rustcompiler-optimization

In Rust, is Option compiled to a runtime check or an instruction jump?


In Rust, Option is defined as:

pub enum Option<T> {
    None,
    Some(T),
}

Used like so:

fn may_return_none() -> Option<i32> {
    if is_full_moon {
        None
    } else {
        Some(1)
    }
}

fn main() {
    let optional = may_return_none();
    match optional {
        None => println!("None"),
        Some(v) => println!("Some"),
    }
}

I'm not familiar with Rust internals, but initially I assumed it might work similar to Nullable in .NET, so the compiled logic of my above Rust code would be like so:

// occupies `sizeof(T) + 1` memory space, possibly more depending on `Bool`'s alignment, so `Nullable<Int32>` consumes 5 bytes.
struct Nullable<T> {
    Bool hasValue;
    T value;
}

Nullable<Int32> MayReturnNone() {
    if( isFullMoon )
        // as a `struct`, the Nullable<Int32> instance is returned via the stack
        return Nullable<Int32>() { HasValue = false }
    else
        return Nullable<Int32>() { HasValue = true, Value = 1 }
}

void Test() {
    Nullable<Int32> optional = may_return_none();
    if( !optional.HasValue ) println("None");
    else                     println("Some");
}

However this isn't a zero-cost abstraction because of the space required for the Bool hasValue flag - and Rust makes a point of providing zero-cost abstractions.

I realise that Option could be implemented via a direct return-jump by the compiler, though it would need the exact jump-to values to be provided as arguments on the stack - as though you can push multiple return addresses:

(Psuedocode)

mayReturnNone(returnToIfNone, returnToIfHasValue) {

    if( isFullMoon ) {
        cleanup-current-stackframe
        jump-to returnToIfNone
    else {
        cleanup-current-stackframe
        push-stack 1
        jump-to returnToIfHasValue
    }

test() {

    mayReturnNone( instructionAddressOf( ifHasValue ), instructionAddressOf( ifNoValue ) )
ifHasValue:
    println("Some")
ifNoValue:
    println("None")
}

Is this how it's implemented? This approach also works for other enum types in Rust - but this specific application I've demonstrated is very brittle and breaks if you want to execute code in-between the call to mayReturnNone and the match statement, for example (as mayReturnNone will jump directly to the match, skipping intermediate instructions).


Solution

  • It depends entirely on optimization. Consider this implementation (playground):

    #![feature(asm)]
    
    extern crate rand;
    
    use rand::Rng;
    
    #[inline(never)]
    fn is_full_moon() -> bool {
        rand::thread_rng().gen()
    }
    
    fn may_return_none() -> Option<i32> {
        if is_full_moon() { None } else { Some(1) }
    }
    
    #[inline(never)]
    fn usage() {
        let optional = may_return_none();
        match optional {
            None => unsafe { asm!("nop") },
            Some(v) => unsafe { asm!("nop; nop") },
        }
    }
    
    fn main() {
        usage();
    }
    

    Here, I've used inline assembly instead of printing because it doesn't clutter up the resulting output as much. Here's the assembly for usage when compiled in release mode:

        .section    .text._ZN10playground5usage17hc2760d0a512fe6f1E,"ax",@progbits
        .p2align    4, 0x90
        .type   _ZN10playground5usage17hc2760d0a512fe6f1E,@function
    _ZN10playground5usage17hc2760d0a512fe6f1E:
        .cfi_startproc
        pushq   %rax
    .Ltmp6:
        .cfi_def_cfa_offset 16
        callq   _ZN10playground12is_full_moon17h78e56c4ffd6b7730E
        testb   %al, %al
        je  .LBB1_2
        #APP
        nop
        #NO_APP
        popq    %rax
        retq
    .LBB1_2:
        #APP
        nop
        nop
        #NO_APP
        popq    %rax
        retq
    .Lfunc_end1:
        .size   _ZN10playground5usage17hc2760d0a512fe6f1E, .Lfunc_end1-_ZN10playground5usage17hc2760d0a512fe6f1E
        .cfi_endproc
    

    The quick rundown is:

    1. It calls the is_full_moon function (callq _ZN10playground12is_full_moon17h78e56c4ffd6b7730E).
    2. The result of the random value is tested (testb %al, %al)
    3. One branch goes to the nop, the other goes to the nop; nop

    Everything else has been optimized out. The function may_return_none basically never exists; no Option was ever created, the value of 1 was never materialized.

    I'm sure that various people have different opinions, but I don't think I could have written this any more optimized.


    Likewise, if we use the value in the Some (which I changed to 42 to find easier):

    Some(v) => unsafe { asm!("nop; nop" : : "r"(v)) },
    

    Then the value is inlined in the branch that uses it:

        .section    .text._ZN10playground5usage17hc2760d0a512fe6f1E,"ax",@progbits
        .p2align    4, 0x90
        .type   _ZN10playground5usage17hc2760d0a512fe6f1E,@function
    _ZN10playground5usage17hc2760d0a512fe6f1E:
        .cfi_startproc
        pushq   %rax
    .Ltmp6:
        .cfi_def_cfa_offset 16
        callq   _ZN10playground12is_full_moon17h78e56c4ffd6b7730E
        testb   %al, %al
        je  .LBB1_2
        #APP
        nop
        #NO_APP
        popq    %rax
        retq
    .LBB1_2:
        movl    $42, %eax  ;; Here it is
        #APP
        nop
        nop
        #NO_APP
        popq    %rax
        retq
    .Lfunc_end1:
        .size   _ZN10playground5usage17hc2760d0a512fe6f1E, .Lfunc_end1-_ZN10playground5usage17hc2760d0a512fe6f1E
        .cfi_endproc
    

    However, nothing can "optimize" around a contractural obligation; if a function has to return an Option, it has to return an Option:

    #[inline(never)]
    pub fn may_return_none() -> Option<i32> {
        if is_full_moon() { None } else { Some(42) }
    }
    

    This makes some Deep Magic assembly:

        .section    .text._ZN10playground15may_return_none17ha1178226d153ece2E,"ax",@progbits
        .p2align    4, 0x90
        .type   _ZN10playground15may_return_none17ha1178226d153ece2E,@function
    _ZN10playground15may_return_none17ha1178226d153ece2E:
        .cfi_startproc
        pushq   %rax
    .Ltmp6:
        .cfi_def_cfa_offset 16
        callq   _ZN10playground12is_full_moon17h78e56c4ffd6b7730E
        movabsq $180388626432, %rdx
        leaq    1(%rdx), %rcx
        testb   %al, %al
        cmovneq %rdx, %rcx
        movq    %rcx, %rax
        popq    %rcx
        retq
    .Lfunc_end1:
        .size   _ZN10playground15may_return_none17ha1178226d153ece2E, .Lfunc_end1-_ZN10playground15may_return_none17ha1178226d153ece2E
        .cfi_endproc
    

    Let's hope I get this right...

    1. Load the 64-bit value 0x2A00000000 to %rdx. 0x2A is 42. This is our Option being built; it's the None variant.
    2. Load %rdx + 1 into %rcx. This is the Some variant.
    3. We test the random value
    4. Depending on the result of the test, move the invalid value to %rcx or not
    5. Move %rcx to %rax - the return register

    The main point here is that regardless of optimization, a function that says it's going to return data in a specific format has to do so. Only when it's inlined with other code is it valid to remove that abstraction.