performancerustllvmllvm-codegen

Why does LLVM appear to ignore Rust's assume intrinsic?


LLVM appears to ignore core::intrinsics::assume(..) calls. They do end up in the bytecode, but don't change the resulting machine code. For example take the following (nonsensical) code:

pub fn one(xs: &mut Vec<i32>) {
    if let Some(x) = xs.pop() {
        xs.push(x);
    }
}

This compiles to a whole lot of assembly:

example::one:
        push    rbp
        push    r15
        push    r14
        push    r12
        push    rbx
        mov     rbx, qword ptr [rdi + 16]
        test    rbx, rbx
        je      .LBB0_9
        mov     r14, rdi
        lea     rsi, [rbx - 1]
        mov     qword ptr [rdi + 16], rsi
        mov     rdi, qword ptr [rdi]
        mov     ebp, dword ptr [rdi + 4*rbx - 4]
        cmp     rsi, qword ptr [r14 + 8]
        jne     .LBB0_8
        lea     rax, [rsi + rsi]
        cmp     rax, rbx
        cmova   rbx, rax
        mov     ecx, 4
        xor     r15d, r15d
        mov     rax, rbx
        mul     rcx
        mov     r12, rax
        setno   al
        jo      .LBB0_11
        mov     r15b, al
        shl     r15, 2
        test    rsi, rsi
        je      .LBB0_4
        shl     rsi, 2
        mov     edx, 4
        mov     rcx, r12
        call    qword ptr [rip + __rust_realloc@GOTPCREL]
        mov     rdi, rax
        test    rax, rax
        je      .LBB0_10
.LBB0_7:
        mov     qword ptr [r14], rdi
        mov     qword ptr [r14 + 8], rbx
        mov     rsi, qword ptr [r14 + 16]
.LBB0_8:
        or      ebp, 1
        mov     dword ptr [rdi + 4*rsi], ebp
        add     qword ptr [r14 + 16], 1
.LBB0_9:
        pop     rbx
        pop     r12
        pop     r14
        pop     r15
        pop     rbp
        ret
.LBB0_4:
        mov     rdi, r12
        mov     rsi, r15
        call    qword ptr [rip + __rust_alloc@GOTPCREL]
        mov     rdi, rax
        test    rax, rax
        jne     .LBB0_7
.LBB0_10:
        mov     rdi, r12
        mov     rsi, r15
        call    qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
        ud2
.LBB0_11:
        call    qword ptr [rip + alloc::raw_vec::capacity_overflow@GOTPCREL]
        ud2

Now we could introduce the assumption that xs is not full (at capacity) after the pop() (this is nightly only):

#![feature(core_intrinsics)]

pub fn one(xs: &mut Vec<i32>) {
    if let Some(x) = xs.pop() {
        unsafe {
            core::intrinsics::assume(xs.len() < xs.capacity());
        }
        xs.push(x);
    }
}

Yet despite the assume showing up in the LLVM bytecode, the assembly is unchanged. If however, we use core::hint::unreachable_unchecked() to create a diverging path in the non-assumed case, such as:

pub fn one(xs: &mut Vec<i32>) {
    if let Some(x) = xs.pop() {
        if xs.len() >= xs.capacity() {
            unsafe { core::hint::unreachable_unchecked() }
        }
        xs.push(x);
    }
}

We get the following:

example::one:
        mov     rax, qword ptr [rdi + 16]
        test    rax, rax
        je      .LBB0_2
        mov     qword ptr [rdi + 16], rax
.LBB0_2:
        ret

Which is essentially a no-op, but not too bad. Of course, we could have left the value in place by using:

pub fn one(xs: &mut Vec<i32>) {
    xs.last_mut().map(|_e| ());
}

Which compiles down to what we'd expect:

example::one:
        ret

Why does LLVM appear to ignore the assume intrinsic?


Solution

  • This now compiles to just a ret on recent versions of rustc due to improvements in rustc and LLVM. LLVM ignored the intrinsic because it wasn't able to optimize it before, but now it has the ability to optimize this better.