I'm using bpf_dynptr_from_mem and the BPF verifier fails on the size argument (total_size) with R2 unbounded memory access, even though I check the size against the buffer limit (MAX_DATA_SIZE) beforehand.
__u32 total_size = insn_len + orig_data->sig_len;
// total_size &= (MAX_DATA_SIZE - 1);
if (total_size >= MAX_DATA_SIZE) {
bpf_printk("Insufficient buffer size\n");
ret = -E2BIG;
goto out;
}
bpf_dynptr_from_mem(combined_buf->data, total_size, 0, &combined_data_ptr);
Relevant Verifier Log:
; __u32 total_size = insn_len + orig_data->sig_len;
123: (0f) r2 += r9 ; R2_w=scalar(smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R9=scalar(id=6,smin=smin32=0,smax=umax=smax32=umax32=0xffff8,var_off=(0x0; 0xffff8))
124: (bf) r1 = r2 ; R1_w=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R2_w=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff))
125: (67) r1 <<= 32 ; R1_w=scalar(smax=0x7fffffff00000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))
126: (77) r1 >>= 32 ; R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
127: (b7) r3 = 1048576 ; R3=0x100000
; if (total_size >= MAX_DATA_SIZE) {
128: (2d) if r3 > r1 goto pc+3 132: R0=scalar(id=8) R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xfffff,var_off=(0x0; 0xfffff)) R2=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R3=0x100000 R6=map_value(map=modified_signat,ks=4,vs=4100) R7=map_value(map=combined_data_m,ks=4,vs=1052672) R8=map_value(map=original_progra,ks=4,vs=1052680,off=0x101004) R9=scalar(id=6,smin=smin32=0,smax=umax=smax32=umax32=0xffff8,var_off=(0x0; 0xffff8)) R10=fp0 fp-32=dynptr_local(id=4,dynptr_id=1) fp-48=dynptr_local(id=5,dynptr_id=1) fp-72=mmmmmmmm fp-88=map_value(map=combined_data_m,ks=4,vs=1052672) fp-96=map_value(map=original_progra,ks=4,vs=1052680,off=0x101004) fp-104=map_value(map=original_progra,ks=4,vs=1052680,off=0x100004)
; bpf_printk("Insufficient buffer size\n");
132: (b7) r8 = 0 ; R8_w=0
133: (bf) r4 = r10 ; R4_w=fp0 R10=fp0
; bpf_dynptr_from_mem(combined_buf->data, total_size, 0, &combined_data_ptr);
134: (07) r4 += -64 ; R4_w=fp-64
135: (bf) r1 = r7 ; R1_w=map_value(map=combined_data_m,ks=4,vs=1052672) R7=map_value(map=combined_data_m,ks=4,vs=1052672)
136: (b7) r3 = 0 ; R3_w=0
137: (85) call bpf_dynptr_from_mem#197
R2 unbounded memory access, use 'var &= const' or 'if (var < const)'
When I change __u32 to __u64, the log of verifier is
; __u64 total_size = insn_len + orig_data->sig_len;
123: (0f) r2 += r9 ; R2_w=scalar(smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R9=scalar(id=6,smin=smin32=0,smax=umax=smax32=umax32=0xffff8,var_off=(0x0; 0xffff8))
124: (bf) r1 = r2 ; R1_w=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R2_w=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff))
125: (67) r1 <<= 32 ; R1_w=scalar(smax=0x7fffffff00000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))
126: (77) r1 >>= 32 ; R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
127: (b7) r3 = 1048576 ; R3=0x100000
; if (total_size >= MAX_DATA_SIZE) {
128: (2d) if r3 > r1 goto pc+3 132: R0=scalar(id=8) R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xfffff,var_off=(0x0; 0xfffff)) R2=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R3=0x100000 R6=map_value(map=modified_signat,ks=4,vs=4100) R7=map_value(map=combined_data_m,ks=4,vs=1052672) R8=map_value(map=original_progra,ks=4,vs=1052680,off=0x101004) R9=scalar(id=6,smin=smin32=0,smax=umax=smax32=umax32=0xffff8,var_off=(0x0; 0xffff8)) R10=fp0 fp-32=dynptr_local(id=4,dynptr_id=1) fp-48=dynptr_local(id=5,dynptr_id=1) fp-72=mmmmmmmm fp-88=map_value(map=combined_data_m,ks=4,vs=1052672) fp-96=map_value(map=original_progra,ks=4,vs=1052680,off=0x101004) fp-104=map_value(map=original_progra,ks=4,vs=1052680,off=0x100004)
; bpf_printk("Insufficient buffer size\n");
132: (b7) r8 = 0 ; R8_w=0
133: (bf) r4 = r10 ; R4_w=fp0 R10=fp0
; bpf_dynptr_from_mem(combined_buf->data, total_size, 0, &combined_data_ptr);
134: (07) r4 += -64 ; R4_w=fp-64
135: (bf) r1 = r7 ; R1_w=map_value(map=combined_data_m,ks=4,vs=1052672) R7=map_value(map=combined_data_m,ks=4,vs=1052672)
136: (b7) r3 = 0 ; R3_w=0
137: (85) call bpf_dynptr_from_mem#197
R2 unbounded memory access, use 'var &= const' or 'if (var < const)'
The libbpf I'm using is libbpf-dev/noble,now 1:1.3.0-2build2 amd64
When I uncomment the second line total_size &= (MAX_DATA_SIZE - 1);
, the program runs well.
This seems due instruction reordering the compiler does for the type conversions.
Lets break down the log:
; __u32 total_size = insn_len + orig_data->sig_len;
123: (0f) r2 += r9 ; R2_w=scalar(smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R9=scalar(id=6,smin=smin32=0,smax=umax=smax32=umax32=0xffff8,var_off=(0x0; 0xffff8))
We add R2(insn_len
) and R9(orig_data->sig_len
). One or both of these are 64-bit at this time. We can see that the max and variable offsets exceed what would fit in a 32-bit number.
124: (bf) r1 = r2 ; R1_w=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R2_w=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff))
We copy R2 to R1. The verifier is smart enough to know that these are the same variable. It assigns both values ID 9, so that something like an if
propagates info back to both registers.
125: (67) r1 <<= 32 ; R1_w=scalar(smax=0x7fffffff00000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))
126: (77) r1 >>= 32 ; R1_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
Now the compiler does the cast. It shifts left by 32 then right to clear the upper 32 bits. The new max value and variable offset reflect this. But because we modified R1, it is no longer the same as R2, and thus no longer has the ID of 9.
127: (b7) r3 = 1048576 ; R3=0x100000
; if (total_size >= MAX_DATA_SIZE) {
128: (2d) if r3 > r1 goto pc+3 132: R0=scalar(id=8) R1=scalar(smin=smin32=0,smax=umax=smax32=umax32=0xfffff,var_off=(0x0; 0xfffff)) R2=scalar(id=9,smin=0,smax=umax=0x1000ffff7,var_off=(0x0; 0x1ffffffff)) R3=0x100000 R6=map_value(map=modified_signat,ks=4,vs=4100) R7=map_value(map=combined_data_m,ks=4,vs=1052672) R8=map_value(map=original_progra,ks=4,vs=1052680,off=0x101004) R9=scalar(id=6,smin=smin32=0,smax=umax=smax32=umax32=0xffff8,var_off=(0x0; 0xffff8)) R10=fp0 fp-32=dynptr_local(id=4,dynptr_id=1) fp-48=dynptr_local(id=5,dynptr_id=1) fp-72=mmmmmmmm fp-88=map_value(map=combined_data_m,ks=4,vs=1052672) fp-96=map_value(map=original_progra,ks=4,vs=1052680,off=0x101004) fp-104=map_value(map=original_progra,ks=4,vs=1052680,off=0x100004)
; bpf_printk("Insufficient buffer size\n");
R3 is loaded with MAX_DATA_SIZE
and compared to R1. The range of R1 gets adjusted, but due to the loss of the link, this does not get applied to R2.
132: (b7) r8 = 0 ; R8_w=0
133: (bf) r4 = r10 ; R4_w=fp0 R10=fp0
; bpf_dynptr_from_mem(combined_buf->data, total_size, 0, &combined_data_ptr);
134: (07) r4 += -64 ; R4_w=fp-64
135: (bf) r1 = r7 ; R1_w=map_value(map=combined_data_m,ks=4,vs=1052672) R7=map_value(map=combined_data_m,ks=4,vs=1052672)
136: (b7) r3 = 0 ; R3_w=0
137: (85) call bpf_dynptr_from_mem#197
R2 unbounded memory access, use 'var &= const' or 'if (var < const)'
After that, R2 gets used without modification, still having the 64-bit possible range.
The signature of bpf_dynptr_from_mem
is:
static long (* const bpf_dynptr_from_mem)(void *data, __u32 size, __u64 flags, struct bpf_dynptr *ptr) = (void *) 197;
So it seems that the compiler saves the 64-bit result of insn_len + orig_data->sig_len
and passes it to the helper as is. Even though the signature says that size
is a 32 bit number.
Even when you change the type of total_size
to 64 bit, it does the cast for the if
statement, but does not use the cast-ed valued to pass to the helper. This is very odd, perhaps because its passed to a __u32
it interpreters MAX_DATA_SIZE
which is a type less literal as such as well.
I am not sure if this is a compiler bug or if its technically allowed because of some sort of undefined behavior or some rule in the calling convention.
Doing total_size &= (MAX_DATA_SIZE - 1)
likely works because its done before the cast, and thus the if statement and linking of the two registers isn't required to ensure R2 is bounded properly.