ebpfxdp-bpfbcc-bpf

Question about how the eBPF Verifier behaves in my specific use case


I have a confusion about how the eBPF verifier behaves in my usecase. In general, I want to implement an eBPF program deployed from a python bcc module to look for my custom option 31 and remove it from packets. However, the verifier give me an error when I use if (option_length == 8 || option_length == 12) in the same command. When I separate them, it is fine, like this:

if (option_length == 8) {
      my code
}
else if (option_length == 12) {
      my duplicated code
}

Here is my full code:

#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/udp.h>

#define MY_OPTION_TYPE 31
#define MAX_CHECKING 4

static inline __u16 iph_csum(struct iphdr *iph, void *data_end)
{
    __u32 sum = 0;
    __u16 *buf = (__u16 *)iph;
    __u16 ihl = iph->ihl << 2; // Convert IHL to bytes
    iph->check = 0;
    for (__u8 i = 0; i < ihl && i < 60; i += 2)
    {
        if ((void *)(buf + 1) > data_end)
        {
            break;
        }
        sum += *buf++;
    }
    for (__u8 i = 0; sum >> 16 && i < MAX_CHECKING; i += 1)
    {
        sum = (sum & 0xFFFF) + (sum >> 16);
    }
    return ~sum;
}

int inter_op_ebpf(struct xdp_md *ctx)
{
    void *data = (void *)(long)ctx->data;
    void *data_end = (void *)(long)ctx->data_end;

    struct ethhdr *eth = data;

    // Check if the Ethernet header is within bounds
    if ((void *)eth + sizeof(*eth) > data_end)
        return XDP_PASS;

    struct iphdr *ip = data + sizeof(*eth);

    // Check if the IP header is within bounds
    if ((void *)ip + sizeof(*ip) > data_end)
        return XDP_PASS;

    if (ip->version != 4)
    {
        return XDP_PASS;
    }

    // Check if the packet has IP options
    int options_len = (ip->ihl * 4) - sizeof(struct iphdr);
    __u8 *options = (__u8 *)(ip + 1);

    __u8 is_register = 1;
    __u8 is_exist_custom_option = 0;
    if (options_len > 0 && (void *)(options + 4) < data_end)
    {
        __u8 option_type = options[0];
        if (option_type == MY_OPTION_TYPE)
        {
            is_exist_custom_option = 1;
            __u8 option_length = options[1];

            __u8 *data_bytes = (__u8 *)data;
            int shift_data_length = sizeof(*eth) + sizeof(struct iphdr);

            if (option_length == 8 || option_length == 12) {
                for (int i = shift_data_length - 1; i >= 0; i--) {
                    if ((void *)(data_bytes + i + option_length + 1) > data_end)
                        return XDP_PASS;

                    data_bytes[i + option_length] = data_bytes[i];
                }
            }
      
            int ret = bpf_xdp_adjust_head(ctx, option_length); // Cut option length bytes at the head
            if (ret < 0)
            {
                return XDP_PASS; // If adjustment fails, pass the packet
            }

            data = (void *)(long)ctx->data;
            data_end = (void *)(long)ctx->data_end;
        
            eth = data;
        
            // Check if the Ethernet header is within bounds
            if ((void *)eth + sizeof(*eth) > data_end)
                return XDP_DROP;
        
            ip = data + sizeof(*eth);
        
            // Check if the IP header is within bounds
            if ((void *)ip + sizeof(*ip) > data_end)
                return XDP_DROP;
        
            if (ip->version != 4)
            {
                return XDP_DROP;
            }

            int new_header_size = sizeof(struct iphdr); // 20 + 8 = 28 bytes
            ip->ihl = new_header_size / 4;
            ip->tot_len = htons(ntohs(ip->tot_len) - option_length);
        }
        ip->check = iph_csum(ip, data_end);
    }
    return XDP_PASS;
}

Here is the full error message:

bpf: Failed to load program: Permission denied
btf_vmlinux is malformed
; int inter_op_ebpf(struct xdp_md *ctx)
0: (bf) r7 = r1
1: (b7) r6 = 2
; void *data_end = (void *)(long)ctx->data_end;
2: (61) r2 = *(u32 *)(r7 +4)
; void *data = (void *)(long)ctx->data;
3: (61) r1 = *(u32 *)(r7 +0)
; if ((void *)eth + sizeof(*eth) > data_end)
4: (bf) r3 = r1
5: (07) r3 += 14
; if ((void *)eth + sizeof(*eth) > data_end)
6: (2d) if r3 > r2 goto pc+304
 R1_w=pkt(id=0,off=0,r=14,imm=0) R2_w=pkt_end(id=0,off=0,imm=0) R3_w=pkt(id=0,off=14,r=14,imm=0) R6_w=inv2 R7_w=ctx(id=0,off=0,imm=0) R10=fp0
; if ((void *)ip + sizeof(*ip) > data_end)
7: (bf) r4 = r1
8: (07) r4 += 34
; if ((void *)ip + sizeof(*ip) > data_end)
9: (2d) if r4 > r2 goto pc+301
 R1_w=pkt(id=0,off=0,r=34,imm=0) R2_w=pkt_end(id=0,off=0,imm=0) R3_w=pkt(id=0,off=14,r=34,imm=0) R4_w=pkt(id=0,off=34,r=34,imm=0) R6_w=inv2 R7_w=ctx(id=0,off=0,imm=0) R10=fp0
; if (ip->version != 4)
10: (71) r5 = *(u8 *)(r3 +0)
; if (ip->version != 4)
11: (bf) r0 = r5
12: (57) r0 &= 240
; if (ip->version != 4)
13: (55) if r0 != 0x40 goto pc+297
 R0=inv64 R1=pkt(id=0,off=0,r=34,imm=0) R2=pkt_end(id=0,off=0,imm=0) R3=pkt(id=0,off=14,r=34,imm=0) R4=pkt(id=0,off=34,r=34,imm=0) R5=inv(id=1,umax_value=255,var_off=(0x0; 0xff)) R6=inv2 R7=ctx(id=0,off=0,imm=0) R10=fp0
14: (bf) r0 = r1
15: (07) r0 += 38
; if (options_len > 0 && (void *)(options + 4) < data_end)
16: (3d) if r0 >= r2 goto pc+294
 R0_w=pkt(id=0,off=38,r=39,imm=0) R1=pkt(id=0,off=0,r=39,imm=0) R2=pkt_end(id=0,off=0,imm=0) R3=pkt(id=0,off=14,r=39,imm=0) R4=pkt(id=0,off=34,r=39,imm=0) R5=inv(id=1,umax_value=255,var_off=(0x0; 0xff)) R6=inv2 R7=ctx(id=0,off=0,imm=0) R10=fp0
17: (bf) r0 = r5
18: (57) r0 &= 14
19: (b7) r8 = 6
20: (2d) if r8 > r0 goto pc+290
 R0_w=inv(id=0,umin_value=6,umax_value=14,var_off=(0x0; 0xe)) R1=pkt(id=0,off=0,r=39,imm=0) R2=pkt_end(id=0,off=0,imm=0) R3=pkt(id=0,off=14,r=39,imm=0) R4=pkt(id=0,off=34,r=39,imm=0) R5=inv(id=1,umax_value=255,var_off=(0x0; 0xff)) R6=inv2 R7=ctx(id=0,off=0,imm=0) R8_w=inv6 R10=fp0
; __u8 option_type = options[0];
21: (71) r4 = *(u8 *)(r4 +0)
; if (option_type == MY_OPTION_TYPE)
22: (55) if r4 != 0x1f goto pc+258
 R0=inv(id=0,umin_value=6,umax_value=14,var_off=(0x0; 0xe)) R1=pkt(id=0,off=0,r=39,imm=0) R2=pkt_end(id=0,off=0,imm=0) R3=pkt(id=0,off=14,r=39,imm=0) R4=inv31 R5=inv(id=1,umax_value=255,var_off=(0x0; 0xff)) R6=inv2 R7=ctx(id=0,off=0,imm=0) R8=inv6 R10=fp0
; __u8 option_length = options[1];
23: (71) r8 = *(u8 *)(r1 +35)
; if (option_length == 8 || option_length == 12) {
24: (bf) r3 = r8
25: (47) r3 |= 4
26: (15) if r3 == 0xc goto pc+1

from 26 to 28: R0=inv(id=0,umin_value=6,umax_value=14,var_off=(0x0; 0xe)) R1=pkt(id=0,off=0,r=39,imm=0) R2=pkt_end(id=0,off=0,imm=0) R3_w=inv12 R4=inv31 R5=inv(id=1,umax_value=255,var_off=(0x0; 0xff)) R6=inv2 R7=ctx(id=0,off=0,imm=0) R8_w=inv(id=2,umax_value=255,var_off=(0x0; 0xff)) R10=fp0
; 
28: (bf) r3 = r1
29: (0f) r3 += r8
last_idx 29 first_idx 22
regs=100 stack=0 before 28: (bf) r3 = r1
regs=100 stack=0 before 26: (15) if r3 == 0xc goto pc+1
regs=100 stack=0 before 25: (47) r3 |= 4
regs=100 stack=0 before 24: (bf) r3 = r8
regs=100 stack=0 before 23: (71) r8 = *(u8 *)(r1 +35)
; if ((void *)(data_bytes + i + option_length + 1) > data_end)
30: (bf) r4 = r3
31: (07) r4 += 34
; if ((void *)(data_bytes + i + option_length + 1) > data_end)
32: (2d) if r4 > r2 goto pc+278
 R0=inv(id=0,umin_value=6,umax_value=14,var_off=(0x0; 0xe)) R1=pkt(id=0,off=0,r=39,imm=0) R2=pkt_end(id=0,off=0,imm=0) R3_w=pkt(id=9,off=0,r=34,umax_value=255,var_off=(0x0; 0xff)) R4_w=pkt(id=9,off=34,r=34,umax_value=255,var_off=(0x0; 0xff)) R5=inv(id=1,umax_value=255,var_off=(0x0; 0xff)) R6=inv2 R7=ctx(id=0,off=0,imm=0) R8_w=invP(id=2,umax_value=255,var_off=(0x0; 0xff)) R10=fp0
; data_bytes[i + option_length] = data_bytes[i];
33: (bf) r4 = r8
34: (0f) r4 += r1
; data_bytes[i + option_length] = data_bytes[i];
35: (71) r5 = *(u8 *)(r1 +33)
; data_bytes[i + option_length] = data_bytes[i];
36: (73) *(u8 *)(r4 +33) = r5
invalid access to packet, off=33 size=1, R4(id=10,off=33,r=0)
R4 offset is outside of the packet
processed 196 insns (limit 1000000) max_states_per_insn 1 total_states 12 peak_states 12 mark_read 6

Traceback (most recent call last):
  File "a.py", line 9, in <module>
    fn = b.load_func("inter_op_ebpf", BPF.XDP)
  File "/usr/lib/python3/dist-packages/bcc/__init__.py", line 523, in load_func
    raise Exception("Failed to load BPF program %s: %s" %
Exception: Failed to load BPF program b'inter_op_ebpf': Permission denied

It is worth noting that I cannot do either if (option_length <= 32).

Could anyone explain me how the verifier works in my case? Thank you.


Solution

  • TL;DR. The verifier seems to get lost because of how the compiler optimized the code. You might be able to encourage the compiler to generate code that the verifier can track.


    Verifier Error Explanation

    28: (bf) r3 = r1
    29: (0f) r3 += r8
    ; if ((void *)(data_bytes + i + option_length + 1) > data_end)
    30: (bf) r4 = r3
    31: (07) r4 += 34
    ; if ((void *)(data_bytes + i + option_length + 1) > data_end)
    32: (2d) if r4 > r2 goto pc+278
    ; data_bytes[i + option_length] = data_bytes[i];
    33: (bf) r4 = r8
    34: (0f) r4 += r1
    ; data_bytes[i + option_length] = data_bytes[i];
    35: (71) r5 = *(u8 *)(r1 +33)
    ; data_bytes[i + option_length] = data_bytes[i];
    36: (73) *(u8 *)(r4 +33) = r5
    invalid access to packet, off=33 size=1, R4(id=10,off=33,r=0)
    R4 offset is outside of the packet
    

    The verifier complains that the packet access is outside the known bounds of the packet. It says you're trying to access the packet at offset 33 (off=33) with an access size of 1 byte (size=1), but the known packet size is 0 (r=0).

    Obviously, you've checked the packet size above (instructions 28--32) so that shouldn't happen. The check was done with registers r1 (data_bytes) and r8 (option_length) added into r3, copied to r4 and shifted by 34 bytes (shift_data_length). That is compared to r2, the packet's end, at instruction 32.

    After that comparison, we can see the following verifier state (which I removed above for clarity):

      R0=inv(id=0,umin_value=6,umax_value=14,var_off=(0x0; 0xe))
      R1=pkt(id=0,off=0,r=39,imm=0) R2=pkt_end(id=0,off=0,imm=0)
      R3_w=pkt(id=9,off=0,r=34,umax_value=255,var_off=(0x0; 0xff))
      R4_w=pkt(id=9,off=34,r=34,umax_value=255,var_off=(0x0; 0xff))
      R5=inv(id=1,umax_value=255,var_off=(0x0; 0xff))
      R6=inv2 R7=ctx(id=0,off=0,imm=0)
      R8_w=invP(id=2,umax_value=255,var_off=(0x0; 0xff)) R10=fp0
    

    We can see the range (r=) of R4 was properly updated to 34 to signify that the known packet size is 34.

    So why doesn't it work? Unfortunately, R4 is not used as is for the packet access. Instead, the whole thing is recomputed into R4 (instructions 33--34), losing what we just verified.


    Potential Solution

    I'd try to rewrite the C code like this to encourage the compiler to compute R4 only once:

    for (int i = shift_data_length - 1; i >= 0; i--) {
        char *ptr = data_bytes + option_length;
        if (ptr + i + 1 > data_end)
            return XDP_PASS;
    
        ptr[i] = data_bytes[i];
    }
    

    Note it's not guaranteed to work and you may have to tweak it further to get the compiler to generate something the verifier can handle.