cebpfbpf

BPF verifier rejects with "Permission denied (13)!" when using bpf_trace_printk()


I'm trying to load an example BPF filter but I'm getting the following error:

Prog section 'classifier' rejected: Permission denied (13)!
 - Type:         3
 - Instructions: 58 (0 over limit)
 - License:      GPL

Verifier analysis:

0: (bf) r6 = r1
1: (61) r2 = *(u32 *)(r6 +80)
2: (61) r1 = *(u32 *)(r6 +76)
3: (bf) r3 = r1
4: (07) r3 += 14
5: (2d) if r3 > r2 goto pc+50
 R1_w=pkt(id=0,off=0,r=14,imm=0) R2_w=pkt_end(id=0,off=0,imm=0) R3_w=pkt(id=0,off=14,r=14,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0
6: (bf) r3 = r1
7: (07) r3 += 15
8: (2d) if r3 > r2 goto pc+47
 R1_w=pkt(id=0,off=0,r=15,imm=0) R2_w=pkt_end(id=0,off=0,imm=0) R3_w=pkt(id=0,off=15,r=15,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0
9: (71) r3 = *(u8 *)(r1 +13)
10: (67) r3 <<= 8
11: (71) r4 = *(u8 *)(r1 +12)
12: (4f) r3 |= r4
13: (57) r3 &= 65535
14: (55) if r3 != 0x8 goto pc+41
 R1=pkt(id=0,off=0,r=15,imm=0) R2=pkt_end(id=0,off=0,imm=0) R3=inv8 R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R6=ctx(id=0,off=0,imm=0) R10=fp0
15: (71) r3 = *(u8 *)(r1 +23)
invalid access to packet, off=23 size=1, R1(id=0,off=23,r=15)
R1 offset is outside of the packet
processed 16 insns (limit 1000000) max_states_per_insn 0 total_states 1 peak_states 1 mark_read 1

Error fetching program/map!
Unable to load program

I'm using the following commands to compile and load:

# clang -O2 -Wall -I/usr/include/x86_64-linux-gnu -target bpf -c classifier.c -o classifier.o
# tc filter add dev wlp0s20f3 ingress bpf obj classifier.o flowid 0:

If I comment out the line trace_printk("Yes! It is HTTP!\n"); it loads. trace_printk() is a macro, I've also tried to call bpf_trace_printk() directly but with the same result.

#include <linux/if_ether.h>
#include <linux/tcp.h>
#include <linux/ip.h>
#include <linux/in.h>
#include <linux/bpf.h>
#include <linux/pkt_cls.h>
//#include <bpf/bpf_helpers.h>

#define SEC(NAME) __attribute__((section(NAME), used))

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define __bpf_htons(x) __builtin_bswap16(x)
#define __bpf_constant_htons(x) ___constant_swab16(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define __bpf_htons(x) (x)
#define __bpf_constant_htons(x) (x)
#else
#error "Fix your compiler's __BYTE_ORDER__?!"
#endif

#define bpf_htons(x) \
  (__builtin_constant_p(x) ? __bpf_constant_htons(x) : __bpf_htons(x))

static long (*bpf_trace_printk)(const char *fmt, __u32 fmt_size,
                               ...) = (void *)BPF_FUNC_trace_printk;

#define trace_printk(fmt, ...)                                       \
    do {                                                         \
        char _fmt[] = fmt;                                   \
        bpf_trace_printk(_fmt, sizeof(_fmt), ##__VA_ARGS__); \
    } while (0)

static inline int is_http(struct __sk_buff *skb, __u64 nh_off);
unsigned long long load_byte(void *skb,
                             unsigned long long off) asm("llvm.bpf.load.byte");

SEC("classifier")
static inline int classification(struct __sk_buff *skb) {
    void *data_end = (void *)(long)skb->data_end;
    void *data = (void *)(long)skb->data;
    struct ethhdr *eth = data;

    __u16 h_proto;
    __u64 nh_off = 0;
    nh_off = sizeof(*eth);

    if (data + nh_off > data_end) {
        return TC_ACT_OK;
    }

    h_proto = eth->h_proto;

    if (h_proto == bpf_htons(ETH_P_IP)) {
        if (is_http(skb, nh_off) == 1) {
            trace_printk("Yes! It is HTTP!\n"); // (ERROR)
        }
    }

    return TC_ACT_OK;
}

static inline int is_http(struct __sk_buff *skb, __u64 nh_off) {
    void *data_end = (void *)(long)skb->data_end;
    void *data = (void *)(long)skb->data;
    struct iphdr *iph = data + nh_off;

    if ((void*)iph + 1 > data_end) {
        return 0;
    }

    if (iph->protocol != IPPROTO_TCP) {
        return 0;
    }
    __u32 tcp_hlen = 0;
    __u32 ip_hlen = 0;
    __u32 poffset = 0;
    __u32 plength = 0;
    __u32 ip_total_length = iph->tot_len;

    ip_hlen = iph->ihl << 2;

    if (ip_hlen < sizeof(*iph)) {
        return 0;
    }

    struct tcphdr *tcph = data + nh_off + sizeof(*iph);

    if ((void*)tcph + 1 > data_end) {
        return 0;
    }

    tcp_hlen = tcph->doff << 2;

    poffset = ETH_HLEN + ip_hlen + tcp_hlen;
    plength = ip_total_length - ip_hlen - tcp_hlen;
    if (plength >= 7) {
        unsigned long p[7];
        int i = 0;
        for (i = 0; i < 7; i++) {

            p[i] = load_byte(skb, poffset + i);
        }
        if ((p[0] == 'H') && (p[1] == 'T') && (p[2] == 'T') && (p[3] == 'P')) {
            return 1;
        }
    }

    return 0;
}

char _license[] SEC("license") = "GPL";

Solution

  • trace_printk isn't actually the issue here. It just seems that way because once you comment that statement, the verifier optimizes out the call to is_http(); it's not needed anymore.

    What's going on?

    The verifier is rejecting your BPF program with the following error:

    invalid access to packet, off=23 size=1, R1(id=0,off=23,r=15)
    R1 offset is outside of the packet
    

    That means you are trying to access the packet at offset 23 even though you only verified that it is 15 bytes long.

    Where the bug?

    I suspect the bug is in those lines:

    if ((void*)iph + 1 > data_end) {
        return 0;
    }
    

    Here, you are checking that data_end is larger or equal to iph plus one byte. We can see it in the verifier output:

    3: (bf) r3 = r1
    4: (07) r3 += 14
    5: (2d) if r3 > r2 goto pc+50
     R1_w=pkt(id=0,off=0,r=14,imm=0) R2_w=pkt_end(id=0,off=0,imm=0) R3_w=pkt(id=0,off=14,r=14,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0
    6: (bf) r3 = r1
    7: (07) r3 += 15
    8: (2d) if r3 > r2 goto pc+47
     R1_w=pkt(id=0,off=0,r=15,imm=0) R2_w=pkt_end(id=0,off=0,imm=0) R3_w=pkt(id=0,off=15,r=15,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0
    

    The first comparison is for data + nh_off > data_end, the second is for (void*)iph + 1 > data_end. As you can see there's only one byte in difference.

    What's the fix?

    I think you want:

    if ((void*)(iph + 1) > data_end) {
        return 0;
    }
    

    And the same for the next packet bounds check (tcp).