socketslinux-kernelebpfbpf

Not able to read and write option as the same time using eBPF BPF_PROG_TYPE_SOCK_OPS program type


I am currently writing a program for testing an experimental TCP option.

For that, I activate two flags:

Then, I use the function bpf_reserve_hdr_opt and bpf_store_hdr_opt to respectively store and write the option.

My problem is the following, when only activating the BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG, I am able to write the option, everything is fine, I can observe the option in wireshark. But when I also activate BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG, then the BPF_SOCK_OPS_WRITE_HDR_OPT_CB operation is never triggered...

I really feel like I am missing something here. I could have two eBPF program, one for writing and the other for reading but I would really like to understand what is going on. Thanks in advance!

Code that do not write the option:

// objective: reading options using TCP SOCK OPS eBPF program type
#include <linux/bpf.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <netinet/tcp.h>

#define TCP_EXPERIMENT2 42
#define TR_OPTION_ID 254 // experimental option id
#define TR_OPTION_LEN 4 // for now small lenght, for testing

SEC("sockops")
int bgf_dummy_option(struct bpf_sock_ops *ctx) 
{
    void *data_end = (void *)(__u64)ctx->skb_data;
    void *data = (void *)(__u64)ctx->skb_data_end;

    int rv = 0;

    // setup flags, by default reserve opt space and write opt are not triggered
    rv = bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
    if (rv < 0) 
    {
        bpf_printk("Failed to setup flag:: BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG, %d", rv);
        return 0;
    }

    // IMPORTANT NOTE: when parse all is activated with the write, we never hook the write
    rv = bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
    if (rv < 0) 
    {
        bpf_printk("Failed to setup flag:: BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG, %d", rv);
        return 0;
    }

    switch (ctx->op)
    {
        case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
            bpf_printk("setting BPF opt len, op == %d", ctx->op);
            long opt_ptr = bpf_reserve_hdr_opt(ctx, TR_OPTION_LEN, 0);
            if (opt_ptr < 0) {
                bpf_printk("Failed to reserve TCP option space\n");
                return 0;
            }
            return 1;
        case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
            bpf_printk("Writting option, op == %d", ctx->op);


            // TODO: load option with a struct instead of an array
            __u8 *tcp_option;
            tcp_option=(__u8[]){TR_OPTION_ID, TR_OPTION_LEN, 0x01, 0x02};

            int ret = bpf_store_hdr_opt(ctx, tcp_option, TR_OPTION_LEN, 0);
            if (ret < 0) {
                bpf_printk("Failed to write TCP option (error %d)\n", ret);
                return 0;
            }

            bpf_printk("Adding experimental option\n");

            return 1;
        // only this operation is triggered if both parsing and writing flag are activated
        case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:        
            bpf_printk("Only reading operation activated");
            return 1;
        default:
            return 1;
    }

    return 1;
}

Code that write the option (without reading flag activated):

// objective: reading options using TCP SOCK OPS eBPF program type
#include <linux/bpf.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <netinet/tcp.h>

#define TCP_EXPERIMENT2 42
#define TR_OPTION_ID 254 // experimental option id
#define TR_OPTION_LEN 4 // for now small lenght, for testing

SEC("sockops")
int bgf_dummy_option(struct bpf_sock_ops *ctx) 
{
    void *data_end = (void *)(__u64)ctx->skb_data;
    void *data = (void *)(__u64)ctx->skb_data_end;

    int rv = 0;

    // setup flags, by default reserve opt space and write opt are not triggered
    rv = bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
    if (rv < 0) 
    {
        bpf_printk("Failed to setup flag:: BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG, %d", rv);
        return 0;
    }

    switch (ctx->op)
    {
        case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
            bpf_printk("setting BPF opt len, op == %d", ctx->op);
            long opt_ptr = bpf_reserve_hdr_opt(ctx, TR_OPTION_LEN, 0);
            if (opt_ptr < 0) {
                bpf_printk("Failed to reserve TCP option space\n");
                return 0;
            }
            return 1;
        case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
            bpf_printk("Writting option, op == %d", ctx->op);


            // TODO: load option with a struct instead of an array
            __u8 *tcp_option;
            tcp_option=(__u8[]){TR_OPTION_ID, TR_OPTION_LEN, 0x01, 0x02};

            int ret = bpf_store_hdr_opt(ctx, tcp_option, TR_OPTION_LEN, 0);
            if (ret < 0) {
                bpf_printk("Failed to write TCP option (error %d)\n", ret);
                return 0;
            }

            bpf_printk("Adding experimental option\n");

            return 1;
        // only this operation is triggered if both parsing and writing flag are activated
        case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:        
            bpf_printk("Only reading operation activated");
            return 1;
        default:
            return 1;
    }

    return 1;
}

I was expecting both writing and reading be possible for the BPF_PROG_SOCK_OPS program type in the same program.


Solution

  • You need to call bpf_sock_ops_cb_flags_set only once with all the flags you want to set:

    // setup flags, by default reserve opt space and write opt are not triggered
    int flags = BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG;
    rv = bpf_sock_ops_cb_flags_set(ctx, flags);
    if (rv < 0) 
    {
        bpf_printk("Failed to setup flags:, %d", rv);
        return 0;
    }
    

    This is described in the manpage for helpers:

    Attempt to set the value of the bpf_sock_ops_cb_flags field for the full TCP socket associated to bpf_sock_ops to argval.

    So the whole field is set by the helper and that second argument is actually a flag array. That also makes it possible to unset flags with that helper, for example bpf_sock_ops_cb_flags_set(ctx, 0) to clear all flags.