I am currently writing a program for testing an experimental TCP option.
For that, I activate two flags:
Then, I use the function bpf_reserve_hdr_opt and bpf_store_hdr_opt to respectively store and write the option.
My problem is the following, when only activating the BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG, I am able to write the option, everything is fine, I can observe the option in wireshark. But when I also activate BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG, then the BPF_SOCK_OPS_WRITE_HDR_OPT_CB operation is never triggered...
I really feel like I am missing something here. I could have two eBPF program, one for writing and the other for reading but I would really like to understand what is going on. Thanks in advance!
Code that do not write the option:
// objective: reading options using TCP SOCK OPS eBPF program type
#include <linux/bpf.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <netinet/tcp.h>
#define TCP_EXPERIMENT2 42
#define TR_OPTION_ID 254 // experimental option id
#define TR_OPTION_LEN 4 // for now small lenght, for testing
SEC("sockops")
int bgf_dummy_option(struct bpf_sock_ops *ctx)
{
void *data_end = (void *)(__u64)ctx->skb_data;
void *data = (void *)(__u64)ctx->skb_data_end;
int rv = 0;
// setup flags, by default reserve opt space and write opt are not triggered
rv = bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
if (rv < 0)
{
bpf_printk("Failed to setup flag:: BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG, %d", rv);
return 0;
}
// IMPORTANT NOTE: when parse all is activated with the write, we never hook the write
rv = bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
if (rv < 0)
{
bpf_printk("Failed to setup flag:: BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG, %d", rv);
return 0;
}
switch (ctx->op)
{
case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
bpf_printk("setting BPF opt len, op == %d", ctx->op);
long opt_ptr = bpf_reserve_hdr_opt(ctx, TR_OPTION_LEN, 0);
if (opt_ptr < 0) {
bpf_printk("Failed to reserve TCP option space\n");
return 0;
}
return 1;
case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
bpf_printk("Writting option, op == %d", ctx->op);
// TODO: load option with a struct instead of an array
__u8 *tcp_option;
tcp_option=(__u8[]){TR_OPTION_ID, TR_OPTION_LEN, 0x01, 0x02};
int ret = bpf_store_hdr_opt(ctx, tcp_option, TR_OPTION_LEN, 0);
if (ret < 0) {
bpf_printk("Failed to write TCP option (error %d)\n", ret);
return 0;
}
bpf_printk("Adding experimental option\n");
return 1;
// only this operation is triggered if both parsing and writing flag are activated
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
bpf_printk("Only reading operation activated");
return 1;
default:
return 1;
}
return 1;
}
Code that write the option (without reading flag activated):
// objective: reading options using TCP SOCK OPS eBPF program type
#include <linux/bpf.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <netinet/tcp.h>
#define TCP_EXPERIMENT2 42
#define TR_OPTION_ID 254 // experimental option id
#define TR_OPTION_LEN 4 // for now small lenght, for testing
SEC("sockops")
int bgf_dummy_option(struct bpf_sock_ops *ctx)
{
void *data_end = (void *)(__u64)ctx->skb_data;
void *data = (void *)(__u64)ctx->skb_data_end;
int rv = 0;
// setup flags, by default reserve opt space and write opt are not triggered
rv = bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
if (rv < 0)
{
bpf_printk("Failed to setup flag:: BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG, %d", rv);
return 0;
}
switch (ctx->op)
{
case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
bpf_printk("setting BPF opt len, op == %d", ctx->op);
long opt_ptr = bpf_reserve_hdr_opt(ctx, TR_OPTION_LEN, 0);
if (opt_ptr < 0) {
bpf_printk("Failed to reserve TCP option space\n");
return 0;
}
return 1;
case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
bpf_printk("Writting option, op == %d", ctx->op);
// TODO: load option with a struct instead of an array
__u8 *tcp_option;
tcp_option=(__u8[]){TR_OPTION_ID, TR_OPTION_LEN, 0x01, 0x02};
int ret = bpf_store_hdr_opt(ctx, tcp_option, TR_OPTION_LEN, 0);
if (ret < 0) {
bpf_printk("Failed to write TCP option (error %d)\n", ret);
return 0;
}
bpf_printk("Adding experimental option\n");
return 1;
// only this operation is triggered if both parsing and writing flag are activated
case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
bpf_printk("Only reading operation activated");
return 1;
default:
return 1;
}
return 1;
}
I was expecting both writing and reading be possible for the BPF_PROG_SOCK_OPS program type in the same program.
You need to call bpf_sock_ops_cb_flags_set
only once with all the flags you want to set:
// setup flags, by default reserve opt space and write opt are not triggered
int flags = BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG;
rv = bpf_sock_ops_cb_flags_set(ctx, flags);
if (rv < 0)
{
bpf_printk("Failed to setup flags:, %d", rv);
return 0;
}
This is described in the manpage for helpers:
Attempt to set the value of the bpf_sock_ops_cb_flags field for the full TCP socket associated to bpf_sock_ops to argval.
So the whole field is set by the helper and that second argument is actually a flag array. That also makes it possible to unset flags with that helper, for example bpf_sock_ops_cb_flags_set(ctx, 0)
to clear all flags.