I try to write a program that splits udp messages into different sockets. Because I need to access skb->data, I use sk_skb/verdict
as the hook point.
But when using bpf_sk_redirect_map
to redirect the packet, it always returns SK_DROP
The following is bpf code and userspace code, kernel verion is Linux wusheng 6.11.7-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.11.7-1 (2024-11-09) x86_64 GNU/Linux
and is also the minimum reproducible code
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
struct
{
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 65535);
__type(key, __u32);
__type(value, __u64);
} sock_map SEC(".maps");
SEC("sk_skb/verdict")
int prog_skb_verdict(struct __sk_buff *skb)
{
__u32 zero = skb->local_port;
bpf_printk("socket port %d => %d", skb->local_port, skb->remote_port);
bpf_printk("socket addr %d => %d", skb->local_ip4, skb->remote_ip4);
struct bpf_sock * look = bpf_map_lookup_elem(&sock_map, &zero);
long ret = bpf_sk_redirect_map(skb, &sock_map, zero, BPF_F_INGRESS);
bpf_printk("socket redir ret=%d sock_find=%d sk_state=%d", ret, look != NULL, look != NULL ? look->state : -1);
if (look != NULL)
bpf_sk_release(look);
return SK_PASS;
}
char _license[] SEC("license") = "GPL";
userspace code
#include <stdio.h>
#include <stdlib.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <unistd.h>
#include <arpa/inet.h>
#define MAP_NAME "/sys/fs/bpf/sock_map"
#define BPF_OBJECT_FILE "test_sockmap_skb_verdict_attach.bpf.o"
int main(int argc, char **argv) {
struct bpf_object *bpf_obj;
int prog_fd, sock_map_fd, sock1, sock2;
struct sockaddr_in addr = {0}, addr2 = {0};
// 加载 eBPF 对象文件
bpf_obj = bpf_object__open_file(BPF_OBJECT_FILE, NULL);
if (!bpf_obj) {
fprintf(stderr, "Failed to open BPF object file\n");
return 1;
}
if (bpf_object__load(bpf_obj)) {
fprintf(stderr, "Failed to load BPF program\n");
return 1;
}
// 获取程序的 FD
prog_fd = bpf_program__fd(bpf_object__find_program_by_name(bpf_obj, "prog_skb_verdict"));
if (prog_fd < 0) {
fprintf(stderr, "Failed to find BPF program FD\n");
return 1;
}
// 获取 sock_map 的 FD
sock_map_fd = bpf_object__find_map_fd_by_name(bpf_obj, "sock_map");
if (sock_map_fd < 0) {
fprintf(stderr, "Failed to find sock_map FD\n");
return 1;
}
printf("socket map fd: %d\n", sock_map_fd);
if (bpf_prog_attach(prog_fd, sock_map_fd, BPF_SK_SKB_VERDICT, 0)) {
perror("bpf_prog_attach");
return 1;
}
// 创建两个 socket
sock1 = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
sock2 = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (sock1 < 0 || sock2 < 0) {
perror("socket");
return 1;
}
printf("start bind socket\n");
addr.sin_family = AF_INET;
addr.sin_port = htons(18080);
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr2.sin_family = AF_INET;
addr2.sin_port = htons(18081);
addr2.sin_addr.s_addr = htonl(INADDR_ANY);
if (bind(sock1, (struct sockaddr *) &addr , sizeof(addr))){
perror("sock1 bind fail");
return 1;
}
if (bind(sock2, (struct sockaddr *) &addr2 , sizeof(addr2))){
perror("sock2 bind fail");
return 1;
}
// 将 sockets 添加到 sock_map
#if 1
uint32_t zero = 18081;
uint64_t sock1dup = sock1;
if (bpf_map_update_elem(sock_map_fd, &zero, &sock1dup, BPF_ANY)) {
perror("bpf_map_update_elem sock1");
return 1;
}
uint32_t zero1 = 18080;
uint64_t sock2dup = sock2;
if (bpf_map_update_elem(sock_map_fd, &zero1, &sock2dup, BPF_ANY))
{
perror("bpf_map_update_elem sock2");
return 1;
}
#endif
printf("BPF program loaded and sockets added to sock_map.\n");
//keep listening for data
int recv_len;
#define BUFLEN 1500
char buf[BUFLEN];
struct sockaddr_in si_other;
int slen = sizeof(si_other);
while(1)
{
printf("Waiting for data...");
fflush(stdout);
//try to receive some data, this is a blocking call
if ((recv_len = recvfrom(sock1, buf, BUFLEN, 0, (struct sockaddr *) &si_other, &slen)) == -1)
{
perror("recvfrom()");
}
//print details of the client/peer and the data received
printf("Received packet from %x:%d\n", inet_ntoa(si_other.sin_addr), ntohs(si_other.sin_port));
buf[recv_len] = 0;
printf("Data: %s\n" , buf);
//now reply the client with the same data
if (sendto(sock1, buf, recv_len, 0, (struct sockaddr*) &si_other, slen) == -1)
{
perror("sendto()");
}
}
// close(sock1);
// close(sock2);
// bpf_object__close(bpf_obj);
return 0;
}
Log when sending a udp packet to 18081.
<idle>-0 [005] ..s21 328.378048: bpf_trace_printk: socket port 18081 => 0
<idle>-0 [005] ..s21 328.378052: bpf_trace_printk: socket addr 0 => 0
<idle>-0 [005] ..s21 328.378053: bpf_trace_printk: socket redir ret=0 sock_find=1 sk_state=7
It can be seen that even though udp sock has been looked up, the return value of bpf_sk_redirect_map
is 0
I tried to find the answer from the kernel code, but I not sure whether sk->sk_state
in the code has the same meaning as bpf_sock->state
// net/core/sock_map.c
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
return sk->sk_state != TCP_LISTEN;
else
return sk->sk_state == TCP_ESTABLISHED;
}
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags)
{
struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
sk = __sock_map_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
return SK_PASS;
}
You seem to be almost there. As the kernel code indicates it does support UDP but only with connected/established UDP sockets.
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
return sk->sk_state != TCP_LISTEN;
else
return sk->sk_state == TCP_ESTABLISHED;
}
I tried to find the answer from the kernel code, but I not sure whether sk->sk_state in the code has the same meaning as bpf_sock->state
Yes, these both translate to the same field.
So in your userspace program where you create the sockets, you should call connect
on them, that should solve your issue. There is a great blog post by cloudflare on unconnected vs connected UDP sockets that goes into details and implications.