Skip to content

Instantly share code, notes, and snippets.

@pavel-odintsov
Last active August 27, 2023 16:47
Show Gist options
  • Save pavel-odintsov/46ab7f719d184e6f7e7d892a54dc1a2d to your computer and use it in GitHub Desktop.
Save pavel-odintsov/46ab7f719d184e6f7e7d892a54dc1a2d to your computer and use it in GitHub Desktop.

Revisions

  1. pavel-odintsov revised this gist Aug 27, 2023. No changes.
  2. pavel-odintsov created this gist Aug 24, 2023.
    304 changes: 304 additions & 0 deletions reuseport_bpf.c
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,304 @@
    /*
    * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
    * a BPF program (both classic and extended) to read the first word from an
    * incoming packet (expected to be in network byte-order), calculate a modulus
    * of that number, and then dispatch the packet to the Nth socket using the
    * result. These tests are run for each supported address family and protocol.
    * Additionally, a few edge cases in the implementation are tested.
    */

    #include <errno.h>
    #include <error.h>
    #include <fcntl.h>
    #include <linux/bpf.h>
    #include <linux/filter.h>
    #include <linux/unistd.h>
    #include <netinet/in.h>
    #include <netinet/tcp.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/epoll.h>
    #include <sys/types.h>
    #include <sys/socket.h>
    #include <sys/resource.h>
    #include <unistd.h>

    #include "../kselftest.h"

    struct test_params {
    int recv_family;
    int send_family;
    int protocol;
    size_t recv_socks;
    uint16_t recv_port;
    uint16_t send_port_min;
    };

    static size_t sockaddr_size(void)
    {
    return sizeof(struct sockaddr_storage);
    }

    static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
    {
    struct sockaddr_storage *addr;
    struct sockaddr_in *addr4;
    struct sockaddr_in6 *addr6;

    addr = malloc(sizeof(struct sockaddr_storage));
    memset(addr, 0, sizeof(struct sockaddr_storage));

    switch (family) {
    case AF_INET:
    addr4 = (struct sockaddr_in *)addr;
    addr4->sin_family = AF_INET;
    addr4->sin_addr.s_addr = htonl(INADDR_ANY);
    addr4->sin_port = htons(port);
    break;
    case AF_INET6:
    addr6 = (struct sockaddr_in6 *)addr;
    addr6->sin6_family = AF_INET6;
    addr6->sin6_addr = in6addr_any;
    addr6->sin6_port = htons(port);
    break;
    default:
    error(1, 0, "Unsupported family %d", family);
    }
    return (struct sockaddr *)addr;
    }

    static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
    {
    struct sockaddr *addr = new_any_sockaddr(family, port);
    struct sockaddr_in *addr4;
    struct sockaddr_in6 *addr6;

    switch (family) {
    case AF_INET:
    addr4 = (struct sockaddr_in *)addr;
    addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    break;
    case AF_INET6:
    addr6 = (struct sockaddr_in6 *)addr;
    addr6->sin6_addr = in6addr_loopback;
    break;
    default:
    error(1, 0, "Unsupported family %d", family);
    }
    return addr;
    }

    static void attach_cbpf(int fd, uint16_t mod)
    {
    struct sock_filter code[] = {
    /* A = (uint32_t)skb[0] */
    { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
    /* A = A % mod */
    { BPF_ALU | BPF_MOD, 0, 0, mod },
    /* return A */
    { BPF_RET | BPF_A, 0, 0, 0 },
    };
    struct sock_fprog p = {
    .len = ARRAY_SIZE(code),
    .filter = code,
    };

    if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
    error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
    }

    static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
    void (*attach_bpf)(int, uint16_t))
    {
    struct sockaddr * const addr =
    new_any_sockaddr(p.recv_family, p.recv_port);
    int i, opt;

    for (i = 0; i < p.recv_socks; ++i) {
    fd[i] = socket(p.recv_family, p.protocol, 0);
    if (fd[i] < 0)
    error(1, errno, "failed to create recv %d", i);

    opt = 1;
    if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
    sizeof(opt)))
    error(1, errno, "failed to set SO_REUSEPORT on %d", i);

    if (i == 0)
    attach_bpf(fd[i], mod);

    if (bind(fd[i], addr, sockaddr_size()))
    error(1, errno, "failed to bind recv socket %d", i);

    }
    free(addr);
    }

    static void send_from(struct test_params p, uint16_t sport, char *buf,
    size_t len)
    {
    struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
    struct sockaddr * const daddr =
    new_loopback_sockaddr(p.send_family, p.recv_port);
    const int fd = socket(p.send_family, p.protocol, 0), one = 1;

    if (fd < 0)
    error(1, errno, "failed to create send socket");

    if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
    error(1, errno, "failed to set reuseaddr");

    if (bind(fd, saddr, sockaddr_size()))
    error(1, errno, "failed to bind send socket");

    if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
    error(1, errno, "failed to send message");

    close(fd);
    free(saddr);
    free(daddr);
    }

    static void test_recv_order(const struct test_params p, int fd[], int mod)
    {
    char recv_buf[8], send_buf[8];
    struct msghdr msg;
    struct iovec recv_io = { recv_buf, 8 };
    struct epoll_event ev;
    int epfd, conn, i, sport, expected;
    uint32_t data, ndata;

    epfd = epoll_create(1);
    if (epfd < 0)
    error(1, errno, "failed to create epoll");
    for (i = 0; i < p.recv_socks; ++i) {
    ev.events = EPOLLIN;
    ev.data.fd = fd[i];
    if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
    error(1, errno, "failed to register sock %d epoll", i);
    }

    memset(&msg, 0, sizeof(msg));
    msg.msg_iov = &recv_io;
    msg.msg_iovlen = 1;

    for (data = 0; data < p.recv_socks * 2; ++data) {
    sport = p.send_port_min + data;
    ndata = htonl(data);
    memcpy(send_buf, &ndata, sizeof(ndata));
    send_from(p, sport, send_buf, sizeof(ndata));

    i = epoll_wait(epfd, &ev, 1, -1);
    if (i < 0)
    error(1, errno, "epoll wait failed");

    if (p.protocol == SOCK_STREAM) {
    conn = accept(ev.data.fd, NULL, NULL);
    if (conn < 0)
    error(1, errno, "error accepting");
    i = recvmsg(conn, &msg, 0);
    close(conn);
    } else {
    i = recvmsg(ev.data.fd, &msg, 0);
    }
    if (i < 0)
    error(1, errno, "recvmsg error");
    if (i != sizeof(ndata))
    error(1, 0, "expected size %zd got %d",
    sizeof(ndata), i);

    for (i = 0; i < p.recv_socks; ++i)
    if (ev.data.fd == fd[i])
    break;
    memcpy(&ndata, recv_buf, sizeof(ndata));
    fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));

    expected = (sport % mod);
    if (i != expected)
    error(1, 0, "expected socket %d", expected);
    }
    }

    static void test_reuseport_cbpf(struct test_params p)
    {
    int i, fd[p.recv_socks];

    fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
    build_recv_group(p, fd, p.recv_socks, attach_cbpf);
    }

    static struct rlimit rlim_old;

    static __attribute__((constructor)) void main_ctor(void)
    {
    getrlimit(RLIMIT_MEMLOCK, &rlim_old);

    if (rlim_old.rlim_cur != RLIM_INFINITY) {
    struct rlimit rlim_new;

    rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
    rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
    setrlimit(RLIMIT_MEMLOCK, &rlim_new);
    }
    }

    static __attribute__((destructor)) void main_dtor(void)
    {
    setrlimit(RLIMIT_MEMLOCK, &rlim_old);
    }

    int main(void)
    {
    fprintf(stderr, "---- IPv4 UDP ----\n");
    /* NOTE: UDP socket lookups traverse a different code path when there
    * are > 10 sockets in a group. Run the bpf test through both paths.
    */
    test_reuseport_cbpf((struct test_params) {
    .recv_family = AF_INET,
    .send_family = AF_INET,
    .protocol = SOCK_DGRAM,
    .recv_socks = 10,
    .recv_port = 8001,
    .send_port_min = 9020});
    test_reuseport_cbpf((struct test_params) {
    .recv_family = AF_INET,
    .send_family = AF_INET,
    .protocol = SOCK_DGRAM,
    .recv_socks = 20,
    .recv_port = 8001,
    .send_port_min = 9020});

    fprintf(stderr, "---- IPv6 UDP ----\n");
    test_reuseport_cbpf((struct test_params) {
    .recv_family = AF_INET6,
    .send_family = AF_INET6,
    .protocol = SOCK_DGRAM,
    .recv_socks = 10,
    .recv_port = 8004,
    .send_port_min = 9060});
    test_reuseport_cbpf((struct test_params) {
    .recv_family = AF_INET6,
    .send_family = AF_INET6,
    .protocol = SOCK_DGRAM,
    .recv_socks = 20,
    .recv_port = 8004,
    .send_port_min = 9060});
    test_reuseport_cbpf((struct test_params) {
    .recv_family = AF_INET6,
    .send_family = AF_INET,
    .protocol = SOCK_DGRAM,
    .recv_socks = 10,
    .recv_port = 8007,
    .send_port_min = 9100});
    test_reuseport_cbpf((struct test_params) {
    .recv_family = AF_INET6,
    .send_family = AF_INET,
    .protocol = SOCK_DGRAM,
    .recv_socks = 20,
    .recv_port = 8007,
    .send_port_min = 9100});

    fprintf(stderr, "SUCCESS\n");
    return 0;
    }