// SPDX-License-Identifier: GPL-2.0 /* * Test for sockmap/sockhash redirection. * * BPF_MAP_TYPE_SOCKMAP * BPF_MAP_TYPE_SOCKHASH * x * sk_msg-to-egress * sk_msg-to-ingress * sk_skb-to-egress * sk_skb-to-ingress * x * AF_INET, SOCK_STREAM * AF_INET6, SOCK_STREAM * AF_INET, SOCK_DGRAM * AF_INET6, SOCK_DGRAM * AF_UNIX, SOCK_STREAM * AF_UNIX, SOCK_DGRAM * AF_VSOCK, SOCK_STREAM * AF_VSOCK, SOCK_SEQPACKET */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "linux/const.h" #include "test_progs.h" #include "sockmap_helpers.h" #include "test_sockmap_redir.skel.h" /* The meaning of SUPPORTED is "will redirect packet as expected". */ #define SUPPORTED _BITUL(0) /* Note on sk_skb-to-ingress ->af_vsock: * * Peer socket may receive the packet some time after the return from sendmsg(). * In a typical usage scenario, recvmsg() will block until the redirected packet * appears in the destination queue, or timeout if the packet was dropped. By * that point, the verdict map has already been updated to reflect what has * happened. * * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg() * takes place. Which means we may race the execution of the verdict logic and * read map_verd before it has been updated, i.e. we might observe * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1. * * This confuses the selftest logic: if there was no packet dropped, where's the * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0 * (which implies the verdict program has not been ran) just re-read the verdict * map again. */ #define UNSUPPORTED_RACY_VERD _BITUL(1) enum prog_type { SK_MSG_EGRESS, SK_MSG_INGRESS, SK_SKB_EGRESS, SK_SKB_INGRESS, }; enum { SEND_INNER = 0, SEND_OUTER, }; enum { RECV_INNER = 0, RECV_OUTER, }; struct maps { int in; int out; int verd; }; struct combo_spec { enum prog_type prog_type; const char *in, *out; }; struct redir_spec { const char *name; int idx_send; int idx_recv; enum prog_type prog_type; }; struct socket_spec { int family; int sotype; int send_flags; int in[2]; int out[2]; }; static int socket_spec_pairs(struct socket_spec *s) { return create_socket_pairs(s->family, s->sotype, &s->in[0], &s->out[0], &s->in[1], &s->out[1]); } static void socket_spec_close(struct socket_spec *s) { xclose(s->in[0]); xclose(s->in[1]); xclose(s->out[0]); xclose(s->out[1]); } static void get_redir_params(struct redir_spec *redir, struct test_sockmap_redir *skel, int *prog_fd, enum bpf_attach_type *attach_type, int *redirect_flags) { enum prog_type type = redir->prog_type; struct bpf_program *prog; bool sk_msg; sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS; prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict; *prog_fd = bpf_program__fd(prog); *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT; if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS) *redirect_flags = BPF_F_INGRESS; else *redirect_flags = 0; } static void try_recv(const char *prefix, int fd, int flags, bool expect_success) { ssize_t n; char buf; errno = 0; n = recv(fd, &buf, 1, flags); if (n < 0 && expect_success) FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n); if (!n && !expect_success) FAIL("%s: expected failure: retval=%zd", prefix, n); } static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out, int sd_recv, int map_verd, int status) { unsigned int drop, pass; char recv_buf; ssize_t n; get_verdict: if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) || xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass)) return; if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) { sched_yield(); goto get_verdict; } if (pass != 0) { FAIL("unsupported: wanted verdict pass 0, have %u", pass); return; } /* If nothing was dropped, packet should have reached the peer */ if (drop == 0) { errno = 0; n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC); if (n != 1) FAIL_ERRNO("unsupported: packet missing, retval=%zd", n); } /* Ensure queues are empty */ try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false); if (sd_in != sd_send) try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false); try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false); if (sd_recv != sd_out) try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false); } static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer, int sd_in, int sd_out, int sd_recv, struct maps *maps, int status) { unsigned int drop, pass; char *send_buf = "ab"; char recv_buf = '\0'; ssize_t n, len = 1; /* Zero out the verdict map */ if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) || xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY)) return; if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST)) return; if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST)) goto del_in; /* Last byte is OOB data when send_flags has MSG_OOB bit set */ if (send_flags & MSG_OOB) len++; n = send(sd_send, send_buf, len, send_flags); if (n >= 0 && n < len) FAIL("incomplete send"); if (n < 0) { /* sk_msg redirect combo not supported? */ if (status & SUPPORTED || errno != EACCES) FAIL_ERRNO("send"); goto out; } if (!(status & SUPPORTED)) { handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv, maps->verd, status); goto out; } errno = 0; n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC); if (n != 1) { FAIL_ERRNO("recv_timeout()"); goto out; } /* Check verdict _after_ recv(); af_vsock may need time to catch up */ if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) || xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass)) goto out; if (drop != 0 || pass != 1) FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u", drop, pass); if (recv_buf != send_buf[0]) FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]); if (send_flags & MSG_OOB) { /* Fail reading OOB while in sockmap */ try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out, MSG_OOB | MSG_DONTWAIT, false); /* Remove sd_out from sockmap */ xbpf_map_delete_elem(maps->out, &u32(0)); /* Check that OOB was dropped on redirect */ try_recv("recv(sd_out, MSG_OOB)", sd_out, MSG_OOB | MSG_DONTWAIT, false); goto del_in; } out: xbpf_map_delete_elem(maps->out, &u32(0)); del_in: xbpf_map_delete_elem(maps->in, &u32(0)); } static int is_redir_supported(enum prog_type type, const char *in, const char *out) { /* Matching based on strings returned by socket_kind_to_str(): * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq * Plus a wildcard: any * Not in use: u_seq, v_dgr */ struct combo_spec *c, combos[] = { /* Send to local: TCP -> any, but vsock */ { SK_MSG_INGRESS, "tcp", "tcp" }, { SK_MSG_INGRESS, "tcp", "udp" }, { SK_MSG_INGRESS, "tcp", "u_str" }, { SK_MSG_INGRESS, "tcp", "u_dgr" }, /* Send to egress: TCP -> TCP */ { SK_MSG_EGRESS, "tcp", "tcp" }, /* Ingress to egress: any -> any */ { SK_SKB_EGRESS, "any", "any" }, /* Ingress to local: any -> any, but vsock */ { SK_SKB_INGRESS, "any", "tcp" }, { SK_SKB_INGRESS, "any", "udp" }, { SK_SKB_INGRESS, "any", "u_str" }, { SK_SKB_INGRESS, "any", "u_dgr" }, }; for (c = combos; c < combos + ARRAY_SIZE(combos); c++) { if (c->prog_type == type && (!strcmp(c->in, "any") || strstarts(in, c->in)) && (!strcmp(c->out, "any") || strstarts(out, c->out))) return SUPPORTED; } return 0; } static int get_support_status(enum prog_type type, const char *in, const char *out) { int status = is_redir_supported(type, in, out); if (type == SK_SKB_INGRESS && strstarts(out, "v_")) status |= UNSUPPORTED_RACY_VERD; return status; } static void test_socket(enum bpf_map_type type, struct redir_spec *redir, struct maps *maps, struct socket_spec *s_in, struct socket_spec *s_out) { int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status; const char *in_str, *out_str; char s[MAX_TEST_NAME]; fd_in = s_in->in[0]; fd_out = s_out->out[0]; fd_send = s_in->in[redir->idx_send]; fd_peer = s_in->in[redir->idx_send ^ 1]; fd_recv = s_out->out[redir->idx_recv]; flags = s_in->send_flags; in_str = socket_kind_to_str(fd_in); out_str = socket_kind_to_str(fd_out); status = get_support_status(redir->prog_type, in_str, out_str); snprintf(s, sizeof(s), "%-4s %-17s %-5s %s %-5s%6s", /* hash sk_skb-to-ingress u_str → v_str (OOB) */ type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash", redir->name, in_str, status & SUPPORTED ? "→" : " ", out_str, (flags & MSG_OOB) ? "(OOB)" : ""); if (!test__start_subtest(s)) return; test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv, maps, status); } static void test_redir(enum bpf_map_type type, struct redir_spec *redir, struct maps *maps) { struct socket_spec *s, sockets[] = { { AF_INET, SOCK_STREAM }, // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */ { AF_INET6, SOCK_STREAM }, { AF_INET, SOCK_DGRAM }, { AF_INET6, SOCK_DGRAM }, { AF_UNIX, SOCK_STREAM }, { AF_UNIX, SOCK_STREAM, MSG_OOB }, { AF_UNIX, SOCK_DGRAM }, // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */ { AF_VSOCK, SOCK_STREAM }, // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */ { AF_VSOCK, SOCK_SEQPACKET }, }; for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) if (socket_spec_pairs(s)) goto out; /* Intra-proto */ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) test_socket(type, redir, maps, s, s); /* Cross-proto */ for (int i = 0; i < ARRAY_SIZE(sockets); i++) { for (int j = 0; j < ARRAY_SIZE(sockets); j++) { struct socket_spec *out = &sockets[j]; struct socket_spec *in = &sockets[i]; /* Skip intra-proto and between variants */ if (out->send_flags || (in->family == out->family && in->sotype == out->sotype)) continue; test_socket(type, redir, maps, in, out); } } out: while (--s >= sockets) socket_spec_close(s); } static void test_map(enum bpf_map_type type) { struct redir_spec *r, redirs[] = { { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS }, { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS }, { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS }, { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS }, }; for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) { enum bpf_attach_type attach_type; struct test_sockmap_redir *skel; struct maps maps; int prog_fd; skel = test_sockmap_redir__open_and_load(); if (!skel) { FAIL("open_and_load"); return; } switch (type) { case BPF_MAP_TYPE_SOCKMAP: maps.in = bpf_map__fd(skel->maps.nop_map); maps.out = bpf_map__fd(skel->maps.sock_map); break; case BPF_MAP_TYPE_SOCKHASH: maps.in = bpf_map__fd(skel->maps.nop_hash); maps.out = bpf_map__fd(skel->maps.sock_hash); break; default: FAIL("Unsupported bpf_map_type"); return; } skel->bss->redirect_type = type; maps.verd = bpf_map__fd(skel->maps.verdict_map); get_redir_params(r, skel, &prog_fd, &attach_type, &skel->bss->redirect_flags); if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0)) return; test_redir(type, r, &maps); if (xbpf_prog_detach2(prog_fd, maps.in, attach_type)) return; test_sockmap_redir__destroy(skel); } } void serial_test_sockmap_redir(void) { test_map(BPF_MAP_TYPE_SOCKMAP); test_map(BPF_MAP_TYPE_SOCKHASH); }