diff options
25 files changed, 928 insertions, 16 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 18b592fde896..e53ceee1df37 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2264,6 +2264,9 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_nf(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index d4ee3ccd3753..39a999abb0ce 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -79,6 +79,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, #endif BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall, void *, void *) +#ifdef CONFIG_NETFILTER +BPF_PROG_TYPE(BPF_PROG_TYPE_NETFILTER, netfilter, + struct bpf_nf_ctx, struct bpf_nf_ctx) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c8e03bcaecaa..0762444e3767 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -80,6 +80,7 @@ typedef unsigned int nf_hookfn(void *priv, enum nf_hook_ops_type { NF_HOOK_OP_UNDEFINED, NF_HOOK_OP_NF_TABLES, + NF_HOOK_OP_BPF, }; struct nf_hook_ops { diff --git a/include/net/netfilter/nf_bpf_link.h b/include/net/netfilter/nf_bpf_link.h new file mode 100644 index 000000000000..6c984b0ea838 --- /dev/null +++ b/include/net/netfilter/nf_bpf_link.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +struct bpf_nf_ctx { + const struct nf_hook_state *state; + struct sk_buff *skb; +}; + +#if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK) +int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +#else +static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} +#endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4b20a7269bee..1bb11a6ee667 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -986,6 +986,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_NETFILTER, }; enum bpf_attach_type { @@ -1050,6 +1051,7 @@ enum bpf_link_type { BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, BPF_LINK_TYPE_STRUCT_OPS = 9, + BPF_LINK_TYPE_NETFILTER = 10, MAX_BPF_LINK_TYPE, }; @@ -1560,6 +1562,12 @@ union bpf_attr { */ __u64 cookie; } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; }; } link_create; @@ -6410,6 +6418,12 @@ struct bpf_link_info { struct { __u32 map_id; } struct_ops; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; }; } __attribute__((aligned(8))); diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h index bbcd285b22e1..84a561a74b98 100644 --- a/include/uapi/linux/netfilter/nfnetlink_hook.h +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -32,8 +32,12 @@ enum nfnl_hook_attributes { /** * enum nfnl_hook_chain_info_attributes - chain description * - * NFNLA_HOOK_INFO_DESC: nft chain and table name (enum nft_table_attributes) (NLA_NESTED) - * NFNLA_HOOK_INFO_TYPE: chain type (enum nfnl_hook_chaintype) (NLA_U32) + * @NFNLA_HOOK_INFO_DESC: nft chain and table name (NLA_NESTED) + * @NFNLA_HOOK_INFO_TYPE: chain type (enum nfnl_hook_chaintype) (NLA_U32) + * + * NFNLA_HOOK_INFO_DESC depends on NFNLA_HOOK_INFO_TYPE value: + * NFNL_HOOK_TYPE_NFTABLES: enum nft_table_attributes + * NFNL_HOOK_TYPE_BPF: enum nfnl_hook_bpf_attributes */ enum nfnl_hook_chain_info_attributes { NFNLA_HOOK_INFO_UNSPEC, @@ -55,10 +59,24 @@ enum nfnl_hook_chain_desc_attributes { /** * enum nfnl_hook_chaintype - chain type * - * @NFNL_HOOK_TYPE_NFTABLES nf_tables base chain + * @NFNL_HOOK_TYPE_NFTABLES: nf_tables base chain + * @NFNL_HOOK_TYPE_BPF: bpf program */ enum nfnl_hook_chaintype { NFNL_HOOK_TYPE_NFTABLES = 0x1, + NFNL_HOOK_TYPE_BPF, +}; + +/** + * enum nfnl_hook_bpf_attributes - bpf prog description + * + * @NFNLA_HOOK_BPF_ID: bpf program id (NLA_U32) + */ +enum nfnl_hook_bpf_attributes { + NFNLA_HOOK_BPF_UNSPEC, + NFNLA_HOOK_BPF_ID, + __NFNLA_HOOK_BPF_MAX, }; +#define NFNLA_HOOK_BPF_MAX (__NFNLA_HOOK_BPF_MAX - 1) #endif /* _NFNL_HOOK_H */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7db4ec125fbd..6b682b8e4b50 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -25,6 +25,9 @@ #include <linux/bsearch.h> #include <linux/kobject.h> #include <linux/sysfs.h> + +#include <net/netfilter/nf_bpf_link.h> + #include <net/sock.h> #include "../tools/lib/bpf/relo_core.h" @@ -212,6 +215,7 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_SK_SKB, BTF_KFUNC_HOOK_SOCKET_FILTER, BTF_KFUNC_HOOK_LWT, + BTF_KFUNC_HOOK_NETFILTER, BTF_KFUNC_HOOK_MAX, }; @@ -7802,6 +7806,8 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_LWT_SEG6LOCAL: return BTF_KFUNC_HOOK_LWT; + case BPF_PROG_TYPE_NETFILTER: + return BTF_KFUNC_HOOK_NETFILTER; default: return BTF_KFUNC_HOOK_MAX; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index bcf1a1920ddd..14f39c1e573e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -35,6 +35,7 @@ #include <linux/rcupdate_trace.h> #include <linux/memcontrol.h> #include <linux/trace_events.h> +#include <net/netfilter/nf_bpf_link.h> #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ @@ -2462,6 +2463,7 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ + case BPF_PROG_TYPE_NETFILTER: return true; case BPF_PROG_TYPE_CGROUP_SKB: /* always unpriv */ @@ -4588,6 +4590,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) switch (prog->type) { case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_NETFILTER: break; case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: @@ -4654,6 +4657,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_XDP: ret = bpf_xdp_link_attach(attr, prog); break; + case BPF_PROG_TYPE_NETFILTER: + ret = bpf_nf_link_attach(attr, prog); + break; #endif case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1e05355facdc..fc7281d39e46 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13816,6 +13816,9 @@ static int check_return_code(struct bpf_verifier_env *env) } break; + case BPF_PROG_TYPE_NETFILTER: + range = tnum_range(NF_DROP, NF_ACCEPT); + break; case BPF_PROG_TYPE_EXT: /* freplace program can return anything as its return value * depends on the to-be-replaced kernel func or bpf program. diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index f170e8a17974..e79e3a415ca9 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -19,7 +19,9 @@ #include <linux/error-injection.h> #include <linux/smp.h> #include <linux/sock_diag.h> +#include <linux/netfilter.h> #include <net/xdp.h> +#include <net/netfilter/nf_bpf_link.h> #define CREATE_TRACE_POINTS #include <trace/events/bpf_test_run.h> @@ -1691,6 +1693,162 @@ out: return err; } +static int verify_and_copy_hook_state(struct nf_hook_state *state, + const struct nf_hook_state *user, + struct net_device *dev) +{ + if (user->in || user->out) + return -EINVAL; + + if (user->net || user->sk || user->okfn) + return -EINVAL; + + switch (user->pf) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + switch (state->hook) { + case NF_INET_PRE_ROUTING: + state->in = dev; + break; + case NF_INET_LOCAL_IN: + state->in = dev; + break; + case NF_INET_FORWARD: + state->in = dev; + state->out = dev; + break; + case NF_INET_LOCAL_OUT: + state->out = dev; + break; + case NF_INET_POST_ROUTING: + state->out = dev; + break; + } + + break; + default: + return -EINVAL; + } + + state->pf = user->pf; + state->hook = user->hook; + + return 0; +} + +static __be16 nfproto_eth(int nfproto) +{ + switch (nfproto) { + case NFPROTO_IPV4: + return htons(ETH_P_IP); + case NFPROTO_IPV6: + break; + } + + return htons(ETH_P_IPV6); +} + +int bpf_prog_test_run_nf(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + struct net *net = current->nsproxy->net_ns; + struct net_device *dev = net->loopback_dev; + struct nf_hook_state *user_ctx, hook_state = { + .pf = NFPROTO_IPV4, + .hook = NF_INET_LOCAL_OUT, + }; + u32 size = kattr->test.data_size_in; + u32 repeat = kattr->test.repeat; + struct bpf_nf_ctx ctx = { + .state = &hook_state, + }; + struct sk_buff *skb = NULL; + u32 retval, duration; + void *data; + int ret; + + if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) + return -EINVAL; + + if (size < sizeof(struct iphdr)) + return -EINVAL; + + data = bpf_test_init(kattr, kattr->test.data_size_in, size, + NET_SKB_PAD + NET_IP_ALIGN, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) + return PTR_ERR(data); + + if (!repeat) + repeat = 1; + + user_ctx = bpf_ctx_init(kattr, sizeof(struct nf_hook_state)); + if (IS_ERR(user_ctx)) { + kfree(data); + return PTR_ERR(user_ctx); + } + + if (user_ctx) { + ret = verify_and_copy_hook_state(&hook_state, user_ctx, dev); + if (ret) + goto out; + } + + skb = slab_build_skb(data); + if (!skb) { + ret = -ENOMEM; + goto out; + } + + data = NULL; /* data released via kfree_skb */ + + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + __skb_put(skb, size); + + ret = -EINVAL; + + if (hook_state.hook != NF_INET_LOCAL_OUT) { + if (size < ETH_HLEN + sizeof(struct iphdr)) + goto out; + + skb->protocol = eth_type_trans(skb, dev); + switch (skb->protocol) { + case htons(ETH_P_IP): + if (hook_state.pf == NFPROTO_IPV4) + break; + goto out; + case htons(ETH_P_IPV6): + if (size < ETH_HLEN + sizeof(struct ipv6hdr)) + goto out; + if (hook_state.pf == NFPROTO_IPV6) + break; + goto out; + default: + ret = -EPROTO; + goto out; + } + + skb_reset_network_header(skb); + } else { + skb->protocol = nfproto_eth(hook_state.pf); + } + + ctx.skb = skb; + + ret = bpf_test_run(prog, &ctx, repeat, &retval, &duration, false); + if (ret) + goto out; + + ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration); + +out: + kfree(user_ctx); + kfree_skb(skb); + kfree(data); + return ret; +} + static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { .owner = THIS_MODULE, .set = &test_sk_check_kfunc_ids, diff --git a/net/core/filter.c b/net/core/filter.c index 44fb997434ad..d9ce04ca22ce 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11717,6 +11717,7 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); } late_initcall(bpf_kfunc_init); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d0bf630482c1..441d1f134110 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -30,6 +30,9 @@ config NETFILTER_FAMILY_BRIDGE config NETFILTER_FAMILY_ARP bool +config NETFILTER_BPF_LINK + def_bool BPF_SYSCALL + config NETFILTER_NETLINK_HOOK tristate "Netfilter base hook dump support" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5ffef1cd6143..d4958e7e7631 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -22,6 +22,7 @@ nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o endif obj-$(CONFIG_NETFILTER) = netfilter.o +obj-$(CONFIG_NETFILTER_BPF_LINK) += nf_bpf_link.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 358220b58521..f0783e42108b 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -119,6 +119,18 @@ nf_hook_entries_grow(const struct nf_hook_entries *old, for (i = 0; i < old_entries; i++) { if (orig_ops[i] != &dummy_ops) alloc_entries++; + + /* Restrict BPF hook type to force a unique priority, not + * shared at attach time. + * + * This is mainly to avoid ordering issues between two + * different bpf programs, this doesn't prevent a normal + * hook at same priority as a bpf one (we don't want to + * prevent defrag, conntrack, iptables etc from attaching). + */ + if (reg->priority == orig_ops[i]->priority && + reg->hook_ops_type == NF_HOOK_OP_BPF) + return ERR_PTR(-EBUSY); } } diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c new file mode 100644 index 000000000000..c36da56d756f --- /dev/null +++ b/net/netfilter/nf_bpf_link.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/netfilter.h> + +#include <net/netfilter/nf_bpf_link.h> +#include <uapi/linux/netfilter_ipv4.h> + +static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb, + const struct nf_hook_state *s) +{ + const struct bpf_prog *prog = bpf_prog; + struct bpf_nf_ctx ctx = { + .state = s, + .skb = skb, + }; + + return bpf_prog_run(prog, &ctx); +} + +struct bpf_nf_link { + struct bpf_link link; + struct nf_hook_ops hook_ops; + struct net *net; + u32 dead; +}; + +static void bpf_nf_link_release(struct bpf_link *link) +{ + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + + if (nf_link->dead) + return; + + /* prevent hook-not-found warning splat from netfilter core when + * .detach was already called + */ + if (!cmpxchg(&nf_link->dead, 0, 1)) + nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops); +} + +static void bpf_nf_link_dealloc(struct bpf_link *link) +{ + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + + kfree(nf_link); +} + +static int bpf_nf_link_detach(struct bpf_link *link) +{ + bpf_nf_link_release(link); + return 0; +} + +static void bpf_nf_link_show_info(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + + seq_printf(seq, "pf:\t%u\thooknum:\t%u\tprio:\t%d\n", + nf_link->hook_ops.pf, nf_link->hook_ops.hooknum, + nf_link->hook_ops.priority); +} + +static int bpf_nf_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link); + + info->netfilter.pf = nf_link->hook_ops.pf; + info->netfilter.hooknum = nf_link->hook_ops.hooknum; + info->netfilter.priority = nf_link->hook_ops.priority; + info->netfilter.flags = 0; + + return 0; +} + +static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + return -EOPNOTSUPP; +} + +static const struct bpf_link_ops bpf_nf_link_lops = { + .release = bpf_nf_link_release, + .dealloc = bpf_nf_link_dealloc, + .detach = bpf_nf_link_detach, + .show_fdinfo = bpf_nf_link_show_info, + .fill_link_info = bpf_nf_link_fill_link_info, + .update_prog = bpf_nf_link_update, +}; + +static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr) +{ + switch (attr->link_create.netfilter.pf) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (attr->link_create.netfilter.hooknum >= NF_INET_NUMHOOKS) + return -EPROTO; + break; + default: + return -EAFNOSUPPORT; + } + + if (attr->link_create.netfilter.flags) + return -EOPNOTSUPP; + + /* make sure conntrack confirm is always last. + * + * In the future, if userspace can e.g. request defrag, then + * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG" + * should fail. + */ + switch (attr->link_create.netfilter.priority) { + case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */ + case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */ + } + + return 0; +} + +int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct net *net = current->nsproxy->net_ns; + struct bpf_link_primer link_primer; + struct bpf_nf_link *link; + int err; + + if (attr->link_create.flags) + return -EINVAL; + + err = bpf_nf_check_pf_and_hooks(attr); + if (err) + return err; + + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) + return -ENOMEM; + + bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog); + + link->hook_ops.hook = nf_hook_run_bpf; + link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF; + link->hook_ops.priv = prog; + + link->hook_ops.pf = attr->link_create.netfilter.pf; + link->hook_ops.priority = attr->link_create.netfilter.priority; + link->hook_ops.hooknum = attr->link_create.netfilter.hooknum; + + link->net = net; + link->dead = false; + + err = bpf_link_prime(&link->link, &link_primer); + if (err) { + kfree(link); + return err; + } + + err = nf_register_net_hook(net, &link->hook_ops); + if (err) { + bpf_link_cleanup(&link_primer); + return err; + } + + return bpf_link_settle(&link_primer); +} + +const struct bpf_prog_ops netfilter_prog_ops = { + .test_run = bpf_prog_test_run_nf, +}; + +static bool nf_ptr_to_btf_id(struct bpf_insn_access_aux *info, const char *name) +{ + struct btf *btf; + s32 type_id; + + btf = bpf_get_btf_vmlinux(); + if (IS_ERR_OR_NULL(btf)) + return false; + + type_id = btf_find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (WARN_ON_ONCE(type_id < 0)) + return false; + + info->btf = btf; + info->btf_id = type_id; + info->reg_type = PTR_TO_BTF_ID | PTR_TRUSTED; + return true; +} + +static bool nf_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= sizeof(struct bpf_nf_ctx)) + return false; + + if (type == BPF_WRITE) + return false; + + switch (off) { + case bpf_ctx_range(struct bpf_nf_ctx, skb): + if (size != sizeof_field(struct bpf_nf_ctx, skb)) + return false; + + return nf_ptr_to_btf_id(info, "sk_buff"); + case bpf_ctx_range(struct bpf_nf_ctx, state): + if (size != sizeof_field(struct bpf_nf_ctx, state)) + return false; + + return nf_ptr_to_btf_id(info, "nf_hook_state"); + default: + return false; + } + + return false; +} + +static const struct bpf_func_proto * +bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return bpf_base_func_proto(func_id); +} + +const struct bpf_verifier_ops netfilter_verifier_ops = { + .is_valid_access = nf_is_valid_access, + .get_func_proto = bpf_nf_func_proto, +}; diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 8120aadf6a0f..ade8ee1988b1 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -5,6 +5,7 @@ * Author: Florian Westphal <fw@strlen.de> */ +#include <linux/bpf.h> #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/kernel.h> @@ -57,35 +58,76 @@ struct nfnl_dump_hook_data { u8 hook; }; +static struct nlattr *nfnl_start_info_type(struct sk_buff *nlskb, enum nfnl_hook_chaintype t) +{ + struct nlattr *nest = nla_nest_start(nlskb, NFNLA_HOOK_CHAIN_INFO); + int ret; + + if (!nest) + return NULL; + + ret = nla_put_be32(nlskb, NFNLA_HOOK_INFO_TYPE, htonl(t)); + if (ret == 0) + return nest; + + nla_nest_cancel(nlskb, nest); + return NULL; +} + +static int nfnl_hook_put_bpf_prog_info(struct sk_buff *nlskb, + const struct nfnl_dump_hook_data *ctx, + unsigned int seq, + const struct bpf_prog *prog) +{ + struct nlattr *nest, *nest2; + int ret; + + if (!IS_ENABLED(CONFIG_NETFILTER_BPF_LINK)) + return 0; + + if (WARN_ON_ONCE(!prog)) + return 0; + + nest = nfnl_start_info_type(nlskb, NFNL_HOOK_TYPE_BPF); + if (!nest) + return -EMSGSIZE; + + nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC); + if (!nest2) + goto cancel_nest; + + ret = nla_put_be32(nlskb, NFNLA_HOOK_BPF_ID, htonl(prog->aux->id)); + if (ret) + goto cancel_nest; + + nla_nest_end(nlskb, nest2); + nla_nest_end(nlskb, nest); + return 0; + +cancel_nest: + nla_nest_cancel(nlskb, nest); + return -EMSGSIZE; +} + static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, const struct nfnl_dump_hook_data *ctx, unsigned int seq, - const struct nf_hook_ops *ops) + struct nft_chain *chain) { struct net *net = sock_net(nlskb->sk); struct nlattr *nest, *nest2; - struct nft_chain *chain; int ret = 0; - if (ops->hook_ops_type != NF_HOOK_OP_NF_TABLES) - return 0; - - chain = ops->priv; if (WARN_ON_ONCE(!chain)) return 0; if (!nft_is_active(net, chain)) return 0; - nest = nla_nest_start(nlskb, NFNLA_HOOK_CHAIN_INFO); + nest = nfnl_start_info_type(nlskb, NFNL_HOOK_TYPE_NFTABLES); if (!nest) return -EMSGSIZE; - ret = nla_put_be32(nlskb, NFNLA_HOOK_INFO_TYPE, - htonl(NFNL_HOOK_TYPE_NFTABLES)); - if (ret) - goto cancel_nest; - nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC); if (!nest2) goto cancel_nest; @@ -171,7 +213,20 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, if (ret) goto nla_put_failure; - ret = nfnl_hook_put_nft_chain_info(nlskb, ctx, seq, ops); + switch (ops->hook_ops_type) { + case NF_HOOK_OP_NF_TABLES: + ret = nfnl_hook_put_nft_chain_info(nlskb, ctx, seq, ops->priv); + break; + case NF_HOOK_OP_BPF: + ret = nfnl_hook_put_bpf_prog_info(nlskb, ctx, seq, ops->priv); + break; + case NF_HOOK_OP_UNDEFINED: + break; + default: + WARN_ON_ONCE(1); + break; + } + if (ret) goto nla_put_failure; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index f985b79cca27..d98dbc50cf4c 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -3,6 +3,8 @@ #include <errno.h> #include <linux/err.h> +#include <linux/netfilter.h> +#include <linux/netfilter_arp.h> #include <net/if.h> #include <stdio.h> #include <unistd.h> @@ -135,6 +137,18 @@ static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr) } } +void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr) +{ + jsonw_uint_field(json_wtr, "pf", + info->netfilter.pf); + jsonw_uint_field(json_wtr, "hook", + info->netfilter.hooknum); + jsonw_int_field(json_wtr, "prio", + info->netfilter.priority); + jsonw_uint_field(json_wtr, "flags", + info->netfilter.flags); +} + static int get_prog_info(int prog_id, struct bpf_prog_info *info) { __u32 len = sizeof(*info); @@ -195,6 +209,10 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) info->netns.netns_ino); show_link_attach_type_json(info->netns.attach_type, json_wtr); break; + case BPF_LINK_TYPE_NETFILTER: + netfilter_dump_json(info, json_wtr); + break; + default: break; } @@ -263,6 +281,68 @@ static void show_iter_plain(struct bpf_link_info *info) } } +static const char * const pf2name[] = { + [NFPROTO_INET] = "inet", + [NFPROTO_IPV4] = "ip", + [NFPROTO_ARP] = "arp", + [NFPROTO_NETDEV] = "netdev", + [NFPROTO_BRIDGE] = "bridge", + [NFPROTO_IPV6] = "ip6", +}; + +static const char * const inethook2name[] = { + [NF_INET_PRE_ROUTING] = "prerouting", + [NF_INET_LOCAL_IN] = "input", + [NF_INET_FORWARD] = "forward", + [NF_INET_LOCAL_OUT] = "output", + [NF_INET_POST_ROUTING] = "postrouting", +}; + +static const char * const arphook2name[] = { + [NF_ARP_IN] = "input", + [NF_ARP_OUT] = "output", +}; + +void netfilter_dump_plain(const struct bpf_link_info *info) +{ + const char *hookname = NULL, *pfname = NULL; + unsigned int hook = info->netfilter.hooknum; + unsigned int pf = info->netfilter.pf; + + if (pf < ARRAY_SIZE(pf2name)) + pfname = pf2name[pf]; + + switch (pf) { + case NFPROTO_BRIDGE: /* bridge shares numbers with enum nf_inet_hooks */ + case NFPROTO_IPV4: + case NFPROTO_IPV6: + case NFPROTO_INET: + if (hook < ARRAY_SIZE(inethook2name)) + hookname = inethook2name[hook]; + break; + case NFPROTO_ARP: + if (hook < ARRAY_SIZE(arphook2name)) + hookname = arphook2name[hook]; + default: + break; + } + + if (pfname) + printf("\n\t%s", pfname); + else + printf("\n\tpf: %d", pf); + + if (hookname) + printf(" %s", hookname); + else + printf(", hook %u,", hook); + + printf(" prio %d", info->netfilter.priority); + + if (info->netfilter.flags) + printf(" flags 0x%x", info->netfilter.flags); +} + static int show_link_close_plain(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; @@ -301,6 +381,9 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tnetns_ino %u ", info->netns.netns_ino); show_link_attach_type_plain(info->netns.attach_type); break; + case BPF_LINK_TYPE_NETFILTER: + netfilter_dump_plain(info); + break; default: break; } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 9a95788e654d..a49534d7eafa 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -267,4 +267,7 @@ static inline bool hashmap__empty(struct hashmap *map) int pathname_concat(char *buf, int buf_sz, const char *path, const char *name); +/* print netfilter bpf_link info */ +void netfilter_dump_plain(const struct bpf_link_info *info); +void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr); #endif diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index c40e44c938ae..26a49965bf71 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -647,6 +647,108 @@ static int do_detach(int argc, char **argv) return 0; } +static int netfilter_link_compar(const void *a, const void *b) +{ + const struct bpf_link_info *nfa = a; + const struct bpf_link_info *nfb = b; + int delta; + + delta = nfa->netfilter.pf - nfb->netfilter.pf; + if (delta) + return delta; + + delta = nfa->netfilter.hooknum - nfb->netfilter.hooknum; + if (delta) + return delta; + + if (nfa->netfilter.priority < nfb->netfilter.priority) + return -1; + if (nfa->netfilter.priority > nfb->netfilter.priority) + return 1; + + return nfa->netfilter.flags - nfb->netfilter.flags; +} + +static void show_link_netfilter(void) +{ + unsigned int nf_link_len = 0, nf_link_count = 0; + struct bpf_link_info *nf_link_info = NULL; + __u32 id = 0; + + while (true) { + struct bpf_link_info info; + int fd, err; + __u32 len; + + err = bpf_link_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + p_err("can't get next link: %s (id %d)", strerror(errno), id); + break; + } + + fd = bpf_link_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get link by id (%u): %s", id, strerror(errno)); + continue; + } + + memset(&info, 0, sizeof(info)); + len = sizeof(info); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + + close(fd); + + if (err) { + p_err("can't get link info for fd %d: %s", fd, strerror(errno)); + continue; + } + + if (info.type != BPF_LINK_TYPE_NETFILTER) + continue; + + if (nf_link_count >= nf_link_len) { + static const unsigned int max_link_count = INT_MAX / sizeof(info); + struct bpf_link_info *expand; + + if (nf_link_count > max_link_count) { + p_err("cannot handle more than %u links\n", max_link_count); + break; + } + + nf_link_len += 16; + + expand = realloc(nf_link_info, nf_link_len * sizeof(info)); + if (!expand) { + p_err("realloc: %s", strerror(errno)); + break; + } + + nf_link_info = expand; + } + + nf_link_info[nf_link_count] = info; + nf_link_count++; + } + + qsort(nf_link_info, nf_link_count, sizeof(*nf_link_info), netfilter_link_compar); + + for (id = 0; id < nf_link_count; id++) { + NET_START_OBJECT; + if (json_output) + netfilter_dump_json(&nf_link_info[id], json_wtr); + else + netfilter_dump_plain(&nf_link_info[id]); + + NET_DUMP_UINT("id", " prog_id %u", nf_link_info[id].prog_id); + NET_END_OBJECT; + } + + free(nf_link_info); +} + static int do_show(int argc, char **argv) { struct bpf_attach_info attach_info = {}; @@ -701,6 +803,10 @@ static int do_show(int argc, char **argv) NET_DUMP_UINT("id", "id %u", attach_info.flow_dissector_id); NET_END_ARRAY("\n"); + NET_START_ARRAY("netfilter", "%s:\n"); + show_link_netfilter(); + NET_END_ARRAY("\n"); + NET_END_OBJECT; if (json_output) jsonw_end_array(json_wtr); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4b20a7269bee..1bb11a6ee667 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -986,6 +986,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ + BPF_PROG_TYPE_NETFILTER, }; enum bpf_attach_type { @@ -1050,6 +1051,7 @@ enum bpf_link_type { BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, BPF_LINK_TYPE_STRUCT_OPS = 9, + BPF_LINK_TYPE_NETFILTER = 10, MAX_BPF_LINK_TYPE, }; @@ -1560,6 +1562,12 @@ union bpf_attr { */ __u64 cookie; } tracing; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; }; } link_create; @@ -6410,6 +6418,12 @@ struct bpf_link_info { struct { __u32 map_id; } struct_ops; + struct { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + } netfilter; }; } __attribute__((aligned(8))); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2600d8384252..1cbacf9e71f3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -130,6 +130,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_LINK_TYPE_NETFILTER] = "netfilter", }; static const char * const map_type_name[] = { @@ -201,6 +202,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_LSM] = "lsm", [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", [BPF_PROG_TYPE_SYSCALL] = "syscall", + [BPF_PROG_TYPE_NETFILTER] = "netfilter", }; static int __base_pr(enum libbpf_print_level level, const char *format, @@ -8710,6 +8712,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), + SEC_DEF("netfilter", NETFILTER, 0, SEC_NONE), }; static size_t custom_sec_def_cnt; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 4f3bc968ff8e..6065f408a59c 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -180,6 +180,7 @@ static int probe_prog_load(enum bpf_prog_type prog_type, case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: + case BPF_PROG_TYPE_NETFILTER: break; default: return -EOPNOTSUPP; diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 7c68d78da9ea..7534f5499d11 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -29,6 +29,8 @@ #include "verifier_map_ret_val.skel.h" #include "verifier_masking.skel.h" #include "verifier_meta_access.skel.h" +#include "verifier_netfilter_ctx.skel.h" +#include "verifier_netfilter_retcode.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" #include "verifier_reg_equal.skel.h" @@ -103,6 +105,8 @@ void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); } void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); } void test_verifier_masking(void) { RUN(verifier_masking); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } +void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } +void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_reg_equal(void) { RUN(verifier_reg_equal); } diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c new file mode 100644 index 000000000000..65bba330e7e5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include "bpf_misc.h" + +#include <bpf/bpf_endian.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +SEC("netfilter") +__description("netfilter invalid context access, size too short") +__failure __msg("invalid bpf_context access") +__naked void with_invalid_ctx_access_test1(void) +{ + asm volatile (" \ + r2 = *(u8*)(r1 + %[__bpf_nf_ctx_state]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__bpf_nf_ctx_state, offsetof(struct bpf_nf_ctx, state)) + : __clobber_all); +} + +SEC("netfilter") +__description("netfilter invalid context access, size too short") +__failure __msg("invalid bpf_context access") +__naked void with_invalid_ctx_access_test2(void) +{ + asm volatile (" \ + r2 = *(u16*)(r1 + %[__bpf_nf_ctx_skb]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__bpf_nf_ctx_skb, offsetof(struct bpf_nf_ctx, skb)) + : __clobber_all); +} + +SEC("netfilter") +__description("netfilter invalid context access, past end of ctx") +__failure __msg("invalid bpf_context access") +__naked void with_invalid_ctx_access_test3(void) +{ + asm volatile (" \ + r2 = *(u64*)(r1 + %[__bpf_nf_ctx_size]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(__bpf_nf_ctx_size, sizeof(struct bpf_nf_ctx)) + : __clobber_all); +} + +SEC("netfilter") +__description("netfilter invalid context, write") +__failure __msg("invalid bpf_context access") +__naked void with_invalid_ctx_access_test4(void) +{ + asm volatile (" \ + r2 = r1; \ + *(u64*)(r2 + 0) = r1; \ + r0 = 1; \ + exit; \ +" : + : __imm_const(__bpf_nf_ctx_skb, offsetof(struct bpf_nf_ctx, skb)) + : __clobber_all); +} + +#define NF_DROP 0 +#define NF_ACCEPT 1 + +SEC("netfilter") +__description("netfilter valid context read and invalid write") +__failure __msg("only read is supported") +int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) +{ + struct nf_hook_state *state = (void *)ctx->state; + + state->sk = NULL; + return NF_ACCEPT; +} + +extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym; +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, + void *buffer, uint32_t buffer__sz) __ksym; + +SEC("netfilter") +__description("netfilter test prog with skb and state read access") +__success __failure_unpriv +__retval(0) +int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) +{ + const struct nf_hook_state *state = ctx->state; + struct sk_buff *skb = ctx->skb; + const struct iphdr *iph; + const struct tcphdr *th; + u8 buffer_iph[20] = {}; + u8 buffer_th[40] = {}; + struct bpf_dynptr ptr; + uint8_t ihl; + + if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) + return NF_ACCEPT; + + iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph)); + if (!iph) + return NF_ACCEPT; + + if (state->pf != 2) + return NF_ACCEPT; + + ihl = iph->ihl << 2; + + th = bpf_dynptr_slice(&ptr, ihl, buffer_th, sizeof(buffer_th)); + if (!th) + return NF_ACCEPT; + + return th->dest == bpf_htons(22) ? NF_ACCEPT : NF_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c new file mode 100644 index 000000000000..353ae6da00e1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("netfilter") +__description("bpf_exit with invalid return code. test1") +__failure __msg("R0 is not a known value") +__naked void with_invalid_return_code_test1(void) +{ + asm volatile (" \ + r0 = *(u64*)(r1 + 0); \ + exit; \ +" ::: __clobber_all); +} + +SEC("netfilter") +__description("bpf_exit with valid return code. test2") +__success +__naked void with_valid_return_code_test2(void) +{ + asm volatile (" \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("netfilter") +__description("bpf_exit with valid return code. test3") +__success +__naked void with_valid_return_code_test3(void) +{ + asm volatile (" \ + r0 = 1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("netfilter") +__description("bpf_exit with invalid return code. test4") +__failure __msg("R0 has value (0x2; 0x0)") +__naked void with_invalid_return_code_test4(void) +{ + asm volatile (" \ + r0 = 2; \ + exit; \ +" ::: __clobber_all); +} |