summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-09-30 17:58:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-09-30 17:58:11 -0700
commitae28ed4578e6d5a481e39c5a9827f27048661fdd (patch)
treefd29a311fe5f4ab052c4973fca50bca55e82bf94 /net
parent4b81e2eb9e4db8f6094c077d0c8b27c264901c1b (diff)
parent4ef77dd584cfd915526328f516fec59e3a54d66e (diff)
Merge tag 'bpf-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Support pulling non-linear xdp data with bpf_xdp_pull_data() kfunc (Amery Hung) Applied as a stable branch in bpf-next and net-next trees. - Support reading skb metadata via bpf_dynptr (Jakub Sitnicki) Also a stable branch in bpf-next and net-next trees. - Enforce expected_attach_type for tailcall compatibility (Daniel Borkmann) - Replace path-sensitive with path-insensitive live stack analysis in the verifier (Eduard Zingerman) This is a significant change in the verification logic. More details, motivation, long term plans are in the cover letter/merge commit. - Support signed BPF programs (KP Singh) This is another major feature that took years to materialize. Algorithm details are in the cover letter/marge commit - Add support for may_goto instruction to s390 JIT (Ilya Leoshkevich) - Add support for may_goto instruction to arm64 JIT (Puranjay Mohan) - Fix USDT SIB argument handling in libbpf (Jiawei Zhao) - Allow uprobe-bpf program to change context registers (Jiri Olsa) - Support signed loads from BPF arena (Kumar Kartikeya Dwivedi and Puranjay Mohan) - Allow access to union arguments in tracing programs (Leon Hwang) - Optimize rcu_read_lock() + migrate_disable() combination where it's used in BPF subsystem (Menglong Dong) - Introduce bpf_task_work_schedule*() kfuncs to schedule deferred execution of BPF callback in the context of a specific task using the kernel’s task_work infrastructure (Mykyta Yatsenko) - Enforce RCU protection for KF_RCU_PROTECTED kfuncs (Kumar Kartikeya Dwivedi) - Add stress test for rqspinlock in NMI (Kumar Kartikeya Dwivedi) - Improve the precision of tnum multiplier verifier operation (Nandakumar Edamana) - Use tnums to improve is_branch_taken() logic (Paul Chaignon) - Add support for atomic operations in arena in riscv JIT (Pu Lehui) - Report arena faults to BPF error stream (Puranjay Mohan) - Search for tracefs at /sys/kernel/tracing first in bpftool (Quentin Monnet) - Add bpf_strcasecmp() kfunc (Rong Tao) - Support lookup_and_delete_elem command in BPF_MAP_STACK_TRACE (Tao Chen) * tag 'bpf-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (197 commits) libbpf: Replace AF_ALG with open coded SHA-256 selftests/bpf: Add stress test for rqspinlock in NMI selftests/bpf: Add test case for different expected_attach_type bpf: Enforce expected_attach_type for tailcall compatibility bpftool: Remove duplicate string.h header bpf: Remove duplicate crypto/sha2.h header libbpf: Fix error when st-prefix_ops and ops from differ btf selftests/bpf: Test changing packet data from kfunc selftests/bpf: Add stacktrace map lookup_and_delete_elem test case selftests/bpf: Refactor stacktrace_map case with skeleton bpf: Add lookup_and_delete_elem for BPF_MAP_STACK_TRACE selftests/bpf: Fix flaky bpf_cookie selftest selftests/bpf: Test changing packet data from global functions with a kfunc bpf: Emit struct bpf_xdp_sock type in vmlinux BTF selftests/bpf: Task_work selftest cleanup fixes MAINTAINERS: Delete inactive maintainers from AF_XDP bpf: Mark kfuncs as __noclone selftests/bpf: Add kprobe multi write ctx attach test selftests/bpf: Add kprobe write ctx attach test selftests/bpf: Add uprobe context ip register change test ...
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c59
-rw-r--r--net/core/filter.c210
2 files changed, 221 insertions, 48 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 9728dbd4c66c..dfb03ee0bb62 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -524,27 +524,27 @@ __bpf_kfunc int bpf_fentry_test1(int a)
}
EXPORT_SYMBOL_GPL(bpf_fentry_test1);
-int noinline bpf_fentry_test2(int a, u64 b)
+noinline int bpf_fentry_test2(int a, u64 b)
{
return a + b;
}
-int noinline bpf_fentry_test3(char a, int b, u64 c)
+noinline int bpf_fentry_test3(char a, int b, u64 c)
{
return a + b + c;
}
-int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
+noinline int bpf_fentry_test4(void *a, char b, int c, u64 d)
{
return (long)a + b + c + d;
}
-int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
+noinline int bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
{
return a + (long)b + c + d + e;
}
-int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
+noinline int bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
{
return a + (long)b + c + d + (long)e + f;
}
@@ -553,13 +553,13 @@ struct bpf_fentry_test_t {
struct bpf_fentry_test_t *a;
};
-int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
+noinline int bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
- asm volatile ("": "+r"(arg));
+ asm volatile ("" : "+r"(arg));
return (long)arg;
}
-int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
+noinline int bpf_fentry_test8(struct bpf_fentry_test_t *arg)
{
return (long)arg->a;
}
@@ -569,12 +569,12 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
return *a;
}
-int noinline bpf_fentry_test10(const void *a)
+noinline int bpf_fentry_test10(const void *a)
{
return (long)a;
}
-void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
+noinline void bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
{
}
@@ -598,7 +598,7 @@ __bpf_kfunc int bpf_modify_return_test_tp(int nonce)
return nonce;
}
-int noinline bpf_fentry_shadow_test(int a)
+noinline int bpf_fentry_shadow_test(int a)
{
return a + 1;
}
@@ -665,7 +665,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
void *data;
- if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom)
+ if (user_size > PAGE_SIZE - headroom - tailroom)
return ERR_PTR(-EINVAL);
size = SKB_DATA_ALIGN(size);
@@ -1001,6 +1001,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
+ if (size < ETH_HLEN)
+ return -EINVAL;
+
data = bpf_test_init(kattr, kattr->test.data_size_in,
size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
@@ -1207,9 +1210,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
{
bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size;
+ u32 linear_sz = kattr->test.data_size_in;
u32 batch_size = kattr->test.batch_size;
- u32 retval = 0, duration, max_data_sz;
- u32 size = kattr->test.data_size_in;
u32 headroom = XDP_PACKET_HEADROOM;
u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue;
@@ -1246,39 +1249,45 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (ctx) {
/* There can't be user provided data before the meta data */
- if (ctx->data_meta || ctx->data_end != size ||
+ if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in ||
ctx->data > ctx->data_end ||
unlikely(xdp_metalen_invalid(ctx->data)) ||
(do_live && (kattr->test.data_out || kattr->test.ctx_out)))
goto free_ctx;
/* Meta data is allocated from the headroom */
headroom -= ctx->data;
- }
- max_data_sz = PAGE_SIZE - headroom - tailroom;
- if (size > max_data_sz) {
- /* disallow live data mode for jumbo frames */
- if (do_live)
- goto free_ctx;
- size = max_data_sz;
+ meta_sz = ctx->data;
+ linear_sz = ctx->data_end;
}
- data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
+ max_linear_sz = PAGE_SIZE - headroom - tailroom;
+ linear_sz = min_t(u32, linear_sz, max_linear_sz);
+
+ /* disallow live data mode for jumbo frames */
+ if (do_live && kattr->test.data_size_in > linear_sz)
+ goto free_ctx;
+
+ if (kattr->test.data_size_in - meta_sz < ETH_HLEN)
+ return -EINVAL;
+
+ data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto free_ctx;
}
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
- rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+ rxqueue->xdp_rxq.frag_size = PAGE_SIZE;
xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
- xdp_prepare_buff(&xdp, data, headroom, size, true);
+ xdp_prepare_buff(&xdp, data, headroom, linear_sz, true);
sinfo = xdp_get_shared_info_from_buff(&xdp);
ret = xdp_convert_md_to_buff(ctx, &xdp);
if (ret)
goto free_data;
+ size = linear_sz;
if (unlikely(kattr->test.data_size_in > size)) {
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
diff --git a/net/core/filter.c b/net/core/filter.c
index da391e2b0788..2af0a5f1d748 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
return 0;
}
-static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
- enum xdp_mem_type mem_type, bool release)
+static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
+ bool tail, bool release)
{
- struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
+ struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
+ xsk_buff_get_head(xdp);
if (release) {
- xsk_buff_del_tail(zc_frag);
- __xdp_return(0, mem_type, false, zc_frag);
+ xsk_buff_del_frag(zc_frag);
} else {
- zc_frag->data_end -= shrink;
+ if (tail)
+ zc_frag->data_end -= shrink;
+ else
+ zc_frag->data += shrink;
}
+
+ return zc_frag;
}
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
- int shrink)
+ int shrink, bool tail)
{
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink;
+ netmem_ref netmem = skb_frag_netmem(frag);
+ struct xdp_buff *zc_frag = NULL;
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
- bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
- goto out;
+ netmem = 0;
+ zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
}
- if (release)
- __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
+ if (release) {
+ __xdp_return(netmem, mem_type, false, zc_frag);
+ } else {
+ if (!tail)
+ skb_frag_off_add(frag, shrink);
+ skb_frag_size_sub(frag, shrink);
+ }
-out:
return release;
}
@@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
len_free += shrink;
offset -= shrink;
- if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
+ if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
n_frags_free++;
- } else {
- skb_frag_size_sub(frag, shrink);
- break;
- }
}
sinfo->nr_frags -= n_frags_free;
sinfo->xdp_frags_size -= len_free;
if (unlikely(!sinfo->nr_frags)) {
xdp_buff_clear_frags_flag(xdp);
+ xdp_buff_clear_frag_pfmemalloc(xdp);
xdp->data_end -= offset;
}
@@ -7431,6 +7439,8 @@ u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
offsetof(struct xdp_sock, FIELD)); \
} while (0)
+ BTF_TYPE_EMIT(struct bpf_xdp_sock);
+
switch (si->off) {
case offsetof(struct bpf_xdp_sock, queue_id):
BPF_XDP_SOCK_GET(queue_id);
@@ -9284,13 +9294,17 @@ static bool sock_addr_is_valid_access(int off, int size,
return false;
info->reg_type = PTR_TO_SOCKET;
break;
- default:
- if (type == BPF_READ) {
- if (size != size_default)
- return false;
- } else {
+ case bpf_ctx_range(struct bpf_sock_addr, user_family):
+ case bpf_ctx_range(struct bpf_sock_addr, family):
+ case bpf_ctx_range(struct bpf_sock_addr, type):
+ case bpf_ctx_range(struct bpf_sock_addr, protocol):
+ if (type != BPF_READ)
return false;
- }
+ if (size != size_default)
+ return false;
+ break;
+ default:
+ return false;
}
return true;
@@ -11990,6 +12004,16 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return func;
}
+/**
+ * bpf_skb_meta_pointer() - Gets a mutable pointer within the skb metadata area.
+ * @skb: socket buffer carrying the metadata
+ * @offset: offset into the metadata area, must be <= skb_metadata_len()
+ */
+void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
+{
+ return skb_metadata_end(skb) - skb_metadata_len(skb) + offset;
+}
+
__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
struct bpf_dynptr *ptr__uninit)
@@ -12007,6 +12031,42 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
return 0;
}
+/**
+ * bpf_dynptr_from_skb_meta() - Initialize a dynptr to the skb metadata area.
+ * @skb_: socket buffer carrying the metadata
+ * @flags: future use, must be zero
+ * @ptr__uninit: dynptr to initialize
+ *
+ * Set up a dynptr for access to the metadata area earlier allocated from the
+ * XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to
+ * &__sk_buff->data_meta.
+ *
+ * If passed @skb_ is a clone which shares the data with the original, the
+ * dynptr will be read-only. This limitation may be lifted in the future.
+ *
+ * Return:
+ * * %0 - dynptr ready to use
+ * * %-EINVAL - invalid flags, dynptr set to null
+ */
+__bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
+{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
+ struct sk_buff *skb = (struct sk_buff *)skb_;
+
+ if (flags) {
+ bpf_dynptr_set_null(ptr);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb));
+
+ if (skb_cloned(skb))
+ bpf_dynptr_set_rdonly(ptr);
+
+ return 0;
+}
+
__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags,
struct bpf_dynptr *ptr__uninit)
{
@@ -12160,6 +12220,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
return 0;
}
+/**
+ * bpf_xdp_pull_data() - Pull in non-linear xdp data.
+ * @x: &xdp_md associated with the XDP buffer
+ * @len: length of data to be made directly accessible in the linear part
+ *
+ * Pull in data in case the XDP buffer associated with @x is non-linear and
+ * not all @len are in the linear data area.
+ *
+ * Direct packet access allows reading and writing linear XDP data through
+ * packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
+ * ends up in the linear part of the xdp_buff depends on the NIC and its
+ * configuration. When a frag-capable XDP program wants to directly access
+ * headers that may be in the non-linear area, call this kfunc to make sure
+ * the data is available in the linear area. Alternatively, use dynptr or
+ * bpf_xdp_{load,store}_bytes() to access data without pulling.
+ *
+ * This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
+ * headers in the non-linear data area.
+ *
+ * A call to this kfunc may reduce headroom. If there is not enough tailroom
+ * in the linear data area, metadata and data will be shifted down.
+ *
+ * A call to this kfunc is susceptible to change the buffer geometry.
+ * Therefore, at load time, all checks on pointers previously done by the
+ * verifier are invalidated and must be performed again, if the kfunc is used
+ * in combination with direct packet access.
+ *
+ * Return:
+ * * %0 - success
+ * * %-EINVAL - invalid len
+ */
+__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)x;
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ int i, delta, shift, headroom, tailroom, n_frags_free = 0;
+ void *data_hard_end = xdp_data_hard_end(xdp);
+ int data_len = xdp->data_end - xdp->data;
+ void *start;
+
+ if (len <= data_len)
+ return 0;
+
+ if (unlikely(len > xdp_get_buff_len(xdp)))
+ return -EINVAL;
+
+ start = xdp_data_meta_unsupported(xdp) ? xdp->data : xdp->data_meta;
+
+ headroom = start - xdp->data_hard_start - sizeof(struct xdp_frame);
+ tailroom = data_hard_end - xdp->data_end;
+
+ delta = len - data_len;
+ if (unlikely(delta > tailroom + headroom))
+ return -EINVAL;
+
+ shift = delta - tailroom;
+ if (shift > 0) {
+ memmove(start - shift, start, xdp->data_end - start);
+
+ xdp->data_meta -= shift;
+ xdp->data -= shift;
+ xdp->data_end -= shift;
+ }
+
+ for (i = 0; i < sinfo->nr_frags && delta; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ u32 shrink = min_t(u32, delta, skb_frag_size(frag));
+
+ memcpy(xdp->data_end, skb_frag_address(frag), shrink);
+
+ xdp->data_end += shrink;
+ sinfo->xdp_frags_size -= shrink;
+ delta -= shrink;
+ if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
+ n_frags_free++;
+ }
+
+ if (unlikely(n_frags_free)) {
+ memmove(sinfo->frags, sinfo->frags + n_frags_free,
+ (sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));
+
+ sinfo->nr_frags -= n_frags_free;
+
+ if (!sinfo->nr_frags) {
+ xdp_buff_clear_frags_flag(xdp);
+ xdp_buff_clear_frag_pfmemalloc(xdp);
+ }
+ }
+
+ return 0;
+}
+
__bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@@ -12181,8 +12333,13 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
+BTF_KFUNCS_START(bpf_kfunc_check_set_skb_meta)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
+
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
+BTF_ID_FLAGS(func, bpf_xdp_pull_data)
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
@@ -12202,6 +12359,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.set = &bpf_kfunc_check_set_skb,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_skb_meta = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_skb_meta,
+};
+
static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_xdp,
@@ -12237,6 +12399,8 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb_meta);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb_meta);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
&bpf_kfunc_set_sock_addr);