summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-05-28 15:52:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-05-28 15:52:42 -0700
commit90b83efa6701656e02c86e7df2cb1765ea602d07 (patch)
tree59ac0306b5fe287af6691717ebcdbcc96163c3ca
parent1b98f357dadd6ea613a435fbaef1a5dd7b35fd21 (diff)
parentc5cebb241e27ed0c3f4c1d2ce63089398e0ed17e (diff)
Merge tag 'bpf-next-6.16' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-nextHEADmaster
Pull bpf updates from Alexei Starovoitov: - Fix and improve BTF deduplication of identical BTF types (Alan Maguire and Andrii Nakryiko) - Support up to 12 arguments in BPF trampoline on arm64 (Xu Kuohai and Alexis Lothoré) - Support load-acquire and store-release instructions in BPF JIT on riscv64 (Andrea Parri) - Fix uninitialized values in BPF_{CORE,PROBE}_READ macros (Anton Protopopov) - Streamline allowed helpers across program types (Feng Yang) - Support atomic update for hashtab of BPF maps (Hou Tao) - Implement json output for BPF helpers (Ihor Solodrai) - Several s390 JIT fixes (Ilya Leoshkevich) - Various sockmap fixes (Jiayuan Chen) - Support mmap of vmlinux BTF data (Lorenz Bauer) - Support BPF rbtree traversal and list peeking (Martin KaFai Lau) - Tests for sockmap/sockhash redirection (Michal Luczaj) - Introduce kfuncs for memory reads into dynptrs (Mykyta Yatsenko) - Add support for dma-buf iterators in BPF (T.J. Mercier) - The verifier support for __bpf_trap() (Yonghong Song) * tag 'bpf-next-6.16' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (135 commits) bpf, arm64: Remove unused-but-set function and variable. selftests/bpf: Add tests with stack ptr register in conditional jmp bpf: Do not include stack ptr register in precision backtracking bookkeeping selftests/bpf: enable many-args tests for arm64 bpf, arm64: Support up to 12 function arguments bpf: Check rcu_read_lock_trace_held() in bpf_map_lookup_percpu_elem() bpf: Avoid __bpf_prog_ret0_warn when jit fails bpftool: Add support for custom BTF path in prog load/loadall selftests/bpf: Add unit tests with __bpf_trap() kfunc bpf: Warn with __bpf_trap() kfunc maybe due to uninitialized variable bpf: Remove special_kfunc_set from verifier selftests/bpf: Add test for open coded dmabuf_iter selftests/bpf: Add test for dmabuf_iter bpf: Add open coded dmabuf iterator bpf: Add dmabuf iterator dma-buf: Rename debugfs symbols bpf: Fix error return value in bpf_copy_from_user_dynptr libbpf: Use mmap to parse vmlinux BTF from sysfs selftests: bpf: Add a test for mmapable vmlinux BTF btf: Allow mmap of vmlinux btf ...
-rw-r--r--Documentation/bpf/bpf_iterators.rst117
-rw-r--r--Documentation/bpf/kfuncs.rst17
-rw-r--r--arch/arm64/net/bpf_jit_comp.c242
-rw-r--r--arch/riscv/net/bpf_jit.h15
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c332
-rw-r--r--arch/riscv/net/bpf_jit_core.c3
-rw-r--r--arch/s390/include/asm/nospec-branch.h4
-rw-r--r--arch/s390/net/bpf_jit_comp.c138
-rw-r--r--drivers/dma-buf/dma-buf.c98
-rw-r--r--include/asm-generic/vmlinux.lds.h3
-rw-r--r--include/linux/bpf-cgroup.h8
-rw-r--r--include/linux/bpf.h20
-rw-r--r--include/linux/bpf_verifier.h24
-rw-r--r--include/linux/dma-buf.h4
-rw-r--r--include/uapi/linux/bpf.h19
-rw-r--r--kernel/bpf/Makefile3
-rw-r--r--kernel/bpf/bpf_struct_ops.c2
-rw-r--r--kernel/bpf/btf.c45
-rw-r--r--kernel/bpf/cgroup.c32
-rw-r--r--kernel/bpf/core.c29
-rw-r--r--kernel/bpf/dmabuf_iter.c150
-rw-r--r--kernel/bpf/hashtab.c148
-rw-r--r--kernel/bpf/helpers.c133
-rw-r--r--kernel/bpf/syscall.c10
-rw-r--r--kernel/bpf/sysfs_btf.c32
-rw-r--r--kernel/bpf/verifier.c636
-rw-r--r--kernel/sched/ext.c15
-rw-r--r--kernel/trace/bpf_trace.c321
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--net/bpf/test_run.c8
-rw-r--r--net/core/filter.c14
-rw-r--r--net/core/skmsg.c56
-rw-r--r--net/tls/tls_sw.c15
-rw-r--r--scripts/Makefile.btf2
-rwxr-xr-xscripts/bpf_doc.py119
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst10
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool4
-rw-r--r--tools/bpf/bpftool/cgroup.c14
-rw-r--r--tools/bpf/bpftool/link.c3
-rw-r--r--tools/bpf/bpftool/prog.c12
-rw-r--r--tools/include/uapi/linux/bpf.h19
-rw-r--r--tools/lib/bpf/bpf_core_read.h6
-rw-r--r--tools/lib/bpf/bpf_helpers.h8
-rw-r--r--tools/lib/bpf/btf.c226
-rw-r--r--tools/lib/bpf/libbpf.c87
-rw-r--r--tools/lib/bpf/libbpf.h6
-rw-r--r--tools/lib/bpf/libbpf.map4
-rw-r--r--tools/lib/bpf/libbpf_internal.h9
-rw-r--r--tools/lib/bpf/linker.c6
-rw-r--r--tools/lib/bpf/nlattr.c15
-rw-r--r--tools/testing/selftests/bpf/DENYLIST1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch642
-rw-r--r--tools/testing/selftests/bpf/Makefile16
-rw-r--r--tools/testing/selftests/bpf/bench.c4
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_htab_mem.c3
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c598
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h5
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_sysfs.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c285
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c192
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_helpers.h84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c457
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_redir.c465
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_btf_ext.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bench_sockmap_prog.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h (renamed from tools/testing/selftests/bpf/bpf_arena_spin_lock.h)15
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h5
-rw-r--r--tools/testing/selftests/bpf/progs/dmabuf_iter.c101
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c230
-rw-r--r--tools/testing/selftests/bpf/progs/fd_htab_lookup.c25
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c2
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_peek.c113
-rw-r--r--tools/testing/selftests/bpf/progs/prepare.c1
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c29
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c206
-rw-r--r--tools/testing/selftests/bpf/progs/set_global_vars.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_ext.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_redir.c68
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_trap.c71
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_load_acquire.c48
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c58
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_store_release.c39
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata.c13
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c8
-rw-r--r--tools/testing/selftests/bpf/test_loader.c14
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c8
-rw-r--r--tools/testing/selftests/bpf/veristat.c101
108 files changed, 5801 insertions, 1713 deletions
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst
index 7f514cb6b052..189e3ec1c6c8 100644
--- a/Documentation/bpf/bpf_iterators.rst
+++ b/Documentation/bpf/bpf_iterators.rst
@@ -2,10 +2,117 @@
BPF Iterators
=============
+--------
+Overview
+--------
+
+BPF supports two separate entities collectively known as "BPF iterators": BPF
+iterator *program type* and *open-coded* BPF iterators. The former is
+a stand-alone BPF program type which, when attached and activated by user,
+will be called once for each entity (task_struct, cgroup, etc) that is being
+iterated. The latter is a set of BPF-side APIs implementing iterator
+functionality and available across multiple BPF program types. Open-coded
+iterators provide similar functionality to BPF iterator programs, but gives
+more flexibility and control to all other BPF program types. BPF iterator
+programs, on the other hand, can be used to implement anonymous or BPF
+FS-mounted special files, whose contents are generated by attached BPF iterator
+program, backed by seq_file functionality. Both are useful depending on
+specific needs.
+
+When adding a new BPF iterator program, it is expected that similar
+functionality will be added as open-coded iterator for maximum flexibility.
+It's also expected that iteration logic and code will be maximally shared and
+reused between two iterator API surfaces.
-----------
-Motivation
-----------
+------------------------
+Open-coded BPF Iterators
+------------------------
+
+Open-coded BPF iterators are implemented as tightly-coupled trios of kfuncs
+(constructor, next element fetch, destructor) and iterator-specific type
+describing on-the-stack iterator state, which is guaranteed by the BPF
+verifier to not be tampered with outside of the corresponding
+constructor/destructor/next APIs.
+
+Each kind of open-coded BPF iterator has its own associated
+struct bpf_iter_<type>, where <type> denotes a specific type of iterator.
+bpf_iter_<type> state needs to live on BPF program stack, so make sure it's
+small enough to fit on BPF stack. For performance reasons its best to avoid
+dynamic memory allocation for iterator state and size the state struct big
+enough to fit everything necessary. But if necessary, dynamic memory
+allocation is a way to bypass BPF stack limitations. Note, state struct size
+is part of iterator's user-visible API, so changing it will break backwards
+compatibility, so be deliberate about designing it.
+
+All kfuncs (constructor, next, destructor) have to be named consistently as
+bpf_iter_<type>_{new,next,destroy}(), respectively. <type> represents iterator
+type, and iterator state should be represented as a matching
+`struct bpf_iter_<type>` state type. Also, all iter kfuncs should have
+a pointer to this `struct bpf_iter_<type>` as the very first argument.
+
+Additionally:
+ - Constructor, i.e., `bpf_iter_<type>_new()`, can have arbitrary extra
+ number of arguments. Return type is not enforced either.
+ - Next method, i.e., `bpf_iter_<type>_next()`, has to return a pointer
+ type and should have exactly one argument: `struct bpf_iter_<type> *`
+ (const/volatile/restrict and typedefs are ignored).
+ - Destructor, i.e., `bpf_iter_<type>_destroy()`, should return void and
+ should have exactly one argument, similar to the next method.
+ - `struct bpf_iter_<type>` size is enforced to be positive and
+ a multiple of 8 bytes (to fit stack slots correctly).
+
+Such strictness and consistency allows to build generic helpers abstracting
+important, but boilerplate, details to be able to use open-coded iterators
+effectively and ergonomically (see libbpf's bpf_for_each() macro). This is
+enforced at kfunc registration point by the kernel.
+
+Constructor/next/destructor implementation contract is as follows:
+ - constructor, `bpf_iter_<type>_new()`, always initializes iterator state on
+ the stack. If any of the input arguments are invalid, constructor should
+ make sure to still initialize it such that subsequent next() calls will
+ return NULL. I.e., on error, *return error and construct empty iterator*.
+ Constructor kfunc is marked with KF_ITER_NEW flag.
+
+ - next method, `bpf_iter_<type>_next()`, accepts pointer to iterator state
+ and produces an element. Next method should always return a pointer. The
+ contract between BPF verifier is that next method *guarantees* that it
+ will eventually return NULL when elements are exhausted. Once NULL is
+ returned, subsequent next calls *should keep returning NULL*. Next method
+ is marked with KF_ITER_NEXT (and should also have KF_RET_NULL as
+ NULL-returning kfunc, of course).
+
+ - destructor, `bpf_iter_<type>_destroy()`, is always called once. Even if
+ constructor failed or next returned nothing. Destructor frees up any
+ resources and marks stack space used by `struct bpf_iter_<type>` as usable
+ for something else. Destructor is marked with KF_ITER_DESTROY flag.
+
+Any open-coded BPF iterator implementation has to implement at least these
+three methods. It is enforced that for any given type of iterator only
+applicable constructor/destructor/next are callable. I.e., verifier ensures
+you can't pass number iterator state into, say, cgroup iterator's next method.
+
+From a 10,000-feet BPF verification point of view, next methods are the points
+of forking a verification state, which are conceptually similar to what
+verifier is doing when validating conditional jumps. Verifier is branching out
+`call bpf_iter_<type>_next` instruction and simulates two outcomes: NULL
+(iteration is done) and non-NULL (new element is returned). NULL is simulated
+first and is supposed to reach exit without looping. After that non-NULL case
+is validated and it either reaches exit (for trivial examples with no real
+loop), or reaches another `call bpf_iter_<type>_next` instruction with the
+state equivalent to already (partially) validated one. State equivalency at
+that point means we technically are going to be looping forever without
+"breaking out" out of established "state envelope" (i.e., subsequent
+iterations don't add any new knowledge or constraints to the verifier state,
+so running 1, 2, 10, or a million of them doesn't matter). But taking into
+account the contract stating that iterator next method *has to* return NULL
+eventually, we can conclude that loop body is safe and will eventually
+terminate. Given we validated logic outside of the loop (NULL case), and
+concluded that loop body is safe (though potentially looping many times),
+verifier can claim safety of the overall program logic.
+
+------------------------
+BPF Iterators Motivation
+------------------------
There are a few existing ways to dump kernel data into user space. The most
popular one is the ``/proc`` system. For example, ``cat /proc/net/tcp6`` dumps
@@ -323,8 +430,8 @@ Now, in the userspace program, pass the pointer of struct to the
::
- link = bpf_program__attach_iter(prog, &opts); iter_fd =
- bpf_iter_create(bpf_link__fd(link));
+ link = bpf_program__attach_iter(prog, &opts);
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
If both *tid* and *pid* are zero, an iterator created from this struct
``bpf_iter_attach_opts`` will include every opened file of every task in the
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index a8f5782bd833..ae468b781d31 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -160,6 +160,23 @@ Or::
...
}
+2.2.6 __prog Annotation
+---------------------------
+This annotation is used to indicate that the argument needs to be fixed up to
+the bpf_prog_aux of the caller BPF program. Any value passed into this argument
+is ignored, and rewritten by the verifier.
+
+An example is given below::
+
+ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *value),
+ unsigned int flags,
+ void *aux__prog)
+ {
+ struct bpf_prog_aux *aux = aux__prog;
+ ...
+ }
+
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 634d78422adb..da8b89dd2910 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2113,7 +2113,7 @@ bool bpf_jit_supports_subprog_tailcalls(void)
}
static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
- int args_off, int retval_off, int run_ctx_off,
+ int bargs_off, int retval_off, int run_ctx_off,
bool save_ret)
{
__le32 *branch;
@@ -2155,7 +2155,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
branch = ctx->image + ctx->idx;
emit(A64_NOP, ctx);
- emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
+ emit(A64_ADD_I(1, A64_R(0), A64_SP, bargs_off), ctx);
if (!p->jited)
emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
@@ -2180,7 +2180,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
}
static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
- int args_off, int retval_off, int run_ctx_off,
+ int bargs_off, int retval_off, int run_ctx_off,
__le32 **branches)
{
int i;
@@ -2190,7 +2190,7 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
*/
emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
for (i = 0; i < tl->nr_links; i++) {
- invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
+ invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
run_ctx_off, true);
/* if (*(u64 *)(sp + retval_off) != 0)
* goto do_fexit;
@@ -2204,23 +2204,125 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
}
}
-static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
+struct arg_aux {
+ /* how many args are passed through registers, the rest of the args are
+ * passed through stack
+ */
+ int args_in_regs;
+ /* how many registers are used to pass arguments */
+ int regs_for_args;
+ /* how much stack is used for additional args passed to bpf program
+ * that did not fit in original function registers
+ */
+ int bstack_for_args;
+ /* home much stack is used for additional args passed to the
+ * original function when called from trampoline (this one needs
+ * arguments to be properly aligned)
+ */
+ int ostack_for_args;
+};
+
+static int calc_arg_aux(const struct btf_func_model *m,
+ struct arg_aux *a)
{
- int i;
+ int stack_slots, nregs, slots, i;
+
+ /* verifier ensures m->nr_args <= MAX_BPF_FUNC_ARGS */
+ for (i = 0, nregs = 0; i < m->nr_args; i++) {
+ slots = (m->arg_size[i] + 7) / 8;
+ if (nregs + slots <= 8) /* passed through register ? */
+ nregs += slots;
+ else
+ break;
+ }
+
+ a->args_in_regs = i;
+ a->regs_for_args = nregs;
+ a->ostack_for_args = 0;
+ a->bstack_for_args = 0;
+
+ /* the rest arguments are passed through stack */
+ for (; i < m->nr_args; i++) {
+ /* We can not know for sure about exact alignment needs for
+ * struct passed on stack, so deny those
+ */
+ if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
+ return -ENOTSUPP;
+ stack_slots = (m->arg_size[i] + 7) / 8;
+ a->bstack_for_args += stack_slots * 8;
+ a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
+ }
+
+ return 0;
+}
- for (i = 0; i < nregs; i++) {
- emit(A64_STR64I(i, A64_SP, args_off), ctx);
- args_off += 8;
+static void clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes)
+{
+ if (effective_bytes) {
+ int garbage_bits = 64 - 8 * effective_bytes;
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ /* garbage bits are at the right end */
+ emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
+ emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
+#else
+ /* garbage bits are at the left end */
+ emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
+ emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
+#endif
}
}
-static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
+static void save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off,
+ const struct btf_func_model *m,
+ const struct arg_aux *a,
+ bool for_call_origin)
{
int i;
+ int reg;
+ int doff;
+ int soff;
+ int slots;
+ u8 tmp = bpf2a64[TMP_REG_1];
+
+ /* store arguments to the stack for the bpf program, or restore
+ * arguments from stack for the original function
+ */
+ for (reg = 0; reg < a->regs_for_args; reg++) {
+ emit(for_call_origin ?
+ A64_LDR64I(reg, A64_SP, bargs_off) :
+ A64_STR64I(reg, A64_SP, bargs_off),
+ ctx);
+ bargs_off += 8;
+ }
+
+ soff = 32; /* on stack arguments start from FP + 32 */
+ doff = (for_call_origin ? oargs_off : bargs_off);
+
+ /* save on stack arguments */
+ for (i = a->args_in_regs; i < m->nr_args; i++) {
+ slots = (m->arg_size[i] + 7) / 8;
+ /* verifier ensures arg_size <= 16, so slots equals 1 or 2 */
+ while (slots-- > 0) {
+ emit(A64_LDR64I(tmp, A64_FP, soff), ctx);
+ /* if there is unused space in the last slot, clear
+ * the garbage contained in the space.
+ */
+ if (slots == 0 && !for_call_origin)
+ clear_garbage(ctx, tmp, m->arg_size[i] % 8);
+ emit(A64_STR64I(tmp, A64_SP, doff), ctx);
+ soff += 8;
+ doff += 8;
+ }
+ }
+}
- for (i = 0; i < nregs; i++) {
- emit(A64_LDR64I(i, A64_SP, args_off), ctx);
- args_off += 8;
+static void restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
+{
+ int reg;
+
+ for (reg = 0; reg < nregs; reg++) {
+ emit(A64_LDR64I(reg, A64_SP, bargs_off), ctx);
+ bargs_off += 8;
}
}
@@ -2243,17 +2345,21 @@ static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
*/
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
struct bpf_tramp_links *tlinks, void *func_addr,
- int nregs, u32 flags)
+ const struct btf_func_model *m,
+ const struct arg_aux *a,
+ u32 flags)
{
int i;
int stack_size;
int retaddr_off;
int regs_off;
int retval_off;
- int args_off;
- int nregs_off;
+ int bargs_off;
+ int nfuncargs_off;
int ip_off;
int run_ctx_off;
+ int oargs_off;
+ int nfuncargs;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2262,31 +2368,38 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
bool is_struct_ops = is_struct_ops_tramp(fentry);
/* trampoline stack layout:
- * [ parent ip ]
- * [ FP ]
- * SP + retaddr_off [ self ip ]
- * [ FP ]
+ * [ parent ip ]
+ * [ FP ]
+ * SP + retaddr_off [ self ip ]
+ * [ FP ]
*
- * [ padding ] align SP to multiples of 16
+ * [ padding ] align SP to multiples of 16
*
- * [ x20 ] callee saved reg x20
- * SP + regs_off [ x19 ] callee saved reg x19
+ * [ x20 ] callee saved reg x20
+ * SP + regs_off [ x19 ] callee saved reg x19
*
- * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
- * BPF_TRAMP_F_RET_FENTRY_RET
+ * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
+ * BPF_TRAMP_F_RET_FENTRY_RET
+ * [ arg reg N ]
+ * [ ... ]
+ * SP + bargs_off [ arg reg 1 ] for bpf
*
- * [ arg reg N ]
- * [ ... ]
- * SP + args_off [ arg reg 1 ]
+ * SP + nfuncargs_off [ arg regs count ]
*
- * SP + nregs_off [ arg regs count ]
+ * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
- * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
+ * SP + run_ctx_off [ bpf_tramp_run_ctx ]
*
- * SP + run_ctx_off [ bpf_tramp_run_ctx ]
+ * [ stack arg N ]
+ * [ ... ]
+ * SP + oargs_off [ stack arg 1 ] for original func
*/
stack_size = 0;
+ oargs_off = stack_size;
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ stack_size += a->ostack_for_args;
+
run_ctx_off = stack_size;
/* room for bpf_tramp_run_ctx */
stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
@@ -2296,13 +2409,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8;
- nregs_off = stack_size;
+ nfuncargs_off = stack_size;
/* room for args count */
stack_size += 8;
- args_off = stack_size;
+ bargs_off = stack_size;
/* room for args */
- stack_size += nregs * 8;
+ nfuncargs = a->regs_for_args + a->bstack_for_args / 8;
+ stack_size += 8 * nfuncargs;
/* room for return value */
retval_off = stack_size;
@@ -2349,11 +2463,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
/* save arg regs count*/
- emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
- emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
+ emit(A64_MOVZ(1, A64_R(10), nfuncargs, 0), ctx);
+ emit(A64_STR64I(A64_R(10), A64_SP, nfuncargs_off), ctx);
- /* save arg regs */
- save_args(ctx, args_off, nregs);
+ /* save args for bpf */
+ save_args(ctx, bargs_off, oargs_off, m, a, false);
/* save callee saved registers */
emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -2369,7 +2483,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
for (i = 0; i < fentry->nr_links; i++)
- invoke_bpf_prog(ctx, fentry->links[i], args_off,
+ invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
retval_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET);
@@ -2379,12 +2493,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (!branches)
return -ENOMEM;
- invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
+ invoke_bpf_mod_ret(ctx, fmod_ret, bargs_off, retval_off,
run_ctx_off, branches);
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- restore_args(ctx, args_off, nregs);
+ /* save args for original func */
+ save_args(ctx, bargs_off, oargs_off, m, a, true);
/* call original func */
emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
@@ -2403,7 +2518,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
for (i = 0; i < fexit->nr_links; i++)
- invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
+ invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
run_ctx_off, false);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
@@ -2417,7 +2532,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
}
if (flags & BPF_TRAMP_F_RESTORE_REGS)
- restore_args(ctx, args_off, nregs);
+ restore_args(ctx, bargs_off, a->regs_for_args);
/* restore callee saved register x19 and x20 */
emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
@@ -2454,21 +2569,6 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
return ctx->idx;
}
-static int btf_func_model_nregs(const struct btf_func_model *m)
-{
- int nregs = m->nr_args;
- int i;
-
- /* extra registers needed for struct argument */
- for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
- /* The arg_size is at most 16 bytes, enforced by the verifier. */
- if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
- nregs += (m->arg_size[i] + 7) / 8 - 1;
- }
-
- return nregs;
-}
-
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
@@ -2477,14 +2577,14 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
.idx = 0,
};
struct bpf_tramp_image im;
- int nregs, ret;
+ struct arg_aux aaux;
+ int ret;
- nregs = btf_func_model_nregs(m);
- /* the first 8 registers are used for arguments */
- if (nregs > 8)
- return -ENOTSUPP;
+ ret = calc_arg_aux(m, &aaux);
+ if (ret < 0)
+ return ret;
- ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
+ ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, m, &aaux, flags);
if (ret < 0)
return ret;
@@ -2511,9 +2611,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
u32 flags, struct bpf_tramp_links *tlinks,
void *func_addr)
{
- int ret, nregs;
- void *image, *tmp;
u32 size = ro_image_end - ro_image;
+ struct arg_aux aaux;
+ void *image, *tmp;
+ int ret;
/* image doesn't need to be in module memory range, so we can
* use kvmalloc.
@@ -2529,13 +2630,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
.write = true,
};
- nregs = btf_func_model_nregs(m);
- /* the first 8 registers are used for arguments */
- if (nregs > 8)
- return -ENOTSUPP;
jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
- ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
+ ret = calc_arg_aux(m, &aaux);
+ if (ret)
+ goto out;
+ ret = prepare_trampoline(&ctx, im, tlinks, func_addr, m, &aaux, flags);
if (ret > 0 && validate_code(&ctx) < 0) {
ret = -EINVAL;
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 1d1c78d4cff1..e7b032dfd17f 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -608,6 +608,21 @@ static inline u32 rv_fence(u8 pred, u8 succ)
return rv_i_insn(imm11_0, 0, 0, 0, 0xf);
}
+static inline void emit_fence_r_rw(struct rv_jit_context *ctx)
+{
+ emit(rv_fence(0x2, 0x3), ctx);
+}
+
+static inline void emit_fence_rw_w(struct rv_jit_context *ctx)
+{
+ emit(rv_fence(0x3, 0x1), ctx);
+}
+
+static inline void emit_fence_rw_rw(struct rv_jit_context *ctx)
+{
+ emit(rv_fence(0x3, 0x3), ctx);
+}
+
static inline u32 rv_nop(void)
{
return rv_i_insn(0, 0, 0, 0, 0x13);
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index ca60db75199d..10e01ff06312 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -473,11 +473,212 @@ static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx)
emit(hash, ctx);
}
-static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
- struct rv_jit_context *ctx)
+static int emit_load_8(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ int insns_start;
+
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lb(rd, off, rs), ctx);
+ else
+ emit(rv_lbu(rd, off, rs), ctx);
+ return ctx->ninsns - insns_start;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lb(rd, 0, RV_REG_T1), ctx);
+ else
+ emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
+ return ctx->ninsns - insns_start;
+}
+
+static int emit_load_16(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ int insns_start;
+
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lh(rd, off, rs), ctx);
+ else
+ emit(rv_lhu(rd, off, rs), ctx);
+ return ctx->ninsns - insns_start;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lh(rd, 0, RV_REG_T1), ctx);
+ else
+ emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
+ return ctx->ninsns - insns_start;
+}
+
+static int emit_load_32(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ int insns_start;
+
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lw(rd, off, rs), ctx);
+ else
+ emit(rv_lwu(rd, off, rs), ctx);
+ return ctx->ninsns - insns_start;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ if (sign_ext)
+ emit(rv_lw(rd, 0, RV_REG_T1), ctx);
+ else
+ emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
+ return ctx->ninsns - insns_start;
+}
+
+static int emit_load_64(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ int insns_start;
+
+ if (is_12b_int(off)) {
+ insns_start = ctx->ninsns;
+ emit_ld(rd, off, rs, ctx);
+ return ctx->ninsns - insns_start;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ insns_start = ctx->ninsns;
+ emit_ld(rd, 0, RV_REG_T1, ctx);
+ return ctx->ninsns - insns_start;
+}
+
+static void emit_store_8(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ if (is_12b_int(off)) {
+ emit(rv_sb(rd, off, rs), ctx);
+ return;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit(rv_sb(RV_REG_T1, 0, rs), ctx);
+}
+
+static void emit_store_16(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ if (is_12b_int(off)) {
+ emit(rv_sh(rd, off, rs), ctx);
+ return;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit(rv_sh(RV_REG_T1, 0, rs), ctx);
+}
+
+static void emit_store_32(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ if (is_12b_int(off)) {
+ emit_sw(rd, off, rs, ctx);
+ return;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit_sw(RV_REG_T1, 0, rs, ctx);
+}
+
+static void emit_store_64(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+{
+ if (is_12b_int(off)) {
+ emit_sd(rd, off, rs, ctx);
+ return;
+ }
+
+ emit_imm(RV_REG_T1, off, ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
+ emit_sd(RV_REG_T1, 0, rs, ctx);
+}
+
+static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
+ struct rv_jit_context *ctx)
+{
+ u8 code = insn->code;
+ s32 imm = insn->imm;
+ s16 off = insn->off;
+
+ switch (imm) {
+ /* dst_reg = load_acquire(src_reg + off16) */
+ case BPF_LOAD_ACQ:
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit_load_8(false, rd, off, rs, ctx);
+ break;
+ case BPF_H:
+ emit_load_16(false, rd, off, rs, ctx);
+ break;
+ case BPF_W:
+ emit_load_32(false, rd, off, rs, ctx);
+ break;
+ case BPF_DW:
+ emit_load_64(false, rd, off, rs, ctx);
+ break;
+ }
+ emit_fence_r_rw(ctx);
+
+ /* If our next insn is a redundant zext, return 1 to tell
+ * build_body() to skip it.
+ */
+ if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1]))
+ return 1;
+ break;
+ /* store_release(dst_reg + off16, src_reg) */
+ case BPF_STORE_REL:
+ emit_fence_rw_w(ctx);
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit_store_8(rd, off, rs, ctx);
+ break;
+ case BPF_H:
+ emit_store_16(rd, off, rs, ctx);
+ break;
+ case BPF_W:
+ emit_store_32(rd, off, rs, ctx);
+ break;
+ case BPF_DW:
+ emit_store_64(rd, off, rs, ctx);
+ break;
+ }
+ break;
+ default:
+ pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn,
+ struct rv_jit_context *ctx)
{
- u8 r0;
+ u8 r0, code = insn->code;
+ s16 off = insn->off;
+ s32 imm = insn->imm;
int jmp_offset;
+ bool is64;
+
+ if (BPF_SIZE(code) != BPF_W && BPF_SIZE(code) != BPF_DW) {
+ pr_err_once("bpf-jit: 1- and 2-byte RMW atomics are not supported\n");
+ return -EINVAL;
+ }
+ is64 = BPF_SIZE(code) == BPF_DW;
if (off) {
if (is_12b_int(off)) {
@@ -554,9 +755,14 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
jmp_offset = ninsns_rvoff(-6);
emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
- emit(rv_fence(0x3, 0x3), ctx);
+ emit_fence_rw_rw(ctx);
break;
+ default:
+ pr_err_once("bpf-jit: invalid atomic RMW opcode %02x\n", imm);
+ return -EINVAL;
}
+
+ return 0;
}
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
@@ -1650,8 +1856,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
{
- int insn_len, insns_start;
bool sign_ext;
+ int insn_len;
sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
@@ -1663,78 +1869,16 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
switch (BPF_SIZE(code)) {
case BPF_B:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lb(rd, off, rs), ctx);
- else
- emit(rv_lbu(rd, off, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lb(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
+ insn_len = emit_load_8(sign_ext, rd, off, rs, ctx);
break;
case BPF_H:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lh(rd, off, rs), ctx);
- else
- emit(rv_lhu(rd, off, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lh(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
+ insn_len = emit_load_16(sign_ext, rd, off, rs, ctx);
break;
case BPF_W:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lw(rd, off, rs), ctx);
- else
- emit(rv_lwu(rd, off, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lw(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
+ insn_len = emit_load_32(sign_ext, rd, off, rs, ctx);
break;
case BPF_DW:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit_ld(rd, off, rs, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- emit_ld(rd, 0, RV_REG_T1, ctx);
- insn_len = ctx->ninsns - insns_start;
+ insn_len = emit_load_64(sign_ext, rd, off, rs, ctx);
break;
}
@@ -1879,49 +2023,27 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* STX: *(size *)(dst + off) = src */
case BPF_STX | BPF_MEM | BPF_B:
- if (is_12b_int(off)) {
- emit(rv_sb(rd, off, rs), ctx);
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit(rv_sb(RV_REG_T1, 0, rs), ctx);
+ emit_store_8(rd, off, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_H:
- if (is_12b_int(off)) {
- emit(rv_sh(rd, off, rs), ctx);
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit(rv_sh(RV_REG_T1, 0, rs), ctx);
+ emit_store_16(rd, off, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_W:
- if (is_12b_int(off)) {
- emit_sw(rd, off, rs, ctx);
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit_sw(RV_REG_T1, 0, rs, ctx);
+ emit_store_32(rd, off, rs, ctx);
break;
case BPF_STX | BPF_MEM | BPF_DW:
- if (is_12b_int(off)) {
- emit_sd(rd, off, rs, ctx);
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit_sd(RV_REG_T1, 0, rs, ctx);
+ emit_store_64(rd, off, rs, ctx);
break;
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
- emit_atomic(rd, rs, off, imm,
- BPF_SIZE(code) == BPF_DW, ctx);
+ if (bpf_atomic_is_load_store(insn))
+ ret = emit_atomic_ld_st(rd, rs, insn, ctx);
+ else
+ ret = emit_atomic_rmw(rd, rs, insn, ctx);
+ if (ret)
+ return ret;
break;
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index f8cd2f70a7fb..f6ca5cfa6b2f 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -26,9 +26,8 @@ static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
int ret;
ret = bpf_jit_emit_insn(insn, ctx, extra_pass);
- /* BPF_LD | BPF_IMM | BPF_DW: skip the next instruction. */
if (ret > 0)
- i++;
+ i++; /* skip the next instruction */
if (offset)
offset[i] = ctx->ninsns;
if (ret < 0)
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 192835a3e24d..c7c96282f011 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -26,8 +26,6 @@ static inline bool nospec_uses_trampoline(void)
return __is_defined(CC_USING_EXPOLINE) && !nospec_disable;
}
-#ifdef CONFIG_EXPOLINE_EXTERN
-
void __s390_indirect_jump_r1(void);
void __s390_indirect_jump_r2(void);
void __s390_indirect_jump_r3(void);
@@ -44,8 +42,6 @@ void __s390_indirect_jump_r13(void);
void __s390_indirect_jump_r14(void);
void __s390_indirect_jump_r15(void);
-#endif
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_EXPOLINE_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 0776dfde2dba..c7f8313ba449 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -48,8 +48,6 @@ struct bpf_jit {
int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
int exit_ip; /* Address of exit */
- int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
- int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
@@ -127,6 +125,18 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
jit->seen_regs |= (1 << r1);
}
+static s32 off_to_pcrel(struct bpf_jit *jit, u32 off)
+{
+ return off - jit->prg;
+}
+
+static s64 ptr_to_pcrel(struct bpf_jit *jit, const void *ptr)
+{
+ if (jit->prg_buf)
+ return (const u8 *)ptr - ((const u8 *)jit->prg_buf + jit->prg);
+ return 0;
+}
+
#define REG_SET_SEEN(b1) \
({ \
reg_set_seen(jit, b1); \
@@ -201,7 +211,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT4_PCREL_RIC(op, mask, target) \
({ \
- int __rel = ((target) - jit->prg) / 2; \
+ int __rel = off_to_pcrel(jit, target) / 2; \
_EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
@@ -239,7 +249,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
(op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
@@ -248,7 +258,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \
({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
+ unsigned int rel = off_to_pcrel(jit, target) / 2; \
_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
(rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
@@ -257,29 +267,41 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
- int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \
+ int rel = off_to_pcrel(jit, addrs[(i) + (off) + 1]) / 2;\
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
+static void emit6_pcrel_ril(struct bpf_jit *jit, u32 op, s64 pcrel)
+{
+ u32 pc32dbl = (s32)(pcrel / 2);
+
+ _EMIT6(op | pc32dbl >> 16, pc32dbl & 0xffff);
+}
+
+static void emit6_pcrel_rilb(struct bpf_jit *jit, u32 op, u8 b, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | reg_high(b) << 16, pcrel);
+ REG_SET_SEEN(b);
+}
+
#define EMIT6_PCREL_RILB(op, b, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
- REG_SET_SEEN(b); \
-})
+ emit6_pcrel_rilb(jit, op, b, off_to_pcrel(jit, target))
-#define EMIT6_PCREL_RIL(op, target) \
-({ \
- unsigned int rel = (int)((target) - jit->prg) / 2; \
- _EMIT6((op) | rel >> 16, rel & 0xffff); \
-})
+#define EMIT6_PCREL_RILB_PTR(op, b, target_ptr) \
+ emit6_pcrel_rilb(jit, op, b, ptr_to_pcrel(jit, target_ptr))
+
+static void emit6_pcrel_rilc(struct bpf_jit *jit, u32 op, u8 mask, s64 pcrel)
+{
+ emit6_pcrel_ril(jit, op | mask << 20, pcrel);
+}
#define EMIT6_PCREL_RILC(op, mask, target) \
-({ \
- EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
-})
+ emit6_pcrel_rilc(jit, op, mask, off_to_pcrel(jit, target))
+
+#define EMIT6_PCREL_RILC_PTR(op, mask, target_ptr) \
+ emit6_pcrel_rilc(jit, op, mask, ptr_to_pcrel(jit, target_ptr))
#define _EMIT6_IMM(op, imm) \
({ \
@@ -503,7 +525,7 @@ static void bpf_skip(struct bpf_jit *jit, int size)
{
if (size >= 6 && !is_valid_rel(size)) {
/* brcl 0xf,size */
- EMIT6_PCREL_RIL(0xc0f4000000, size);
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, size);
size -= 6;
} else if (size >= 4 && is_valid_rel(size)) {
/* brc 0xf,size */
@@ -605,43 +627,30 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
}
/* Setup stack and backchain */
if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* lgr %w1,%r15 (backchain) */
- EMIT4(0xb9040000, REG_W1, REG_15);
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* stg %w1,152(%r15) (backchain) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
- REG_15, 152);
+ /* stg %w1,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+ REG_15, 152);
}
}
/*
- * Emit an expoline for a jump that follows
+ * Jump using a register either directly or via an expoline thunk
*/
-static void emit_expoline(struct bpf_jit *jit)
-{
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
-}
-
-/*
- * Emit __s390_indirect_jump_r1 thunk if necessary
- */
-static void emit_r1_thunk(struct bpf_jit *jit)
-{
- if (nospec_uses_trampoline()) {
- jit->r1_thunk_ip = jit->prg;
- emit_expoline(jit);
- /* br %r1 */
- _EMIT2(0x07f1);
- }
-}
+#define EMIT_JUMP_REG(reg) do { \
+ if (nospec_uses_trampoline()) \
+ /* brcl 0xf,__s390_indirect_jump_rN */ \
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0x0f, \
+ __s390_indirect_jump_r ## reg); \
+ else \
+ /* br %rN */ \
+ _EMIT2(0x07f0 | reg); \
+} while (0)
/*
* Call r1 either directly or via __s390_indirect_jump_r1 thunk
@@ -650,7 +659,8 @@ static void call_r1(struct bpf_jit *jit)
{
if (nospec_uses_trampoline())
/* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+ __s390_indirect_jump_r1);
else
/* basr %r14,%r1 */
EMIT2(0x0d00, REG_14, REG_1);
@@ -666,16 +676,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
- if (nospec_uses_trampoline()) {
- jit->r14_thunk_ip = jit->prg;
- /* Generate __s390_indirect_jump_r14 thunk */
- emit_expoline(jit);
- }
- /* br %r14 */
- _EMIT2(0x07fe);
-
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- emit_r1_thunk(jit);
+ EMIT_JUMP_REG(14);
jit->prg = ALIGN(jit->prg, 8);
jit->prologue_plt = jit->prg;
@@ -1877,7 +1878,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* aghi %r1,tail_call_start */
EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
/* brcl 0xf,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+ __s390_indirect_jump_r1);
} else {
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
@@ -2585,9 +2587,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
if (nr_stack_args > MAX_NR_STACK_ARGS)
return -ENOTSUPP;
- /* Return to %r14, since func_addr and %r0 are not available. */
- if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
- (flags & BPF_TRAMP_F_INDIRECT))
+ /* Return to %r14 in the struct_ops case. */
+ if (flags & BPF_TRAMP_F_INDIRECT)
flags |= BPF_TRAMP_F_SKIP_FRAME;
/*
@@ -2847,17 +2848,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
0xf000 | tjit->tccnt_off);
/* aghi %r15,stack_size */
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
- /* Emit an expoline for the following indirect jump. */
- if (nospec_uses_trampoline())
- emit_expoline(jit);
if (flags & BPF_TRAMP_F_SKIP_FRAME)
- /* br %r14 */
- _EMIT2(0x07fe);
+ EMIT_JUMP_REG(14);
else
- /* br %r1 */
- _EMIT2(0x07f1);
-
- emit_r1_thunk(jit);
+ EMIT_JUMP_REG(1);
return 0;
}
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 0c48d41dd5eb..890ecac04dac 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -19,7 +19,9 @@
#include <linux/anon_inodes.h>
#include <linux/export.h>
#include <linux/debugfs.h>
+#include <linux/list.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/seq_file.h>
#include <linux/sync_file.h>
#include <linux/poll.h>
@@ -35,35 +37,91 @@
static inline int is_dma_buf_file(struct file *);
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-static DEFINE_MUTEX(debugfs_list_mutex);
-static LIST_HEAD(debugfs_list);
+static DEFINE_MUTEX(dmabuf_list_mutex);
+static LIST_HEAD(dmabuf_list);
-static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf)
+static void __dma_buf_list_add(struct dma_buf *dmabuf)
{
- mutex_lock(&debugfs_list_mutex);
- list_add(&dmabuf->list_node, &debugfs_list);
- mutex_unlock(&debugfs_list_mutex);
+ mutex_lock(&dmabuf_list_mutex);
+ list_add(&dmabuf->list_node, &dmabuf_list);
+ mutex_unlock(&dmabuf_list_mutex);
}
-static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf)
+static void __dma_buf_list_del(struct dma_buf *dmabuf)
{
if (!dmabuf)
return;
- mutex_lock(&debugfs_list_mutex);
+ mutex_lock(&dmabuf_list_mutex);
list_del(&dmabuf->list_node);
- mutex_unlock(&debugfs_list_mutex);
+ mutex_unlock(&dmabuf_list_mutex);
}
-#else
-static void __dma_buf_debugfs_list_add(struct dma_buf *dmabuf)
+
+/**
+ * dma_buf_iter_begin - begin iteration through global list of all DMA buffers
+ *
+ * Returns the first buffer in the global list of DMA-bufs that's not in the
+ * process of being destroyed. Increments that buffer's reference count to
+ * prevent buffer destruction. Callers must release the reference, either by
+ * continuing iteration with dma_buf_iter_next(), or with dma_buf_put().
+ *
+ * Return:
+ * * First buffer from global list, with refcount elevated
+ * * NULL if no active buffers are present
+ */
+struct dma_buf *dma_buf_iter_begin(void)
{
+ struct dma_buf *ret = NULL, *dmabuf;
+
+ /*
+ * The list mutex does not protect a dmabuf's refcount, so it can be
+ * zeroed while we are iterating. We cannot call get_dma_buf() since the
+ * caller may not already own a reference to the buffer.
+ */
+ mutex_lock(&dmabuf_list_mutex);
+ list_for_each_entry(dmabuf, &dmabuf_list, list_node) {
+ if (file_ref_get(&dmabuf->file->f_ref)) {
+ ret = dmabuf;
+ break;
+ }
+ }
+ mutex_unlock(&dmabuf_list_mutex);
+ return ret;
}
-static void __dma_buf_debugfs_list_del(struct dma_buf *dmabuf)
+/**
+ * dma_buf_iter_next - continue iteration through global list of all DMA buffers
+ * @dmabuf: [in] pointer to dma_buf
+ *
+ * Decrements the reference count on the provided buffer. Returns the next
+ * buffer from the remainder of the global list of DMA-bufs with its reference
+ * count incremented. Callers must release the reference, either by continuing
+ * iteration with dma_buf_iter_next(), or with dma_buf_put().
+ *
+ * Return:
+ * * Next buffer from global list, with refcount elevated
+ * * NULL if no additional active buffers are present
+ */
+struct dma_buf *dma_buf_iter_next(struct dma_buf *dmabuf)
{
+ struct dma_buf *ret = NULL;
+
+ /*
+ * The list mutex does not protect a dmabuf's refcount, so it can be
+ * zeroed while we are iterating. We cannot call get_dma_buf() since the
+ * caller may not already own a reference to the buffer.
+ */
+ mutex_lock(&dmabuf_list_mutex);
+ dma_buf_put(dmabuf);
+ list_for_each_entry_continue(dmabuf, &dmabuf_list, list_node) {
+ if (file_ref_get(&dmabuf->file->f_ref)) {
+ ret = dmabuf;
+ break;
+ }
+ }
+ mutex_unlock(&dmabuf_list_mutex);
+ return ret;
}
-#endif
static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
{
@@ -115,7 +173,7 @@ static int dma_buf_file_release(struct inode *inode, struct file *file)
if (!is_dma_buf_file(file))
return -EINVAL;
- __dma_buf_debugfs_list_del(file->private_data);
+ __dma_buf_list_del(file->private_data);
return 0;
}
@@ -685,7 +743,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
file->f_path.dentry->d_fsdata = dmabuf;
dmabuf->file = file;
- __dma_buf_debugfs_list_add(dmabuf);
+ __dma_buf_list_add(dmabuf);
return dmabuf;
@@ -1563,7 +1621,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
size_t size = 0;
int ret;
- ret = mutex_lock_interruptible(&debugfs_list_mutex);
+ ret = mutex_lock_interruptible(&dmabuf_list_mutex);
if (ret)
return ret;
@@ -1572,7 +1630,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\tname\n",
"size", "flags", "mode", "count", "ino");
- list_for_each_entry(buf_obj, &debugfs_list, list_node) {
+ list_for_each_entry(buf_obj, &dmabuf_list, list_node) {
ret = dma_resv_lock_interruptible(buf_obj->resv, NULL);
if (ret)
@@ -1609,11 +1667,11 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
seq_printf(s, "\nTotal %d objects, %zu bytes\n", count, size);
- mutex_unlock(&debugfs_list_mutex);
+ mutex_unlock(&dmabuf_list_mutex);
return 0;
error_unlock:
- mutex_unlock(&debugfs_list_mutex);
+ mutex_unlock(&dmabuf_list_mutex);
return ret;
}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 66409bc3a4e0..fa5f19b8d53a 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -667,10 +667,11 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG)
*/
#ifdef CONFIG_DEBUG_INFO_BTF
#define BTF \
+ . = ALIGN(PAGE_SIZE); \
.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \
BOUNDED_SECTION_BY(.BTF, _BTF) \
} \
- . = ALIGN(4); \
+ . = ALIGN(PAGE_SIZE); \
.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) { \
*(.BTF_ids) \
}
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 60d1511b4f4d..70c8b94e797a 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -426,8 +426,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
const struct bpf_func_proto *
cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
-const struct bpf_func_proto *
-cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
#else
static inline void cgroup_bpf_lifetime_notifier_init(void)
@@ -466,12 +464,6 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
-static inline const struct bpf_func_proto *
-cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
- return NULL;
-}
-
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3f0cc89c0622..5b25d278409b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -346,6 +346,12 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
}
}
+#if IS_ENABLED(CONFIG_DEBUG_KERNEL)
+#define BPF_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
+#else
+#define BPF_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+#endif
+
static inline u32 btf_field_type_size(enum btf_field_type type)
{
switch (type) {
@@ -1349,6 +1355,20 @@ u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
+int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset,
+ void *src, u32 len, u64 flags);
+void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
+ void *buffer__opt, u32 buffer__szk);
+
+static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+{
+ u32 size = __bpf_dynptr_size(ptr);
+
+ if (len > size || offset > size - len)
+ return -E2BIG;
+
+ return 0;
+}
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link,
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9734544b6957..256274acb1d8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -356,7 +356,11 @@ enum {
INSN_F_SPI_MASK = 0x3f, /* 6 bits */
INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */
- INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */
+ INSN_F_STACK_ACCESS = BIT(9),
+
+ INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */
+ INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */
+ /* total 12 bits are used now. */
};
static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
@@ -365,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
struct bpf_insn_hist_entry {
u32 idx;
/* insn idx can't be bigger than 1 million */
- u32 prev_idx : 22;
- /* special flags, e.g., whether insn is doing register stack spill/load */
- u32 flags : 10;
+ u32 prev_idx : 20;
+ /* special INSN_F_xxx flags */
+ u32 flags : 12;
/* additional registers that need precision tracking when this
* jump is backtracked, vector of six 10-bit records
*/
@@ -591,6 +595,7 @@ struct bpf_insn_aux_data {
* bpf_fastcall pattern.
*/
u8 fastcall_spills_num:3;
+ u8 arg_prog:4;
/* below fields are initialized once */
unsigned int orig_idx; /* original instruction index */
@@ -838,6 +843,17 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
u32 insn_off,
const char *prefix_fmt, ...);
+#define verifier_bug_if(cond, env, fmt, args...) \
+ ({ \
+ bool __cond = (cond); \
+ if (unlikely(__cond)) { \
+ BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \
+ bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \
+ } \
+ (__cond); \
+ })
+#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args)
+
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *cur = env->cur_state;
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 544f8f8c3f44..d58e329ac0e7 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -361,10 +361,8 @@ struct dma_buf {
*/
struct module *owner;
-#if IS_ENABLED(CONFIG_DEBUG_FS)
/** @list_node: node for dma_buf accounting and debugging. */
struct list_head list_node;
-#endif
/** @priv: exporter specific private data for this buffer object. */
void *priv;
@@ -609,4 +607,6 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map);
void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map);
int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map);
+struct dma_buf *dma_buf_iter_begin(void);
+struct dma_buf *dma_buf_iter_next(struct dma_buf *dmbuf);
#endif /* __DMA_BUF_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fd404729b115..85180e4aaa5a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1506,7 +1506,7 @@ union bpf_attr {
__s32 map_token_fd;
};
- struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
__u32 map_fd;
__aligned_u64 key;
union {
@@ -1995,11 +1995,15 @@ union bpf_attr {
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
- * associated to *skb*, at *offset*. *flags* are a combination of
- * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
- * checksum for the packet after storing the bytes) and
- * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
- * **->swhash** and *skb*\ **->l4hash** to 0).
+ * associated to *skb*, at *offset*. The *flags* are a combination
+ * of the following values:
+ *
+ * **BPF_F_RECOMPUTE_CSUM**
+ * Automatically update *skb*\ **->csum** after storing the
+ * bytes.
+ * **BPF_F_INVALIDATE_HASH**
+ * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\
+ * **->l4hash** to 0.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2051,7 +2055,7 @@ union bpf_attr {
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
- * the checksum is to be computed against a pseudo-header.
+ * that the modified header field is part of the pseudo-header.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -6723,6 +6727,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from file_name */
__u64 cookie;
+ __u64 ref_ctr_offset;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 70502f038b92..3a335c50e6e3 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -53,6 +53,9 @@ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o
obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o
obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o
+ifeq ($(CONFIG_DMA_SHARED_BUFFER),y)
+obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o
+endif
CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index db13ee70d94d..96113633e391 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -601,7 +601,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
if (model->ret_size > 0)
flags |= BPF_TRAMP_F_RET_FENTRY_RET;
- size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
+ size = arch_bpf_trampoline_size(model, flags, tlinks, stub_func);
if (size <= 0)
return size ? : -EFAULT;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 324c47ab377a..1d2cf898e21e 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -26,6 +26,7 @@
#include <linux/bsearch.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include <linux/overflow.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -3957,7 +3958,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* This needs to be kzalloc to zero out padding and unused fields, see
* comment in btf_record_equal.
*/
- rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN);
+ rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL | __GFP_NOWARN);
if (!rec)
return ERR_PTR(-ENOMEM);
@@ -5583,7 +5584,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
if (id < 0)
continue;
- new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
+ new_aof = krealloc(aof, struct_size(new_aof, ids, aof->cnt + 1),
GFP_KERNEL | __GFP_NOWARN);
if (!new_aof) {
ret = -ENOMEM;
@@ -5610,7 +5611,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
if (ret != BTF_FIELD_FOUND)
continue;
- new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]),
+ new_aof = krealloc(aof, struct_size(new_aof, ids, aof->cnt + 1),
GFP_KERNEL | __GFP_NOWARN);
if (!new_aof) {
ret = -ENOMEM;
@@ -5647,7 +5648,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
continue;
parse:
tab_cnt = tab ? tab->cnt : 0;
- new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]),
+ new_tab = krealloc(tab, struct_size(new_tab, types, tab_cnt + 1),
GFP_KERNEL | __GFP_NOWARN);
if (!new_tab) {
ret = -ENOMEM;
@@ -6383,12 +6384,11 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
return prog->aux->attach_btf;
}
-static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
+static bool is_void_or_int_ptr(struct btf *btf, const struct btf_type *t)
{
/* skip modifiers */
t = btf_type_skip_modifiers(btf, t->type, NULL);
-
- return btf_type_is_int(t);
+ return btf_type_is_void(t) || btf_type_is_int(t);
}
u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto,
@@ -6777,14 +6777,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
}
- if (t->type == 0)
- /* This is a pointer to void.
- * It is the same as scalar from the verifier safety pov.
- * No further pointer walking is allowed.
- */
- return true;
-
- if (is_int_ptr(btf, t))
+ /*
+ * If it's a pointer to void, it's the same as scalar from the verifier
+ * safety POV. Either way, no futher pointer walking is allowed.
+ */
+ if (is_void_or_int_ptr(btf, t))
return true;
/* this is a pointer to another type */
@@ -6830,10 +6827,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* Is this a func with potential NULL args? */
if (strcmp(tname, raw_tp_null_args[i].func))
continue;
- if (raw_tp_null_args[i].mask & (0x1 << (arg * 4)))
+ if (raw_tp_null_args[i].mask & (0x1ULL << (arg * 4)))
info->reg_type |= PTR_MAYBE_NULL;
/* Is the current arg IS_ERR? */
- if (raw_tp_null_args[i].mask & (0x2 << (arg * 4)))
+ if (raw_tp_null_args[i].mask & (0x2ULL << (arg * 4)))
ptr_err_raw_tp = true;
break;
}
@@ -7663,7 +7660,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
return 0;
if (!prog->aux->func_info) {
- bpf_log(log, "Verifier bug\n");
+ verifier_bug(env, "func_info undefined");
return -EFAULT;
}
@@ -7687,7 +7684,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
tname = btf_name_by_offset(btf, fn_t->name_off);
if (prog->aux->func_info_aux[subprog].unreliable) {
- bpf_log(log, "Verifier bug in function %s()\n", tname);
+ verifier_bug(env, "unreliable BTF for function %s()", tname);
return -EFAULT;
}
if (prog_type == BPF_PROG_TYPE_EXT)
@@ -8564,7 +8561,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
/* Grow set */
set = krealloc(tab->sets[hook],
- offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]),
+ struct_size(set, pairs, set_cnt + add_set->cnt),
GFP_KERNEL | __GFP_NOWARN);
if (!set) {
ret = -ENOMEM;
@@ -8850,7 +8847,7 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
}
tab = krealloc(btf->dtor_kfunc_tab,
- offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]),
+ struct_size(tab, dtors, tab_cnt + add_cnt),
GFP_KERNEL | __GFP_NOWARN);
if (!tab) {
ret = -ENOMEM;
@@ -9408,8 +9405,7 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops,
tab = btf->struct_ops_tab;
if (!tab) {
- tab = kzalloc(offsetof(struct btf_struct_ops_tab, ops[4]),
- GFP_KERNEL);
+ tab = kzalloc(struct_size(tab, ops, 4), GFP_KERNEL);
if (!tab)
return -ENOMEM;
tab->capacity = 4;
@@ -9422,8 +9418,7 @@ btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops,
if (tab->cnt == tab->capacity) {
new_tab = krealloc(tab,
- offsetof(struct btf_struct_ops_tab,
- ops[tab->capacity * 2]),
+ struct_size(tab, ops, tab->capacity * 2),
GFP_KERNEL);
if (!new_tab)
return -ENOMEM;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index cad0194552fb..9122c39870bf 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1687,10 +1687,6 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
@@ -2238,10 +2234,6 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
case BPF_FUNC_sysctl_get_name:
return &bpf_sysctl_get_name_proto;
@@ -2385,10 +2377,6 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
#ifdef CONFIG_NET
case BPF_FUNC_get_netns_cookie:
@@ -2635,23 +2623,3 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
}
-
-/* Common helpers for cgroup hooks with valid process context. */
-const struct bpf_func_proto *
-cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
- switch (func_id) {
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_current_comm:
- return &bpf_get_current_comm_proto;
-#ifdef CONFIG_CGROUP_NET_CLASSID
- case BPF_FUNC_get_cgroup_classid:
- return &bpf_get_cgroup_classid_curr_proto;
-#endif
- case BPF_FUNC_current_task_under_cgroup:
- return &bpf_current_task_under_cgroup_proto;
- default:
- return NULL;
- }
-}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ba6b6118cf50..c20babbf998f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2358,8 +2358,8 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
return 0;
}
-bool bpf_prog_map_compatible(struct bpf_map *map,
- const struct bpf_prog *fp)
+static bool __bpf_prog_map_compatible(struct bpf_map *map,
+ const struct bpf_prog *fp)
{
enum bpf_prog_type prog_type = resolve_prog_type(fp);
bool ret;
@@ -2368,14 +2368,6 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
if (fp->kprobe_override)
return false;
- /* XDP programs inserted into maps are not guaranteed to run on
- * a particular netdev (and can run outside driver context entirely
- * in the case of devmap and cpumap). Until device checks
- * are implemented, prohibit adding dev-bound programs to program maps.
- */
- if (bpf_prog_is_dev_bound(aux))
- return false;
-
spin_lock(&map->owner.lock);
if (!map->owner.type) {
/* There's no owner yet where we could check for
@@ -2409,6 +2401,19 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
return ret;
}
+bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp)
+{
+ /* XDP programs inserted into maps are not guaranteed to run on
+ * a particular netdev (and can run outside driver context entirely
+ * in the case of devmap and cpumap). Until device checks
+ * are implemented, prohibit adding dev-bound programs to program maps.
+ */
+ if (bpf_prog_is_dev_bound(fp->aux))
+ return false;
+
+ return __bpf_prog_map_compatible(map, fp);
+}
+
static int bpf_check_tail_call(const struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
@@ -2421,7 +2426,7 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
if (!map_type_contains_progs(map))
continue;
- if (!bpf_prog_map_compatible(map, fp)) {
+ if (!__bpf_prog_map_compatible(map, fp)) {
ret = -EINVAL;
goto out;
}
@@ -2469,7 +2474,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
/* In case of BPF to BPF calls, verifier did all the prep
* work with regards to JITing, etc.
*/
- bool jit_needed = false;
+ bool jit_needed = fp->jit_requested;
if (fp->bpf_func)
goto finalize;
diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c
new file mode 100644
index 000000000000..4dd7ef7c145c
--- /dev/null
+++ b/kernel/bpf/dmabuf_iter.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Google LLC */
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <linux/dma-buf.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+
+static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ if (*pos)
+ return NULL;
+
+ return dma_buf_iter_begin();
+}
+
+static void *dmabuf_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct dma_buf *dmabuf = v;
+
+ ++*pos;
+
+ return dma_buf_iter_next(dmabuf);
+}
+
+struct bpf_iter__dmabuf {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct dma_buf *, dmabuf);
+};
+
+static int __dmabuf_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+ struct bpf_iter_meta meta = {
+ .seq = seq,
+ };
+ struct bpf_iter__dmabuf ctx = {
+ .meta = &meta,
+ .dmabuf = v,
+ };
+ struct bpf_prog *prog = bpf_iter_get_info(&meta, in_stop);
+
+ if (prog)
+ return bpf_iter_run_prog(prog, &ctx);
+
+ return 0;
+}
+
+static int dmabuf_iter_seq_show(struct seq_file *seq, void *v)
+{
+ return __dmabuf_seq_show(seq, v, false);
+}
+
+static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v)
+{
+ struct dma_buf *dmabuf = v;
+
+ if (dmabuf)
+ dma_buf_put(dmabuf);
+}
+
+static const struct seq_operations dmabuf_iter_seq_ops = {
+ .start = dmabuf_iter_seq_start,
+ .next = dmabuf_iter_seq_next,
+ .stop = dmabuf_iter_seq_stop,
+ .show = dmabuf_iter_seq_show,
+};
+
+static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
+ struct seq_file *seq)
+{
+ seq_puts(seq, "dmabuf iter\n");
+}
+
+static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
+ .seq_ops = &dmabuf_iter_seq_ops,
+ .init_seq_private = NULL,
+ .fini_seq_private = NULL,
+ .seq_priv_size = 0,
+};
+
+static struct bpf_iter_reg bpf_dmabuf_reg_info = {
+ .target = "dmabuf",
+ .feature = BPF_ITER_RESCHED,
+ .show_fdinfo = bpf_iter_dmabuf_show_fdinfo,
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__dmabuf, dmabuf),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &dmabuf_iter_seq_info,
+};
+
+DEFINE_BPF_ITER_FUNC(dmabuf, struct bpf_iter_meta *meta, struct dma_buf *dmabuf)
+BTF_ID_LIST_SINGLE(bpf_dmabuf_btf_id, struct, dma_buf)
+
+static int __init dmabuf_iter_init(void)
+{
+ bpf_dmabuf_reg_info.ctx_arg_info[0].btf_id = bpf_dmabuf_btf_id[0];
+ return bpf_iter_reg_target(&bpf_dmabuf_reg_info);
+}
+
+late_initcall(dmabuf_iter_init);
+
+struct bpf_iter_dmabuf {
+ /*
+ * opaque iterator state; having __u64 here allows to preserve correct
+ * alignment requirements in vmlinux.h, generated from BTF
+ */
+ __u64 __opaque[1];
+} __aligned(8);
+
+/* Non-opaque version of bpf_iter_dmabuf */
+struct bpf_iter_dmabuf_kern {
+ struct dma_buf *dmabuf;
+} __aligned(8);
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it)
+{
+ struct bpf_iter_dmabuf_kern *kit = (void *)it;
+
+ BUILD_BUG_ON(sizeof(*kit) > sizeof(*it));
+ BUILD_BUG_ON(__alignof__(*kit) != __alignof__(*it));
+
+ kit->dmabuf = NULL;
+ return 0;
+}
+
+__bpf_kfunc struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it)
+{
+ struct bpf_iter_dmabuf_kern *kit = (void *)it;
+
+ if (kit->dmabuf)
+ kit->dmabuf = dma_buf_iter_next(kit->dmabuf);
+ else
+ kit->dmabuf = dma_buf_iter_begin();
+
+ return kit->dmabuf;
+}
+
+__bpf_kfunc void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it)
+{
+ struct bpf_iter_dmabuf_kern *kit = (void *)it;
+
+ if (kit->dmabuf)
+ dma_buf_put(kit->dmabuf);
+}
+
+__bpf_kfunc_end_defs();
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 92b606d60020..71f9931ac64c 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -175,20 +175,30 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
}
+static inline bool is_fd_htab(const struct bpf_htab *htab)
+{
+ return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS;
+}
+
+static inline void *htab_elem_value(struct htab_elem *l, u32 key_size)
+{
+ return l->key + round_up(key_size, 8);
+}
+
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
void __percpu *pptr)
{
- *(void __percpu **)(l->key + roundup(key_size, 8)) = pptr;
+ *(void __percpu **)htab_elem_value(l, key_size) = pptr;
}
static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
{
- return *(void __percpu **)(l->key + roundup(key_size, 8));
+ return *(void __percpu **)htab_elem_value(l, key_size);
}
static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
{
- return *(void **)(l->key + roundup(map->key_size, 8));
+ return *(void **)htab_elem_value(l, map->key_size);
}
static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
@@ -196,9 +206,13 @@ static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size);
}
+/* Both percpu and fd htab support in-place update, so no need for
+ * extra elem. LRU itself can remove the least used element, so
+ * there is no need for an extra elem during map_update.
+ */
static bool htab_has_extra_elems(struct bpf_htab *htab)
{
- return !htab_is_percpu(htab) && !htab_is_lru(htab);
+ return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
}
static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
@@ -215,10 +229,10 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
elem = get_htab_elem(htab, i);
if (btf_record_has_field(htab->map.record, BPF_TIMER))
bpf_obj_free_timer(htab->map.record,
- elem->key + round_up(htab->map.key_size, 8));
+ htab_elem_value(elem, htab->map.key_size));
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
bpf_obj_free_workqueue(htab->map.record,
- elem->key + round_up(htab->map.key_size, 8));
+ htab_elem_value(elem, htab->map.key_size));
cond_resched();
}
}
@@ -245,7 +259,8 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab)
cond_resched();
}
} else {
- bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
+ bpf_obj_free_fields(htab->map.record,
+ htab_elem_value(elem, htab->map.key_size));
cond_resched();
}
cond_resched();
@@ -453,8 +468,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
- bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
- attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
/* percpu_lru means each cpu has its own LRU list.
* it is different from BPF_MAP_TYPE_PERCPU_HASH where
* the map's value itself is percpu. percpu_lru has
@@ -549,10 +562,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (err)
goto free_map_locked;
- if (!percpu && !lru) {
- /* lru itself can remove the least used element, so
- * there is no need for an extra elem during map_update.
- */
+ if (htab_has_extra_elems(htab)) {
err = alloc_extra_elems(htab);
if (err)
goto free_prealloc;
@@ -670,7 +680,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
struct htab_elem *l = __htab_map_lookup_elem(map, key);
if (l)
- return l->key + round_up(map->key_size, 8);
+ return htab_elem_value(l, map->key_size);
return NULL;
}
@@ -709,7 +719,7 @@ static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map,
if (l) {
if (mark)
bpf_lru_node_set_ref(&l->lru_node);
- return l->key + round_up(map->key_size, 8);
+ return htab_elem_value(l, map->key_size);
}
return NULL;
@@ -763,7 +773,7 @@ static void check_and_free_fields(struct bpf_htab *htab,
for_each_possible_cpu(cpu)
bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
} else {
- void *map_value = elem->key + round_up(htab->map.key_size, 8);
+ void *map_value = htab_elem_value(elem, htab->map.key_size);
bpf_obj_free_fields(htab->map.record, map_value);
}
@@ -968,8 +978,7 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
{
- return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS &&
- BITS_PER_LONG == 64;
+ return is_fd_htab(htab) && BITS_PER_LONG == 64;
}
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
@@ -1039,11 +1048,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
htab_elem_set_ptr(l_new, key_size, pptr);
} else if (fd_htab_map_needs_adjust(htab)) {
size = round_up(size, 8);
- memcpy(l_new->key + round_up(key_size, 8), value, size);
+ memcpy(htab_elem_value(l_new, key_size), value, size);
} else {
- copy_map_value(&htab->map,
- l_new->key + round_up(key_size, 8),
- value);
+ copy_map_value(&htab->map, htab_elem_value(l_new, key_size), value);
}
l_new->hash = hash;
@@ -1072,10 +1079,9 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct htab_elem *l_new = NULL, *l_old;
+ struct htab_elem *l_new, *l_old;
struct hlist_nulls_head *head;
unsigned long flags;
- void *old_map_ptr;
struct bucket *b;
u32 key_size, hash;
int ret;
@@ -1106,7 +1112,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (l_old) {
/* grab the element lock and update value in place */
copy_map_value_locked(map,
- l_old->key + round_up(key_size, 8),
+ htab_elem_value(l_old, key_size),
value, false);
return 0;
}
@@ -1134,7 +1140,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
* and update element in place
*/
copy_map_value_locked(map,
- l_old->key + round_up(key_size, 8),
+ htab_elem_value(l_old, key_size),
value, false);
ret = 0;
goto err;
@@ -1156,24 +1162,14 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
hlist_nulls_del_rcu(&l_old->hash_node);
/* l_old has already been stashed in htab->extra_elems, free
- * its special fields before it is available for reuse. Also
- * save the old map pointer in htab of maps before unlock
- * and release it after unlock.
+ * its special fields before it is available for reuse.
*/
- old_map_ptr = NULL;
- if (htab_is_prealloc(htab)) {
- if (map->ops->map_fd_put_ptr)
- old_map_ptr = fd_htab_map_get_ptr(map, l_old);
+ if (htab_is_prealloc(htab))
check_and_free_fields(htab, l_old);
- }
}
htab_unlock_bucket(b, flags);
- if (l_old) {
- if (old_map_ptr)
- map->ops->map_fd_put_ptr(map, old_map_ptr, true);
- if (!htab_is_prealloc(htab))
- free_htab_elem(htab, l_old);
- }
+ if (l_old && !htab_is_prealloc(htab))
+ free_htab_elem(htab, l_old);
return 0;
err:
htab_unlock_bucket(b, flags);
@@ -1220,8 +1216,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
l_new = prealloc_lru_pop(htab, key, hash);
if (!l_new)
return -ENOMEM;
- copy_map_value(&htab->map,
- l_new->key + round_up(map->key_size, 8), value);
+ copy_map_value(&htab->map, htab_elem_value(l_new, map->key_size), value);
ret = htab_lock_bucket(b, &flags);
if (ret)
@@ -1255,13 +1250,14 @@ err_lock_bucket:
return ret;
}
-static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+static long htab_map_update_elem_in_place(struct bpf_map *map, void *key,
void *value, u64 map_flags,
- bool onallcpus)
+ bool percpu, bool onallcpus)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct htab_elem *l_new = NULL, *l_old;
+ struct htab_elem *l_new, *l_old;
struct hlist_nulls_head *head;
+ void *old_map_ptr = NULL;
unsigned long flags;
struct bucket *b;
u32 key_size, hash;
@@ -1292,21 +1288,29 @@ static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
goto err;
if (l_old) {
- /* per-cpu hash map can update value in-place */
- pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
- value, onallcpus);
+ /* Update value in-place */
+ if (percpu) {
+ pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
+ value, onallcpus);
+ } else {
+ void **inner_map_pptr = htab_elem_value(l_old, key_size);
+
+ old_map_ptr = *inner_map_pptr;
+ WRITE_ONCE(*inner_map_pptr, *(void **)value);
+ }
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
- hash, true, onallcpus, NULL);
+ hash, percpu, onallcpus, NULL);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
}
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
}
- ret = 0;
err:
htab_unlock_bucket(b, flags);
+ if (old_map_ptr)
+ map->ops->map_fd_put_ptr(map, old_map_ptr, true);
return ret;
}
@@ -1383,7 +1387,7 @@ err_lock_bucket:
static long htab_percpu_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 map_flags)
{
- return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
+ return htab_map_update_elem_in_place(map, key, value, map_flags, true, false);
}
static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
@@ -1500,10 +1504,10 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
/* We only free timer on uref dropping to zero */
if (btf_record_has_field(htab->map.record, BPF_TIMER))
bpf_obj_free_timer(htab->map.record,
- l->key + round_up(htab->map.key_size, 8));
+ htab_elem_value(l, htab->map.key_size));
if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
bpf_obj_free_workqueue(htab->map.record,
- l->key + round_up(htab->map.key_size, 8));
+ htab_elem_value(l, htab->map.key_size));
}
cond_resched_rcu();
}
@@ -1615,15 +1619,12 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
off += roundup_value_size;
}
} else {
- u32 roundup_key_size = round_up(map->key_size, 8);
+ void *src = htab_elem_value(l, map->key_size);
if (flags & BPF_F_LOCK)
- copy_map_value_locked(map, value, l->key +
- roundup_key_size,
- true);
+ copy_map_value_locked(map, value, src, true);
else
- copy_map_value(map, value, l->key +
- roundup_key_size);
+ copy_map_value(map, value, src);
/* Zeroing special fields in the temp buffer */
check_and_init_map_value(map, value);
}
@@ -1680,12 +1681,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
bool is_percpu)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
void __user *uvalues = u64_to_user_ptr(attr->batch.values);
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size, map_id;
+ u32 bucket_cnt, total, key_size, value_size;
struct htab_elem *node_to_free = NULL;
u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
@@ -1720,7 +1721,6 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
return -ENOENT;
key_size = htab->map.key_size;
- roundup_key_size = round_up(htab->map.key_size, 8);
value_size = htab->map.value_size;
size = round_up(value_size, 8);
if (is_percpu)
@@ -1812,8 +1812,8 @@ again_nocopy:
off += size;
}
} else {
- value = l->key + roundup_key_size;
- if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ value = htab_elem_value(l, key_size);
+ if (is_fd_htab(htab)) {
struct bpf_map **inner_map = value;
/* Actual value is the id of the inner map */
@@ -2063,11 +2063,11 @@ static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
{
struct bpf_iter_seq_hash_map_info *info = seq->private;
- u32 roundup_key_size, roundup_value_size;
struct bpf_iter__bpf_map_elem ctx = {};
struct bpf_map *map = info->map;
struct bpf_iter_meta meta;
int ret = 0, off = 0, cpu;
+ u32 roundup_value_size;
struct bpf_prog *prog;
void __percpu *pptr;
@@ -2077,10 +2077,9 @@ static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
ctx.meta = &meta;
ctx.map = info->map;
if (elem) {
- roundup_key_size = round_up(map->key_size, 8);
ctx.key = elem->key;
if (!info->percpu_value_buf) {
- ctx.value = elem->key + roundup_key_size;
+ ctx.value = htab_elem_value(elem, map->key_size);
} else {
roundup_value_size = round_up(map->value_size, 8);
pptr = htab_elem_get_ptr(elem, map->key_size);
@@ -2165,7 +2164,6 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
struct htab_elem *elem;
- u32 roundup_key_size;
int i, num_elems = 0;
void __percpu *pptr;
struct bucket *b;
@@ -2180,7 +2178,6 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_
is_percpu = htab_is_percpu(htab);
- roundup_key_size = round_up(map->key_size, 8);
/* migration has been disabled, so percpu value prepared here will be
* the same as the one seen by the bpf program with
* bpf_map_lookup_elem().
@@ -2196,7 +2193,7 @@ static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_
pptr = htab_elem_get_ptr(elem, map->key_size);
val = this_cpu_ptr(pptr);
} else {
- val = elem->key + roundup_key_size;
+ val = htab_elem_value(elem, map->key_size);
}
num_elems++;
ret = callback_fn((u64)(long)map, (u64)(long)key,
@@ -2411,8 +2408,8 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
ret = __htab_lru_percpu_map_update_elem(map, key, value,
map_flags, true);
else
- ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
- true);
+ ret = htab_map_update_elem_in_place(map, key, value, map_flags,
+ true, true);
rcu_read_unlock();
return ret;
@@ -2536,24 +2533,23 @@ int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
return ret;
}
-/* only called from syscall */
+/* Only called from syscall */
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags)
{
void *ptr;
int ret;
- u32 ufd = *(u32 *)value;
- ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
+ ptr = map->ops->map_fd_get_ptr(map, map_file, *(int *)value);
if (IS_ERR(ptr))
return PTR_ERR(ptr);
/* The htab bucket lock is always held during update operations in fd
* htab map, and the following rcu_read_lock() is only used to avoid
- * the WARN_ON_ONCE in htab_map_update_elem().
+ * the WARN_ON_ONCE in htab_map_update_elem_in_place().
*/
rcu_read_lock();
- ret = htab_map_update_elem(map, key, &ptr, map_flags);
+ ret = htab_map_update_elem_in_place(map, key, &ptr, map_flags, false, false);
rcu_read_unlock();
if (ret)
map->ops->map_fd_put_ptr(map, ptr, false);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e3a2662f4e33..b71e428ad936 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -23,6 +23,7 @@
#include <linux/btf_ids.h>
#include <linux/bpf_mem_alloc.h>
#include <linux/kasan.h>
+#include <linux/bpf_verifier.h>
#include "../../lib/kstrtox.h"
@@ -129,7 +130,8 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = {
BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu)
{
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu);
}
@@ -1713,16 +1715,6 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
memset(ptr, 0, sizeof(*ptr));
}
-static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
-{
- u32 size = __bpf_dynptr_size(ptr);
-
- if (len > size || offset > size - len)
- return -E2BIG;
-
- return 0;
-}
-
BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr)
{
int err;
@@ -1809,8 +1801,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
.arg5_type = ARG_ANYTHING,
};
-static int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
- u32 len, u64 flags)
+int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
+ u32 len, u64 flags)
{
enum bpf_dynptr_type type;
int err;
@@ -1912,6 +1904,12 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
+const struct bpf_func_proto bpf_perf_event_read_proto __weak;
+const struct bpf_func_proto bpf_send_signal_proto __weak;
+const struct bpf_func_proto bpf_send_signal_thread_proto __weak;
+const struct bpf_func_proto bpf_get_task_stack_sleepable_proto __weak;
+const struct bpf_func_proto bpf_get_task_stack_proto __weak;
+const struct bpf_func_proto bpf_get_branch_snapshot_proto __weak;
const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -1965,6 +1963,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_ns_current_pid_tgid:
return &bpf_get_ns_current_pid_tgid_proto;
+ case BPF_FUNC_get_current_uid_gid:
+ return &bpf_get_current_uid_gid_proto;
default:
break;
}
@@ -2022,7 +2022,21 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_cgroup_id_proto;
case BPF_FUNC_get_current_ancestor_cgroup_id:
return &bpf_get_current_ancestor_cgroup_id_proto;
+ case BPF_FUNC_current_task_under_cgroup:
+ return &bpf_current_task_under_cgroup_proto;
#endif
+#ifdef CONFIG_CGROUP_NET_CLASSID
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_curr_proto;
+#endif
+ case BPF_FUNC_task_storage_get:
+ if (bpf_prog_check_recur(prog))
+ return &bpf_task_storage_get_recur_proto;
+ return &bpf_task_storage_get_proto;
+ case BPF_FUNC_task_storage_delete:
+ if (bpf_prog_check_recur(prog))
+ return &bpf_task_storage_delete_recur_proto;
+ return &bpf_task_storage_delete_proto;
default:
break;
}
@@ -2037,6 +2051,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_task_proto;
case BPF_FUNC_get_current_task_btf:
return &bpf_get_current_task_btf_proto;
+ case BPF_FUNC_get_current_comm:
+ return &bpf_get_current_comm_proto;
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
@@ -2047,6 +2063,10 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_probe_read_kernel_str:
return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_kernel_str_proto;
+ case BPF_FUNC_copy_from_user:
+ return &bpf_copy_from_user_proto;
+ case BPF_FUNC_copy_from_user_task:
+ return &bpf_copy_from_user_task_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_snprintf:
@@ -2057,6 +2077,19 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return bpf_get_trace_vprintk_proto();
case BPF_FUNC_perf_event_read_value:
return bpf_get_perf_event_read_value_proto();
+ case BPF_FUNC_perf_event_read:
+ return &bpf_perf_event_read_proto;
+ case BPF_FUNC_send_signal:
+ return &bpf_send_signal_proto;
+ case BPF_FUNC_send_signal_thread:
+ return &bpf_send_signal_thread_proto;
+ case BPF_FUNC_get_task_stack:
+ return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
+ : &bpf_get_task_stack_proto;
+ case BPF_FUNC_get_branch_snapshot:
+ return &bpf_get_branch_snapshot_proto;
+ case BPF_FUNC_find_vma:
+ return &bpf_find_vma_proto;
default:
return NULL;
}
@@ -2293,6 +2326,26 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
return __bpf_list_del(head, true);
}
+__bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
+{
+ struct list_head *h = (struct list_head *)head;
+
+ if (list_empty(h) || unlikely(!h->next))
+ return NULL;
+
+ return (struct bpf_list_node *)h->next;
+}
+
+__bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head)
+{
+ struct list_head *h = (struct list_head *)head;
+
+ if (list_empty(h) || unlikely(!h->next))
+ return NULL;
+
+ return (struct bpf_list_node *)h->prev;
+}
+
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
struct bpf_rb_node *node)
{
@@ -2366,6 +2419,33 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
return (struct bpf_rb_node *)rb_first_cached(r);
}
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
+{
+ struct rb_root_cached *r = (struct rb_root_cached *)root;
+
+ return (struct bpf_rb_node *)r->rb_root.rb_node;
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
+{
+ struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
+
+ if (READ_ONCE(node_internal->owner) != root)
+ return NULL;
+
+ return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
+}
+
+__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
+{
+ struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
+
+ if (READ_ONCE(node_internal->owner) != root)
+ return NULL;
+
+ return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
+}
+
/**
* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
* kfunc which is not stored in a map as a kptr, must be released by calling
@@ -2923,9 +3003,9 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
__bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
int (callback_fn)(void *map, int *key, void *value),
unsigned int flags,
- void *aux__ign)
+ void *aux__prog)
{
- struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__ign;
+ struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__prog;
struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
if (flags)
@@ -3194,6 +3274,10 @@ __bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag)
local_irq_restore(*flags__irq_flag);
}
+__bpf_kfunc void __bpf_trap(void)
+{
+}
+
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
@@ -3209,11 +3293,16 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)
#ifdef CONFIG_CGROUPS
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
@@ -3294,6 +3383,20 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_local_irq_save)
BTF_ID_FLAGS(func, bpf_local_irq_restore)
+BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr)
+BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr)
+BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr)
+BTF_ID_FLAGS(func, bpf_probe_read_kernel_str_dynptr)
+BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+#ifdef CONFIG_DMA_SHARED_BUFFER
+BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
+#endif
+BTF_ID_FLAGS(func, __bpf_trap)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 64c3393e8270..4b5f29168618 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -36,6 +36,7 @@
#include <linux/memcontrol.h>
#include <linux/trace_events.h>
#include <linux/tracepoint.h>
+#include <linux/overflow.h>
#include <net/netfilter/nf_bpf_link.h>
#include <net/netkit.h>
@@ -693,7 +694,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
if (IS_ERR_OR_NULL(rec))
return NULL;
- size = offsetof(struct btf_record, fields[rec->cnt]);
+ size = struct_size(rec, fields, rec->cnt);
new_rec = kmemdup(rec, size, GFP_KERNEL | __GFP_NOWARN);
if (!new_rec)
return ERR_PTR(-ENOMEM);
@@ -748,7 +749,7 @@ bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *r
return false;
if (rec_a->cnt != rec_b->cnt)
return false;
- size = offsetof(struct btf_record, fields[rec_a->cnt]);
+ size = struct_size(rec_a, fields, rec_a->cnt);
/* btf_parse_fields uses kzalloc to allocate a btf_record, so unused
* members are zeroed out. So memcmp is safe to do without worrying
* about padding/unused fields.
@@ -3799,14 +3800,14 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event,
static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
struct bpf_link_info *info)
{
+ u64 ref_ctr_offset, offset;
char __user *uname;
- u64 addr, offset;
u32 ulen, type;
int err;
uname = u64_to_user_ptr(info->perf_event.uprobe.file_name);
ulen = info->perf_event.uprobe.name_len;
- err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr,
+ err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset,
&type, NULL);
if (err)
return err;
@@ -3818,6 +3819,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event,
info->perf_event.uprobe.name_len = ulen;
info->perf_event.uprobe.offset = offset;
info->perf_event.uprobe.cookie = event->bpf_cookie;
+ info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset;
return 0;
}
#endif
diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 81d6cf90584a..941d0d2427e3 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -7,14 +7,46 @@
#include <linux/kobject.h>
#include <linux/init.h>
#include <linux/sysfs.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/btf.h>
/* See scripts/link-vmlinux.sh, gen_btf() func for details */
extern char __start_BTF[];
extern char __stop_BTF[];
+static int btf_sysfs_vmlinux_mmap(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *attr,
+ struct vm_area_struct *vma)
+{
+ unsigned long pages = PAGE_ALIGN(attr->size) >> PAGE_SHIFT;
+ size_t vm_size = vma->vm_end - vma->vm_start;
+ phys_addr_t addr = virt_to_phys(__start_BTF);
+ unsigned long pfn = addr >> PAGE_SHIFT;
+
+ if (attr->private != __start_BTF || !PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ if (vma->vm_pgoff)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_MAYSHARE))
+ return -EACCES;
+
+ if (pfn + pages < pfn)
+ return -EINVAL;
+
+ if ((vm_size >> PAGE_SHIFT) > pages)
+ return -EINVAL;
+
+ vm_flags_mod(vma, VM_DONTDUMP, VM_MAYEXEC | VM_MAYWRITE);
+ return remap_pfn_range(vma, vma->vm_start, pfn, vm_size, vma->vm_page_prot);
+}
+
static struct bin_attribute bin_attr_btf_vmlinux __ro_after_init = {
.attr = { .name = "vmlinux", .mode = 0444, },
.read_new = sysfs_bin_attr_simple_read,
+ .mmap = btf_sysfs_vmlinux_mmap,
};
struct kobject *btf_kobj;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 54c6953a8b84..a7d6e0c5928b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -322,6 +322,7 @@ struct bpf_kfunc_call_arg_meta {
struct btf *arg_btf;
u32 arg_btf_id;
bool arg_owning_ref;
+ bool arg_prog;
struct {
struct btf_field *field;
@@ -1923,11 +1924,8 @@ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env,
u32 steps = 0;
while (topmost && topmost->loop_entry) {
- if (steps++ > st->dfs_depth) {
- WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n");
- verbose(env, "verifier bug: infinite loop in get_loop_entry()\n");
+ if (verifier_bug_if(steps++ > st->dfs_depth, env, "infinite loop"))
return ERR_PTR(-EFAULT);
- }
topmost = topmost->loop_entry;
}
return topmost;
@@ -3459,12 +3457,11 @@ static int mark_reg_read(struct bpf_verifier_env *env,
/* if read wasn't screened by an earlier write ... */
if (writes && state->live & REG_LIVE_WRITTEN)
break;
- if (parent->live & REG_LIVE_DONE) {
- verbose(env, "verifier BUG type %s var_off %lld off %d\n",
- reg_type_str(env, parent->type),
- parent->var_off.value, parent->off);
+ if (verifier_bug_if(parent->live & REG_LIVE_DONE, env,
+ "type %s var_off %lld off %d",
+ reg_type_str(env, parent->type),
+ parent->var_off.value, parent->off))
return -EFAULT;
- }
/* The first condition is more likely to be true than the
* second, checked it first.
*/
@@ -3649,16 +3646,16 @@ static int insn_def_regno(const struct bpf_insn *insn)
case BPF_ST:
return -1;
case BPF_STX:
- if ((BPF_MODE(insn->code) == BPF_ATOMIC ||
- BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) &&
- (insn->imm & BPF_FETCH)) {
+ if (BPF_MODE(insn->code) == BPF_ATOMIC ||
+ BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
if (insn->imm == BPF_CMPXCHG)
return BPF_REG_0;
- else
+ else if (insn->imm == BPF_LOAD_ACQ)
+ return insn->dst_reg;
+ else if (insn->imm & BPF_FETCH)
return insn->src_reg;
- } else {
- return -1;
}
+ return -1;
default:
return insn->dst_reg;
}
@@ -3857,14 +3854,14 @@ static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_s
/* atomic instructions push insn_flags twice, for READ and
* WRITE sides, but they should agree on stack slot
*/
- WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
- (env->cur_hist_ent->flags & insn_flags) != insn_flags,
- "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
- env->insn_idx, env->cur_hist_ent->flags, insn_flags);
+ verifier_bug_if((env->cur_hist_ent->flags & insn_flags) &&
+ (env->cur_hist_ent->flags & insn_flags) != insn_flags,
+ env, "insn history: insn_idx %d cur flags %x new flags %x",
+ env->insn_idx, env->cur_hist_ent->flags, insn_flags);
env->cur_hist_ent->flags |= insn_flags;
- WARN_ONCE(env->cur_hist_ent->linked_regs != 0,
- "verifier insn history bug: insn_idx %d linked_regs != 0: %#llx\n",
- env->insn_idx, env->cur_hist_ent->linked_regs);
+ verifier_bug_if(env->cur_hist_ent->linked_regs != 0, env,
+ "insn history: insn_idx %d linked_regs: %#llx",
+ env->insn_idx, env->cur_hist_ent->linked_regs);
env->cur_hist_ent->linked_regs = linked_regs;
return 0;
}
@@ -3987,8 +3984,7 @@ static inline u32 bt_empty(struct backtrack_state *bt)
static inline int bt_subprog_enter(struct backtrack_state *bt)
{
if (bt->frame == MAX_CALL_FRAMES - 1) {
- verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(bt->env, "subprog enter from frame %d", bt->frame);
return -EFAULT;
}
bt->frame++;
@@ -3998,8 +3994,7 @@ static inline int bt_subprog_enter(struct backtrack_state *bt)
static inline int bt_subprog_exit(struct backtrack_state *bt)
{
if (bt->frame == 0) {
- verbose(bt->env, "BUG subprog exit from frame 0\n");
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(bt->env, "subprog exit from frame 0");
return -EFAULT;
}
bt->frame--;
@@ -4277,14 +4272,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* should be literally next instruction in
* caller program
*/
- WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug");
+ verifier_bug_if(idx + 1 != subseq_idx, env,
+ "extra insn from subprog");
/* r1-r5 are invalidated after subprog call,
* so for global func call it shouldn't be set
* anymore
*/
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "global subprog unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
/* global subprog always sets R0 */
@@ -4298,16 +4294,17 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* the current frame should be zero by now
*/
if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "static subprog unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
/* we are now tracking register spills correctly,
* so any instance of leftover slots is a bug
*/
if (bt_stack_mask(bt) != 0) {
- verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)");
+ verifier_bug(env,
+ "static subprog leftover stack slots %llx",
+ bt_stack_mask(bt));
return -EFAULT;
}
/* propagate r1-r5 to the caller */
@@ -4330,13 +4327,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* not actually arguments passed directly to callback subprogs
*/
if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "callback unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
if (bt_stack_mask(bt) != 0) {
- verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)");
+ verifier_bug(env, "callback leftover stack slots %llx",
+ bt_stack_mask(bt));
return -EFAULT;
}
/* clear r1-r5 in callback subprog's mask */
@@ -4355,11 +4352,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
/* regular helper call sets R0 */
bt_clear_reg(bt, BPF_REG_0);
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- /* if backtracing was looking for registers R1-R5
+ /* if backtracking was looking for registers R1-R5
* they should have been found already.
*/
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "backtracking call unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
} else if (opcode == BPF_EXIT) {
@@ -4377,8 +4374,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
bt_clear_reg(bt, i);
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
- verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "backtracking exit unexpected regs %x",
+ bt_reg_mask(bt));
return -EFAULT;
}
@@ -4413,8 +4410,10 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* before it would be equally necessary to
* propagate it to dreg.
*/
- bt_set_reg(bt, dreg);
- bt_set_reg(bt, sreg);
+ if (!hist || !(hist->flags & INSN_F_SRC_REG_STACK))
+ bt_set_reg(bt, sreg);
+ if (!hist || !(hist->flags & INSN_F_DST_REG_STACK))
+ bt_set_reg(bt, dreg);
} else if (BPF_SRC(insn->code) == BPF_K) {
/* dreg <cond> K
* Only dreg still needs precision before
@@ -4719,9 +4718,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
return 0;
}
- verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
- st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "backtracking func entry subprog %d reg_mask %x stack_mask %llx",
+ st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
return -EFAULT;
}
@@ -4757,8 +4755,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
* It means the backtracking missed the spot where
* particular register was initialized with a constant.
*/
- verbose(env, "BUG backtracking idx %d\n", i);
- WARN_ONCE(1, "verifier backtracking bug");
+ verifier_bug(env, "backtracking idx %d", i);
return -EFAULT;
}
}
@@ -4783,12 +4780,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
for_each_set_bit(i, mask, 64) {
- if (i >= func->allocated_stack / BPF_REG_SIZE) {
- verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n",
- i, func->allocated_stack / BPF_REG_SIZE);
- WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)");
+ if (verifier_bug_if(i >= func->allocated_stack / BPF_REG_SIZE,
+ env, "stack slot %d, total slots %d",
+ i, func->allocated_stack / BPF_REG_SIZE))
return -EFAULT;
- }
if (!is_spilled_scalar_reg(&func->stack[i])) {
bt_clear_frame_slot(bt, fr, i);
@@ -6561,21 +6556,18 @@ continue_func:
/* find the callee */
next_insn = i + insn[i].imm + 1;
sidx = find_subprog(env, next_insn);
- if (sidx < 0) {
- WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
- next_insn);
+ if (verifier_bug_if(sidx < 0, env, "callee not found at insn %d", next_insn))
return -EFAULT;
- }
if (subprog[sidx].is_async_cb) {
if (subprog[sidx].has_tail_call) {
- verbose(env, "verifier bug. subprog has tail_call and async cb\n");
+ verifier_bug(env, "subprog has tail_call and async cb");
return -EFAULT;
}
/* async callbacks don't increase bpf prog stack size unless called directly */
if (!bpf_pseudo_call(insn + i))
continue;
if (subprog[sidx].is_exception_cb) {
- verbose(env, "insn %d cannot call exception cb directly\n", i);
+ verbose(env, "insn %d cannot call exception cb directly", i);
return -EINVAL;
}
}
@@ -6675,11 +6667,8 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
int start = idx + insn->imm + 1, subprog;
subprog = find_subprog(env, start);
- if (subprog < 0) {
- WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
- start);
+ if (verifier_bug_if(subprog < 0, env, "get stack depth: no program at insn %d", start))
return -EFAULT;
- }
return env->subprog_info[subprog].stack_depth;
}
#endif
@@ -7984,7 +7973,7 @@ static int check_stack_range_initialized(
slot = -i - 1;
spi = slot / BPF_REG_SIZE;
if (state->allocated_stack <= slot) {
- verbose(env, "verifier bug: allocated_stack too small\n");
+ verbose(env, "allocated_stack too small\n");
return -EFAULT;
}
@@ -8413,7 +8402,7 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno,
return -EINVAL;
}
if (meta->map_ptr) {
- verbose(env, "verifier bug. Two map pointers in a timer helper\n");
+ verifier_bug(env, "Two map pointers in a timer helper");
return -EFAULT;
}
meta->map_uid = reg->map_uid;
@@ -10285,8 +10274,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls
}
if (state->frame[state->curframe + 1]) {
- verbose(env, "verifier bug. Frame %d already allocated\n",
- state->curframe + 1);
+ verifier_bug(env, "Frame %d already allocated", state->curframe + 1);
return -EFAULT;
}
@@ -10400,8 +10388,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
if (err)
return err;
} else {
- bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n",
- i, arg->arg_type);
+ verifier_bug(env, "unrecognized arg#%d type %d", i, arg->arg_type);
return -EFAULT;
}
}
@@ -10464,13 +10451,13 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
env->subprog_info[subprog].is_cb = true;
if (bpf_pseudo_kfunc_call(insn) &&
!is_callback_calling_kfunc(insn->imm)) {
- verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
- func_id_name(insn->imm), insn->imm);
+ verifier_bug(env, "kfunc %s#%d not marked as callback-calling",
+ func_id_name(insn->imm), insn->imm);
return -EFAULT;
} else if (!bpf_pseudo_kfunc_call(insn) &&
!is_callback_calling_function(insn->imm)) { /* helper */
- verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
- func_id_name(insn->imm), insn->imm);
+ verifier_bug(env, "helper %s#%d not marked as callback-calling",
+ func_id_name(insn->imm), insn->imm);
return -EFAULT;
}
@@ -10522,10 +10509,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
target_insn = *insn_idx + insn->imm + 1;
subprog = find_subprog(env, target_insn);
- if (subprog < 0) {
- verbose(env, "verifier bug. No program starts at insn %d\n", target_insn);
+ if (verifier_bug_if(subprog < 0, env, "target of func call at insn %d is not a program",
+ target_insn))
return -EFAULT;
- }
caller = state->frame[state->curframe];
err = btf_check_subprog_call(env, subprog, caller->regs);
@@ -11124,7 +11110,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
fmt_map_off);
if (err) {
- verbose(env, "verifier bug\n");
+ verbose(env, "failed to retrieve map value address\n");
return -EFAULT;
}
fmt = (char *)(long)fmt_addr + fmt_map_off;
@@ -11897,6 +11883,11 @@ static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param
return btf_param_match_suffix(btf, arg, "__irq_flag");
}
+static bool is_kfunc_arg_prog(const struct btf *btf, const struct btf_param *arg)
+{
+ return btf_param_match_suffix(btf, arg, "__prog");
+}
+
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -11987,6 +11978,16 @@ static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_p
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
}
+static bool is_rbtree_node_type(const struct btf_type *t)
+{
+ return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
+}
+
+static bool is_list_node_type(const struct btf_type *t)
+{
+ return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
+}
+
static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
const struct btf_param *arg)
{
@@ -12069,6 +12070,8 @@ enum special_kfunc_type {
KF_bpf_list_push_back_impl,
KF_bpf_list_pop_front,
KF_bpf_list_pop_back,
+ KF_bpf_list_front,
+ KF_bpf_list_back,
KF_bpf_cast_to_kern_ctx,
KF_bpf_rdonly_cast,
KF_bpf_rcu_read_lock,
@@ -12076,6 +12079,9 @@ enum special_kfunc_type {
KF_bpf_rbtree_remove,
KF_bpf_rbtree_add_impl,
KF_bpf_rbtree_first,
+ KF_bpf_rbtree_root,
+ KF_bpf_rbtree_left,
+ KF_bpf_rbtree_right,
KF_bpf_dynptr_from_skb,
KF_bpf_dynptr_from_xdp,
KF_bpf_dynptr_slice,
@@ -12101,41 +12107,9 @@ enum special_kfunc_type {
KF_bpf_res_spin_unlock,
KF_bpf_res_spin_lock_irqsave,
KF_bpf_res_spin_unlock_irqrestore,
+ KF___bpf_trap,
};
-BTF_SET_START(special_kfunc_set)
-BTF_ID(func, bpf_obj_new_impl)
-BTF_ID(func, bpf_obj_drop_impl)
-BTF_ID(func, bpf_refcount_acquire_impl)
-BTF_ID(func, bpf_list_push_front_impl)
-BTF_ID(func, bpf_list_push_back_impl)
-BTF_ID(func, bpf_list_pop_front)
-BTF_ID(func, bpf_list_pop_back)
-BTF_ID(func, bpf_cast_to_kern_ctx)
-BTF_ID(func, bpf_rdonly_cast)
-BTF_ID(func, bpf_rbtree_remove)
-BTF_ID(func, bpf_rbtree_add_impl)
-BTF_ID(func, bpf_rbtree_first)
-#ifdef CONFIG_NET
-BTF_ID(func, bpf_dynptr_from_skb)
-BTF_ID(func, bpf_dynptr_from_xdp)
-#endif
-BTF_ID(func, bpf_dynptr_slice)
-BTF_ID(func, bpf_dynptr_slice_rdwr)
-BTF_ID(func, bpf_dynptr_clone)
-BTF_ID(func, bpf_percpu_obj_new_impl)
-BTF_ID(func, bpf_percpu_obj_drop_impl)
-BTF_ID(func, bpf_throw)
-BTF_ID(func, bpf_wq_set_callback_impl)
-#ifdef CONFIG_CGROUPS
-BTF_ID(func, bpf_iter_css_task_new)
-#endif
-#ifdef CONFIG_BPF_LSM
-BTF_ID(func, bpf_set_dentry_xattr)
-BTF_ID(func, bpf_remove_dentry_xattr)
-#endif
-BTF_SET_END(special_kfunc_set)
-
BTF_ID_LIST(special_kfunc_list)
BTF_ID(func, bpf_obj_new_impl)
BTF_ID(func, bpf_obj_drop_impl)
@@ -12144,6 +12118,8 @@ BTF_ID(func, bpf_list_push_front_impl)
BTF_ID(func, bpf_list_push_back_impl)
BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
+BTF_ID(func, bpf_list_front)
+BTF_ID(func, bpf_list_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rcu_read_lock)
@@ -12151,6 +12127,9 @@ BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add_impl)
BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_rbtree_root)
+BTF_ID(func, bpf_rbtree_left)
+BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12194,6 +12173,7 @@ BTF_ID(func, bpf_res_spin_lock)
BTF_ID(func, bpf_res_spin_unlock)
BTF_ID(func, bpf_res_spin_lock_irqsave)
BTF_ID(func, bpf_res_spin_unlock_irqrestore)
+BTF_ID(func, __bpf_trap)
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -12579,14 +12559,19 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
- btf_id == special_kfunc_list[KF_bpf_list_pop_back];
+ btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
+ btf_id == special_kfunc_list[KF_bpf_list_front] ||
+ btf_id == special_kfunc_list[KF_bpf_list_back];
}
static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
{
return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- btf_id == special_kfunc_list[KF_bpf_rbtree_first];
+ btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
+ btf_id == special_kfunc_list[KF_bpf_rbtree_right];
}
static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
@@ -12686,7 +12671,9 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
break;
case BPF_RB_NODE:
ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
+ kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
break;
default:
verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
@@ -12906,6 +12893,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (is_kfunc_arg_ignore(btf, &args[i]))
continue;
+ if (is_kfunc_arg_prog(btf, &args[i])) {
+ /* Used to reject repeated use of __prog. */
+ if (meta->arg_prog) {
+ verbose(env, "Only 1 prog->aux argument supported per-kfunc\n");
+ return -EFAULT;
+ }
+ meta->arg_prog = true;
+ cur_aux(env)->arg_prog = regno;
+ continue;
+ }
+
if (btf_type_is_scalar(t)) {
if (reg->type != SCALAR_VALUE) {
verbose(env, "R%d is not a scalar\n", regno);
@@ -13200,22 +13198,22 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_RB_NODE:
- if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
- if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
- verbose(env, "rbtree_remove node input must be non-owning ref\n");
+ if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
+ if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ verbose(env, "arg#%d expected pointer to allocated object\n", i);
return -EINVAL;
}
- if (in_rbtree_lock_required_cb(env)) {
- verbose(env, "rbtree_remove not allowed in rbtree cb\n");
+ if (!reg->ref_obj_id) {
+ verbose(env, "allocated object must be referenced\n");
return -EINVAL;
}
} else {
- if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
- verbose(env, "arg#%d expected pointer to allocated object\n", i);
+ if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) {
+ verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
return -EINVAL;
}
- if (!reg->ref_obj_id) {
- verbose(env, "allocated object must be referenced\n");
+ if (in_rbtree_lock_required_cb(env)) {
+ verbose(env, "%s not allowed in rbtree cb\n", func_name);
return -EINVAL;
}
}
@@ -13420,6 +13418,178 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
return 0;
}
+/* check special kfuncs and return:
+ * 1 - not fall-through to 'else' branch, continue verification
+ * 0 - fall-through to 'else' branch
+ * < 0 - not fall-through to 'else' branch, return error
+ */
+static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
+ struct bpf_reg_state *regs, struct bpf_insn_aux_data *insn_aux,
+ const struct btf_type *ptr_type, struct btf *desc_btf)
+{
+ const struct btf_type *ret_t;
+ int err = 0;
+
+ if (meta->btf != btf_vmlinux)
+ return 0;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+ meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ struct btf_struct_meta *struct_meta;
+ struct btf *ret_btf;
+ u32 ret_btf_id;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
+ return -ENOMEM;
+
+ if (((u64)(u32)meta->arg_constant.value) != meta->arg_constant.value) {
+ verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
+ return -EINVAL;
+ }
+
+ ret_btf = env->prog->aux->btf;
+ ret_btf_id = meta->arg_constant.value;
+
+ /* This may be NULL due to user not supplying a BTF */
+ if (!ret_btf) {
+ verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
+ return -EINVAL;
+ }
+
+ ret_t = btf_type_by_id(ret_btf, ret_btf_id);
+ if (!ret_t || !__btf_type_is_struct(ret_t)) {
+ verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
+ return -EINVAL;
+ }
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
+ verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
+ ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
+ return -EINVAL;
+ }
+
+ if (!bpf_global_percpu_ma_set) {
+ mutex_lock(&bpf_percpu_ma_lock);
+ if (!bpf_global_percpu_ma_set) {
+ /* Charge memory allocated with bpf_global_percpu_ma to
+ * root memcg. The obj_cgroup for root memcg is NULL.
+ */
+ err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
+ if (!err)
+ bpf_global_percpu_ma_set = true;
+ }
+ mutex_unlock(&bpf_percpu_ma_lock);
+ if (err)
+ return err;
+ }
+
+ mutex_lock(&bpf_percpu_ma_lock);
+ err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
+ mutex_unlock(&bpf_percpu_ma_lock);
+ if (err)
+ return err;
+ }
+
+ struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
+ if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
+ verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
+ return -EINVAL;
+ }
+
+ if (struct_meta) {
+ verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
+ return -EINVAL;
+ }
+ }
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[BPF_REG_0].btf = ret_btf;
+ regs[BPF_REG_0].btf_id = ret_btf_id;
+ if (meta->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
+ regs[BPF_REG_0].type |= MEM_PERCPU;
+
+ insn_aux->obj_new_size = ret_t->size;
+ insn_aux->kptr_struct_meta = struct_meta;
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
+ regs[BPF_REG_0].btf = meta->arg_btf;
+ regs[BPF_REG_0].btf_id = meta->arg_btf_id;
+
+ insn_aux->kptr_struct_meta =
+ btf_find_struct_meta(meta->arg_btf,
+ meta->arg_btf_id);
+ } else if (is_list_node_type(ptr_type)) {
+ struct btf_field *field = meta->arg_list_head.field;
+
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+ } else if (is_rbtree_node_type(ptr_type)) {
+ struct btf_field *field = meta->arg_rbtree_root.field;
+
+ mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].btf_id = meta->ret_btf_id;
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
+ ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
+ if (!ret_t || !btf_type_is_struct(ret_t)) {
+ verbose(env,
+ "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
+ return -EINVAL;
+ }
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].btf_id = meta->arg_constant.value;
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
+ meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
+ enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+
+ if (!meta->arg_constant.found) {
+ verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
+ return -EFAULT;
+ }
+
+ regs[BPF_REG_0].mem_size = meta->arg_constant.value;
+
+ /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
+ regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
+ regs[BPF_REG_0].type |= MEM_RDONLY;
+ } else {
+ /* this will set env->seen_direct_write to true */
+ if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
+ verbose(env, "the prog does not allow writes to packet data\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!meta->initialized_dynptr.id) {
+ verbose(env, "verifier internal error: no dynptr id\n");
+ return -EFAULT;
+ }
+ regs[BPF_REG_0].dynptr_id = meta->initialized_dynptr.id;
+
+ /* we don't need to set BPF_REG_0's ref obj id
+ * because packet slices are not refcounted (see
+ * dynptr_type_refcounted)
+ */
+ } else {
+ return 0;
+ }
+
+ return 1;
+}
+
static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -13434,7 +13604,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
struct bpf_insn_aux_data *insn_aux;
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
- const struct btf_type *ret_t;
struct btf *desc_btf;
/* skip for now, but return error when we find this in fixup_kfunc_call */
@@ -13476,6 +13645,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return err;
}
__mark_btf_func_reg_size(env, regs, BPF_REG_0, sizeof(u32));
+ } else if (!insn->off && insn->imm == special_kfunc_list[KF___bpf_trap]) {
+ verbose(env, "unexpected __bpf_trap() due to uninitialized variable?\n");
+ return -EFAULT;
}
if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
@@ -13654,165 +13826,10 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
mark_btf_func_reg_size(env, BPF_REG_0, t->size);
} else if (btf_type_is_ptr(t)) {
ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
-
- if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
- meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
- struct btf_struct_meta *struct_meta;
- struct btf *ret_btf;
- u32 ret_btf_id;
-
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
- return -ENOMEM;
-
- if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
- verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
- return -EINVAL;
- }
-
- ret_btf = env->prog->aux->btf;
- ret_btf_id = meta.arg_constant.value;
-
- /* This may be NULL due to user not supplying a BTF */
- if (!ret_btf) {
- verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
- return -EINVAL;
- }
-
- ret_t = btf_type_by_id(ret_btf, ret_btf_id);
- if (!ret_t || !__btf_type_is_struct(ret_t)) {
- verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
- return -EINVAL;
- }
-
- if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
- if (ret_t->size > BPF_GLOBAL_PERCPU_MA_MAX_SIZE) {
- verbose(env, "bpf_percpu_obj_new type size (%d) is greater than %d\n",
- ret_t->size, BPF_GLOBAL_PERCPU_MA_MAX_SIZE);
- return -EINVAL;
- }
-
- if (!bpf_global_percpu_ma_set) {
- mutex_lock(&bpf_percpu_ma_lock);
- if (!bpf_global_percpu_ma_set) {
- /* Charge memory allocated with bpf_global_percpu_ma to
- * root memcg. The obj_cgroup for root memcg is NULL.
- */
- err = bpf_mem_alloc_percpu_init(&bpf_global_percpu_ma, NULL);
- if (!err)
- bpf_global_percpu_ma_set = true;
- }
- mutex_unlock(&bpf_percpu_ma_lock);
- if (err)
- return err;
- }
-
- mutex_lock(&bpf_percpu_ma_lock);
- err = bpf_mem_alloc_percpu_unit_init(&bpf_global_percpu_ma, ret_t->size);
- mutex_unlock(&bpf_percpu_ma_lock);
- if (err)
- return err;
- }
-
- struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
- if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
- if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
- verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
- return -EINVAL;
- }
-
- if (struct_meta) {
- verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
- return -EINVAL;
- }
- }
-
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
- regs[BPF_REG_0].btf = ret_btf;
- regs[BPF_REG_0].btf_id = ret_btf_id;
- if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
- regs[BPF_REG_0].type |= MEM_PERCPU;
-
- insn_aux->obj_new_size = ret_t->size;
- insn_aux->kptr_struct_meta = struct_meta;
- } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
- regs[BPF_REG_0].btf = meta.arg_btf;
- regs[BPF_REG_0].btf_id = meta.arg_btf_id;
-
- insn_aux->kptr_struct_meta =
- btf_find_struct_meta(meta.arg_btf,
- meta.arg_btf_id);
- } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
- meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
- struct btf_field *field = meta.arg_list_head.field;
-
- mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
- } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
- meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
- struct btf_field *field = meta.arg_rbtree_root.field;
-
- mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
- } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
- regs[BPF_REG_0].btf = desc_btf;
- regs[BPF_REG_0].btf_id = meta.ret_btf_id;
- } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
- ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
- if (!ret_t || !btf_type_is_struct(ret_t)) {
- verbose(env,
- "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
- return -EINVAL;
- }
-
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
- regs[BPF_REG_0].btf = desc_btf;
- regs[BPF_REG_0].btf_id = meta.arg_constant.value;
- } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
- meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
- enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
-
- mark_reg_known_zero(env, regs, BPF_REG_0);
-
- if (!meta.arg_constant.found) {
- verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
- return -EFAULT;
- }
-
- regs[BPF_REG_0].mem_size = meta.arg_constant.value;
-
- /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
- regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
-
- if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
- regs[BPF_REG_0].type |= MEM_RDONLY;
- } else {
- /* this will set env->seen_direct_write to true */
- if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
- verbose(env, "the prog does not allow writes to packet data\n");
- return -EINVAL;
- }
- }
-
- if (!meta.initialized_dynptr.id) {
- verbose(env, "verifier internal error: no dynptr id\n");
- return -EFAULT;
- }
- regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
-
- /* we don't need to set BPF_REG_0's ref obj id
- * because packet slices are not refcounted (see
- * dynptr_type_refcounted)
- */
- } else {
- verbose(env, "kernel function %s unhandled dynamic return type\n",
- meta.func_name);
- return -EFAULT;
- }
+ err = check_special_kfunc(env, &meta, regs, insn_aux, ptr_type, desc_btf);
+ if (err) {
+ if (err < 0)
+ return err;
} else if (btf_type_is_void(ptr_type)) {
/* kfunc returning 'void *' is equivalent to returning scalar */
mark_reg_unknown(env, regs, BPF_REG_0);
@@ -13881,14 +13898,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (is_kfunc_ret_null(&meta))
regs[BPF_REG_0].id = id;
regs[BPF_REG_0].ref_obj_id = id;
- } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
+ } else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
ref_set_non_owning(env, &regs[BPF_REG_0]);
}
if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
regs[BPF_REG_0].id = ++env->id_gen;
} else if (btf_type_is_void(t)) {
- if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
+ if (meta.btf == btf_vmlinux) {
if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
insn_aux->kptr_struct_meta =
@@ -16377,6 +16394,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_reg_state *eq_branch_regs;
struct linked_regs linked_regs = {};
u8 opcode = BPF_OP(insn->code);
+ int insn_flags = 0;
bool is_jmp32;
int pred = -1;
int err;
@@ -16435,6 +16453,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
insn->src_reg);
return -EACCES;
}
+
+ if (src_reg->type == PTR_TO_STACK)
+ insn_flags |= INSN_F_SRC_REG_STACK;
+ if (dst_reg->type == PTR_TO_STACK)
+ insn_flags |= INSN_F_DST_REG_STACK;
} else {
if (insn->src_reg != BPF_REG_0) {
verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
@@ -16444,6 +16467,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
memset(src_reg, 0, sizeof(*src_reg));
src_reg->type = SCALAR_VALUE;
__mark_reg_known(src_reg, insn->imm);
+
+ if (dst_reg->type == PTR_TO_STACK)
+ insn_flags |= INSN_F_DST_REG_STACK;
+ }
+
+ if (insn_flags) {
+ err = push_insn_history(env, this_branch, insn_flags, 0);
+ if (err)
+ return err;
}
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
@@ -19659,10 +19691,9 @@ process_bpf_exit:
return err;
break;
} else {
- if (WARN_ON_ONCE(env->cur_state->loop_entry)) {
- verbose(env, "verifier bug: env->cur_state->loop_entry != NULL\n");
+ if (verifier_bug_if(env->cur_state->loop_entry, env,
+ "broken loop detection"))
return -EFAULT;
- }
do_print_state = true;
continue;
}
@@ -20720,10 +20751,9 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
if (bpf_pseudo_kfunc_call(&insn))
continue;
- if (WARN_ON(load_reg == -1)) {
- verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
+ if (verifier_bug_if(load_reg == -1, env,
+ "zext_dst is set, but no reg is defined"))
return -EFAULT;
- }
zext_patch[0] = insn;
zext_patch[1].dst_reg = load_reg;
@@ -21040,11 +21070,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* propagated in any case.
*/
subprog = find_subprog(env, i + insn->imm + 1);
- if (subprog < 0) {
- WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
- i + insn->imm + 1);
+ if (verifier_bug_if(subprog < 0, env, "No program to jit at insn %d",
+ i + insn->imm + 1))
return -EFAULT;
- }
/* temporarily remember subprog id inside insn instead of
* aux_data, since next loop will split up all insns into funcs
*/
@@ -21487,13 +21515,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
- } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
- struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };
+ }
- insn_buf[0] = ld_addrs[0];
- insn_buf[1] = ld_addrs[1];
- insn_buf[2] = *insn;
- *cnt = 3;
+ if (env->insn_aux_data[insn_idx].arg_prog) {
+ u32 regno = env->insn_aux_data[insn_idx].arg_prog;
+ struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(regno, (long)env->prog->aux) };
+ int idx = *cnt;
+
+ insn_buf[idx++] = ld_addrs[0];
+ insn_buf[idx++] = ld_addrs[1];
+ insn_buf[idx++] = *insn;
+ *cnt = idx;
}
return 0;
}
@@ -22403,7 +22435,7 @@ next_insn:
continue;
/* We need two slots in case timed may_goto is supported. */
if (stack_slots > slots) {
- verbose(env, "verifier bug: stack_slots supports may_goto only\n");
+ verifier_bug(env, "stack_slots supports may_goto only");
return -EFAULT;
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 793e288f63cf..2c41c78be61e 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -5791,21 +5791,8 @@ static int bpf_scx_btf_struct_access(struct bpf_verifier_log *log,
return -EACCES;
}
-static const struct bpf_func_proto *
-bpf_scx_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
- switch (func_id) {
- case BPF_FUNC_task_storage_get:
- return &bpf_task_storage_get_proto;
- case BPF_FUNC_task_storage_delete:
- return &bpf_task_storage_delete_proto;
- default:
- return bpf_base_func_proto(func_id, prog);
- }
-}
-
static const struct bpf_verifier_ops bpf_scx_verifier_ops = {
- .get_func_proto = bpf_scx_get_func_proto,
+ .get_func_proto = bpf_base_func_proto,
.is_valid_access = bpf_scx_is_valid_access,
.btf_struct_access = bpf_scx_btf_struct_access,
};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 187dc37d61d4..132c8be6f635 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -572,7 +572,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
return value;
}
-static const struct bpf_func_proto bpf_perf_event_read_proto = {
+const struct bpf_func_proto bpf_perf_event_read_proto = {
.func = bpf_perf_event_read,
.gpl_only = true,
.ret_type = RET_INTEGER,
@@ -882,7 +882,7 @@ BPF_CALL_1(bpf_send_signal, u32, sig)
return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
}
-static const struct bpf_func_proto bpf_send_signal_proto = {
+const struct bpf_func_proto bpf_send_signal_proto = {
.func = bpf_send_signal,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -894,7 +894,7 @@ BPF_CALL_1(bpf_send_signal_thread, u32, sig)
return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
}
-static const struct bpf_func_proto bpf_send_signal_thread_proto = {
+const struct bpf_func_proto bpf_send_signal_thread_proto = {
.func = bpf_send_signal_thread,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1185,7 +1185,7 @@ BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
return entry_cnt * br_entry_size;
}
-static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
+const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
.func = bpf_get_branch_snapshot,
.gpl_only = true,
.ret_type = RET_INTEGER,
@@ -1430,56 +1430,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
const struct bpf_func_proto *func_proto;
switch (func_id) {
- case BPF_FUNC_map_lookup_elem:
- return &bpf_map_lookup_elem_proto;
- case BPF_FUNC_map_update_elem:
- return &bpf_map_update_elem_proto;
- case BPF_FUNC_map_delete_elem:
- return &bpf_map_delete_elem_proto;
- case BPF_FUNC_map_push_elem:
- return &bpf_map_push_elem_proto;
- case BPF_FUNC_map_pop_elem:
- return &bpf_map_pop_elem_proto;
- case BPF_FUNC_map_peek_elem:
- return &bpf_map_peek_elem_proto;
- case BPF_FUNC_map_lookup_percpu_elem:
- return &bpf_map_lookup_percpu_elem_proto;
- case BPF_FUNC_ktime_get_ns:
- return &bpf_ktime_get_ns_proto;
- case BPF_FUNC_ktime_get_boot_ns:
- return &bpf_ktime_get_boot_ns_proto;
- case BPF_FUNC_tail_call:
- return &bpf_tail_call_proto;
- case BPF_FUNC_get_current_task:
- return &bpf_get_current_task_proto;
- case BPF_FUNC_get_current_task_btf:
- return &bpf_get_current_task_btf_proto;
- case BPF_FUNC_task_pt_regs:
- return &bpf_task_pt_regs_proto;
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_current_comm:
- return &bpf_get_current_comm_proto;
- case BPF_FUNC_trace_printk:
- return bpf_get_trace_printk_proto();
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
- case BPF_FUNC_get_numa_node_id:
- return &bpf_get_numa_node_id_proto;
- case BPF_FUNC_perf_event_read:
- return &bpf_perf_event_read_proto;
- case BPF_FUNC_get_prandom_u32:
- return &bpf_get_prandom_u32_proto;
- case BPF_FUNC_probe_read_user:
- return &bpf_probe_read_user_proto;
- case BPF_FUNC_probe_read_kernel:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_kernel_proto;
- case BPF_FUNC_probe_read_user_str:
- return &bpf_probe_read_user_str_proto;
- case BPF_FUNC_probe_read_kernel_str:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_kernel_str_proto;
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
case BPF_FUNC_probe_read:
return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
@@ -1488,65 +1440,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_compat_str_proto;
#endif
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_cgrp_storage_get:
- return &bpf_cgrp_storage_get_proto;
- case BPF_FUNC_cgrp_storage_delete:
- return &bpf_cgrp_storage_delete_proto;
- case BPF_FUNC_current_task_under_cgroup:
- return &bpf_current_task_under_cgroup_proto;
-#endif
- case BPF_FUNC_send_signal:
- return &bpf_send_signal_proto;
- case BPF_FUNC_send_signal_thread:
- return &bpf_send_signal_thread_proto;
- case BPF_FUNC_perf_event_read_value:
- return &bpf_perf_event_read_value_proto;
- case BPF_FUNC_ringbuf_output:
- return &bpf_ringbuf_output_proto;
- case BPF_FUNC_ringbuf_reserve:
- return &bpf_ringbuf_reserve_proto;
- case BPF_FUNC_ringbuf_submit:
- return &bpf_ringbuf_submit_proto;
- case BPF_FUNC_ringbuf_discard:
- return &bpf_ringbuf_discard_proto;
- case BPF_FUNC_ringbuf_query:
- return &bpf_ringbuf_query_proto;
- case BPF_FUNC_jiffies64:
- return &bpf_jiffies64_proto;
- case BPF_FUNC_get_task_stack:
- return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
- : &bpf_get_task_stack_proto;
- case BPF_FUNC_copy_from_user:
- return &bpf_copy_from_user_proto;
- case BPF_FUNC_copy_from_user_task:
- return &bpf_copy_from_user_task_proto;
- case BPF_FUNC_snprintf_btf:
- return &bpf_snprintf_btf_proto;
- case BPF_FUNC_per_cpu_ptr:
- return &bpf_per_cpu_ptr_proto;
- case BPF_FUNC_this_cpu_ptr:
- return &bpf_this_cpu_ptr_proto;
- case BPF_FUNC_task_storage_get:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_get_recur_proto;
- return &bpf_task_storage_get_proto;
- case BPF_FUNC_task_storage_delete:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_delete_recur_proto;
- return &bpf_task_storage_delete_proto;
- case BPF_FUNC_for_each_map_elem:
- return &bpf_for_each_map_elem_proto;
- case BPF_FUNC_snprintf:
- return &bpf_snprintf_proto;
case BPF_FUNC_get_func_ip:
return &bpf_get_func_ip_proto_tracing;
- case BPF_FUNC_get_branch_snapshot:
- return &bpf_get_branch_snapshot_proto;
- case BPF_FUNC_find_vma:
- return &bpf_find_vma_proto;
- case BPF_FUNC_trace_vprintk:
- return bpf_get_trace_vprintk_proto();
default:
break;
}
@@ -1858,7 +1753,7 @@ static struct pt_regs *get_bpf_raw_tp_regs(void)
struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
- if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+ if (nest_level > ARRAY_SIZE(tp_regs->regs)) {
this_cpu_dec(bpf_raw_tp_nest_level);
return ERR_PTR(-EBUSY);
}
@@ -2987,6 +2882,9 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
+ if (attr->link_create.flags)
+ return -EINVAL;
+
if (!is_kprobe_multi(prog))
return -EINVAL;
@@ -3376,6 +3274,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
+ if (attr->link_create.flags)
+ return -EINVAL;
+
if (!is_uprobe_multi(prog))
return -EINVAL;
@@ -3417,7 +3318,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
}
if (pid) {
+ rcu_read_lock();
task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
+ rcu_read_unlock();
if (!task) {
err = -ESRCH;
goto error_path_put;
@@ -3565,6 +3468,146 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
late_initcall(bpf_kprobe_multi_kfuncs_init);
+typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
+
+/*
+ * The __always_inline is to make sure the compiler doesn't
+ * generate indirect calls into callbacks, which is expensive,
+ * on some kernel configurations. This allows compiler to put
+ * direct calls into all the specific callback implementations
+ * (copy_user_data_sleepable, copy_user_data_nofault, and so on)
+ */
+static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u32 doff, u32 size,
+ const void *unsafe_src,
+ copy_fn_t str_copy_fn,
+ struct task_struct *tsk)
+{
+ struct bpf_dynptr_kern *dst;
+ u32 chunk_sz, off;
+ void *dst_slice;
+ int cnt, err;
+ char buf[256];
+
+ dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
+ if (likely(dst_slice))
+ return str_copy_fn(dst_slice, unsafe_src, size, tsk);
+
+ dst = (struct bpf_dynptr_kern *)dptr;
+ if (bpf_dynptr_check_off_len(dst, doff, size))
+ return -E2BIG;
+
+ for (off = 0; off < size; off += chunk_sz - 1) {
+ chunk_sz = min_t(u32, sizeof(buf), size - off);
+ /* Expect str_copy_fn to return count of copied bytes, including
+ * zero terminator. Next iteration increment off by chunk_sz - 1 to
+ * overwrite NUL.
+ */
+ cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
+ if (cnt < 0)
+ return cnt;
+ err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
+ if (err)
+ return err;
+ if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
+ return off + cnt;
+ }
+ return off;
+}
+
+static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u32 doff,
+ u32 size, const void *unsafe_src,
+ copy_fn_t copy_fn, struct task_struct *tsk)
+{
+ struct bpf_dynptr_kern *dst;
+ void *dst_slice;
+ char buf[256];
+ u32 off, chunk_sz;
+ int err;
+
+ dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
+ if (likely(dst_slice))
+ return copy_fn(dst_slice, unsafe_src, size, tsk);
+
+ dst = (struct bpf_dynptr_kern *)dptr;
+ if (bpf_dynptr_check_off_len(dst, doff, size))
+ return -E2BIG;
+
+ for (off = 0; off < size; off += chunk_sz) {
+ chunk_sz = min_t(u32, sizeof(buf), size - off);
+ err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
+ if (err)
+ return err;
+ err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
+}
+
+static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ int ret;
+
+ if (!tsk) { /* Read from the current task */
+ ret = copy_from_user(dst, (const void __user *)unsafe_src, size);
+ if (ret)
+ return -EFAULT;
+ return 0;
+ }
+
+ ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
+ if (ret != size)
+ return -EFAULT;
+ return 0;
+}
+
+static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return copy_from_kernel_nofault(dst, unsafe_src, size);
+}
+
+static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
+}
+
+static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ int ret;
+
+ if (unlikely(size == 0))
+ return 0;
+
+ if (tsk) {
+ ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
+ } else {
+ ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
+ /* strncpy_from_user does not guarantee NUL termination */
+ if (ret >= 0)
+ ((char *)dst)[ret] = '\0';
+ }
+
+ if (ret < 0)
+ return ret;
+ return ret + 1;
+}
+
+static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return strncpy_from_kernel_nofault(dst, unsafe_src, size);
+}
+
__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
@@ -3576,4 +3619,62 @@ __bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid
return bpf_send_signal_common(sig, type, task, value);
}
+__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
+ copy_kernel_data_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
+ copy_kernel_str_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_sleepable, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_sleepable, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_sleepable, tsk);
+}
+
+__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_sleepable, tsk);
+}
+
__bpf_kfunc_end_defs();
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 35cf76c75dd7..f95a2c3d5b1b 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1489,7 +1489,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
: BPF_FD_TYPE_UPROBE;
*filename = tu->filename;
*probe_offset = tu->offset;
- *probe_addr = 0;
+ *probe_addr = tu->ref_ctr_offset;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 7cb192cbd65f..aaf13a7d58ed 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -569,6 +569,11 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
return *a;
}
+int noinline bpf_fentry_test10(const void *a)
+{
+ return (long)a;
+}
+
void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
{
}
@@ -699,7 +704,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
bpf_fentry_test8(&arg) != 0 ||
- bpf_fentry_test9(&retval) != 0)
+ bpf_fentry_test9(&retval) != 0 ||
+ bpf_fentry_test10((void *)0) != 0)
goto out;
break;
case BPF_MODIFY_RETURN:
diff --git a/net/core/filter.c b/net/core/filter.c
index 577a4504e26f..ab456bf1056e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8023,10 +8023,6 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_sock_proto;
@@ -8052,10 +8048,6 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
if (func_proto)
return func_proto;
- func_proto = cgroup_current_func_proto(func_id, prog);
- if (func_proto)
- return func_proto;
-
switch (func_id) {
case BPF_FUNC_bind:
switch (prog->expected_attach_type) {
@@ -8489,18 +8481,12 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_pop_data_proto;
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sk_msg_proto;
-#ifdef CONFIG_CGROUP_NET_CLASSID
- case BPF_FUNC_get_cgroup_classid:
- return &bpf_get_cgroup_classid_curr_proto;
-#endif
default:
return bpf_sk_base_func_proto(func_id, prog);
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 0ddc4c718833..34c51eb1a14f 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -530,16 +530,22 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
u32 off, u32 len,
struct sk_psock *psock,
struct sock *sk,
- struct sk_msg *msg)
+ struct sk_msg *msg,
+ bool take_ref)
{
int num_sge, copied;
+ /* skb_to_sgvec will fail when the total number of fragments in
+ * frag_list and frags exceeds MAX_MSG_FRAGS. For example, the
+ * caller may aggregate multiple skbs.
+ */
num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
if (num_sge < 0) {
/* skb linearize may fail with ENOMEM, but lets simply try again
* later if this happens. Under memory pressure we don't want to
* drop the skb. We need to linearize the skb so that the mapping
* in skb_to_sgvec can not error.
+ * Note that skb_linearize requires the skb not to be shared.
*/
if (skb_linearize(skb))
return -EAGAIN;
@@ -556,7 +562,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
msg->sg.start = 0;
msg->sg.size = copied;
msg->sg.end = num_sge;
- msg->skb = skb;
+ msg->skb = take_ref ? skb_get(skb) : skb;
sk_psock_queue_msg(psock, msg);
sk_psock_data_ready(sk, psock);
@@ -564,7 +570,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
}
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len);
+ u32 off, u32 len, bool take_ref);
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
@@ -578,7 +584,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* correctly.
*/
if (unlikely(skb->sk == sk))
- return sk_psock_skb_ingress_self(psock, skb, off, len);
+ return sk_psock_skb_ingress_self(psock, skb, off, len, true);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;
@@ -590,7 +596,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true);
if (err < 0)
kfree(msg);
return err;
@@ -601,7 +607,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* because the skb is already accounted for here.
*/
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len)
+ u32 off, u32 len, bool take_ref)
{
struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
struct sock *sk = psock->sk;
@@ -610,7 +616,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
if (unlikely(!msg))
return -EAGAIN;
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
if (err < 0)
kfree(msg);
return err;
@@ -619,18 +625,13 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
- int err = 0;
-
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- skb_get(skb);
- err = sk_psock_skb_ingress(psock, skb, off, len);
- if (err < 0)
- kfree_skb(skb);
- return err;
+
+ return sk_psock_skb_ingress(psock, skb, off, len);
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -655,12 +656,14 @@ static void sk_psock_backlog(struct work_struct *work)
bool ingress;
int ret;
+ /* Increment the psock refcnt to synchronize with close(fd) path in
+ * sock_map_close(), ensuring we wait for backlog thread completion
+ * before sk_socket freed. If refcnt increment fails, it indicates
+ * sock_map_close() completed with sk_socket potentially already freed.
+ */
+ if (!sk_psock_get(psock->sk))
+ return;
mutex_lock(&psock->work_mutex);
- if (unlikely(state->len)) {
- len = state->len;
- off = state->off;
- }
-
while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
@@ -670,6 +673,13 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
+
+ /* Resume processing from previous partial state */
+ if (unlikely(state->len)) {
+ len = state->len;
+ off = state->off;
+ }
+
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@@ -680,7 +690,8 @@ static void sk_psock_backlog(struct work_struct *work)
if (ret <= 0) {
if (ret == -EAGAIN) {
sk_psock_skb_state(psock, state, len, off);
-
+ /* Restore redir info we cleared before */
+ skb_bpf_set_redir(skb, psock->sk, ingress);
/* Delay slightly to prioritize any
* other work that might be here.
*/
@@ -697,11 +708,14 @@ static void sk_psock_backlog(struct work_struct *work)
len -= ret;
} while (len);
+ /* The entire skb sent, clear state */
+ sk_psock_skb_state(psock, state, 0, 0);
skb = skb_dequeue(&psock->ingress_skb);
kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
+ sk_psock_put(psock->sk, psock);
}
struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -1014,7 +1028,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
off = stm->offset;
len = stm->full_len;
}
- err = sk_psock_skb_ingress_self(psock, skb, off, len);
+ err = sk_psock_skb_ingress_self(psock, skb, off, len, false);
}
if (err < 0) {
spin_lock_bh(&psock->ingress_lock);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 914d4e1516a3..fc88e34b7f33 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -908,6 +908,13 @@ more_data:
&msg_redir, send, flags);
lock_sock(sk);
if (err < 0) {
+ /* Regardless of whether the data represented by
+ * msg_redir is sent successfully, we have already
+ * uncharged it via sk_msg_return_zero(). The
+ * msg->sg.size represents the remaining unprocessed
+ * data, which needs to be uncharged here.
+ */
+ sk_mem_uncharge(sk, msg->sg.size);
*copied -= sk_msg_free_nocharge(sk, &msg_redir);
msg->sg.size = 0;
}
@@ -1120,9 +1127,13 @@ alloc_encrypted:
num_async++;
else if (ret == -ENOMEM)
goto wait_for_memory;
- else if (ctx->open_rec && ret == -ENOSPC)
+ else if (ctx->open_rec && ret == -ENOSPC) {
+ if (msg_pl->cork_bytes) {
+ ret = 0;
+ goto send_end;
+ }
goto rollback_iter;
- else if (ret != -EAGAIN)
+ } else if (ret != -EAGAIN)
goto send_end;
}
continue;
diff --git a/scripts/Makefile.btf b/scripts/Makefile.btf
index fbaaec2187e5..db76335dd917 100644
--- a/scripts/Makefile.btf
+++ b/scripts/Makefile.btf
@@ -23,6 +23,8 @@ else
# Switch to using --btf_features for v1.26 and later.
pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j$(JOBS) --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
+pahole-flags-$(call test-ge, $(pahole-ver), 130) += --btf_features=attributes
+
ifneq ($(KBUILD_EXTMOD),)
module-pahole-flags-$(call test-ge, $(pahole-ver), 128) += --btf_features=distilled_base
endif
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index e74a01a85070..c77dc40f7689 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -8,6 +8,7 @@
from __future__ import print_function
import argparse
+import json
import re
import sys, os
import subprocess
@@ -37,11 +38,17 @@ class APIElement(object):
@desc: textual description of the symbol
@ret: (optional) description of any associated return value
"""
- def __init__(self, proto='', desc='', ret='', attrs=[]):
+ def __init__(self, proto='', desc='', ret=''):
self.proto = proto
self.desc = desc
self.ret = ret
- self.attrs = attrs
+
+ def to_dict(self):
+ return {
+ 'proto': self.proto,
+ 'desc': self.desc,
+ 'ret': self.ret
+ }
class Helper(APIElement):
@@ -51,8 +58,9 @@ class Helper(APIElement):
@desc: textual description of the helper function
@ret: description of the return value of the helper function
"""
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
+ def __init__(self, proto='', desc='', ret='', attrs=[]):
+ super().__init__(proto, desc, ret)
+ self.attrs = attrs
self.enum_val = None
def proto_break_down(self):
@@ -81,6 +89,12 @@ class Helper(APIElement):
return res
+ def to_dict(self):
+ d = super().to_dict()
+ d["attrs"] = self.attrs
+ d.update(self.proto_break_down())
+ return d
+
ATTRS = {
'__bpf_fastcall': 'bpf_fastcall'
@@ -675,7 +689,7 @@ COMMANDS
self.print_elem(command)
-class PrinterHelpers(Printer):
+class PrinterHelpersHeader(Printer):
"""
A printer for dumping collected information about helpers as C header to
be included from BPF program.
@@ -896,6 +910,43 @@ class PrinterHelpers(Printer):
print(') = (void *) %d;' % helper.enum_val)
print('')
+
+class PrinterHelpersJSON(Printer):
+ """
+ A printer for dumping collected information about helpers as a JSON file.
+ @parser: A HeaderParser with Helper objects
+ """
+
+ def __init__(self, parser):
+ self.elements = parser.helpers
+ self.elem_number_check(
+ parser.desc_unique_helpers,
+ parser.define_unique_helpers,
+ "helper",
+ "___BPF_FUNC_MAPPER",
+ )
+
+ def print_all(self):
+ helper_dicts = [helper.to_dict() for helper in self.elements]
+ out_dict = {'helpers': helper_dicts}
+ print(json.dumps(out_dict, indent=4))
+
+
+class PrinterSyscallJSON(Printer):
+ """
+ A printer for dumping collected syscall information as a JSON file.
+ @parser: A HeaderParser with APIElement objects
+ """
+
+ def __init__(self, parser):
+ self.elements = parser.commands
+ self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd')
+
+ def print_all(self):
+ syscall_dicts = [syscall.to_dict() for syscall in self.elements]
+ out_dict = {'syscall': syscall_dicts}
+ print(json.dumps(out_dict, indent=4))
+
###############################################################################
# If script is launched from scripts/ from kernel tree and can access
@@ -905,9 +956,17 @@ script = os.path.abspath(sys.argv[0])
linuxRoot = os.path.dirname(os.path.dirname(script))
bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h')
+# target -> output format -> printer
printers = {
- 'helpers': PrinterHelpersRST,
- 'syscall': PrinterSyscallRST,
+ 'helpers': {
+ 'rst': PrinterHelpersRST,
+ 'json': PrinterHelpersJSON,
+ 'header': PrinterHelpersHeader,
+ },
+ 'syscall': {
+ 'rst': PrinterSyscallRST,
+ 'json': PrinterSyscallJSON
+ },
}
argParser = argparse.ArgumentParser(description="""
@@ -917,6 +976,8 @@ rst2man utility.
""")
argParser.add_argument('--header', action='store_true',
help='generate C header file')
+argParser.add_argument('--json', action='store_true',
+ help='generate a JSON')
if (os.path.isfile(bpfh)):
argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h',
default=bpfh)
@@ -924,17 +985,35 @@ else:
argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h')
argParser.add_argument('target', nargs='?', default='helpers',
choices=printers.keys(), help='eBPF API target')
-args = argParser.parse_args()
-
-# Parse file.
-headerParser = HeaderParser(args.filename)
-headerParser.run()
-# Print formatted output to standard output.
-if args.header:
- if args.target != 'helpers':
- raise NotImplementedError('Only helpers header generation is supported')
- printer = PrinterHelpers(headerParser)
-else:
- printer = printers[args.target](headerParser)
-printer.print_all()
+def error_die(message: str):
+ argParser.print_usage(file=sys.stderr)
+ print('Error: {}'.format(message), file=sys.stderr)
+ exit(1)
+
+def parse_and_dump():
+ args = argParser.parse_args()
+
+ # Parse file.
+ headerParser = HeaderParser(args.filename)
+ headerParser.run()
+
+ if args.header and args.json:
+ error_die('Use either --header or --json, not both')
+
+ output_format = 'rst'
+ if args.header:
+ output_format = 'header'
+ elif args.json:
+ output_format = 'json'
+
+ try:
+ printer = printers[args.target][output_format](headerParser)
+ # Print formatted output to standard output.
+ printer.print_all()
+ except KeyError:
+ error_die('Unsupported target/format combination: "{}", "{}"'
+ .format(args.target, output_format))
+
+if __name__ == "__main__":
+ parse_and_dump()
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index d6304e01afe0..da3152c16228 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -31,7 +31,7 @@ PROG COMMANDS
| **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }]
| **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }]
| **bpftool** **prog pin** *PROG* *FILE*
-| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
+| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] [**kernel_btf** *BTF_FILE*]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
@@ -127,7 +127,7 @@ bpftool prog pin *PROG* *FILE*
Note: *FILE* must be located in *bpffs* mount. It must not contain a dot
character ('.'), which is reserved for future extensions of *bpffs*.
-bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach]
+bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach] [kernel_btf *BTF_FILE*]
Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog
load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog
loadall** pins all programs from the *OBJ* under *PATH* directory. **type**
@@ -153,6 +153,12 @@ bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | na
program does not support autoattach, bpftool falls back to regular pinning
for that program instead.
+ The **kernel_btf** option allows specifying an external BTF file to replace
+ the system's own vmlinux BTF file for CO-RE relocations. Note that any
+ other feature relying on BTF (such as fentry/fexit programs, struct_ops)
+ requires the BTF file for the actual kernel running on the host, often
+ exposed at /sys/kernel/btf/vmlinux.
+
Note: *PATH* must be located in *bpffs* mount. It must not contain a dot
character ('.'), which is reserved for future extensions of *bpffs*.
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 1ce409a6cbd9..27512feb5c70 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -505,13 +505,13 @@ _bpftool()
_bpftool_get_map_names
return 0
;;
- pinned|pinmaps)
+ pinned|pinmaps|kernel_btf)
_filedir
return 0
;;
*)
COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
- _bpftool_once_attr 'type pinmaps autoattach'
+ _bpftool_once_attr 'type pinmaps autoattach kernel_btf'
_bpftool_one_of_list 'offload_dev xdpmeta_dev'
return 0
;;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 93b139bfb988..944ebe21a216 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -221,7 +221,7 @@ static int cgroup_has_attached_progs(int cgroup_fd)
for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) {
int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]);
- if (count < 0)
+ if (count < 0 && errno != EINVAL)
return -1;
if (count > 0) {
@@ -318,11 +318,11 @@ static int show_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
static int do_show(int argc, char **argv)
{
- enum bpf_attach_type type;
int has_attached_progs;
const char *path;
int cgroup_fd;
int ret = -1;
+ unsigned int i;
query_flags = 0;
@@ -370,14 +370,14 @@ static int do_show(int argc, char **argv)
"AttachFlags", "Name");
btf_vmlinux = libbpf_find_kernel_btf();
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) {
/*
* Not all attach types may be supported, so it's expected,
* that some requests will fail.
* If we were able to get the show for at least one
* attach type, let's return 0.
*/
- if (show_bpf_progs(cgroup_fd, type, 0) == 0)
+ if (show_bpf_progs(cgroup_fd, cgroup_attach_types[i], 0) == 0)
ret = 0;
}
@@ -400,9 +400,9 @@ exit:
static int do_show_tree_fn(const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftw)
{
- enum bpf_attach_type type;
int has_attached_progs;
int cgroup_fd;
+ unsigned int i;
if (typeflag != FTW_D)
return 0;
@@ -434,8 +434,8 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
}
btf_vmlinux = libbpf_find_kernel_btf();
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
- show_bpf_progs(cgroup_fd, type, ftw->level);
+ for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++)
+ show_bpf_progs(cgroup_fd, cgroup_attach_types[i], ftw->level);
if (errno == EINVAL)
/* Last attach type does not support query.
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 52fd2c9fac56..3535afc80a49 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -380,6 +380,7 @@ show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
u64_to_ptr(info->perf_event.uprobe.file_name));
jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset);
jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie);
+ jsonw_uint_field(wtr, "ref_ctr_offset", info->perf_event.uprobe.ref_ctr_offset);
}
static void
@@ -823,6 +824,8 @@ static void show_perf_event_uprobe_plain(struct bpf_link_info *info)
printf("%s+%#x ", buf, info->perf_event.uprobe.offset);
if (info->perf_event.uprobe.cookie)
printf("cookie %llu ", info->perf_event.uprobe.cookie);
+ if (info->perf_event.uprobe.ref_ctr_offset)
+ printf("ref_ctr_offset 0x%llx ", info->perf_event.uprobe.ref_ctr_offset);
}
static void show_perf_event_tracepoint_plain(struct bpf_link_info *info)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f010295350be..96eea8a67225 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1681,8 +1681,17 @@ offload_dev:
} else if (is_prefix(*argv, "autoattach")) {
auto_attach = true;
NEXT_ARG();
+ } else if (is_prefix(*argv, "kernel_btf")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(1))
+ goto err_free_reuse_maps;
+
+ open_opts.btf_custom_path = GET_ARG();
} else {
- p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?",
+ p_err("expected no more arguments, "
+ "'type', 'map', 'offload_dev', 'xdpmeta_dev', 'pinmaps', "
+ "'autoattach', or 'kernel_btf', got: '%s'?",
*argv);
goto err_free_reuse_maps;
}
@@ -2474,6 +2483,7 @@ static int do_help(int argc, char **argv)
" [map { idx IDX | name NAME } MAP]\\\n"
" [pinmaps MAP_DIR]\n"
" [autoattach]\n"
+ " [kernel_btf BTF_FILE]\n"
" %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n"
" %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n"
" %1$s %2$s run PROG \\\n"
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fd404729b115..85180e4aaa5a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1506,7 +1506,7 @@ union bpf_attr {
__s32 map_token_fd;
};
- struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
__u32 map_fd;
__aligned_u64 key;
union {
@@ -1995,11 +1995,15 @@ union bpf_attr {
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
- * associated to *skb*, at *offset*. *flags* are a combination of
- * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
- * checksum for the packet after storing the bytes) and
- * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
- * **->swhash** and *skb*\ **->l4hash** to 0).
+ * associated to *skb*, at *offset*. The *flags* are a combination
+ * of the following values:
+ *
+ * **BPF_F_RECOMPUTE_CSUM**
+ * Automatically update *skb*\ **->csum** after storing the
+ * bytes.
+ * **BPF_F_INVALIDATE_HASH**
+ * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\
+ * **->l4hash** to 0.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2051,7 +2055,7 @@ union bpf_attr {
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
- * the checksum is to be computed against a pseudo-header.
+ * that the modified header field is part of the pseudo-header.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -6723,6 +6727,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from file_name */
__u64 cookie;
+ __u64 ref_ctr_offset;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index c0e13cdf9660..b997c68bd945 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j
#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__)
+#if defined(__clang__) && (__clang_major__ >= 19)
+#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__))
+#elif defined(__GNUC__) && (__GNUC__ >= 14)
+#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__))
+#else
#define ___type(...) typeof(___arrow(__VA_ARGS__))
+#endif
#define ___read(read_fn, dst, src_type, src, accessor) \
read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 686824b8b413..a50773d4616e 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -15,6 +15,14 @@
#define __array(name, val) typeof(val) *name[]
#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name
+#ifndef likely
+#define likely(x) (__builtin_expect(!!(x), 1))
+#endif
+
+#ifndef unlikely
+#define unlikely(x) (__builtin_expect(!!(x), 0))
+#endif
+
/*
* Helper macro to place programs, maps, license in
* different sections in elf_bpf file. Section names
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 38bc6b14b066..f1d495dc66bb 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -12,6 +12,7 @@
#include <sys/utsname.h>
#include <sys/param.h>
#include <sys/stat.h>
+#include <sys/mman.h>
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/btf.h>
@@ -120,6 +121,9 @@ struct btf {
/* whether base_btf should be freed in btf_free for this instance */
bool owns_base;
+ /* whether raw_data is a (read-only) mmap */
+ bool raw_data_is_mmap;
+
/* BTF object FD, if loaded into kernel */
int fd;
@@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf)
return (void *)btf->hdr != btf->raw_data;
}
+static void btf_free_raw_data(struct btf *btf)
+{
+ if (btf->raw_data_is_mmap) {
+ munmap(btf->raw_data, btf->raw_size);
+ btf->raw_data_is_mmap = false;
+ } else {
+ free(btf->raw_data);
+ }
+ btf->raw_data = NULL;
+}
+
void btf__free(struct btf *btf)
{
if (IS_ERR_OR_NULL(btf))
@@ -970,7 +985,7 @@ void btf__free(struct btf *btf)
free(btf->types_data);
strset__free(btf->strs_set);
}
- free(btf->raw_data);
+ btf_free_raw_data(btf);
free(btf->raw_data_swapped);
free(btf->type_offs);
if (btf->owns_base)
@@ -996,7 +1011,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)
if (base_btf) {
btf->base_btf = base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len;
+ btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
btf->swapped_endian = base_btf->swapped_endian;
}
@@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf)
return libbpf_ptr(btf_new_empty(base_btf));
}
-static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
+static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)
{
struct btf *btf;
int err;
@@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
btf->start_str_off = base_btf->hdr->str_len;
}
- btf->raw_data = malloc(size);
- if (!btf->raw_data) {
- err = -ENOMEM;
- goto done;
+ if (is_mmap) {
+ btf->raw_data = (void *)data;
+ btf->raw_data_is_mmap = true;
+ } else {
+ btf->raw_data = malloc(size);
+ if (!btf->raw_data) {
+ err = -ENOMEM;
+ goto done;
+ }
+ memcpy(btf->raw_data, data, size);
}
- memcpy(btf->raw_data, data, size);
+
btf->raw_size = size;
btf->hdr = btf->raw_data;
@@ -1083,12 +1104,12 @@ done:
struct btf *btf__new(const void *data, __u32 size)
{
- return libbpf_ptr(btf_new(data, size, NULL));
+ return libbpf_ptr(btf_new(data, size, NULL, false));
}
struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
{
- return libbpf_ptr(btf_new(data, size, base_btf));
+ return libbpf_ptr(btf_new(data, size, base_btf, false));
}
struct btf_elf_secs {
@@ -1148,6 +1169,12 @@ static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs
else
continue;
+ if (sh.sh_type != SHT_PROGBITS) {
+ pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n",
+ sh.sh_type, idx, name, path);
+ goto err;
+ }
+
data = elf_getdata(scn, 0);
if (!data) {
pr_warn("failed to get section(%d, %s) data from %s\n",
@@ -1203,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
if (secs.btf_base_data) {
dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
- NULL);
+ NULL, false);
if (IS_ERR(dist_base_btf)) {
err = PTR_ERR(dist_base_btf);
dist_base_btf = NULL;
@@ -1212,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
}
btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
- dist_base_btf ?: base_btf);
+ dist_base_btf ?: base_btf, false);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto done;
@@ -1329,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
}
/* finally parse BTF data */
- btf = btf_new(data, sz, base_btf);
+ btf = btf_new(data, sz, base_btf, false);
err_out:
free(data);
@@ -1348,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
return libbpf_ptr(btf_parse_raw(path, base_btf));
}
+static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
+{
+ struct stat st;
+ void *data;
+ struct btf *btf;
+ int fd, err;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return libbpf_err_ptr(-errno);
+
+ if (fstat(fd, &st) < 0) {
+ err = -errno;
+ close(fd);
+ return libbpf_err_ptr(err);
+ }
+
+ data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ err = -errno;
+ close(fd);
+
+ if (data == MAP_FAILED)
+ return libbpf_err_ptr(err);
+
+ btf = btf_new(data, st.st_size, base_btf, true);
+ if (IS_ERR(btf))
+ munmap(data, st.st_size);
+
+ return btf;
+}
+
static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
{
struct btf *btf;
@@ -1612,7 +1670,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
goto exit_free;
}
- btf = btf_new(ptr, btf_info.btf_size, base_btf);
+ btf = btf_new(ptr, btf_info.btf_size, base_btf, false);
exit_free:
free(ptr);
@@ -1652,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
static void btf_invalidate_raw_data(struct btf *btf)
{
- if (btf->raw_data) {
- free(btf->raw_data);
- btf->raw_data = NULL;
- }
+ if (btf->raw_data)
+ btf_free_raw_data(btf);
if (btf->raw_data_swapped) {
free(btf->raw_data_swapped);
btf->raw_data_swapped = NULL;
@@ -4350,46 +4406,109 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
}
-/* Check if given two types are identical ARRAY definitions */
-static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth)
{
struct btf_type *t1, *t2;
+ int k1, k2;
+recur:
+ if (depth <= 0)
+ return false;
t1 = btf_type_by_id(d->btf, id1);
t2 = btf_type_by_id(d->btf, id2);
- if (!btf_is_array(t1) || !btf_is_array(t2))
+
+ k1 = btf_kind(t1);
+ k2 = btf_kind(t2);
+ if (k1 != k2)
return false;
- return btf_equal_array(t1, t2);
-}
+ switch (k1) {
+ case BTF_KIND_UNKN: /* VOID */
+ return true;
+ case BTF_KIND_INT:
+ return btf_equal_int_tag(t1, t2);
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ return btf_compat_enum(t1, t2);
+ case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
+ return btf_equal_common(t1, t2);
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_TYPE_TAG:
+ if (t1->info != t2->info || t1->name_off != t2->name_off)
+ return false;
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_ARRAY: {
+ struct btf_array *a1, *a2;
-/* Check if given two types are identical STRUCT/UNION definitions */
-static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
-{
- const struct btf_member *m1, *m2;
- struct btf_type *t1, *t2;
- int n, i;
+ if (!btf_compat_array(t1, t2))
+ return false;
- t1 = btf_type_by_id(d->btf, id1);
- t2 = btf_type_by_id(d->btf, id2);
+ a1 = btf_array(t1);
+ a2 = btf_array(t1);
- if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
- return false;
+ if (a1->index_type != a2->index_type &&
+ !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1))
+ return false;
- if (!btf_shallow_equal_struct(t1, t2))
- return false;
+ if (a1->type != a2->type &&
+ !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1))
+ return false;
- m1 = btf_members(t1);
- m2 = btf_members(t2);
- for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
- if (m1->type != m2->type &&
- !btf_dedup_identical_arrays(d, m1->type, m2->type) &&
- !btf_dedup_identical_structs(d, m1->type, m2->type))
+ return true;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m1, *m2;
+ int i, n;
+
+ if (!btf_shallow_equal_struct(t1, t2))
return false;
+
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
+ if (m1->type == m2->type)
+ continue;
+ if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1))
+ return false;
+ }
+ return true;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p1, *p2;
+ int i, n;
+
+ if (!btf_compat_fnproto(t1, t2))
+ return false;
+
+ if (t1->type != t2->type &&
+ !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1))
+ return false;
+
+ p1 = btf_params(t1);
+ p2 = btf_params(t2);
+ for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) {
+ if (p1->type == p2->type)
+ continue;
+ if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1))
+ return false;
+ }
+ return true;
+ }
+ default:
+ return false;
}
- return true;
}
+
/*
* Check equivalence of BTF type graph formed by candidate struct/union (we'll
* call it "candidate graph" in this description for brevity) to a type graph
@@ -4508,19 +4627,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
* different fields within the *same* struct. This breaks type
* equivalence check, which makes an assumption that candidate
* types sub-graph has a consistent and deduped-by-compiler
- * types within a single CU. So work around that by explicitly
- * allowing identical array types here.
+ * types within a single CU. And similar situation can happen
+ * with struct/union sometimes, and event with pointers.
+ * So accommodate cases like this doing a structural
+ * comparison recursively, but avoiding being stuck in endless
+ * loops by limiting the depth up to which we check.
*/
- if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
- return 1;
- /* It turns out that similar situation can happen with
- * struct/union sometimes, sigh... Handle the case where
- * structs/unions are exactly the same, down to the referenced
- * type IDs. Anything more complicated (e.g., if referenced
- * types are different, but equivalent) is *way more*
- * complicated and requires a many-to-many equivalence mapping.
- */
- if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
+ if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16))
return 1;
return 0;
}
@@ -5268,7 +5381,10 @@ struct btf *btf__load_vmlinux_btf(void)
pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
sysfs_btf_path);
} else {
- btf = btf__parse(sysfs_btf_path, NULL);
+ btf = btf_parse_raw_mmap(sysfs_btf_path, NULL);
+ if (IS_ERR(btf))
+ btf = btf__parse(sysfs_btf_path, NULL);
+
if (!btf) {
err = -errno;
pr_warn("failed to read kernel BTF from '%s': %s\n",
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6b85060f07b3..e9c641a2fb20 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -60,6 +60,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define MAX_EVENT_NAME_LEN 64
+
#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
@@ -284,7 +286,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
old_errno = errno;
va_start(args, format);
- __libbpf_pr(level, format, args);
+ print_fn(level, format, args);
va_end(args);
errno = old_errno;
@@ -896,7 +898,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
return -LIBBPF_ERRNO__FORMAT;
}
- if (sec_off + prog_sz > sec_sz) {
+ if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {
pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
sec_name, sec_off);
return -LIBBPF_ERRNO__FORMAT;
@@ -1725,15 +1727,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
return ERR_PTR(-ENOENT);
}
-/* Some versions of Android don't provide memfd_create() in their libc
- * implementation, so avoid complications and just go straight to Linux
- * syscall.
- */
-static int sys_memfd_create(const char *name, unsigned flags)
-{
- return syscall(__NR_memfd_create, name, flags);
-}
-
#ifndef MFD_CLOEXEC
#define MFD_CLOEXEC 0x0001U
#endif
@@ -9455,6 +9448,30 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log
return 0;
}
+struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog)
+{
+ if (prog->func_info_rec_size != sizeof(struct bpf_func_info))
+ return libbpf_err_ptr(-EOPNOTSUPP);
+ return prog->func_info;
+}
+
+__u32 bpf_program__func_info_cnt(const struct bpf_program *prog)
+{
+ return prog->func_info_cnt;
+}
+
+struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog)
+{
+ if (prog->line_info_rec_size != sizeof(struct bpf_line_info))
+ return libbpf_err_ptr(-EOPNOTSUPP);
+ return prog->line_info;
+}
+
+__u32 bpf_program__line_info_cnt(const struct bpf_program *prog)
+{
+ return prog->line_info_cnt;
+}
+
#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
.sec = (char *)sec_pfx, \
.prog_type = BPF_PROG_TYPE_##ptype, \
@@ -11121,16 +11138,16 @@ static const char *tracefs_available_filter_functions_addrs(void)
: TRACEFS"/available_filter_functions_addrs";
}
-static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
- const char *kfunc_name, size_t offset)
+static void gen_probe_legacy_event_name(char *buf, size_t buf_sz,
+ const char *name, size_t offset)
{
static int index = 0;
int i;
- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
- __sync_fetch_and_add(&index, 1));
+ snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(),
+ __sync_fetch_and_add(&index, 1), name, offset);
- /* sanitize binary_path in the probe name */
+ /* sanitize name in the probe name */
for (i = 0; buf[i]; i++) {
if (!isalnum(buf[i]))
buf[i] = '_';
@@ -11255,9 +11272,9 @@ int probe_kern_syscall_wrapper(int token_fd)
return pfd >= 0 ? 1 : 0;
} else { /* legacy mode */
- char probe_name[128];
+ char probe_name[MAX_EVENT_NAME_LEN];
- gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
return 0;
@@ -11313,10 +11330,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
func_name, offset,
-1 /* pid */, 0 /* ref_ctr_off */);
} else {
- char probe_name[256];
+ char probe_name[MAX_EVENT_NAME_LEN];
- gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
- func_name, offset);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
+ func_name, offset);
legacy_probe = strdup(probe_name);
if (!legacy_probe)
@@ -11860,20 +11877,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru
return ret;
}
-static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
- const char *binary_path, uint64_t offset)
-{
- int i;
-
- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
-
- /* sanitize binary_path in the probe name */
- for (i = 0; buf[i]; i++) {
- if (!isalnum(buf[i]))
- buf[i] = '_';
- }
-}
-
static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
const char *binary_path, size_t offset)
{
@@ -12297,13 +12300,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
func_offset, pid, ref_ctr_off);
} else {
- char probe_name[PATH_MAX + 64];
+ char probe_name[MAX_EVENT_NAME_LEN];
if (ref_ctr_off)
return libbpf_err_ptr(-EINVAL);
- gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
- binary_path, func_offset);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
+ strrchr(binary_path, '/') ? : binary_path,
+ func_offset);
legacy_probe = strdup(probe_name);
if (!legacy_probe)
@@ -13371,7 +13375,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
- attr.sample_period = sample_period;
attr.wakeup_events = sample_period;
p.attr = &attr;
@@ -14099,6 +14102,12 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
}
link = map_skel->link;
+ if (!link) {
+ pr_warn("map '%s': BPF map skeleton link is uninitialized\n",
+ bpf_map__name(map));
+ continue;
+ }
+
if (*link)
continue;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index fdcee6a71e0f..1137e7d2e1b5 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -940,6 +940,12 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le
LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size);
LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size);
+LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog);
+LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog);
+
+LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog);
+LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog);
+
/**
* @brief **bpf_program__set_attach_target()** sets BTF-based attach target
* for supported BPF program types:
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d8b71f22f197..1205f9a4fe04 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -437,6 +437,10 @@ LIBBPF_1.6.0 {
bpf_linker__add_fd;
bpf_linker__new_fd;
bpf_object__prepare;
+ bpf_program__func_info;
+ bpf_program__func_info_cnt;
+ bpf_program__line_info;
+ bpf_program__line_info_cnt;
btf__add_decl_attr;
btf__add_type_attr;
} LIBBPF_1.5.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 76669c73dcd1..477a3b3389a0 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -667,6 +667,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags)
return syscall(__NR_dup3, oldfd, newfd, flags);
}
+/* Some versions of Android don't provide memfd_create() in their libc
+ * implementation, so avoid complications and just go straight to Linux
+ * syscall.
+ */
+static inline int sys_memfd_create(const char *name, unsigned flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
/* Point *fixed_fd* to the same file that *tmp_fd* points to.
* Regardless of success, *tmp_fd* is closed.
* Whatever *fixed_fd* pointed to is closed silently.
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 800e0ef09c37..a469e5d4fee7 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -573,7 +573,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,
snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz);
- fd = memfd_create(filename, 0);
+ fd = sys_memfd_create(filename, 0);
if (fd < 0) {
ret = -errno;
pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret));
@@ -1376,7 +1376,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
} else {
if (!secs_match(dst_sec, src_sec)) {
pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name);
- return -1;
+ return -EINVAL;
}
/* "license" and "version" sections are deduped */
@@ -2223,7 +2223,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
}
} else if (!secs_match(dst_sec, src_sec)) {
pr_warn("sections %s are not compatible\n", src_sec->sec_name);
- return -1;
+ return -EINVAL;
}
/* shdr->sh_link points to SYMTAB */
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 975e265eab3b..06663f9ea581 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype,
minlen = nla_attr_minlen[pt->type];
if (libbpf_nla_len(nla) < minlen)
- return -1;
+ return -EINVAL;
if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
- return -1;
+ return -EINVAL;
if (pt->type == LIBBPF_NLA_STRING) {
char *data = libbpf_nla_data(nla);
if (data[libbpf_nla_len(nla) - 1] != '\0')
- return -1;
+ return -EINVAL;
}
return 0;
@@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
if (policy) {
err = validate_nla(nla, maxtype, policy);
if (err < 0)
- goto errout;
+ return err;
}
- if (tb[type])
+ if (tb[type]) {
pr_warn("Attribute of type %#x found multiple times in message, "
"previous attribute is being ignored.\n", type);
+ }
tb[type] = nla;
}
- err = 0;
-errout:
- return err;
+ return 0;
}
/**
diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST
index f748f2c33b22..1789a61d0a9b 100644
--- a/tools/testing/selftests/bpf/DENYLIST
+++ b/tools/testing/selftests/bpf/DENYLIST
@@ -1,5 +1,6 @@
# TEMPORARY
# Alphabetical order
+dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next
get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
stacktrace_build_id
stacktrace_build_id_nmi
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 6d8feda27ce9..12e99c0277a8 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,3 +1 @@
-fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
-fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 66bb50356be0..cf5ed3bee573 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -34,6 +34,9 @@ OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+SKIP_DOCS ?=
+SKIP_LLVM ?=
+
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -172,6 +175,7 @@ override OUTPUT := $(patsubst %/,%,$(OUTPUT))
endif
endif
+ifneq ($(SKIP_LLVM),1)
ifeq ($(feature-llvm),1)
LLVM_CFLAGS += -DHAVE_LLVM_SUPPORT
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
@@ -180,13 +184,14 @@ ifeq ($(feature-llvm),1)
# Prefer linking statically if it's available, otherwise fallback to shared
ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static)
LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)))
LLVM_LDLIBS += -lstdc++
else
LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS))
endif
LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
endif
+endif
SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
@@ -358,7 +363,9 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
endif
+ifneq ($(SKIP_DOCS),1)
all: docs
+endif
docs:
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
@@ -673,9 +680,6 @@ ifneq ($2:$(OUTPUT),:$(shell pwd))
$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
-$(OUTPUT)/$(TRUNNER_BINARY): LDLIBS += $$(LLVM_LDLIBS)
-$(OUTPUT)/$(TRUNNER_BINARY): LDFLAGS += $$(LLVM_LDFLAGS)
-
# some X.test.o files have runtime dependencies on Y.bpf.o files
$(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
@@ -686,7 +690,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@
$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
$(OUTPUT)/$(if $2,$2/)bpftool
@@ -811,6 +815,7 @@ $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.ske
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
+$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -831,6 +836,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_local_storage_create.o \
$(OUTPUT)/bench_htab_mem.o \
$(OUTPUT)/bench_bpf_crypto.o \
+ $(OUTPUT)/bench_sockmap.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 0fd8c9b0d38f..ddd73d06a1eb 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -283,6 +283,7 @@ extern struct argp bench_local_storage_create_argp;
extern struct argp bench_htab_mem_argp;
extern struct argp bench_trigger_batch_argp;
extern struct argp bench_crypto_argp;
+extern struct argp bench_sockmap_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -297,6 +298,7 @@ static const struct argp_child bench_parsers[] = {
{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
+ { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
{},
};
@@ -555,6 +557,7 @@ extern const struct bench bench_local_storage_create;
extern const struct bench bench_htab_mem;
extern const struct bench bench_crypto_encrypt;
extern const struct bench bench_crypto_decrypt;
+extern const struct bench bench_sockmap;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -621,6 +624,7 @@ static const struct bench *benchs[] = {
&bench_htab_mem,
&bench_crypto_encrypt,
&bench_crypto_decrypt,
+ &bench_sockmap,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
index 926ee822143e..297e32390cd1 100644
--- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -279,6 +279,7 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
}
got = read(fd, buf, sizeof(buf) - 1);
+ close(fd);
if (got <= 0) {
*value = 0;
return;
@@ -286,8 +287,6 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
buf[got] = 0;
*value = strtoull(buf, NULL, 0);
-
- close(fd);
}
static void htab_mem_measure(struct bench_res *res)
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
new file mode 100644
index 000000000000..8ebf563a67a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sendfile.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <argp.h>
+#include "bench.h"
+#include "bench_sockmap_prog.skel.h"
+
+#define FILE_SIZE (128 * 1024)
+#define DATA_REPEAT_SIZE 10
+
+static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+/* c1 <-> [p1, p2] <-> c2
+ * RX bench(BPF_SK_SKB_STREAM_VERDICT):
+ * ARG_FW_RX_PASS:
+ * send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2)
+ * ARG_FW_RX_VERDICT_EGRESS:
+ * send(c1) -> verdict skb to tx queuec of p2 -> recv(c2)
+ * ARG_FW_RX_VERDICT_INGRESS:
+ * send(c1) -> verdict skb to rx queuec of c2 -> recv(c2)
+ *
+ * TX bench(BPF_SK_MSG_VERDIC):
+ * ARG_FW_TX_PASS:
+ * send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2)
+ * ARG_FW_TX_VERDICT_INGRESS:
+ * send(p2) -> verdict msg to rx queue of c2 -> recv(c2)
+ * ARG_FW_TX_VERDICT_EGRESS:
+ * send(p1) -> verdict msg to tx queue of p2 -> recv(c2)
+ */
+enum SOCKMAP_ARG_FLAG {
+ ARG_FW_RX_NORMAL = 11000,
+ ARG_FW_RX_PASS,
+ ARG_FW_RX_VERDICT_EGRESS,
+ ARG_FW_RX_VERDICT_INGRESS,
+ ARG_FW_TX_NORMAL,
+ ARG_FW_TX_PASS,
+ ARG_FW_TX_VERDICT_INGRESS,
+ ARG_FW_TX_VERDICT_EGRESS,
+ ARG_CTL_RX_STRP,
+ ARG_CONSUMER_DELAY_TIME,
+ ARG_PRODUCER_DURATION,
+};
+
+#define TXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_TX_NORMAL)
+
+#define TXMODE_BPF_INGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS)
+
+#define TXMODE_BPF_EGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS)
+
+#define TXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_TX_PASS)
+
+#define TXMODE_BPF() ( \
+ TXMODE_BPF_PASS() || \
+ TXMODE_BPF_INGRESS() || \
+ TXMODE_BPF_EGRESS())
+
+#define TXMODE() ( \
+ TXMODE_NORMAL() || \
+ TXMODE_BPF())
+
+#define RXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_RX_NORMAL)
+
+#define RXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_RX_PASS)
+
+#define RXMODE_BPF_VERDICT_EGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS)
+
+#define RXMODE_BPF_VERDICT_INGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS)
+
+#define RXMODE_BPF_VERDICT() ( \
+ RXMODE_BPF_VERDICT_INGRESS() || \
+ RXMODE_BPF_VERDICT_EGRESS())
+
+#define RXMODE_BPF() ( \
+ RXMODE_BPF_PASS() || \
+ RXMODE_BPF_VERDICT())
+
+#define RXMODE() ( \
+ RXMODE_NORMAL() || \
+ RXMODE_BPF())
+
+static struct socmap_ctx {
+ struct bench_sockmap_prog *skel;
+ enum SOCKMAP_ARG_FLAG mode;
+ #define c1 fds[0]
+ #define p1 fds[1]
+ #define c2 fds[2]
+ #define p2 fds[3]
+ #define sfd fds[4]
+ int fds[5];
+ long send_calls;
+ long read_calls;
+ long prod_send;
+ long user_read;
+ int file_size;
+ int delay_consumer;
+ int prod_run_time;
+ int strp_size;
+} ctx = {
+ .prod_send = 0,
+ .user_read = 0,
+ .file_size = FILE_SIZE,
+ .mode = ARG_FW_RX_VERDICT_EGRESS,
+ .fds = {0},
+ .delay_consumer = 0,
+ .prod_run_time = 0,
+ .strp_size = 0,
+};
+
+static void bench_sockmap_prog_destroy(void)
+{
+ int i;
+
+ for (i = 0; i < sizeof(ctx.fds); i++) {
+ if (ctx.fds[0] > 0)
+ close(ctx.fds[i]);
+ }
+
+ bench_sockmap_prog__destroy(ctx.skel);
+}
+
+static void init_addr(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static bool set_non_block(int fd, bool blocking)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (flags == -1)
+ return false;
+ flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK);
+ return (fcntl(fd, F_SETFL, flags) == 0);
+}
+
+static int create_pair(int *c, int *p, int type)
+{
+ struct sockaddr_storage addr;
+ int err, cfd, pfd;
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+ err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len);
+ if (err) {
+ fprintf(stderr, "getsockname error %d\n", errno);
+ return err;
+ }
+ cfd = socket(AF_INET, type, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "socket error %d\n", errno);
+ return err;
+ }
+
+ err = connect(cfd, (struct sockaddr *)&addr, addr_len);
+ if (err && errno != EINPROGRESS) {
+ fprintf(stderr, "connect error %d\n", errno);
+ return err;
+ }
+
+ pfd = accept(ctx.sfd, NULL, NULL);
+ if (pfd < 0) {
+ fprintf(stderr, "accept error %d\n", errno);
+ return err;
+ }
+ *c = cfd;
+ *p = pfd;
+ return 0;
+}
+
+static int create_sockets(void)
+{
+ struct sockaddr_storage addr;
+ int err, one = 1;
+ socklen_t addr_len;
+
+ init_addr(&addr, &addr_len);
+ ctx.sfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (ctx.sfd < 0) {
+ fprintf(stderr, "socket error:%d\n", errno);
+ return ctx.sfd;
+ }
+ err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err) {
+ fprintf(stderr, "setsockopt error:%d\n", errno);
+ return err;
+ }
+
+ err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len);
+ if (err) {
+ fprintf(stderr, "bind error:%d\n", errno);
+ return err;
+ }
+
+ err = listen(ctx.sfd, SOMAXCONN);
+ if (err) {
+ fprintf(stderr, "listen error:%d\n", errno);
+ return err;
+ }
+
+ err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 1 error\n");
+ return err;
+ }
+
+ err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 2 error\n");
+ return err;
+ }
+ printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n",
+ ctx.c1, ctx.p1, ctx.c2, ctx.p2);
+ return 0;
+}
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 2 || env.producer_cnt != 1 ||
+ !env.affinity)
+ goto err;
+ return;
+err:
+ fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary");
+ exit(1);
+}
+
+static int setup_rx_sockmap(void)
+{
+ int verdict, pass, parser, map;
+ int zero = 0, one = 1;
+ int err;
+
+ parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser);
+ verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict);
+ pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass);
+ map = bpf_map__fd(ctx.skel->maps.sock_map_rx);
+
+ if (ctx.strp_size != 0) {
+ ctx.skel->bss->pkt_size = ctx.strp_size;
+ err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return err;
+ }
+
+ if (RXMODE_BPF_VERDICT())
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ else if (RXMODE_BPF_PASS())
+ err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (RXMODE_BPF_PASS())
+ return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST);
+
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ if (err < 0)
+ return err;
+
+ if (RXMODE_BPF_VERDICT_INGRESS()) {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ } else {
+ err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ }
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int setup_tx_sockmap(void)
+{
+ int zero = 0, one = 1;
+ int prog, map;
+ int err;
+
+ map = bpf_map__fd(ctx.skel->maps.sock_map_tx);
+ prog = TXMODE_BPF_PASS() ?
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) :
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict);
+
+ err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (TXMODE_BPF_EGRESS()) {
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ } else {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ }
+
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void setup(void)
+{
+ int err;
+
+ ctx.skel = bench_sockmap_prog__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "error loading skel\n");
+ exit(1);
+ }
+
+ if (create_sockets()) {
+ fprintf(stderr, "create_net_mode error\n");
+ goto err;
+ }
+
+ if (RXMODE_BPF()) {
+ err = setup_rx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_rx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else if (TXMODE_BPF()) {
+ err = setup_tx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_tx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else {
+ fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode);
+ goto err;
+ }
+
+ return;
+
+err:
+ bench_sockmap_prog_destroy();
+ exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->drops = atomic_swap(&ctx.prod_send, 0);
+ res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0);
+ res->false_hits = atomic_swap(&ctx.user_read, 0);
+ res->important_hits = atomic_swap(&ctx.send_calls, 0);
+ res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32;
+}
+
+static void verify_data(int *check_pos, char *buf, int rcv)
+{
+ for (int i = 0 ; i < rcv; i++) {
+ if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
+ fprintf(stderr, "verify data fail");
+ exit(1);
+ }
+ (*check_pos)++;
+ if (*check_pos >= FILE_SIZE)
+ *check_pos = 0;
+ }
+}
+
+static void *consumer(void *input)
+{
+ int rcv, sent;
+ int check_pos = 0;
+ int tid = (long)input;
+ int recv_buf_size = FILE_SIZE;
+ char *buf = malloc(recv_buf_size);
+ int delay_read = ctx.delay_consumer;
+
+ if (!buf) {
+ fprintf(stderr, "fail to init read buffer");
+ return NULL;
+ }
+
+ while (true) {
+ if (tid == 1) {
+ /* consumer 1 is unused for tx test and stream verdict test */
+ if (RXMODE_BPF() || TXMODE())
+ return NULL;
+ /* it's only for RX_NORMAL which service as reserve-proxy mode */
+ rcv = read(ctx.p1, buf, recv_buf_size);
+ if (rcv < 0) {
+ fprintf(stderr, "fail to read p1");
+ return NULL;
+ }
+
+ sent = send(ctx.p2, buf, recv_buf_size, 0);
+ if (sent < 0) {
+ fprintf(stderr, "fail to send p2");
+ return NULL;
+ }
+ } else {
+ if (delay_read != 0) {
+ if (delay_read < 0)
+ return NULL;
+ sleep(delay_read);
+ delay_read = 0;
+ }
+ /* read real endpoint by consumer 0 */
+ atomic_inc(&ctx.read_calls);
+ rcv = read(ctx.c2, buf, recv_buf_size);
+ if (rcv < 0 && errno != EAGAIN) {
+ fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
+ return NULL;
+ }
+ verify_data(&check_pos, buf, rcv);
+ atomic_add(&ctx.user_read, rcv);
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ int off = 0, fp, need_sent, sent;
+ int file_size = ctx.file_size;
+ struct timespec ts1, ts2;
+ int target;
+ FILE *file;
+
+ file = tmpfile();
+ if (!file) {
+ fprintf(stderr, "create file for sendfile");
+ return NULL;
+ }
+
+ /* we need simple verify */
+ for (int i = 0; i < file_size; i++) {
+ if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) {
+ fprintf(stderr, "init tmpfile error");
+ return NULL;
+ }
+ if (++off >= sizeof(snd_data))
+ off = 0;
+ }
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+
+ fp = fileno(file);
+ need_sent = file_size;
+ clock_gettime(CLOCK_MONOTONIC, &ts1);
+
+ if (RXMODE_BPF_VERDICT())
+ target = ctx.c1;
+ else if (TXMODE_BPF_EGRESS())
+ target = ctx.p1;
+ else
+ target = ctx.p2;
+ set_non_block(target, true);
+ while (true) {
+ if (ctx.prod_run_time) {
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+ if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time)
+ return NULL;
+ }
+
+ errno = 0;
+ atomic_inc(&ctx.send_calls);
+ sent = sendfile(target, fp, NULL, need_sent);
+ if (sent < 0) {
+ if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) {
+ fprintf(stderr, "sendfile return %d, errorno %d:%s\n",
+ sent, errno, strerror(errno));
+ return NULL;
+ }
+ continue;
+ } else if (sent < need_sent) {
+ need_sent -= sent;
+ atomic_add(&ctx.prod_send, sent);
+ continue;
+ }
+ atomic_add(&ctx.prod_send, need_sent);
+ need_sent = file_size;
+ lseek(fp, 0, SEEK_SET);
+ }
+
+ return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz;
+
+ prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+ speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0);
+ bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0);
+ read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0);
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+ printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, "
+ "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n",
+ prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ double verdict_mbs_mean = 0.0;
+ long verdict_total = 0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ verdict_total += res[i].hits / 1000000.0;
+ }
+
+ printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n",
+ verdict_total, verdict_mbs_mean);
+}
+
+static const struct argp_option opts[] = {
+ { "rx-normal", ARG_FW_RX_NORMAL, NULL, 0,
+ "simple reserve-proxy mode, no bfp enabled"},
+ { "rx-pass", ARG_FW_RX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "rx-strp", ARG_CTL_RX_STRP, "Byte", 0,
+ "enable strparser and set the encapsulation size"},
+ { "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "tx-normal", ARG_FW_TX_NORMAL, NULL, 0,
+ "simple c-s mode, no bfp enabled"},
+ { "tx-pass", ARG_FW_TX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0,
+ "forward msg to ingress queue of another socket"},
+ { "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0,
+ "forward msg to egress queue of another socket"},
+ { "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0,
+ "delay consumer start"},
+ { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
+ "producer duration"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS:
+ ctx.mode = key;
+ break;
+ case ARG_CONSUMER_DELAY_TIME:
+ ctx.delay_consumer = strtol(arg, NULL, 10);
+ break;
+ case ARG_PRODUCER_DURATION:
+ ctx.prod_run_time = strtol(arg, NULL, 10);
+ break;
+ case ARG_CTL_RX_STRP:
+ ctx.strp_size = strtol(arg, NULL, 10);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_sockmap_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Benchmark performance of creating bpf local storage */
+const struct bench bench_sockmap = {
+ .name = "sockmap",
+ .argp = &bench_sockmap_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 6535c8ae3c46..5e512a1d09d1 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -591,4 +591,9 @@ extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym
extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym;
extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
+struct bpf_iter_dmabuf;
+extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
+
#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3201a962b3dc..f74e1ea0ad3b 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -22,6 +22,8 @@ CONFIG_CRYPTO_AES=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
CONFIG_DUMMY=y
CONFIG_DYNAMIC_FTRACE=y
CONFIG_FPROBE=y
@@ -108,6 +110,7 @@ CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SYN_COOKIES=y
CONFIG_TEST_BPF=m
+CONFIG_UDMABUF=y
CONFIG_USERFAULTFD=y
CONFIG_VSOCKETS=y
CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
index 7565fc7690c2..0223fce4db2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -51,9 +51,11 @@ static void test_arena_spin_lock_size(int size)
struct arena_spin_lock *skel;
pthread_t thread_id[16];
int prog_fd, i, err;
+ int nthreads;
void *ret;
- if (get_nprocs() < 2) {
+ nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id));
+ if (nthreads < 2) {
test__skip();
return;
}
@@ -66,25 +68,25 @@ static void test_arena_spin_lock_size(int size)
goto end;
}
skel->bss->cs_count = size;
- skel->bss->limit = repeat * 16;
+ skel->bss->limit = repeat * nthreads;
- ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init");
+ ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init");
prog_fd = bpf_program__fd(skel->progs.prog);
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < nthreads; i++) {
err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
if (!ASSERT_OK(err, "pthread_create"))
goto end_barrier;
}
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < nthreads; i++) {
if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
goto end_barrier;
if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
goto end_barrier;
}
- ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value");
+ ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
end_barrier:
pthread_barrier_destroy(&barrier);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 329c7862b52d..cabc51c2ca6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -122,6 +122,85 @@ cleanup:
test_attach_probe_manual__destroy(skel);
}
+/* attach uprobe/uretprobe long event name testings */
+static void test_attach_uprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
+ char path[PATH_MAX] = {0};
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ goto cleanup;
+
+ if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink"))
+ goto cleanup;
+
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ uprobe_opts.ref_ctr_offset = 0;
+ uprobe_opts.retprobe = false;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ path,
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ uprobe_opts.retprobe = true;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ path,
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uretprobe = uretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ struct bpf_link *kprobe_link, *kretprobe_link;
+ struct test_attach_probe_manual *skel;
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ /* manual-attach kprobe/kretprobe */
+ kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_kprobe = kprobe_link;
+
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_kretprobe = kretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
static void test_attach_probe_auto(struct test_attach_probe *skel)
{
struct bpf_link *uprobe_err_link;
@@ -323,6 +402,11 @@ void test_attach_probe(void)
if (test__start_subtest("uprobe-ref_ctr"))
test_uprobe_ref_ctr(skel);
+ if (test__start_subtest("uprobe-long_name"))
+ test_attach_uprobe_long_event_name();
+ if (test__start_subtest("kprobe-long_name"))
+ test_attach_kprobe_long_event_name();
+
cleanup:
test_attach_probe__destroy(skel);
ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index dbd13f8e42a7..dd6512fa652b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode)
.repeat = 1,
);
+ if (SYS_NOFAIL("iptables-legacy --version")) {
+ fprintf(stdout, "Missing required iptables-legacy tool\n");
+ test__skip();
+ return;
+ }
+
skel = test_bpf_nf__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index d9024c7a892a..5bc15bb6b7ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -440,6 +440,105 @@ cleanup:
btf__free(btf1);
}
+/* Ensure module split BTF dedup worked correctly; when dedup fails badly
+ * core kernel types are in split BTF also, so ensure that references to
+ * such types point at base - not split - BTF.
+ *
+ * bpf_testmod_test_write() has multiple core kernel type parameters;
+ *
+ * ssize_t
+ * bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ * struct bin_attribute *bin_attr,
+ * char *buf, loff_t off, size_t len);
+ *
+ * Ensure each of the FUNC_PROTO params is a core kernel type.
+ *
+ * Do the same for
+ *
+ * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk);
+ *
+ * ...and
+ *
+ * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb);
+ *
+ */
+const char *mod_funcs[] = {
+ "bpf_testmod_test_write",
+ "bpf_kfunc_call_test3",
+ "bpf_kfunc_call_test_pass_ctx"
+};
+
+static void test_split_module(void)
+{
+ struct btf *vmlinux_btf, *btf1 = NULL;
+ int i, nr_base_types;
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf"))
+ return;
+ nr_base_types = btf__type_cnt(vmlinux_btf);
+ if (!ASSERT_GT(nr_base_types, 0, "nr_base_types"))
+ goto cleanup;
+
+ btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf);
+ if (!ASSERT_OK_PTR(btf1, "split_btf"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) {
+ const struct btf_param *p;
+ const struct btf_type *t;
+ __u16 vlen;
+ __u32 id;
+ int j;
+
+ id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC);
+ if (!ASSERT_GE(id, nr_base_types, "func_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "func_id_type"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, t->type);
+ if (!ASSERT_OK_PTR(t, "func_proto_id_type"))
+ goto cleanup;
+ if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto"))
+ goto cleanup;
+ vlen = btf_vlen(t);
+
+ for (j = 0, p = btf_params(t); j < vlen; j++, p++) {
+ /* bpf_testmod uses resilient split BTF, so any
+ * reference types will be added to split BTF and their
+ * associated targets will be base BTF types; for example
+ * for a "struct sock *" the PTR will be in split BTF
+ * while the "struct sock" will be in base.
+ *
+ * In some cases like loff_t we have to resolve
+ * multiple typedefs hence the while() loop below.
+ *
+ * Note that resilient split BTF generation depends
+ * on pahole version, so we do not assert that
+ * reference types are in split BTF, as if pahole
+ * does not support resilient split BTF they will
+ * also be base BTF types.
+ */
+ id = p->type;
+ do {
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "param_ref_type"))
+ goto cleanup;
+ if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t))
+ break;
+ id = t->type;
+ } while (true);
+
+ if (!ASSERT_LT(id, nr_base_types, "verify_base_type"))
+ goto cleanup;
+ }
+ }
+cleanup:
+ btf__free(btf1);
+ btf__free(vmlinux_btf);
+}
+
void test_btf_dedup_split()
{
if (test__start_subtest("split_simple"))
@@ -450,4 +549,6 @@ void test_btf_dedup_split()
test_split_fwd_resolve();
if (test__start_subtest("split_dup_struct_in_cu"))
test_split_dup_struct_in_cu();
+ if (test__start_subtest("split_module"))
+ test_split_module();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index eef1158676ed..3696fb9a05ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -12,10 +12,11 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
vfprintf(ctx, fmt, args);
}
-void test_btf_split() {
+static void __test_btf_split(bool multi)
+{
struct btf_dump *d = NULL;
const struct btf_type *t;
- struct btf *btf1, *btf2;
+ struct btf *btf1, *btf2, *btf3 = NULL;
int str_off, i, err;
btf1 = btf__new_empty();
@@ -63,14 +64,46 @@ void test_btf_split() {
ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
+ if (multi) {
+ btf3 = btf__new_empty_split(btf2);
+ if (!ASSERT_OK_PTR(btf3, "multi_split_btf"))
+ goto cleanup;
+ } else {
+ btf3 = btf2;
+ }
+
+ btf__add_union(btf3, "u1", 16); /* [5] union u1 { */
+ btf__add_field(btf3, "f1", 4, 0, 0); /* struct s2 f1; */
+ btf__add_field(btf3, "uf2", 1, 0, 0); /* int f2; */
+ /* } */
+
+ if (multi) {
+ t = btf__type_by_id(btf2, 5);
+ ASSERT_NULL(t, "multisplit_type_in_first_split");
+ }
+
+ t = btf__type_by_id(btf3, 5);
+ if (!ASSERT_OK_PTR(t, "split_union_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_union(t), true, "split_union_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name");
+ ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt");
+
+ t = btf__type_by_id(btf3, 1);
+ if (!ASSERT_OK_PTR(t, "split_base_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_int(t), true, "split_base_int");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name");
+
/* BTF-to-C dump of split BTF */
dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
return;
- d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL);
+ d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL);
if (!ASSERT_OK_PTR(d, "btf_dump__new"))
goto cleanup;
- for (i = 1; i < btf__type_cnt(btf2); i++) {
+ for (i = 1; i < btf__type_cnt(btf3); i++) {
err = btf_dump__dump_type(d, i);
ASSERT_OK(err, "dump_type_ok");
}
@@ -79,12 +112,15 @@ void test_btf_split() {
ASSERT_STREQ(dump_buf,
"struct s1 {\n"
" int f1;\n"
-"};\n"
-"\n"
+"};\n\n"
"struct s2 {\n"
" struct s1 f1;\n"
" int f2;\n"
" int *f3;\n"
+"};\n\n"
+"union u1 {\n"
+" struct s2 f1;\n"
+" int uf2;\n"
"};\n\n", "c_dump");
cleanup:
@@ -94,4 +130,14 @@ cleanup:
btf_dump__free(d);
btf__free(btf1);
btf__free(btf2);
+ if (btf2 != btf3)
+ btf__free(btf3);
+}
+
+void test_btf_split(void)
+{
+ if (test__start_subtest("single_split"))
+ __test_btf_split(false);
+ if (test__start_subtest("multi_split"))
+ __test_btf_split(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
new file mode 100644
index 000000000000..3923e64c4c1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2025 Isovalent */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static void test_btf_mmap_sysfs(const char *path, struct btf *base)
+{
+ struct stat st;
+ __u64 btf_size, end;
+ void *raw_data = NULL;
+ int fd = -1;
+ long page_size;
+ struct btf *btf = NULL;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (!ASSERT_GE(page_size, 0, "get_page_size"))
+ goto cleanup;
+
+ if (!ASSERT_OK(stat(path, &st), "stat_btf"))
+ goto cleanup;
+
+ btf_size = st.st_size;
+ end = (btf_size + page_size - 1) / page_size * page_size;
+
+ fd = open(path, O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open_btf"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
+ "mprotect_writable"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
+ "mprotect_executable"))
+ goto cleanup;
+
+ /* Check padding is zeroed */
+ for (int i = btf_size; i < end; i++) {
+ if (((__u8 *)raw_data)[i] != 0) {
+ PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
+ goto cleanup;
+ }
+ }
+
+ btf = btf__new_split(raw_data, btf_size, base);
+ if (!ASSERT_OK_PTR(btf, "parse_btf"))
+ goto cleanup;
+
+cleanup:
+ btf__free(btf);
+ if (raw_data && raw_data != MAP_FAILED)
+ munmap(raw_data, btf_size);
+ if (fd >= 0)
+ close(fd);
+}
+
+void test_btf_sysfs(void)
+{
+ test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
new file mode 100644
index 000000000000..6c2b0c3dbcd8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "dmabuf_iter.skel.h"
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/udmabuf.h>
+
+static int udmabuf = -1;
+static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter";
+static size_t udmabuf_test_buffer_size;
+static int sysheap_dmabuf = -1;
+static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter";
+static size_t sysheap_test_buffer_size;
+
+static int create_udmabuf(void)
+{
+ struct udmabuf_create create;
+ int dev_udmabuf, memfd, local_udmabuf;
+
+ udmabuf_test_buffer_size = 10 * getpagesize();
+
+ if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING);
+ if (!ASSERT_OK_FD(memfd, "memfd_create"))
+ return -1;
+
+ if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal"))
+ goto close_memfd;
+
+ dev_udmabuf = open("/dev/udmabuf", O_RDONLY);
+ if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf"))
+ goto close_memfd;
+
+ memset(&create, 0, sizeof(create));
+ create.memfd = memfd;
+ create.flags = UDMABUF_FLAGS_CLOEXEC;
+ create.offset = 0;
+ create.size = udmabuf_test_buffer_size;
+
+ local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create);
+ close(dev_udmabuf);
+ if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name"))
+ goto close_udmabuf;
+
+ return local_udmabuf;
+
+close_udmabuf:
+ close(local_udmabuf);
+close_memfd:
+ close(memfd);
+ return -1;
+}
+
+static int create_sys_heap_dmabuf(void)
+{
+ sysheap_test_buffer_size = 20 * getpagesize();
+
+ struct dma_heap_allocation_data data = {
+ .len = sysheap_test_buffer_size,
+ .fd = 0,
+ .fd_flags = O_RDWR | O_CLOEXEC,
+ .heap_flags = 0,
+ };
+ int heap_fd, ret;
+
+ if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ heap_fd = open("/dev/dma_heap/system", O_RDONLY);
+ if (!ASSERT_OK_FD(heap_fd, "open dma heap"))
+ return -1;
+
+ ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data);
+ close(heap_fd);
+ if (!ASSERT_OK(ret, "syheap alloc"))
+ return -1;
+
+ if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name"))
+ goto close_sysheap_dmabuf;
+
+ return data.fd;
+
+close_sysheap_dmabuf:
+ close(data.fd);
+ return -1;
+}
+
+static int create_test_buffers(void)
+{
+ udmabuf = create_udmabuf();
+ sysheap_dmabuf = create_sys_heap_dmabuf();
+
+ if (udmabuf < 0 || sysheap_dmabuf < 0)
+ return -1;
+
+ return 0;
+}
+
+static void destroy_test_buffers(void)
+{
+ close(udmabuf);
+ udmabuf = -1;
+
+ close(sysheap_dmabuf);
+ sysheap_dmabuf = -1;
+}
+
+enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT };
+struct DmabufInfo {
+ unsigned long inode;
+ unsigned long size;
+ char name[DMA_BUF_NAME_LEN];
+ char exporter[32];
+};
+
+static bool check_dmabuf_info(const struct DmabufInfo *bufinfo,
+ unsigned long size,
+ const char *name, const char *exporter)
+{
+ return size == bufinfo->size &&
+ !strcmp(name, bufinfo->name) &&
+ !strcmp(exporter, bufinfo->exporter);
+}
+
+static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel)
+{
+ int iter_fd;
+ char buf[256];
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ while (read(iter_fd, buf, sizeof(buf)) > 0)
+ ; /* Read out all contents */
+
+ /* Next reads should return 0 */
+ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
+
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel)
+{
+ bool found_test_sysheap_dmabuf = false;
+ bool found_test_udmabuf = false;
+ struct DmabufInfo bufinfo;
+ size_t linesize = 0;
+ char *line = NULL;
+ FILE *iter_file;
+ int iter_fd, f = INODE;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ iter_file = fdopen(iter_fd, "r");
+ if (!ASSERT_OK_PTR(iter_file, "fdopen"))
+ goto close_iter_fd;
+
+ while (getline(&line, &linesize, iter_file) != -1) {
+ if (f % FIELD_COUNT == INODE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1,
+ "read inode");
+ } else if (f % FIELD_COUNT == SIZE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1,
+ "read size");
+ } else if (f % FIELD_COUNT == NAME) {
+ ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1,
+ "read name");
+ } else if (f % FIELD_COUNT == EXPORTER) {
+ ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1,
+ "read exporter");
+
+ if (check_dmabuf_info(&bufinfo,
+ sysheap_test_buffer_size,
+ sysheap_test_buffer_name,
+ "system"))
+ found_test_sysheap_dmabuf = true;
+ else if (check_dmabuf_info(&bufinfo,
+ udmabuf_test_buffer_size,
+ udmabuf_test_buffer_name,
+ "udmabuf"))
+ found_test_udmabuf = true;
+ }
+ ++f;
+ }
+
+ ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields");
+
+ ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf");
+ ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf");
+
+ free(line);
+ fclose(iter_file);
+close_iter_fd:
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ char key[DMA_BUF_NAME_LEN];
+ int err, fd;
+ bool found;
+
+ /* No need to attach it, just run it directly */
+ fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each);
+
+ err = bpf_prog_test_run_opts(fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key"))
+ return;
+
+ do {
+ ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup");
+ ASSERT_TRUE(found, "found test buffer");
+ } while (bpf_map_get_next_key(map_fd, key, key));
+}
+
+void test_dmabuf_iter(void)
+{
+ struct dmabuf_iter *skel = NULL;
+ int map_fd;
+ const bool f = false;
+
+ skel = dmabuf_iter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.testbuf_hash);
+ if (!ASSERT_OK_FD(map_fd, "map_fd"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY),
+ "insert udmabuf"))
+ goto destroy_skel;
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY),
+ "insert sysheap buffer"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(create_test_buffers(), "create_test_buffers"))
+ goto destroy;
+
+ if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach"))
+ goto destroy;
+
+ if (test__start_subtest("no_infinite_reads"))
+ subtest_dmabuf_iter_check_no_infinite_reads(skel);
+ if (test__start_subtest("default_iter"))
+ subtest_dmabuf_iter_check_default_iter(skel);
+ if (test__start_subtest("open_coded"))
+ subtest_dmabuf_iter_check_open_coded(skel, map_fd);
+
+destroy:
+ destroy_test_buffers();
+destroy_skel:
+ dmabuf_iter__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index e29cc16124c2..62e7ec775f24 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -33,10 +33,19 @@ static struct {
{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
{"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
+ {"test_probe_read_user_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG},
+ {"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP},
};
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
+ char user_data[384] = {[0 ... 382] = 'a', '\0'};
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
@@ -58,6 +67,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
if (!ASSERT_OK(err, "dynptr_success__load"))
goto cleanup;
+ skel->bss->user_ptr = user_data;
+ skel->data->test_len[0] = sizeof(user_data);
+ memcpy(skel->bss->expected_str, user_data, sizeof(user_data));
+
switch (setup_type) {
case SETUP_SYSCALL_SLEEP:
link = bpf_program__attach(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
new file mode 100644
index 000000000000..ca46fdd6e1ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <test_progs.h>
+#include "fd_htab_lookup.skel.h"
+
+struct htab_op_ctx {
+ int fd;
+ int loop;
+ unsigned int entries;
+ bool stop;
+};
+
+#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err)))
+
+static void *htab_lookup_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0, value;
+ int inner_fd, err;
+
+ err = bpf_map_lookup_elem(ctx->fd, &key, &value);
+ if (err) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, err);
+ }
+
+ inner_fd = bpf_map_get_fd_by_id(value);
+ if (inner_fd < 0) {
+ /* The old map has been freed */
+ if (inner_fd == -ENOENT)
+ continue;
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, inner_fd);
+ }
+
+ err = bpf_map_lookup_elem(inner_fd, &zero, &value);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+
+ if (value != key) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(4, -EINVAL);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (inner_fd < 0) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, inner_fd);
+ }
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, err);
+ }
+
+ err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+ }
+ }
+
+ return NULL;
+}
+
+static int setup_htab(int fd, unsigned int entries)
+{
+ unsigned int i;
+
+ for (i = 0; i < entries; i++) {
+ unsigned int key = i, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "new array"))
+ return -1;
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (!ASSERT_OK(err, "init array")) {
+ close(inner_fd);
+ return -1;
+ }
+
+ err = bpf_map_update_elem(fd, &key, &inner_fd, 0);
+ if (!ASSERT_OK(err, "init outer")) {
+ close(inner_fd);
+ return -1;
+ }
+ close(inner_fd);
+ }
+
+ return 0;
+}
+
+static int get_int_from_env(const char *name, int dft)
+{
+ const char *value;
+
+ value = getenv(name);
+ if (!value)
+ return dft;
+
+ return atoi(value);
+}
+
+void test_fd_htab_lookup(void)
+{
+ unsigned int i, wr_nr = 8, rd_nr = 16;
+ pthread_t tids[wr_nr + rd_nr];
+ struct fd_htab_lookup *skel;
+ struct htab_op_ctx ctx;
+ int err;
+
+ skel = fd_htab_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.outer_map);
+ ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5);
+ ctx.stop = false;
+ ctx.entries = 8;
+
+ err = setup_htab(ctx.fd, ctx.entries);
+ if (err)
+ goto destroy;
+
+ memset(tids, 0, sizeof(tids));
+ for (i = 0; i < wr_nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+ for (i = 0; i < rd_nr; i++) {
+ err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+
+reap:
+ for (i = 0; i < wr_nr + rd_nr; i++) {
+ void *ret = NULL;
+ char desc[32];
+
+ if (!tids[i])
+ continue;
+
+ snprintf(desc, sizeof(desc), "thread %u", i + 1);
+ err = pthread_join(tids[i], &ret);
+ ASSERT_OK(err, desc);
+ ASSERT_EQ(ret, NULL, desc);
+ }
+destroy:
+ fd_htab_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index e59af2aa6601..e40114620751 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -37,6 +37,7 @@ static noinline void uprobe_func(void)
static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
ssize_t offset, ssize_t entry_offset)
{
+ ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */;
struct bpf_link_info info;
__u32 len = sizeof(info);
char buf[PATH_MAX];
@@ -97,6 +98,7 @@ again:
case BPF_PERF_EVENT_UPROBE:
case BPF_PERF_EVENT_URETPROBE:
ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset");
+ ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset");
ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1,
"name_len");
@@ -241,20 +243,32 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
.retprobe = type == BPF_PERF_EVENT_URETPROBE,
.bpf_cookie = PERF_EVENT_COOKIE,
);
+ const char *sema[1] = {
+ "uprobe_link_info_sema_1",
+ };
+ __u64 *ref_ctr_offset;
struct bpf_link *link;
int link_fd, err;
+ err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema,
+ (unsigned long **) &ref_ctr_offset, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+ return;
+
+ opts.ref_ctr_offset = *ref_ctr_offset;
link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
0, /* self pid */
UPROBE_FILE, uprobe_offset,
&opts);
if (!ASSERT_OK_PTR(link, "attach_uprobe"))
- return;
+ goto out;
link_fd = bpf_link__fd(link);
- err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0);
+ err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset);
ASSERT_OK(err, "verify_perf_link_info");
bpf_link__destroy(link);
+out:
+ free(ref_ctr_offset);
}
static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
index 8e13a3416a21..1de14b111931 100644
--- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -104,7 +104,7 @@ void test_kmem_cache_iter(void)
goto destroy;
memset(buf, 0, sizeof(buf));
- while (read(iter_fd, buf, sizeof(buf) > 0)) {
+ while (read(iter_fd, buf, sizeof(buf)) > 0) {
/* Read out all contents */
printf("%s", buf);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 77d07e0a4a55..5266c7022863 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -7,6 +7,7 @@
#include "linked_list.skel.h"
#include "linked_list_fail.skel.h"
+#include "linked_list_peek.skel.h"
static char log_buf[1024 * 1024];
@@ -805,3 +806,8 @@ void test_linked_list(void)
test_linked_list_success(LIST_IN_LIST, true);
test_linked_list_success(TEST_ALL, false);
}
+
+void test_linked_list_peek(void)
+{
+ RUN_TESTS(linked_list_peek);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
index 9818f06c97c5..d8f3d7a45fe9 100644
--- a/tools/testing/selftests/bpf/prog_tests/rbtree.c
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -8,6 +8,7 @@
#include "rbtree_fail.skel.h"
#include "rbtree_btf_fail__wrong_node_type.skel.h"
#include "rbtree_btf_fail__add_wrong_type.skel.h"
+#include "rbtree_search.skel.h"
static void test_rbtree_add_nodes(void)
{
@@ -187,3 +188,8 @@ void test_rbtree_fail(void)
{
RUN_TESTS(rbtree_fail);
}
+
+void test_rbtree_search(void)
+{
+ RUN_TESTS(rbtree_search);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 0b9bd1d6f7cc..10a0ab954b8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -37,8 +37,10 @@ configure_stack(void)
tc = popen("tc -V", "r");
if (CHECK_FAIL(!tc))
return false;
- if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
+ if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) {
+ pclose(tc);
return false;
+ }
if (strstr(tc_version, ", libbpf "))
prog = "test_sk_assign_libbpf.bpf.o";
else
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
index 1bdfb79ef009..e02cabcc814e 100644
--- a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -3,6 +3,7 @@
#ifndef __SOCKET_HELPERS__
#define __SOCKET_HELPERS__
+#include <sys/un.h>
#include <linux/vm_sockets.h>
/* include/linux/net.h */
@@ -169,6 +170,15 @@ static inline void init_addr_loopback6(struct sockaddr_storage *ss,
*len = sizeof(*addr6);
}
+static inline void init_addr_loopback_unix(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->sun_family = AF_UNIX;
+ *len = sizeof(sa_family_t);
+}
+
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
@@ -190,6 +200,9 @@ static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
case AF_INET6:
init_addr_loopback6(ss, len);
return;
+ case AF_UNIX:
+ init_addr_loopback_unix(ss, len);
+ return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
@@ -315,21 +328,27 @@ static inline int create_pair(int family, int sotype, int *p0, int *p1)
{
__close_fd int s, c = -1, p = -1;
struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
+ socklen_t len;
int err;
s = socket_loopback(family, sotype);
if (s < 0)
return s;
- err = xgetsockname(s, sockaddr(&addr), &len);
- if (err)
- return err;
-
c = xsocket(family, sotype, 0);
if (c < 0)
return c;
+ init_addr_loopback(family, &addr, &len);
+ err = xbind(c, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
err = connect(c, sockaddr(&addr), len);
if (err) {
if (errno != EINPROGRESS) {
@@ -391,4 +410,59 @@ static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
return err;
}
+static inline const char *socket_kind_to_str(int sock_fd)
+{
+ socklen_t opt_len;
+ int domain, type;
+
+ opt_len = sizeof(domain);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_DOMAIN)");
+
+ opt_len = sizeof(type);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_TYPE)");
+
+ switch (domain) {
+ case AF_INET:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp4";
+ case SOCK_DGRAM:
+ return "udp4";
+ }
+ break;
+ case AF_INET6:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp6";
+ case SOCK_DGRAM:
+ return "udp6";
+ }
+ break;
+ case AF_UNIX:
+ switch (type) {
+ case SOCK_STREAM:
+ return "u_str";
+ case SOCK_DGRAM:
+ return "u_dgr";
+ case SOCK_SEQPACKET:
+ return "u_seq";
+ }
+ break;
+ case AF_VSOCK:
+ switch (type) {
+ case SOCK_STREAM:
+ return "v_str";
+ case SOCK_DGRAM:
+ return "v_dgr";
+ case SOCK_SEQPACKET:
+ return "v_seq";
+ }
+ break;
+ }
+
+ return "???";
+}
+
#endif // __SOCKET_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 3e5571dd578d..d815efac52fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -5,12 +5,15 @@
#define MAX_TEST_NAME 80
+#define u32(v) ((u32){(v)})
+#define u64(v) ((u64){(v)})
+
#define __always_unused __attribute__((__unused__))
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
@@ -18,7 +21,7 @@
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
@@ -26,7 +29,7 @@
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
@@ -35,7 +38,7 @@
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
@@ -43,7 +46,7 @@
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
@@ -66,21 +69,15 @@
__ret; \
})
-static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+static inline int add_to_sockmap(int mapfd, int fd1, int fd2)
{
- u64 value;
- u32 key;
int err;
- key = 0;
- value = fd1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST);
if (err)
return err;
- key = 1;
- value = fd2;
- return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST);
}
#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 0a99fd404f6d..b6c471da5c28 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -3,76 +3,62 @@
/*
* Tests for sockmap/sockhash holding kTLS sockets.
*/
-
+#include <error.h>
#include <netinet/tcp.h>
+#include <linux/tls.h>
#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_ktls.skel.h"
#define MAX_TEST_NAME 80
#define TCP_ULP 31
-static int tcp_server(int family)
+static int init_ktls_pairs(int c, int p)
{
- int err, s;
-
- s = socket(family, SOCK_STREAM, 0);
- if (!ASSERT_GE(s, 0, "socket"))
- return -1;
-
- err = listen(s, SOMAXCONN);
- if (!ASSERT_OK(err, "listen"))
- return -1;
-
- return s;
-}
+ int err;
+ struct tls12_crypto_info_aes_gcm_128 crypto_rx;
+ struct tls12_crypto_info_aes_gcm_128 crypto_tx;
-static int disconnect(int fd)
-{
- struct sockaddr unspec = { AF_UNSPEC };
+ err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
- return connect(fd, &unspec, sizeof(unspec));
+ err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
+
+ memset(&crypto_rx, 0, sizeof(crypto_rx));
+ memset(&crypto_tx, 0, sizeof(crypto_tx));
+ crypto_rx.info.version = TLS_1_2_VERSION;
+ crypto_tx.info.version = TLS_1_2_VERSION;
+ crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_TX)"))
+ goto out;
+
+ err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_RX)"))
+ goto out;
+ return 0;
+out:
+ return -1;
}
-/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
-static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+static int create_ktls_pairs(int family, int sotype, int *c, int *p)
{
- struct sockaddr_storage addr = {0};
- socklen_t len = sizeof(addr);
- int err, cli, srv, zero = 0;
-
- srv = tcp_server(family);
- if (srv == -1)
- return;
-
- err = getsockname(srv, (struct sockaddr *)&addr, &len);
- if (!ASSERT_OK(err, "getsockopt"))
- goto close_srv;
+ int err;
- cli = socket(family, SOCK_STREAM, 0);
- if (!ASSERT_GE(cli, 0, "socket"))
- goto close_srv;
-
- err = connect(cli, (struct sockaddr *)&addr, len);
- if (!ASSERT_OK(err, "connect"))
- goto close_cli;
-
- err = bpf_map_update_elem(map, &zero, &cli, 0);
- if (!ASSERT_OK(err, "bpf_map_update_elem"))
- goto close_cli;
-
- err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
- if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
- goto close_cli;
-
- err = bpf_map_delete_elem(map, &zero);
- if (!ASSERT_OK(err, "bpf_map_delete_elem"))
- goto close_cli;
-
- err = disconnect(cli);
+ err = create_pair(family, sotype, c, p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ return -1;
-close_cli:
- close(cli);
-close_srv:
- close(srv);
+ err = init_ktls_pairs(*c, *p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ return -1;
+ return 0;
}
static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
@@ -145,6 +131,189 @@ static const char *fmt_test_name(const char *subtest_name, int family,
return test_name;
}
+static void test_sockmap_ktls_offload(int family, int sotype)
+{
+ int err;
+ int c = 0, p = 0, sent, recvd;
+ char msg[12] = "hello world\0";
+ char rcv[13];
+
+ err = create_ktls_pairs(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_ktls_pairs()"))
+ goto out;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_OK(err, "send(msg)"))
+ goto out;
+
+ recvd = recv(p, rcv, sizeof(rcv), 0);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sent, "length mismatch"))
+ goto out;
+
+ ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch");
+
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+}
+
+static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push)
+{
+ int err, off;
+ int i, j;
+ int start_push = 0, push_len = 0;
+ int c = 0, p = 0, one = 1, sent, recvd;
+ int prog_fd, map_fd;
+ char msg[12] = "hello world\0";
+ char rcv[20] = {0};
+ struct test_sockmap_ktls *skel;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ skel->bss->cork_byte = sizeof(msg);
+ if (push) {
+ start_push = 1;
+ push_len = 2;
+ }
+ skel->bss->push_start = start_push;
+ skel->bss->push_end = push_len;
+
+ off = sizeof(msg) / 2;
+ sent = send(c, msg, off, 0);
+ if (!ASSERT_EQ(sent, off, "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "expected no data"))
+ goto out;
+
+ /* send remaining msg */
+ sent = send(c, msg + off, sizeof(msg) - off, 0);
+ if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch"))
+ goto out;
+
+ for (i = 0, j = 0; i < recvd;) {
+ /* skip checking the data that has been pushed in */
+ if (i >= start_push && i <= start_push + push_len - 1) {
+ i++;
+ continue;
+ }
+ if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch"))
+ goto out;
+ i++;
+ j++;
+ }
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
+static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push)
+{
+ int c = -1, p = -1, one = 1, two = 2;
+ struct test_sockmap_ktls *skel;
+ unsigned char *data = NULL;
+ struct msghdr msg = {0};
+ struct iovec iov[2];
+ int prog_fd, map_fd;
+ int txrx_buf = 1024;
+ int iov_length = 8192;
+ int err;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int));
+ err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int));
+ if (!ASSERT_OK(err, "set buf limit"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out;
+
+ skel->bss->apply_bytes = 1024;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ data = calloc(iov_length, sizeof(char));
+ if (!data)
+ goto out;
+
+ iov[0].iov_base = data;
+ iov[0].iov_len = iov_length;
+ iov[1].iov_base = data;
+ iov[1].iov_len = iov_length;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 2;
+
+ for (;;) {
+ err = sendmsg(c, &msg, MSG_DONTWAIT);
+ if (err <= 0)
+ break;
+ }
+
+out:
+ if (data)
+ free(data);
+ if (c != -1)
+ close(c);
+ if (p != -1)
+ close(p);
+
+ test_sockmap_ktls__destroy(skel);
+}
+
static void run_tests(int family, enum bpf_map_type map_type)
{
int map;
@@ -153,18 +322,30 @@ static void run_tests(int family, enum bpf_map_type map_type)
if (!ASSERT_GE(map, 0, "bpf_map_create"))
return;
- if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
- test_sockmap_ktls_disconnect_after_delete(family, map);
if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
close(map);
}
+static void run_ktls_test(int family, int sotype)
+{
+ if (test__start_subtest("tls simple offload"))
+ test_sockmap_ktls_offload(family, sotype);
+ if (test__start_subtest("tls tx cork"))
+ test_sockmap_ktls_tx_cork(family, sotype, false);
+ if (test__start_subtest("tls tx cork with push"))
+ test_sockmap_ktls_tx_cork(family, sotype, true);
+ if (test__start_subtest("tls tx egress with no buf"))
+ test_sockmap_ktls_tx_no_buf(family, sotype, true);
+}
+
void test_sockmap_ktls(void)
{
run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+ run_ktls_test(AF_INET, SOCK_STREAM);
+ run_ktls_test(AF_INET6, SOCK_STREAM);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 4ee1148d22be..1d98eee7a2c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1366,237 +1366,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
-static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
- int sock_mapfd, int nop_mapfd,
- int verd_mapfd, enum redir_mode mode,
- int send_flags)
-{
- const char *log_prefix = redir_mode_str(mode);
- unsigned int pass;
- int err, n;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
-
- err = add_to_sockmap(sock_mapfd, peer0, peer1);
- if (err)
- return;
-
- if (nop_mapfd >= 0) {
- err = add_to_sockmap(nop_mapfd, cli0, cli1);
- if (err)
- return;
- }
-
- /* Last byte is OOB data when send_flags has MSG_OOB bit set */
- n = xsend(cli1, "ab", 2, send_flags);
- if (n >= 0 && n < 2)
- FAIL("%s: incomplete send", log_prefix);
- if (n < 2)
- return;
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- return;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
- if (n < 0)
- FAIL_ERRNO("%s: recv_timeout", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
-
- if (send_flags & MSG_OOB) {
- /* Check that we can't read OOB while in sockmap */
- errno = 0;
- n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
- if (n != -1 || errno != EOPNOTSUPP)
- FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d",
- log_prefix, n, errno);
-
- /* Remove peer1 from sockmap */
- xbpf_map_delete_elem(sock_mapfd, &(int){ 1 });
-
- /* Check that OOB was dropped on redirect */
- errno = 0;
- n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
- if (n != -1 || errno != EINVAL)
- FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d",
- log_prefix, n, errno);
- }
-}
-
-static void unix_redir_to_connected(int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int sfd[2];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- goto close0;
- c1 = sfd[0], p1 = sfd[1];
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close0:
- xclose(c0);
- xclose(p0);
-}
-
-static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int sotype)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
- skel->bss->test_ingress = true;
- unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int sotype)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(AF_UNIX);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
- unix_skb_redir_to_connected(skel, map, sotype);
-}
-
-/* Returns two connected loopback vsock sockets */
-static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
-{
- return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
-}
-
-static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
- enum redir_mode mode, int sotype)
-{
- const char *log_prefix = redir_mode_str(mode);
- char a = 'a', b = 'b';
- int u0, u1, v0, v1;
- int sfd[2];
- unsigned int pass;
- int err, n;
- u32 key;
-
- zero_verdict_count(verd_mapfd);
-
- if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
- return;
-
- u0 = sfd[0];
- u1 = sfd[1];
-
- err = vsock_socketpair_connectible(sotype, &v0, &v1);
- if (err) {
- FAIL("vsock_socketpair_connectible() failed");
- goto close_uds;
- }
-
- err = add_to_sockmap(sock_mapfd, u0, v0);
- if (err) {
- FAIL("add_to_sockmap failed");
- goto close_vsock;
- }
-
- n = write(v1, &a, sizeof(a));
- if (n < 0)
- FAIL_ERRNO("%s: write", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete write", log_prefix);
- if (n < 1)
- goto out;
-
- n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
- if (n < 0)
- FAIL("%s: recv() err, errno=%d", log_prefix, errno);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
- if (b != a)
- FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- goto out;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-out:
- key = 0;
- bpf_map_delete_elem(sock_mapfd, &key);
- key = 1;
- bpf_map_delete_elem(sock_mapfd, &key);
-
-close_vsock:
- close(v0);
- close(v1);
-
-close_uds:
- close(u0);
- close(u1);
-}
-
-static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map,
- int sotype)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
- skel->bss->test_ingress = true;
- vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(AF_VSOCK);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
-
- vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
- vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
-}
-
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1637,224 +1406,6 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static int inet_socketpair(int family, int type, int *s, int *c)
-{
- return create_pair(family, type | SOCK_NONBLOCK, s, c);
-}
-
-static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
- enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int err;
-
- err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
- if (err)
- return;
- err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
- if (err)
- goto close_cli0;
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close_cli0:
- xclose(c0);
- xclose(p0);
-}
-
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
- skel->bss->test_ingress = true;
- udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(family);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
- udp_skb_redir_to_connected(skel, map, family);
-}
-
-static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int sfd[2];
- int err;
-
- if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- err = inet_socketpair(family, type, &p1, &c1);
- if (err)
- goto close;
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close:
- xclose(c0);
- xclose(p0);
-}
-
-static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_EGRESS);
- inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
- REDIR_EGRESS);
- skel->bss->test_ingress = true;
- inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_INGRESS);
- inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
- REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
- int nop_mapfd, int verd_mapfd,
- enum redir_mode mode, int send_flags)
-{
- int c0, c1, p0, p1;
- int sfd[2];
- int err;
-
- err = inet_socketpair(family, type, &p0, &c0);
- if (err)
- return;
-
- if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
- goto close_cli0;
- c1 = sfd[0], p1 = sfd[1];
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd,
- verd_mapfd, mode, send_flags);
-
- xclose(c1);
- xclose(p1);
-close_cli0:
- xclose(c0);
- xclose(p0);
-}
-
-static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int nop_map = bpf_map__fd(skel->maps.nop_map);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, -1, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, -1, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
-
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
-
- /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, MSG_OOB);
-
- skel->bss->test_ingress = true;
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, -1, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, -1, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
-
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
-
- /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, MSG_OOB);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
-{
- const char *family_name, *map_name;
- struct netns_obj *netns;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(family);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
-
- netns = netns_new("sockmap_listen", true);
- if (!ASSERT_OK_PTR(netns, "netns_new"))
- return;
-
- inet_unix_skb_redir_to_connected(skel, map, family);
- unix_inet_skb_redir_to_connected(skel, map, family);
-
- netns_free(netns);
-}
-
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1863,8 +1414,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
- test_udp_redir(skel, map, family);
- test_udp_unix_redir(skel, map, family);
}
void serial_test_sockmap_listen(void)
@@ -1880,16 +1429,10 @@ void serial_test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
- test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
- test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
- test_vsock_redir(skel, skel->maps.sock_map);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
- test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
- test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
- test_vsock_redir(skel, skel->maps.sock_hash);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
new file mode 100644
index 000000000000..9c461d93113d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for sockmap/sockhash redirection.
+ *
+ * BPF_MAP_TYPE_SOCKMAP
+ * BPF_MAP_TYPE_SOCKHASH
+ * x
+ * sk_msg-to-egress
+ * sk_msg-to-ingress
+ * sk_skb-to-egress
+ * sk_skb-to-ingress
+ * x
+ * AF_INET, SOCK_STREAM
+ * AF_INET6, SOCK_STREAM
+ * AF_INET, SOCK_DGRAM
+ * AF_INET6, SOCK_DGRAM
+ * AF_UNIX, SOCK_STREAM
+ * AF_UNIX, SOCK_DGRAM
+ * AF_VSOCK, SOCK_STREAM
+ * AF_VSOCK, SOCK_SEQPACKET
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <linux/string.h>
+#include <linux/vm_sockets.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "linux/const.h"
+#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_sockmap_redir.skel.h"
+
+/* The meaning of SUPPORTED is "will redirect packet as expected".
+ */
+#define SUPPORTED _BITUL(0)
+
+/* Note on sk_skb-to-ingress ->af_vsock:
+ *
+ * Peer socket may receive the packet some time after the return from sendmsg().
+ * In a typical usage scenario, recvmsg() will block until the redirected packet
+ * appears in the destination queue, or timeout if the packet was dropped. By
+ * that point, the verdict map has already been updated to reflect what has
+ * happened.
+ *
+ * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
+ * takes place. Which means we may race the execution of the verdict logic and
+ * read map_verd before it has been updated, i.e. we might observe
+ * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
+ *
+ * This confuses the selftest logic: if there was no packet dropped, where's the
+ * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
+ * (which implies the verdict program has not been ran) just re-read the verdict
+ * map again.
+ */
+#define UNSUPPORTED_RACY_VERD _BITUL(1)
+
+enum prog_type {
+ SK_MSG_EGRESS,
+ SK_MSG_INGRESS,
+ SK_SKB_EGRESS,
+ SK_SKB_INGRESS,
+};
+
+enum {
+ SEND_INNER = 0,
+ SEND_OUTER,
+};
+
+enum {
+ RECV_INNER = 0,
+ RECV_OUTER,
+};
+
+struct maps {
+ int in;
+ int out;
+ int verd;
+};
+
+struct combo_spec {
+ enum prog_type prog_type;
+ const char *in, *out;
+};
+
+struct redir_spec {
+ const char *name;
+ int idx_send;
+ int idx_recv;
+ enum prog_type prog_type;
+};
+
+struct socket_spec {
+ int family;
+ int sotype;
+ int send_flags;
+ int in[2];
+ int out[2];
+};
+
+static int socket_spec_pairs(struct socket_spec *s)
+{
+ return create_socket_pairs(s->family, s->sotype,
+ &s->in[0], &s->out[0],
+ &s->in[1], &s->out[1]);
+}
+
+static void socket_spec_close(struct socket_spec *s)
+{
+ xclose(s->in[0]);
+ xclose(s->in[1]);
+ xclose(s->out[0]);
+ xclose(s->out[1]);
+}
+
+static void get_redir_params(struct redir_spec *redir,
+ struct test_sockmap_redir *skel, int *prog_fd,
+ enum bpf_attach_type *attach_type,
+ int *redirect_flags)
+{
+ enum prog_type type = redir->prog_type;
+ struct bpf_program *prog;
+ bool sk_msg;
+
+ sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
+ prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
+
+ *prog_fd = bpf_program__fd(prog);
+ *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
+
+ if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
+ *redirect_flags = BPF_F_INGRESS;
+ else
+ *redirect_flags = 0;
+}
+
+static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
+{
+ ssize_t n;
+ char buf;
+
+ errno = 0;
+ n = recv(fd, &buf, 1, flags);
+ if (n < 0 && expect_success)
+ FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
+ if (!n && !expect_success)
+ FAIL("%s: expected failure: retval=%zd", prefix, n);
+}
+
+static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
+ int sd_recv, int map_verd, int status)
+{
+ unsigned int drop, pass;
+ char recv_buf;
+ ssize_t n;
+
+get_verdict:
+ if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
+ return;
+
+ if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
+ sched_yield();
+ goto get_verdict;
+ }
+
+ if (pass != 0) {
+ FAIL("unsupported: wanted verdict pass 0, have %u", pass);
+ return;
+ }
+
+ /* If nothing was dropped, packet should have reached the peer */
+ if (drop == 0) {
+ errno = 0;
+ n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1)
+ FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
+ }
+
+ /* Ensure queues are empty */
+ try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
+ if (sd_in != sd_send)
+ try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
+
+ try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
+ if (sd_recv != sd_out)
+ try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
+}
+
+static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
+ int sd_in, int sd_out, int sd_recv,
+ struct maps *maps, int status)
+{
+ unsigned int drop, pass;
+ char *send_buf = "ab";
+ char recv_buf = '\0';
+ ssize_t n, len = 1;
+
+ /* Zero out the verdict map */
+ if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
+ xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
+ return;
+
+ if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
+ return;
+
+ if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
+ goto del_in;
+
+ /* Last byte is OOB data when send_flags has MSG_OOB bit set */
+ if (send_flags & MSG_OOB)
+ len++;
+ n = send(sd_send, send_buf, len, send_flags);
+ if (n >= 0 && n < len)
+ FAIL("incomplete send");
+ if (n < 0) {
+ /* sk_msg redirect combo not supported? */
+ if (status & SUPPORTED || errno != EACCES)
+ FAIL_ERRNO("send");
+ goto out;
+ }
+
+ if (!(status & SUPPORTED)) {
+ handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
+ maps->verd, status);
+ goto out;
+ }
+
+ errno = 0;
+ n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1) {
+ FAIL_ERRNO("recv_timeout()");
+ goto out;
+ }
+
+ /* Check verdict _after_ recv(); af_vsock may need time to catch up */
+ if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
+ goto out;
+
+ if (drop != 0 || pass != 1)
+ FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
+ drop, pass);
+
+ if (recv_buf != send_buf[0])
+ FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
+
+ if (send_flags & MSG_OOB) {
+ /* Fail reading OOB while in sockmap */
+ try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ /* Remove sd_out from sockmap */
+ xbpf_map_delete_elem(maps->out, &u32(0));
+
+ /* Check that OOB was dropped on redirect */
+ try_recv("recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ goto del_in;
+ }
+out:
+ xbpf_map_delete_elem(maps->out, &u32(0));
+del_in:
+ xbpf_map_delete_elem(maps->in, &u32(0));
+}
+
+static int is_redir_supported(enum prog_type type, const char *in,
+ const char *out)
+{
+ /* Matching based on strings returned by socket_kind_to_str():
+ * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
+ * Plus a wildcard: any
+ * Not in use: u_seq, v_dgr
+ */
+ struct combo_spec *c, combos[] = {
+ /* Send to local: TCP -> any, but vsock */
+ { SK_MSG_INGRESS, "tcp", "tcp" },
+ { SK_MSG_INGRESS, "tcp", "udp" },
+ { SK_MSG_INGRESS, "tcp", "u_str" },
+ { SK_MSG_INGRESS, "tcp", "u_dgr" },
+
+ /* Send to egress: TCP -> TCP */
+ { SK_MSG_EGRESS, "tcp", "tcp" },
+
+ /* Ingress to egress: any -> any */
+ { SK_SKB_EGRESS, "any", "any" },
+
+ /* Ingress to local: any -> any, but vsock */
+ { SK_SKB_INGRESS, "any", "tcp" },
+ { SK_SKB_INGRESS, "any", "udp" },
+ { SK_SKB_INGRESS, "any", "u_str" },
+ { SK_SKB_INGRESS, "any", "u_dgr" },
+ };
+
+ for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
+ if (c->prog_type == type &&
+ (!strcmp(c->in, "any") || strstarts(in, c->in)) &&
+ (!strcmp(c->out, "any") || strstarts(out, c->out)))
+ return SUPPORTED;
+ }
+
+ return 0;
+}
+
+static int get_support_status(enum prog_type type, const char *in,
+ const char *out)
+{
+ int status = is_redir_supported(type, in, out);
+
+ if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
+ status |= UNSUPPORTED_RACY_VERD;
+
+ return status;
+}
+
+static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps, struct socket_spec *s_in,
+ struct socket_spec *s_out)
+{
+ int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
+ const char *in_str, *out_str;
+ char s[MAX_TEST_NAME];
+
+ fd_in = s_in->in[0];
+ fd_out = s_out->out[0];
+ fd_send = s_in->in[redir->idx_send];
+ fd_peer = s_in->in[redir->idx_send ^ 1];
+ fd_recv = s_out->out[redir->idx_recv];
+ flags = s_in->send_flags;
+
+ in_str = socket_kind_to_str(fd_in);
+ out_str = socket_kind_to_str(fd_out);
+ status = get_support_status(redir->prog_type, in_str, out_str);
+
+ snprintf(s, sizeof(s),
+ "%-4s %-17s %-5s %s %-5s%6s",
+ /* hash sk_skb-to-ingress u_str → v_str (OOB) */
+ type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
+ redir->name,
+ in_str,
+ status & SUPPORTED ? "→" : " ",
+ out_str,
+ (flags & MSG_OOB) ? "(OOB)" : "");
+
+ if (!test__start_subtest(s))
+ return;
+
+ test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
+ maps, status);
+}
+
+static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps)
+{
+ struct socket_spec *s, sockets[] = {
+ { AF_INET, SOCK_STREAM },
+ // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
+ { AF_INET6, SOCK_STREAM },
+ { AF_INET, SOCK_DGRAM },
+ { AF_INET6, SOCK_DGRAM },
+ { AF_UNIX, SOCK_STREAM },
+ { AF_UNIX, SOCK_STREAM, MSG_OOB },
+ { AF_UNIX, SOCK_DGRAM },
+ // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */
+ { AF_VSOCK, SOCK_STREAM },
+ // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */
+ { AF_VSOCK, SOCK_SEQPACKET },
+ };
+
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ if (socket_spec_pairs(s))
+ goto out;
+
+ /* Intra-proto */
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ test_socket(type, redir, maps, s, s);
+
+ /* Cross-proto */
+ for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
+ for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
+ struct socket_spec *out = &sockets[j];
+ struct socket_spec *in = &sockets[i];
+
+ /* Skip intra-proto and between variants */
+ if (out->send_flags ||
+ (in->family == out->family &&
+ in->sotype == out->sotype))
+ continue;
+
+ test_socket(type, redir, maps, in, out);
+ }
+ }
+out:
+ while (--s >= sockets)
+ socket_spec_close(s);
+}
+
+static void test_map(enum bpf_map_type type)
+{
+ struct redir_spec *r, redirs[] = {
+ { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
+ { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
+ { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
+ { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
+ };
+
+ for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
+ enum bpf_attach_type attach_type;
+ struct test_sockmap_redir *skel;
+ struct maps maps;
+ int prog_fd;
+
+ skel = test_sockmap_redir__open_and_load();
+ if (!skel) {
+ FAIL("open_and_load");
+ return;
+ }
+
+ switch (type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ maps.in = bpf_map__fd(skel->maps.nop_map);
+ maps.out = bpf_map__fd(skel->maps.sock_map);
+ break;
+ case BPF_MAP_TYPE_SOCKHASH:
+ maps.in = bpf_map__fd(skel->maps.nop_hash);
+ maps.out = bpf_map__fd(skel->maps.sock_hash);
+ break;
+ default:
+ FAIL("Unsupported bpf_map_type");
+ return;
+ }
+
+ skel->bss->redirect_type = type;
+ maps.verd = bpf_map__fd(skel->maps.verdict_map);
+ get_redir_params(r, skel, &prog_fd, &attach_type,
+ &skel->bss->redirect_flags);
+
+ if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
+ return;
+
+ test_redir(type, r, &maps);
+
+ if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
+ return;
+
+ test_sockmap_redir__destroy(skel);
+ }
+}
+
+void serial_test_sockmap_redir(void)
+{
+ test_map(BPF_MAP_TYPE_SOCKMAP);
+ test_map(BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index c85798966aec..76d72a59365e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -56,6 +56,8 @@
#define MAC_DST_FWD "00:11:22:33:44:55"
#define MAC_DST "00:22:33:44:55:66"
+#define MAC_SRC_FWD "00:33:44:55:66:77"
+#define MAC_SRC "00:44:55:66:77:88"
#define IFADDR_STR_LEN 18
#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
@@ -207,11 +209,10 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
int err;
if (result->dev_mode == MODE_VETH) {
- SYS(fail, "ip link add src type veth peer name src_fwd");
- SYS(fail, "ip link add dst type veth peer name dst_fwd");
-
- SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
- SYS(fail, "ip link set dst address " MAC_DST);
+ SYS(fail, "ip link add src address " MAC_SRC " type veth "
+ "peer name src_fwd address " MAC_SRC_FWD);
+ SYS(fail, "ip link add dst address " MAC_DST " type veth "
+ "peer name dst_fwd address " MAC_DST_FWD);
} else if (result->dev_mode == MODE_NETKIT) {
err = create_netkit(NETKIT_L3, "src", "src_fwd");
if (!ASSERT_OK(err, "create_ifindex_src"))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
new file mode 100644
index 000000000000..7d1b478c99a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms Inc. */
+#include <test_progs.h>
+#include "test_btf_ext.skel.h"
+#include "btf_helpers.h"
+
+static void subtest_line_func_info(void)
+{
+ struct test_btf_ext *skel;
+ struct bpf_prog_info info;
+ struct bpf_line_info line_info[128], *libbpf_line_info;
+ struct bpf_func_info func_info[128], *libbpf_func_info;
+ __u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt;
+ int err, fd;
+
+ skel = test_btf_ext__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.line_info = ptr_to_u64(&line_info);
+ info.nr_line_info = sizeof(line_info);
+ info.line_info_rec_size = sizeof(*line_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_line_info"))
+ goto out;
+
+ libbpf_line_info = bpf_program__line_info(skel->progs.global_func);
+ libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.func_info = ptr_to_u64(&func_info);
+ info.nr_func_info = sizeof(func_info);
+ info.func_info_rec_size = sizeof(*func_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_func_info"))
+ goto out;
+
+ libbpf_func_info = bpf_program__func_info(skel->progs.global_func);
+ libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func);
+
+ if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt"))
+ goto out;
+ if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt"))
+ goto out;
+ ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info),
+ "line_info");
+ ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info),
+ "func_info");
+out:
+ test_btf_ext__destroy(skel);
+}
+
+void test_btf_ext(void)
+{
+ if (test__start_subtest("line_func_info"))
+ subtest_line_func_info();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
index a95b42bf744a..47b56c258f3f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -63,6 +63,9 @@ static void test_set_global_vars_succeeds(void)
" -G \"var_eb = EB2\" "\
" -G \"var_ec = EC2\" "\
" -G \"var_b = 1\" "\
+ " -G \"struct1.struct2.u.var_u8 = 170\" "\
+ " -G \"union1.struct3.var_u8_l = 0xaa\" "\
+ " -G \"union1.struct3.var_u8_h = 0xaa\" "\
"-vl2 > %s", fix->veristat, fix->tmpfile);
read(fix->fd, fix->output, fix->sz);
@@ -78,6 +81,8 @@ static void test_set_global_vars_succeeds(void)
__CHECK_STR("_w=12 ", "var_eb = EB2");
__CHECK_STR("_w=13 ", "var_ec = EC2");
__CHECK_STR("_w=1 ", "var_b = 1");
+ __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170");
+ __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa");
out:
teardown_fixture(fix);
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index e66a57970d28..c9da06741104 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -14,6 +14,7 @@
#include "verifier_bounds_deduction_non_const.skel.h"
#include "verifier_bounds_mix_sign_unsign.skel.h"
#include "verifier_bpf_get_stack.skel.h"
+#include "verifier_bpf_trap.skel.h"
#include "verifier_bswap.skel.h"
#include "verifier_btf_ctx_access.skel.h"
#include "verifier_btf_unreliable_prog.skel.h"
@@ -148,6 +149,7 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction);
void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); }
void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); }
void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); }
+void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); }
void test_verifier_bswap(void) { RUN(verifier_bswap); }
void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); }
void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 3d47878ef6bf..19f92affc2da 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -351,9 +351,10 @@ void test_xdp_metadata(void)
struct xdp_metadata2 *bpf_obj2 = NULL;
struct xdp_metadata *bpf_obj = NULL;
struct bpf_program *new_prog, *prog;
+ struct bpf_devmap_val devmap_e = {};
+ struct bpf_map *prog_arr, *devmap;
struct nstoken *tok = NULL;
__u32 queue_id = QUEUE_ID;
- struct bpf_map *prog_arr;
struct xsk tx_xsk = {};
struct xsk rx_xsk = {};
__u32 val, key = 0;
@@ -409,6 +410,13 @@ void test_xdp_metadata(void)
bpf_program__set_ifindex(prog, rx_ifindex);
bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+ /* Make sure we can load a dev-bound program that performs
+ * XDP_REDIRECT into a devmap.
+ */
+ new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect");
+ bpf_program__set_ifindex(new_prog, rx_ifindex);
+ bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
goto out;
@@ -423,6 +431,18 @@ void test_xdp_metadata(void)
"update prog_arr"))
goto out;
+ /* Make sure we can't add dev-bound programs to devmaps. */
+ devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map");
+ if (!ASSERT_OK_PTR(devmap, "no dev_map found"))
+ goto out;
+
+ devmap_e.bpf_prog.fd = val;
+ if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key),
+ &devmap_e, sizeof(devmap_e),
+ BPF_ANY),
+ "update dev_map"))
+ goto out;
+
/* Attach BPF program to RX interface. */
ret = bpf_xdp_attach(rx_ifindex,
diff --git a/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
new file mode 100644
index 000000000000..079bf3794b3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+long process_byte = 0;
+int verdict_dir = 0;
+int dropped = 0;
+int pkt_size = 0;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return pkt_size;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ int one = 1;
+ int ret = bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir);
+
+ if (ret == SK_DROP)
+ dropped++;
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return ret;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_pass(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ int one = 1;
+
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return bpf_msg_redirect_map(msg, &sock_map_tx, one, verdict_dir);
+}
+
+SEC("sk_msg")
+int prog_skmsg_pass(struct sk_msg_md *msg)
+{
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
index fb8dc0768999..d67466c1ff77 100644
--- a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h
+++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
@@ -32,6 +32,7 @@ extern unsigned long CONFIG_NR_CPUS __kconfig;
struct __qspinlock {
union {
atomic_t val;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
struct {
u8 locked;
u8 pending;
@@ -40,6 +41,17 @@ struct __qspinlock {
u16 locked_pending;
u16 tail;
};
+#else
+ struct {
+ u16 tail;
+ u16 locked_pending;
+ };
+ struct {
+ u8 reserved[2];
+ u8 pending;
+ u8 locked;
+ };
+#endif
};
};
@@ -95,9 +107,6 @@ struct arena_qnode {
#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
-
struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES];
static inline u32 encode_tail(int cpu, int idx)
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 863df7c0fdd0..6e208e24ba3b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -225,8 +225,9 @@
#define CAN_USE_BPF_ST
#endif
-#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
#define CAN_USE_LOAD_ACQ_STORE_REL
#endif
diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
new file mode 100644
index 000000000000..13cdb11fdeb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+/* From uapi/linux/dma-buf.h */
+#define DMA_BUF_NAME_LEN 32
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, DMA_BUF_NAME_LEN);
+ __type(value, bool);
+ __uint(max_entries, 5);
+} testbuf_hash SEC(".maps");
+
+/*
+ * Fields output by this iterator are delimited by newlines. Convert any
+ * newlines in user-provided printed strings to spaces.
+ */
+static void sanitize_string(char *src, size_t size)
+{
+ for (char *c = src; (size_t)(c - src) < size && *c; ++c)
+ if (*c == '\n')
+ *c = ' ';
+}
+
+SEC("iter/dmabuf")
+int dmabuf_collector(struct bpf_iter__dmabuf *ctx)
+{
+ const struct dma_buf *dmabuf = ctx->dmabuf;
+ struct seq_file *seq = ctx->meta->seq;
+ unsigned long inode = 0;
+ size_t size;
+ const char *pname, *exporter;
+ char name[DMA_BUF_NAME_LEN] = {'\0'};
+
+ if (!dmabuf)
+ return 0;
+
+ if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) ||
+ bpf_core_read(&size, sizeof(size), &dmabuf->size) ||
+ bpf_core_read(&pname, sizeof(pname), &dmabuf->name) ||
+ bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (pname) {
+ if (bpf_probe_read_kernel(name, sizeof(name), pname))
+ return 1;
+
+ /* Name strings can be provided by userspace */
+ sanitize_string(name, sizeof(name));
+ }
+
+ BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter);
+ return 0;
+}
+
+SEC("syscall")
+int iter_dmabuf_for_each(const void *ctx)
+{
+ struct dma_buf *d;
+
+ bpf_for_each(dmabuf, d) {
+ char name[DMA_BUF_NAME_LEN];
+ const char *pname;
+ bool *found;
+ long len;
+ int i;
+
+ if (bpf_core_read(&pname, sizeof(pname), &d->name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (!pname)
+ continue;
+
+ len = bpf_probe_read_kernel_str(name, sizeof(name), pname);
+ if (len < 0)
+ return 1;
+
+ /*
+ * The entire name buffer is used as a map key.
+ * Zeroize any uninitialized trailing bytes after the NUL.
+ */
+ bpf_for(i, len, DMA_BUF_NAME_LEN)
+ name[i] = 0;
+
+ found = bpf_map_lookup_elem(&testbuf_hash, name);
+ if (found) {
+ bool t = true;
+
+ bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index e1fba28e4a86..a0391f9da2d4 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -680,3 +680,233 @@ out:
bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
return XDP_DROP;
}
+
+void *user_ptr;
+/* Contains the copy of the data pointed by user_ptr.
+ * Size 384 to make it not fit into a single kernel chunk when copying
+ * but less than the maximum bpf stack size (512).
+ */
+char expected_str[384];
+__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257};
+
+typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr);
+
+/* Returns the offset just before the end of the maximum sized xdp fragment.
+ * Any write larger than 32 bytes will be split between 2 fragments.
+ */
+__u32 xdp_near_frag_end_offset(void)
+{
+ const __u32 headroom = 256;
+ const __u32 max_frag_size = __PAGE_SIZE - headroom - sizeof(struct skb_shared_info);
+
+ /* 32 bytes before the approximate end of the fragment */
+ return max_frag_size - 32;
+}
+
+/* Use __always_inline on test_dynptr_probe[_str][_xdp]() and callbacks
+ * of type bpf_read_dynptr_fn_t to prevent compiler from generating
+ * indirect calls that make program fail to load with "unknown opcode" error.
+ */
+static __always_inline void test_dynptr_probe(void *ptr, bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ int i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_buf, 0, test_len[i], ptr);
+ if (len > sizeof(buf))
+ break;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_buf, 0, buf, len, 0);
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_str(void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ __u32 cnt, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_buf, 0, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+static __always_inline void test_dynptr_probe_str_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 cnt, off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ if (err)
+ return;
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+SEC("xdp")
+int test_probe_read_user_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(user_ptr, bpf_probe_read_user_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, user_ptr, bpf_probe_read_user_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(expected_str, bpf_probe_read_kernel_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, expected_str, bpf_probe_read_kernel_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_user_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(user_ptr, bpf_probe_read_user_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, user_ptr, bpf_probe_read_user_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(expected_str, bpf_probe_read_kernel_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, expected_str, bpf_probe_read_kernel_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_from_user_dynptr);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_from_user_str_dynptr);
+ return 0;
+}
+
+static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_str_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_data_from_user_task);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_data_from_user_task_str);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fd_htab_lookup.c b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
new file mode 100644
index 000000000000..a4a9e1db626f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct inner_map_type);
+} outer_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 427b72954b87..76adf4a8f2da 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -7,8 +7,6 @@
#include "bpf_misc.h"
#include "bpf_compiler.h"
-#define unlikely(x) __builtin_expect(!!(x), 0)
-
static volatile int zero = 0;
int my_pid;
diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c
new file mode 100644
index 000000000000..264e81bfb287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_list_node l;
+ int key;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(node_data, l);
+
+#define list_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+SEC("syscall")
+__retval(0)
+long list_peek(void *ctx)
+{
+ struct bpf_list_node *l_n;
+ struct node_data *n;
+ int i, err = 0;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_front(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_back(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+ n->key = i;
+ bpf_spin_lock(&glock);
+ bpf_list_push_back(&ghead, &n->l);
+ bpf_spin_unlock(&glock);
+ }
+
+ bpf_spin_lock(&glock);
+
+ l_n = bpf_list_front(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != 0) {
+ err = __LINE__;
+ goto done;
+ }
+
+ l_n = bpf_list_back(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != NR_NODES - 1) {
+ err = __LINE__;
+ goto done;
+ }
+
+done:
+ bpf_spin_unlock(&glock);
+ return err;
+}
+
+#define TEST_FB(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_list_node *l_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock); \
+ l_n = bpf_list_##op(&ghead); \
+ if (l_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock); \
+ \
+ return !!jiffies; \
+}
+
+#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_FB(front, true)
+TEST_FB(back, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head"
+TEST_FB(front, false)
+TEST_FB(back, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c
index 1f1dd547e4ee..cfc1f48e0d28 100644
--- a/tools/testing/selftests/bpf/progs/prepare.c
+++ b/tools/testing/selftests/bpf/progs/prepare.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2025 Meta */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-//#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index dbd5eee8e25e..4acb6af2dfe3 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -69,11 +69,11 @@ long rbtree_api_nolock_first(void *ctx)
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__retval(0)
long rbtree_api_remove_unadded_node(void *ctx)
{
struct node_data *n, *m;
- struct bpf_rb_node *res;
+ struct bpf_rb_node *res_n, *res_m;
n = bpf_obj_new(typeof(*n));
if (!n)
@@ -88,19 +88,20 @@ long rbtree_api_remove_unadded_node(void *ctx)
bpf_spin_lock(&glock);
bpf_rbtree_add(&groot, &n->node, less);
- /* This remove should pass verifier */
- res = bpf_rbtree_remove(&groot, &n->node);
- n = container_of(res, struct node_data, node);
+ res_n = bpf_rbtree_remove(&groot, &n->node);
- /* This remove shouldn't, m isn't in an rbtree */
- res = bpf_rbtree_remove(&groot, &m->node);
- m = container_of(res, struct node_data, node);
+ res_m = bpf_rbtree_remove(&groot, &m->node);
bpf_spin_unlock(&glock);
- if (n)
- bpf_obj_drop(n);
- if (m)
- bpf_obj_drop(m);
+ bpf_obj_drop(m);
+ if (res_n)
+ bpf_obj_drop(container_of(res_n, struct node_data, node));
+ if (res_m) {
+ bpf_obj_drop(container_of(res_m, struct node_data, node));
+ /* m was not added to the rbtree */
+ return 2;
+ }
+
return 0;
}
@@ -178,7 +179,7 @@ err_out:
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
long rbtree_api_add_release_unlock_escape(void *ctx)
{
struct node_data *n;
@@ -202,7 +203,7 @@ long rbtree_api_add_release_unlock_escape(void *ctx)
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
long rbtree_api_first_release_unlock_escape(void *ctx)
{
struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c
new file mode 100644
index 000000000000..098ef970fac1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_search.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_refcount ref;
+ struct bpf_rb_node r0;
+ struct bpf_rb_node r1;
+ int key0;
+ int key1;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock0;
+private(A) struct bpf_rb_root groot0 __contains(node_data, r0);
+
+private(B) struct bpf_spin_lock glock1;
+private(B) struct bpf_rb_root groot1 __contains(node_data, r1);
+
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+static bool less0(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r0);
+ node_b = rb_entry(b, struct node_data, r0);
+
+ return node_a->key0 < node_b->key0;
+}
+
+static bool less1(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r1);
+ node_b = rb_entry(b, struct node_data, r1);
+
+ return node_a->key1 < node_b->key1;
+}
+
+SEC("syscall")
+__retval(0)
+long rbtree_search(void *ctx)
+{
+ struct bpf_rb_node *rb_n, *rb_m, *gc_ns[NR_NODES];
+ long lookup_key = NR_NODES / 2;
+ struct node_data *n, *m;
+ int i, nr_gc = 0;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+
+ m = bpf_refcount_acquire(n);
+
+ n->key0 = i;
+ m->key1 = i;
+
+ bpf_spin_lock(&glock0);
+ bpf_rbtree_add(&groot0, &n->r0, less0);
+ bpf_spin_unlock(&glock0);
+
+ bpf_spin_lock(&glock1);
+ bpf_rbtree_add(&groot1, &m->r1, less1);
+ bpf_spin_unlock(&glock1);
+ }
+
+ n = NULL;
+ bpf_spin_lock(&glock0);
+ rb_n = bpf_rbtree_root(&groot0);
+ while (can_loop) {
+ if (!rb_n) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ n = rb_entry(rb_n, struct node_data, r0);
+ if (lookup_key == n->key0)
+ break;
+ if (nr_gc < NR_NODES)
+ gc_ns[nr_gc++] = rb_n;
+ if (lookup_key < n->key0)
+ rb_n = bpf_rbtree_left(&groot0, rb_n);
+ else
+ rb_n = bpf_rbtree_right(&groot0, rb_n);
+ }
+
+ if (!n || lookup_key != n->key0) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ gc_ns[i] = bpf_rbtree_remove(&groot0, rb_n);
+ }
+
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&glock0);
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ if (rb_n) {
+ n = rb_entry(rb_n, struct node_data, r0);
+ bpf_obj_drop(n);
+ }
+ }
+
+ if (!m)
+ return __LINE__;
+
+ bpf_spin_lock(&glock1);
+ rb_m = bpf_rbtree_remove(&groot1, &m->r1);
+ bpf_spin_unlock(&glock1);
+ bpf_obj_drop(m);
+ if (!rb_m)
+ return __LINE__;
+ bpf_obj_drop(rb_entry(rb_m, struct node_data, r1));
+
+ return 0;
+}
+
+#define TEST_ROOT(dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_root_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+#define TEST_LR(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ struct node_data *n; \
+ __u64 jiffies = 0; \
+ \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (!rb_n) { \
+ bpf_spin_unlock(&glock0); \
+ return 1; \
+ } \
+ n = rb_entry(rb_n, struct node_data, r0); \
+ n = bpf_refcount_acquire(n); \
+ bpf_spin_unlock(&glock0); \
+ if (!n) \
+ return 1; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_##op(&groot0, &n->r0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+/*
+ * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG)
+ * to ensure the message itself is not in the bpf prog lineinfo
+ * which the verifier includes in its log.
+ * Otherwise, the test_loader will incorrectly match the prog lineinfo
+ * instead of the log generated by the verifier.
+ */
+#define MSG "call bpf_rbtree_root{{.+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_ROOT(true)
+#undef MSG
+#define MSG "call bpf_rbtree_{{(left|right).+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_LR(left, true)
+TEST_LR(right, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_rb_root"
+TEST_ROOT(false)
+TEST_LR(left, false)
+TEST_LR(right, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
index 9adb5ba4cd4d..90f5656c3991 100644
--- a/tools/testing/selftests/bpf/progs/set_global_vars.c
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -24,6 +24,44 @@ const volatile enum Enumu64 var_eb = EB1;
const volatile enum Enums64 var_ec = EC1;
const volatile bool var_b = false;
+struct Struct {
+ int:16;
+ __u16 filler;
+ struct {
+ const __u16 filler2;
+ };
+ struct Struct2 {
+ __u16 filler;
+ volatile struct {
+ const int:1;
+ union {
+ const volatile __u8 var_u8;
+ const volatile __s16 filler3;
+ const int:1;
+ } u;
+ };
+ } struct2;
+};
+
+const volatile __u32 stru = 0; /* same prefix as below */
+const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}};
+
+union Union {
+ __u16 var_u16;
+ struct Struct3 {
+ struct {
+ __u8 var_u8_l;
+ };
+ struct {
+ struct {
+ __u8 var_u8_h;
+ };
+ };
+ } struct3;
+};
+
+const volatile union Union union1 = {.var_u16 = -1};
+
char arr[4] = {0};
SEC("socket")
@@ -43,5 +81,8 @@ int test_set_globals(void *ctx)
a = var_eb;
a = var_ec;
a = var_b;
+ a = struct1.struct2.u.var_u8;
+ a = union1.var_u16;
+
return a;
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_ext.c b/tools/testing/selftests/bpf/progs/test_btf_ext.c
new file mode 100644
index 000000000000..cdf20331db04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_ext.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms Inc. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__noinline static void f0(void)
+{
+ __u64 a = 1;
+
+ __sink(a);
+}
+
+SEC("xdp")
+__u64 global_func(struct xdp_md *xdp)
+{
+ f0();
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
new file mode 100644
index 000000000000..8bdb9987c0c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+int cork_byte;
+int push_start;
+int push_end;
+int apply_bytes;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_msg")
+int prog_sk_policy(struct sk_msg_md *msg)
+{
+ if (cork_byte > 0)
+ bpf_msg_cork_bytes(msg, cork_byte);
+ if (push_start > 0 && push_end > 0)
+ bpf_msg_push_data(msg, push_start, push_end, 0);
+
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_sk_policy_redir(struct sk_msg_md *msg)
+{
+ int two = 2;
+
+ bpf_msg_apply_bytes(msg, apply_bytes);
+ return bpf_msg_redirect_map(msg, &sock_map, two, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_redir.c b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
new file mode 100644
index 000000000000..34d9f4f2f0a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_map, sock_map;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_hash, sock_hash;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, unsigned int);
+} verdict_map;
+
+/* Set by user space */
+int redirect_type;
+int redirect_flags;
+
+#define redirect_map(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_map, \
+ struct sk_msg_md * : bpf_msg_redirect_map \
+ )((__data), &sock_map, (__u32){0}, redirect_flags)
+
+#define redirect_hash(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_hash, \
+ struct sk_msg_md * : bpf_msg_redirect_hash \
+ )((__data), &sock_hash, &(__u32){0}, redirect_flags)
+
+#define DEFINE_PROG(__type, __param) \
+SEC("sk_" XSTR(__type)) \
+int prog_ ## __type ## _verdict(__param data) \
+{ \
+ unsigned int *count; \
+ int verdict; \
+ \
+ if (redirect_type == BPF_MAP_TYPE_SOCKMAP) \
+ verdict = redirect_map(data); \
+ else if (redirect_type == BPF_MAP_TYPE_SOCKHASH) \
+ verdict = redirect_hash(data); \
+ else \
+ verdict = redirect_type - __MAX_BPF_MAP_TYPE; \
+ \
+ count = bpf_map_lookup_elem(&verdict_map, &verdict); \
+ if (count) \
+ (*count)++; \
+ \
+ return verdict; \
+}
+
+DEFINE_PROG(skb, struct __sk_buff *);
+DEFINE_PROG(msg, struct sk_msg_md *);
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index eb5cca1fce16..7d5293de1952 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -294,7 +294,9 @@ static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
(ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
goto err;
- if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+ if (!ctx->attrs.wscale_ok ||
+ !ctx->attrs.snd_wscale ||
+ ctx->attrs.snd_wscale >= BPF_SYNCOOKIE_WSCALE_MASK)
goto err;
if (!ctx->attrs.tstamp_ok)
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
new file mode 100644
index 000000000000..35e2cdc00a01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if __clang_major__ >= 21 && 0
+SEC("socket")
+__description("__builtin_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_builtin_trap_with_simple_c(void)
+{
+ __builtin_trap();
+}
+#endif
+
+SEC("socket")
+__description("__bpf_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_trap_with_simple_c(void)
+{
+ __bpf_trap();
+}
+
+SEC("socket")
+__description("__bpf_trap as the second-from-last insn")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void bpf_trap_at_func_end(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "call %[__bpf_trap];"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead code __bpf_trap in the middle of code")
+__success
+__naked void dead_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 0 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("reachable __bpf_trap in the middle of code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void live_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 1 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
index 28b939572cda..03942cec07e5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -65,4 +65,16 @@ __naked void ctx_access_u32_pointer_reject_8(void)
" ::: __clobber_all);
}
+SEC("fentry/bpf_fentry_test10")
+__description("btf_ctx_access const void pointer accept")
+__success __retval(0)
+__naked void ctx_access_const_void_pointer_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); /* load 1st argument value (const void pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
index 77698d5a19e4..74f4f19c10b8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
+++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
@@ -10,65 +10,81 @@
SEC("socket")
__description("load-acquire, 8-bit")
-__success __success_unpriv __retval(0x12)
+__success __success_unpriv __retval(0)
__naked void load_acquire_8(void)
{
asm volatile (
- "w1 = 0x12;"
+ "r0 = 0;"
+ "w1 = 0xfe;"
"*(u8 *)(r10 - 1) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1))
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 16-bit")
-__success __success_unpriv __retval(0x1234)
+__success __success_unpriv __retval(0)
__naked void load_acquire_16(void)
{
asm volatile (
- "w1 = 0x1234;"
+ "r0 = 0;"
+ "w1 = 0xfedc;"
"*(u16 *)(r10 - 2) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2))
+ BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 32-bit")
-__success __success_unpriv __retval(0x12345678)
+__success __success_unpriv __retval(0)
__naked void load_acquire_32(void)
{
asm volatile (
- "w1 = 0x12345678;"
+ "r0 = 0;"
+ "w1 = 0xfedcba09;"
"*(u32 *)(r10 - 4) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4))
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 64-bit")
-__success __success_unpriv __retval(0x1234567890abcdef)
+__success __success_unpriv __retval(0)
__naked void load_acquire_64(void)
{
asm volatile (
- "r1 = 0x1234567890abcdef ll;"
+ "r0 = 0;"
+ "r1 = 0xfedcba0987654321 ll;"
"*(u64 *)(r10 - 8) = r1;"
- ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8));
+ ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8))
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
: __clobber_all);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 6662d4b39969..9fe5d255ee37 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -91,8 +91,7 @@ __naked int bpf_end_bswap(void)
::: __clobber_all);
}
-#if defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
SEC("?raw_tp")
__success __log_level(2)
@@ -138,7 +137,7 @@ __naked int bpf_store_release(void)
: __clobber_all);
}
-#endif /* load-acquire, store-release */
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
#endif /* v4 instruction */
SEC("?raw_tp")
@@ -179,4 +178,57 @@ __naked int state_loop_first_last_equal(void)
);
}
+__used __naked static void __bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r2 = 2314885393468386424 ll;"
+ "goto +0;"
+ "if r2 <= r10 goto +3;"
+ "if r1 >= -1835016 goto +0;"
+ "if r2 <= 8 goto +0;"
+ "if r3 <= 0 goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (bd) if r2 <= r10 goto pc+3")
+__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("10: (b5) if r2 <= 0x8 goto pc+0")
+__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3")
+__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0")
+__naked void bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r3 = 0 ll;"
+ "call __bpf_cond_op_r10;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("3: (bf) r3 = r10")
+__msg("4: (bd) if r3 <= r2 goto pc+1")
+__msg("5: (b5) if r2 <= 0x8 goto pc+2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__naked void bpf_cond_op_not_r10(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 2314885393468386424 ll;"
+ "r3 = r10;"
+ "if r3 <= r2 goto +1;"
+ "if r2 <= 8 goto +2;"
+ "r0 = 2 ll;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c
index c0442d5bb049..72f1eb006074 100644
--- a/tools/testing/selftests/bpf/progs/verifier_store_release.c
+++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c
@@ -6,18 +6,21 @@
#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
-#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
SEC("socket")
__description("store-release, 8-bit")
-__success __success_unpriv __retval(0x12)
+__success __success_unpriv __retval(0)
__naked void store_release_8(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x12;"
".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1);
- "w0 = *(u8 *)(r10 - 1);"
+ "w2 = *(u8 *)(r10 - 1);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -27,13 +30,17 @@ __naked void store_release_8(void)
SEC("socket")
__description("store-release, 16-bit")
-__success __success_unpriv __retval(0x1234)
+__success __success_unpriv __retval(0)
__naked void store_release_16(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x1234;"
".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1);
- "w0 = *(u16 *)(r10 - 2);"
+ "w2 = *(u16 *)(r10 - 2);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -43,13 +50,17 @@ __naked void store_release_16(void)
SEC("socket")
__description("store-release, 32-bit")
-__success __success_unpriv __retval(0x12345678)
+__success __success_unpriv __retval(0)
__naked void store_release_32(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x12345678;"
".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1);
- "w0 = *(u32 *)(r10 - 4);"
+ "w2 = *(u32 *)(r10 - 4);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -59,13 +70,17 @@ __naked void store_release_32(void)
SEC("socket")
__description("store-release, 64-bit")
-__success __success_unpriv __retval(0x1234567890abcdef)
+__success __success_unpriv __retval(0)
__naked void store_release_64(void)
{
asm volatile (
+ "r0 = 0;"
"r1 = 0x1234567890abcdef ll;"
".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
- "r0 = *(u64 *)(r10 - 8);"
+ "r2 = *(u64 *)(r10 - 8);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -271,7 +286,7 @@ __naked void store_release_with_invalid_reg(void)
: __clobber_all);
}
-#else
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
SEC("socket")
__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test")
@@ -281,6 +296,6 @@ int dummy_test(void)
return 0;
}
-#endif
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index 31ca229bb3c0..09bb8a038d52 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -19,6 +19,13 @@ struct {
__type(value, __u32);
} prog_arr SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} dev_map SEC(".maps");
+
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
@@ -95,4 +102,10 @@ int rx(struct xdp_md *ctx)
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
+SEC("xdp")
+int redirect(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&dev_map, ctx->rx_queue_index, XDP_PASS);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index 3220f1d28697..2e54b95ad898 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -134,6 +134,10 @@ bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
return bpf_testmod_test_struct_arg_result;
}
+__weak noinline void bpf_testmod_looooooooooooooooooooooooooooooong_name(void)
+{
+}
+
__bpf_kfunc void
bpf_testmod_test_mod_kfunc(int i)
{
@@ -1340,7 +1344,7 @@ static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct
*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
- *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
*insn++ = prog->insnsi[0];
@@ -1379,7 +1383,7 @@ static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struc
*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
- *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
*insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
*insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
*insn++ = BPF_EXIT_INSN();
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 49f2fc61061f..9551d8d5f8f9 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -1042,6 +1042,14 @@ void run_subtest(struct test_loader *tester,
emit_verifier_log(tester->log_buf, false /*force*/);
validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log);
+ /* Restore capabilities because the kernel will silently ignore requests
+ * for program info (such as xlated program text) if we are not
+ * bpf-capable. Also, for some reason test_verifier executes programs
+ * with all capabilities restored. Do the same here.
+ */
+ if (restore_capabilities(&caps))
+ goto tobj_cleanup;
+
if (subspec->expect_xlated.cnt) {
err = get_xlated_program_text(bpf_program__fd(tprog),
tester->log_buf, tester->log_buf_sz);
@@ -1067,12 +1075,6 @@ void run_subtest(struct test_loader *tester,
}
if (should_do_test_run(spec, subspec)) {
- /* For some reason test_verifier executes programs
- * with all capabilities restored. Do the same here.
- */
- if (restore_capabilities(&caps))
- goto tobj_cleanup;
-
/* Do bpf_map__attach_struct_ops() for each struct_ops map.
* This should trigger bpf_struct_ops->reg callback on kernel side.
*/
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 447b68509d76..27db34ecf3f5 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -734,7 +734,7 @@ static __u32 btf_raw_types[] = {
BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
};
-static char bpf_vlog[UINT_MAX >> 8];
+static char bpf_vlog[UINT_MAX >> 5];
static int load_btf_spec(__u32 *types, int types_len,
const char *strings, int strings_len)
@@ -1559,10 +1559,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
test->errstr_unpriv : test->errstr;
opts.expected_attach_type = test->expected_attach_type;
- if (verbose)
- opts.log_level = verif_log_level | 4; /* force stats */
- else if (expected_ret == VERBOSE_ACCEPT)
+ if (expected_ret == VERBOSE_ACCEPT)
opts.log_level = 2;
+ else if (verbose)
+ opts.log_level = verif_log_level | 4; /* force stats */
else
opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
opts.prog_flags = pflags;
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index a18972ffdeb6..b2bb20b00952 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -1486,7 +1486,84 @@ static bool is_preset_supported(const struct btf_type *t)
return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
}
-static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t,
+const int btf_find_member(const struct btf *btf,
+ const struct btf_type *parent_type,
+ __u32 parent_offset,
+ const char *member_name,
+ int *member_tid,
+ __u32 *member_offset)
+{
+ int i;
+
+ if (!btf_is_composite(parent_type))
+ return -EINVAL;
+
+ for (i = 0; i < btf_vlen(parent_type); ++i) {
+ const struct btf_member *member;
+ const struct btf_type *member_type;
+ int tid;
+
+ member = btf_members(parent_type) + i;
+ tid = btf__resolve_type(btf, member->type);
+ if (tid < 0)
+ return -EINVAL;
+
+ member_type = btf__type_by_id(btf, tid);
+ if (member->name_off) {
+ const char *name = btf__name_by_offset(btf, member->name_off);
+
+ if (strcmp(member_name, name) == 0) {
+ if (btf_member_bitfield_size(parent_type, i) != 0) {
+ fprintf(stderr, "Bitfield presets are not supported %s\n",
+ name);
+ return -EINVAL;
+ }
+ *member_offset = parent_offset + member->offset;
+ *member_tid = tid;
+ return 0;
+ }
+ } else if (btf_is_composite(member_type)) {
+ int err;
+
+ err = btf_find_member(btf, member_type, parent_offset + member->offset,
+ member_name, member_tid, member_offset);
+ if (!err)
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t,
+ struct btf_var_secinfo *sinfo, const char *var)
+{
+ char expr[256], *saveptr;
+ const struct btf_type *base_type, *member_type;
+ int err, member_tid;
+ char *name;
+ __u32 member_offset = 0;
+
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
+ snprintf(expr, sizeof(expr), "%s", var);
+ strtok_r(expr, ".", &saveptr);
+
+ while ((name = strtok_r(NULL, ".", &saveptr))) {
+ err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset);
+ if (err) {
+ fprintf(stderr, "Could not find member %s for variable %s\n", name, var);
+ return err;
+ }
+ member_type = btf__type_by_id(btf, member_tid);
+ sinfo->offset += member_offset / 8;
+ sinfo->size = member_type->size;
+ sinfo->type = member_tid;
+ base_type = member_type;
+ }
+ return 0;
+}
+
+static int set_global_var(struct bpf_object *obj, struct btf *btf,
struct bpf_map *map, struct btf_var_secinfo *sinfo,
struct var_preset *preset)
{
@@ -1495,9 +1572,9 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct
long long value = preset->ivalue;
size_t size;
- base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type));
if (!base_type) {
- fprintf(stderr, "Failed to resolve type %d\n", t->type);
+ fprintf(stderr, "Failed to resolve type %d\n", sinfo->type);
return -EINVAL;
}
if (!is_preset_supported(base_type)) {
@@ -1530,7 +1607,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct
if (value >= max_val || value < -max_val) {
fprintf(stderr,
"Variable %s value %lld is out of range [%lld; %lld]\n",
- btf__name_by_offset(btf, t->name_off), value,
+ btf__name_by_offset(btf, base_type->name_off), value,
is_signed ? -max_val : 0, max_val - 1);
return -EINVAL;
}
@@ -1583,14 +1660,20 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
for (j = 0; j < n; ++j, ++sinfo) {
const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
const char *var_name;
+ int var_len;
if (!btf_is_var(var_type))
continue;
var_name = btf__name_by_offset(btf, var_type->name_off);
+ var_len = strlen(var_name);
for (k = 0; k < npresets; ++k) {
- if (strcmp(var_name, presets[k].name) != 0)
+ struct btf_var_secinfo tmp_sinfo;
+
+ if (strncmp(var_name, presets[k].name, var_len) != 0 ||
+ (presets[k].name[var_len] != '\0' &&
+ presets[k].name[var_len] != '.'))
continue;
if (presets[k].applied) {
@@ -1598,13 +1681,17 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
var_name);
return -EINVAL;
}
+ tmp_sinfo = *sinfo;
+ err = adjust_var_secinfo(btf, var_type,
+ &tmp_sinfo, presets[k].name);
+ if (err)
+ return err;
- err = set_global_var(obj, btf, var_type, map, sinfo, presets + k);
+ err = set_global_var(obj, btf, map, &tmp_sinfo, presets + k);
if (err)
return err;
presets[k].applied = true;
- break;
}
}
}