summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CREDITS6
-rw-r--r--Documentation/bpf/kfuncs.rst19
-rw-r--r--Documentation/bpf/verifier.rst264
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm64/net/Makefile2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c127
-rw-r--r--arch/arm64/net/bpf_timed_may_goto.S40
-rw-r--r--arch/riscv/Kconfig1
-rw-r--r--arch/riscv/include/asm/cmpxchg.h6
-rw-r--r--arch/riscv/kernel/setup.c1
-rw-r--r--arch/riscv/net/bpf_jit.h70
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c569
-rw-r--r--arch/s390/net/Makefile2
-rw-r--r--arch/s390/net/bpf_jit_comp.c148
-rw-r--r--arch/s390/net/bpf_timed_may_goto.S45
-rw-r--r--arch/x86/net/bpf_jit_comp.c125
-rw-r--r--crypto/asymmetric_keys/pkcs7_verify.c1
-rw-r--r--include/linux/bpf.h73
-rw-r--r--include/linux/bpf_verifier.h65
-rw-r--r--include/linux/btf.h2
-rw-r--r--include/linux/cgroup.h1
-rw-r--r--include/linux/filter.h17
-rw-r--r--include/linux/rcupdate.h14
-rw-r--r--include/linux/tnum.h6
-rw-r--r--include/linux/verification.h1
-rw-r--r--include/net/xdp.h5
-rw-r--r--include/net/xdp_sock_drv.h21
-rw-r--r--include/uapi/linux/bpf.h22
-rw-r--r--kernel/bpf/Kconfig2
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arena.c30
-rw-r--r--kernel/bpf/arraymap.c21
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c6
-rw-r--r--kernel/bpf/bpf_inode_storage.c6
-rw-r--r--kernel/bpf/bpf_iter.c6
-rw-r--r--kernel/bpf/bpf_lru_list.c10
-rw-r--r--kernel/bpf/bpf_struct_ops.c12
-rw-r--r--kernel/bpf/bpf_task_storage.c6
-rw-r--r--kernel/bpf/btf.c99
-rw-r--r--kernel/bpf/cgroup.c11
-rw-r--r--kernel/bpf/core.c60
-rw-r--r--kernel/bpf/cpumap.c2
-rw-r--r--kernel/bpf/devmap.c2
-rw-r--r--kernel/bpf/hashtab.c43
-rw-r--r--kernel/bpf/helpers.c612
-rw-r--r--kernel/bpf/liveness.c733
-rw-r--r--kernel/bpf/local_storage.c2
-rw-r--r--kernel/bpf/log.c30
-rw-r--r--kernel/bpf/memalloc.c2
-rw-r--r--kernel/bpf/stackmap.c16
-rw-r--r--kernel/bpf/syscall.c125
-rw-r--r--kernel/bpf/tnum.c63
-rw-r--r--kernel/bpf/trampoline.c18
-rw-r--r--kernel/bpf/verifier.c869
-rw-r--r--kernel/cgroup/cgroup.c24
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/events/uprobes.c7
-rw-r--r--kernel/trace/bpf_trace.c201
-rw-r--r--net/bpf/test_run.c59
-rw-r--r--net/core/filter.c210
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst13
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst14
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-token.rst64
-rw-r--r--tools/bpf/bpftool/Makefile6
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool37
-rw-r--r--tools/bpf/bpftool/btf_dumper.c2
-rw-r--r--tools/bpf/bpftool/cgroup.c4
-rw-r--r--tools/bpf/bpftool/common.c93
-rw-r--r--tools/bpf/bpftool/feature.c86
-rw-r--r--tools/bpf/bpftool/gen.c68
-rw-r--r--tools/bpf/bpftool/link.c54
-rw-r--r--tools/bpf/bpftool/main.c29
-rw-r--r--tools/bpf/bpftool/main.h21
-rw-r--r--tools/bpf/bpftool/prog.c33
-rw-r--r--tools/bpf/bpftool/sign.c211
-rw-r--r--tools/bpf/bpftool/token.c210
-rw-r--r--tools/bpf/bpftool/tracelog.c11
-rw-r--r--tools/include/uapi/linux/bpf.h22
-rw-r--r--tools/lib/bpf/bpf.c6
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h2
-rw-r--r--tools/lib/bpf/gen_loader.c47
-rw-r--r--tools/lib/bpf/libbpf.c213
-rw-r--r--tools/lib/bpf/libbpf.h52
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/lib/bpf/libbpf_internal.h4
-rw-r--r--tools/lib/bpf/skel_internal.h76
-rw-r--r--tools/lib/bpf/usdt.bpf.h44
-rw-r--r--tools/lib/bpf/usdt.c72
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile43
-rw-r--r--tools/testing/selftests/bpf/bench.c22
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c555
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c5
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c61
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh4
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h54
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h3
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c20
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/config.aarch6412
-rw-r--r--tools/testing/selftests/bpf/config.ppc64el1
-rw-r--r--tools/testing/selftests/bpf/config.riscv641
-rw-r--r--tools/testing/selftests/bpf/config.s390x11
-rw-r--r--tools/testing/selftests/bpf/config.x86_645
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kernel_flag.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c247
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_excl.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/res_spin_lock.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stream.c131
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h386
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_work_stress.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_work.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe.c156
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c222
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c179
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c9
-rw-r--r--tools/testing/selftests/bpf/progs/arena_spin_lock.c5
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cc_cubic.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h24
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_test_utils.h18
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_read_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c12
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c258
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c55
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c34
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/iters_state_safety.c6
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_failure.c4
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod.c46
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod_seq.c6
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_write_ctx.c22
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie.h30
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_bench.c230
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_map.c19
-rw-r--r--tools/testing/selftests/bpf/progs/map_excl.c34
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c4
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c2
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_map.c (renamed from tools/testing/selftests/bpf/progs/test_stacktrace_map.c)2
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c158
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c6
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c1
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c5
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c3
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h237
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c107
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c96
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_devmap.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_local_data.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c419
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_pull_data.c48
-rw-r--r--tools/testing/selftests/bpf/progs/timer_interrupt.c48
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c33
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c12
-rw-r--r--tools/testing/selftests/bpf/progs/uretprobe_stack.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c79
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c27
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c32
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c178
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_live_stack.c294
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c21
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_1.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_mul.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock.c48
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c40
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/Makefile2
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c209
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c155
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h4
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c2
-rw-r--r--tools/testing/selftests/bpf/test_loader.c300
-rw-r--r--tools/testing/selftests/bpf/test_progs.c13
-rw-r--r--tools/testing/selftests/bpf/test_progs.h17
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c2
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c20
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh2
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c14
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c234
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_st_mem.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c8
-rwxr-xr-xtools/testing/selftests/bpf/verify_sig_setup.sh11
-rw-r--r--tools/testing/selftests/bpf/veristat.c56
-rw-r--r--tools/testing/selftests/bpf/xdping.c2
-rw-r--r--tools/testing/selftests/bpf/xsk.h4
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c14
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c89
254 files changed, 11831 insertions, 2795 deletions
diff --git a/CREDITS b/CREDITS
index a687c3c35c4c..e47df5e74abe 100644
--- a/CREDITS
+++ b/CREDITS
@@ -3912,6 +3912,12 @@ S: C/ Federico Garcia Lorca 1 10-A
S: Sevilla 41005
S: Spain
+N: Björn Töpel
+E: bjorn@kernel.org
+D: AF_XDP
+S: Gothenburg
+S: Sweden
+
N: Linus Torvalds
E: torvalds@linux-foundation.org
D: Original kernel hacker
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index ae468b781d31..e38941370b90 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -335,9 +335,26 @@ consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
also be KF_RET_NULL.
+2.4.8 KF_RCU_PROTECTED flag
+---------------------------
+
+The KF_RCU_PROTECTED flag is used to indicate that the kfunc must be invoked in
+an RCU critical section. This is assumed by default in non-sleepable programs,
+and must be explicitly ensured by calling ``bpf_rcu_read_lock`` for sleepable
+ones.
+
+If the kfunc returns a pointer value, this flag also enforces that the returned
+pointer is RCU protected, and can only be used while the RCU critical section is
+active.
+
+The flag is distinct from the ``KF_RCU`` flag, which only ensures that its
+arguments are at least RCU protected pointers. This may transitively imply that
+RCU protection is ensured, but it does not work in cases of kfuncs which require
+RCU protection but do not take RCU protected arguments.
+
.. _KF_deprecated_flag:
-2.4.8 KF_DEPRECATED flag
+2.4.9 KF_DEPRECATED flag
------------------------
The KF_DEPRECATED flag is used for kfuncs which are scheduled to be
diff --git a/Documentation/bpf/verifier.rst b/Documentation/bpf/verifier.rst
index 95e6f80a407e..510d15bc697b 100644
--- a/Documentation/bpf/verifier.rst
+++ b/Documentation/bpf/verifier.rst
@@ -347,270 +347,6 @@ However, only the value of register ``r1`` is important to successfully finish
verification. The goal of the liveness tracking algorithm is to spot this fact
and figure out that both states are actually equivalent.
-Data structures
-~~~~~~~~~~~~~~~
-
-Liveness is tracked using the following data structures::
-
- enum bpf_reg_liveness {
- REG_LIVE_NONE = 0,
- REG_LIVE_READ32 = 0x1,
- REG_LIVE_READ64 = 0x2,
- REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
- REG_LIVE_WRITTEN = 0x4,
- REG_LIVE_DONE = 0x8,
- };
-
- struct bpf_reg_state {
- ...
- struct bpf_reg_state *parent;
- ...
- enum bpf_reg_liveness live;
- ...
- };
-
- struct bpf_stack_state {
- struct bpf_reg_state spilled_ptr;
- ...
- };
-
- struct bpf_func_state {
- struct bpf_reg_state regs[MAX_BPF_REG];
- ...
- struct bpf_stack_state *stack;
- }
-
- struct bpf_verifier_state {
- struct bpf_func_state *frame[MAX_CALL_FRAMES];
- struct bpf_verifier_state *parent;
- ...
- }
-
-* ``REG_LIVE_NONE`` is an initial value assigned to ``->live`` fields upon new
- verifier state creation;
-
-* ``REG_LIVE_WRITTEN`` means that the value of the register (or stack slot) is
- defined by some instruction verified between this verifier state's parent and
- verifier state itself;
-
-* ``REG_LIVE_READ{32,64}`` means that the value of the register (or stack slot)
- is read by a some child state of this verifier state;
-
-* ``REG_LIVE_DONE`` is a marker used by ``clean_verifier_state()`` to avoid
- processing same verifier state multiple times and for some sanity checks;
-
-* ``->live`` field values are formed by combining ``enum bpf_reg_liveness``
- values using bitwise or.
-
-Register parentage chains
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In order to propagate information between parent and child states, a *register
-parentage chain* is established. Each register or stack slot is linked to a
-corresponding register or stack slot in its parent state via a ``->parent``
-pointer. This link is established upon state creation in ``is_state_visited()``
-and might be modified by ``set_callee_state()`` called from
-``__check_func_call()``.
-
-The rules for correspondence between registers / stack slots are as follows:
-
-* For the current stack frame, registers and stack slots of the new state are
- linked to the registers and stack slots of the parent state with the same
- indices.
-
-* For the outer stack frames, only callee saved registers (r6-r9) and stack
- slots are linked to the registers and stack slots of the parent state with the
- same indices.
-
-* When function call is processed a new ``struct bpf_func_state`` instance is
- allocated, it encapsulates a new set of registers and stack slots. For this
- new frame, parent links for r6-r9 and stack slots are set to nil, parent links
- for r1-r5 are set to match caller r1-r5 parent links.
-
-This could be illustrated by the following diagram (arrows stand for
-``->parent`` pointers)::
-
- ... ; Frame #0, some instructions
- --- checkpoint #0 ---
- 1 : r6 = 42 ; Frame #0
- --- checkpoint #1 ---
- 2 : call foo() ; Frame #0
- ... ; Frame #1, instructions from foo()
- --- checkpoint #2 ---
- ... ; Frame #1, instructions from foo()
- --- checkpoint #3 ---
- exit ; Frame #1, return from foo()
- 3 : r1 = r6 ; Frame #0 <- current state
-
- +-------------------------------+-------------------------------+
- | Frame #0 | Frame #1 |
- Checkpoint +-------------------------------+-------------------------------+
- #0 | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+
- ^ ^ ^ ^
- | | | |
- Checkpoint +-------------------------------+
- #1 | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+
- ^ ^ ^
- |_______|_______|_______________
- | | |
- nil nil | | | nil nil
- | | | | | | |
- Checkpoint +-------------------------------+-------------------------------+
- #2 | r0 | r1-r5 | r6-r9 | fp-8 ... | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+-------------------------------+
- ^ ^ ^ ^ ^
- nil nil | | | | |
- | | | | | | |
- Checkpoint +-------------------------------+-------------------------------+
- #3 | r0 | r1-r5 | r6-r9 | fp-8 ... | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+-------------------------------+
- ^ ^
- nil nil | |
- | | | |
- Current +-------------------------------+
- state | r0 | r1-r5 | r6-r9 | fp-8 ... |
- +-------------------------------+
- \
- r6 read mark is propagated via these links
- all the way up to checkpoint #1.
- The checkpoint #1 contains a write mark for r6
- because of instruction (1), thus read propagation
- does not reach checkpoint #0 (see section below).
-
-Liveness marks tracking
-~~~~~~~~~~~~~~~~~~~~~~~
-
-For each processed instruction, the verifier tracks read and written registers
-and stack slots. The main idea of the algorithm is that read marks propagate
-back along the state parentage chain until they hit a write mark, which 'screens
-off' earlier states from the read. The information about reads is propagated by
-function ``mark_reg_read()`` which could be summarized as follows::
-
- mark_reg_read(struct bpf_reg_state *state, ...):
- parent = state->parent
- while parent:
- if state->live & REG_LIVE_WRITTEN:
- break
- if parent->live & REG_LIVE_READ64:
- break
- parent->live |= REG_LIVE_READ64
- state = parent
- parent = state->parent
-
-Notes:
-
-* The read marks are applied to the **parent** state while write marks are
- applied to the **current** state. The write mark on a register or stack slot
- means that it is updated by some instruction in the straight-line code leading
- from the parent state to the current state.
-
-* Details about REG_LIVE_READ32 are omitted.
-
-* Function ``propagate_liveness()`` (see section :ref:`read_marks_for_cache_hits`)
- might override the first parent link. Please refer to the comments in the
- ``propagate_liveness()`` and ``mark_reg_read()`` source code for further
- details.
-
-Because stack writes could have different sizes ``REG_LIVE_WRITTEN`` marks are
-applied conservatively: stack slots are marked as written only if write size
-corresponds to the size of the register, e.g. see function ``save_register_state()``.
-
-Consider the following example::
-
- 0: (*u64)(r10 - 8) = 0 ; define 8 bytes of fp-8
- --- checkpoint #0 ---
- 1: (*u32)(r10 - 8) = 1 ; redefine lower 4 bytes
- 2: r1 = (*u32)(r10 - 8) ; read lower 4 bytes defined at (1)
- 3: r2 = (*u32)(r10 - 4) ; read upper 4 bytes defined at (0)
-
-As stated above, the write at (1) does not count as ``REG_LIVE_WRITTEN``. Should
-it be otherwise, the algorithm above wouldn't be able to propagate the read mark
-from (3) to checkpoint #0.
-
-Once the ``BPF_EXIT`` instruction is reached ``update_branch_counts()`` is
-called to update the ``->branches`` counter for each verifier state in a chain
-of parent verifier states. When the ``->branches`` counter reaches zero the
-verifier state becomes a valid entry in a set of cached verifier states.
-
-Each entry of the verifier states cache is post-processed by a function
-``clean_live_states()``. This function marks all registers and stack slots
-without ``REG_LIVE_READ{32,64}`` marks as ``NOT_INIT`` or ``STACK_INVALID``.
-Registers/stack slots marked in this way are ignored in function ``stacksafe()``
-called from ``states_equal()`` when a state cache entry is considered for
-equivalence with a current state.
-
-Now it is possible to explain how the example from the beginning of the section
-works::
-
- 0: call bpf_get_prandom_u32()
- 1: r1 = 0
- 2: if r0 == 0 goto +1
- 3: r0 = 1
- --- checkpoint[0] ---
- 4: r0 = r1
- 5: exit
-
-* At instruction #2 branching point is reached and state ``{ r0 == 0, r1 == 0, pc == 4 }``
- is pushed to states processing queue (pc stands for program counter).
-
-* At instruction #4:
-
- * ``checkpoint[0]`` states cache entry is created: ``{ r0 == 1, r1 == 0, pc == 4 }``;
- * ``checkpoint[0].r0`` is marked as written;
- * ``checkpoint[0].r1`` is marked as read;
-
-* At instruction #5 exit is reached and ``checkpoint[0]`` can now be processed
- by ``clean_live_states()``. After this processing ``checkpoint[0].r1`` has a
- read mark and all other registers and stack slots are marked as ``NOT_INIT``
- or ``STACK_INVALID``
-
-* The state ``{ r0 == 0, r1 == 0, pc == 4 }`` is popped from the states queue
- and is compared against a cached state ``{ r1 == 0, pc == 4 }``, the states
- are considered equivalent.
-
-.. _read_marks_for_cache_hits:
-
-Read marks propagation for cache hits
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Another point is the handling of read marks when a previously verified state is
-found in the states cache. Upon cache hit verifier must behave in the same way
-as if the current state was verified to the program exit. This means that all
-read marks, present on registers and stack slots of the cached state, must be
-propagated over the parentage chain of the current state. Example below shows
-why this is important. Function ``propagate_liveness()`` handles this case.
-
-Consider the following state parentage chain (S is a starting state, A-E are
-derived states, -> arrows show which state is derived from which)::
-
- r1 read
- <------------- A[r1] == 0
- C[r1] == 0
- S ---> A ---> B ---> exit E[r1] == 1
- |
- ` ---> C ---> D
- |
- ` ---> E ^
- |___ suppose all these
- ^ states are at insn #Y
- |
- suppose all these
- states are at insn #X
-
-* Chain of states ``S -> A -> B -> exit`` is verified first.
-
-* While ``B -> exit`` is verified, register ``r1`` is read and this read mark is
- propagated up to state ``A``.
-
-* When chain of states ``C -> D`` is verified the state ``D`` turns out to be
- equivalent to state ``B``.
-
-* The read mark for ``r1`` has to be propagated to state ``C``, otherwise state
- ``C`` might get mistakenly marked as equivalent to state ``E`` even though
- values for register ``r1`` differ between ``C`` and ``E``.
-
Understanding eBPF verifier messages
====================================
diff --git a/MAINTAINERS b/MAINTAINERS
index 331728e93f69..b5bed6f6674a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -27466,10 +27466,8 @@ F: tools/testing/selftests/bpf/*xdp*
K: (?:\b|_)xdp(?:\b|_)
XDP SOCKETS (AF_XDP)
-M: Björn Töpel <bjorn@kernel.org>
M: Magnus Karlsson <magnus.karlsson@intel.com>
M: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
-R: Jonathan Lemon <jonathan.lemon@gmail.com>
R: Stanislav Fomichev <sdf@fomichev.me>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
diff --git a/arch/arm64/net/Makefile b/arch/arm64/net/Makefile
index 5c540efb7d9b..3ae382bfca87 100644
--- a/arch/arm64/net/Makefile
+++ b/arch/arm64/net/Makefile
@@ -2,4 +2,4 @@
#
# ARM64 networking code
#
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 28996e0a9b00..ab83089c3d8f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1066,19 +1066,53 @@ static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
emit(A64_RET(A64_LR), ctx);
}
-#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
+/*
+ * Metadata encoding for exception handling in JITed code.
+ *
+ * Format of `fixup` field in `struct exception_table_entry`:
+ *
+ * Bit layout of `fixup` (32-bit):
+ *
+ * +-----------+--------+-----------+-----------+----------+
+ * | 31-27 | 26-22 | 21 | 20-16 | 15-0 |
+ * | | | | | |
+ * | FIXUP_REG | Unused | ARENA_ACC | ARENA_REG | OFFSET |
+ * +-----------+--------+-----------+-----------+----------+
+ *
+ * - OFFSET (16 bits): Offset used to compute address for Load/Store instruction.
+ * - ARENA_REG (5 bits): Register that is used to calculate the address for load/store when
+ * accessing the arena region.
+ * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
+ * - FIXUP_REG (5 bits): Destination register for the load instruction (cleared on fault) or set to
+ * DONT_CLEAR if it is a store instruction.
+ */
+
+#define BPF_FIXUP_OFFSET_MASK GENMASK(15, 0)
+#define BPF_FIXUP_ARENA_REG_MASK GENMASK(20, 16)
+#define BPF_ARENA_ACCESS BIT(21)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
#define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
{
- off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
+ s16 off = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
+ int arena_reg = FIELD_GET(BPF_FIXUP_ARENA_REG_MASK, ex->fixup);
+ bool is_arena = !!(ex->fixup & BPF_ARENA_ACCESS);
+ bool is_write = (dst_reg == DONT_CLEAR);
+ unsigned long addr;
+
+ if (is_arena) {
+ addr = regs->regs[arena_reg] + off;
+ bpf_prog_report_arena_violation(is_write, addr, regs->pc);
+ }
if (dst_reg != DONT_CLEAR)
regs->regs[dst_reg] = 0;
- regs->pc = (unsigned long)&ex->fixup - offset;
+ /* Skip the faulting instruction */
+ regs->pc += AARCH64_INSN_SIZE;
+
return true;
}
@@ -1088,7 +1122,9 @@ static int add_exception_handler(const struct bpf_insn *insn,
int dst_reg)
{
off_t ins_offset;
- off_t fixup_offset;
+ s16 off = insn->off;
+ bool is_arena;
+ int arena_reg;
unsigned long pc;
struct exception_table_entry *ex;
@@ -1097,11 +1133,16 @@ static int add_exception_handler(const struct bpf_insn *insn,
return 0;
if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
- BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
- BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
- BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
+ BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEM32SX &&
+ BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
return 0;
+ is_arena = (BPF_MODE(insn->code) == BPF_PROBE_MEM32) ||
+ (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) ||
+ (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC);
+
if (!ctx->prog->aux->extable ||
WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
return -EINVAL;
@@ -1120,22 +1161,6 @@ static int add_exception_handler(const struct bpf_insn *insn,
return -ERANGE;
/*
- * Since the extable follows the program, the fixup offset is always
- * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
- * to keep things simple, and put the destination register in the upper
- * bits. We don't need to worry about buildtime or runtime sort
- * modifying the upper bits because the table is already sorted, and
- * isn't part of the main exception table.
- *
- * The fixup_offset is set to the next instruction from the instruction
- * that may fault. The execution will jump to this after handling the
- * fault.
- */
- fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
- if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
- return -ERANGE;
-
- /*
* The offsets above have been calculated using the RO buffer but we
* need to use the R/W buffer for writes.
* switch ex to rw buffer for writing.
@@ -1147,8 +1172,26 @@ static int add_exception_handler(const struct bpf_insn *insn,
if (BPF_CLASS(insn->code) != BPF_LDX)
dst_reg = DONT_CLEAR;
- ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
- FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
+ ex->fixup = FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
+
+ if (is_arena) {
+ ex->fixup |= BPF_ARENA_ACCESS;
+ /*
+ * insn->src_reg/dst_reg holds the address in the arena region with upper 32-bits
+ * being zero because of a preceding addr_space_cast(r<n>, 0x0, 0x1) instruction.
+ * This address is adjusted with the addition of arena_vm_start (see the
+ * implementation of BPF_PROBE_MEM32 and BPF_PROBE_ATOMIC) before being used for the
+ * memory access. Pass the reg holding the unmodified 32-bit address to
+ * ex_handler_bpf.
+ */
+ if (BPF_CLASS(insn->code) == BPF_LDX)
+ arena_reg = bpf2a64[insn->src_reg];
+ else
+ arena_reg = bpf2a64[insn->dst_reg];
+
+ ex->fixup |= FIELD_PREP(BPF_FIXUP_OFFSET_MASK, off) |
+ FIELD_PREP(BPF_FIXUP_ARENA_REG_MASK, arena_reg);
+ }
ex->type = EX_TYPE_BPF;
@@ -1558,7 +1601,13 @@ emit_cond_jmp:
if (ret < 0)
return ret;
emit_call(func_addr, ctx);
- emit(A64_MOV(1, r0, A64_R(0)), ctx);
+ /*
+ * Call to arch_bpf_timed_may_goto() is emitted by the
+ * verifier and called with custom calling convention with
+ * first argument and return value in BPF_REG_AX (x9).
+ */
+ if (func_addr != (u64)arch_bpf_timed_may_goto)
+ emit(A64_MOV(1, r0, A64_R(0)), ctx);
break;
}
/* tail call */
@@ -1612,7 +1661,11 @@ emit_cond_jmp:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
- if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEM32SX) {
emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx);
src = tmp2;
}
@@ -1624,7 +1677,8 @@ emit_cond_jmp:
off_adj = off;
}
sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX ||
- BPF_MODE(insn->code) == BPF_PROBE_MEMSX);
+ BPF_MODE(insn->code) == BPF_PROBE_MEMSX ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEM32SX);
switch (BPF_SIZE(code)) {
case BPF_W:
if (is_lsi_offset(off_adj, 2)) {
@@ -1832,9 +1886,11 @@ emit_cond_jmp:
if (ret)
return ret;
- ret = add_exception_handler(insn, ctx, dst);
- if (ret)
- return ret;
+ if (BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) {
+ ret = add_exception_handler(insn, ctx, dst);
+ if (ret)
+ return ret;
+ }
break;
default:
@@ -2767,7 +2823,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
goto out;
}
- bpf_flush_icache(ro_image, ro_image + size);
out:
kvfree(image);
return ret;
@@ -3038,6 +3093,11 @@ bool bpf_jit_bypass_spec_v4(void)
return true;
}
+bool bpf_jit_supports_timed_may_goto(void)
+{
+ return true;
+}
+
bool bpf_jit_inlines_helper_call(s32 imm)
{
switch (imm) {
@@ -3064,8 +3124,7 @@ void bpf_jit_free(struct bpf_prog *prog)
* before freeing it.
*/
if (jit_data) {
- bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
- sizeof(jit_data->header->size));
+ bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
kfree(jit_data);
}
prog->bpf_func -= cfi_get_offset();
diff --git a/arch/arm64/net/bpf_timed_may_goto.S b/arch/arm64/net/bpf_timed_may_goto.S
new file mode 100644
index 000000000000..894cfcd7b241
--- /dev/null
+++ b/arch/arm64/net/bpf_timed_may_goto.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Puranjay Mohan <puranjay@kernel.org> */
+
+#include <linux/linkage.h>
+
+SYM_FUNC_START(arch_bpf_timed_may_goto)
+ /* Allocate stack space and emit frame record */
+ stp x29, x30, [sp, #-64]!
+ mov x29, sp
+
+ /* Save BPF registers R0 - R5 (x7, x0-x4)*/
+ stp x7, x0, [sp, #16]
+ stp x1, x2, [sp, #32]
+ stp x3, x4, [sp, #48]
+
+ /*
+ * Stack depth was passed in BPF_REG_AX (x9), add it to the BPF_FP
+ * (x25) to get the pointer to count and timestamp and pass it as the
+ * first argument in x0.
+ *
+ * Before generating the call to arch_bpf_timed_may_goto, the verifier
+ * generates a load instruction using FP, i.e. REG_AX = *(u64 *)(FP -
+ * stack_off_cnt), so BPF_REG_FP (x25) is always set up by the arm64
+ * jit in this case.
+ */
+ add x0, x9, x25
+ bl bpf_check_timed_may_goto
+ /* BPF_REG_AX(x9) will be stored into count, so move return value to it. */
+ mov x9, x0
+
+ /* Restore BPF registers R0 - R5 (x7, x0-x4) */
+ ldp x7, x0, [sp, #16]
+ ldp x1, x2, [sp, #32]
+ ldp x3, x4, [sp, #48]
+
+ /* Restore FP and LR */
+ ldp x29, x30, [sp], #64
+
+ ret
+SYM_FUNC_END(arch_bpf_timed_may_goto)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a9ef4eeb72ab..2181dde50d6e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -708,7 +708,6 @@ config TOOLCHAIN_HAS_ZACAS
config RISCV_ISA_ZACAS
bool "Zacas extension support for atomic CAS"
- depends on TOOLCHAIN_HAS_ZACAS
depends on RISCV_ALTERNATIVE
default y
help
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 80bd52363c68..122e1485d39a 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -134,6 +134,7 @@
({ \
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
+ IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
r = o; \
@@ -181,6 +182,7 @@
r, p, co, o, n) \
({ \
if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
+ IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) && \
riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
r = o; \
\
@@ -316,7 +318,7 @@
arch_cmpxchg_release((ptr), (o), (n)); \
})
-#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
+#if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS) && defined(CONFIG_TOOLCHAIN_HAS_ZACAS)
#define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
@@ -352,7 +354,7 @@ union __u128_halves {
#define arch_cmpxchg128_local(ptr, o, n) \
__arch_cmpxchg128((ptr), (o), (n), "")
-#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
+#endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS && CONFIG_TOOLCHAIN_HAS_ZACAS */
#ifdef CONFIG_RISCV_ISA_ZAWRS
/*
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index f90cce7a3ace..14235e58c539 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -290,6 +290,7 @@ static void __init riscv_spinlock_init(void)
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&
IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&
+ IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZACAS) &&
riscv_isa_extension_available(NULL, ZABHA) &&
riscv_isa_extension_available(NULL, ZACAS)) {
using_ext = "using Zabha";
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index e7b032dfd17f..632ced07bca4 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -13,21 +13,15 @@
#include <linux/filter.h>
#include <asm/cacheflush.h>
+/* verify runtime detection extension status */
+#define rv_ext_enabled(ext) \
+ (IS_ENABLED(CONFIG_RISCV_ISA_##ext) && riscv_has_extension_likely(RISCV_ISA_EXT_##ext))
+
static inline bool rvc_enabled(void)
{
return IS_ENABLED(CONFIG_RISCV_ISA_C);
}
-static inline bool rvzba_enabled(void)
-{
- return IS_ENABLED(CONFIG_RISCV_ISA_ZBA) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBA);
-}
-
-static inline bool rvzbb_enabled(void)
-{
- return IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && riscv_has_extension_likely(RISCV_ISA_EXT_ZBB);
-}
-
enum {
RV_REG_ZERO = 0, /* The constant value 0 */
RV_REG_RA = 1, /* Return address */
@@ -84,6 +78,8 @@ struct rv_jit_context {
int epilogue_offset;
int *offset; /* BPF to RV */
int nexentries;
+ int ex_insn_off;
+ int ex_jmp_off;
unsigned long flags;
int stack_size;
u64 arena_vm_start;
@@ -757,6 +753,17 @@ static inline u16 rvc_swsp(u32 imm8, u8 rs2)
return rv_css_insn(0x6, imm, rs2, 0x2);
}
+/* RVZACAS instructions. */
+static inline u32 rvzacas_amocas_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x5, aq, rl, rs2, rs1, 2, rd, 0x2f);
+}
+
+static inline u32 rvzacas_amocas_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
+{
+ return rv_amo_insn(0x5, aq, rl, rs2, rs1, 3, rd, 0x2f);
+}
+
/* RVZBA instructions. */
static inline u32 rvzba_sh2add(u8 rd, u8 rs1, u8 rs2)
{
@@ -1123,7 +1130,7 @@ static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
static inline void emit_sh2add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
{
- if (rvzba_enabled()) {
+ if (rv_ext_enabled(ZBA)) {
emit(rvzba_sh2add(rd, rs1, rs2), ctx);
return;
}
@@ -1134,7 +1141,7 @@ static inline void emit_sh2add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx
static inline void emit_sh3add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
{
- if (rvzba_enabled()) {
+ if (rv_ext_enabled(ZBA)) {
emit(rvzba_sh3add(rd, rs1, rs2), ctx);
return;
}
@@ -1184,7 +1191,7 @@ static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
static inline void emit_sextb(u8 rd, u8 rs, struct rv_jit_context *ctx)
{
- if (rvzbb_enabled()) {
+ if (rv_ext_enabled(ZBB)) {
emit(rvzbb_sextb(rd, rs), ctx);
return;
}
@@ -1195,7 +1202,7 @@ static inline void emit_sextb(u8 rd, u8 rs, struct rv_jit_context *ctx)
static inline void emit_sexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
{
- if (rvzbb_enabled()) {
+ if (rv_ext_enabled(ZBB)) {
emit(rvzbb_sexth(rd, rs), ctx);
return;
}
@@ -1211,7 +1218,7 @@ static inline void emit_sextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
{
- if (rvzbb_enabled()) {
+ if (rv_ext_enabled(ZBB)) {
emit(rvzbb_zexth(rd, rs), ctx);
return;
}
@@ -1222,7 +1229,7 @@ static inline void emit_zexth(u8 rd, u8 rs, struct rv_jit_context *ctx)
static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
{
- if (rvzba_enabled()) {
+ if (rv_ext_enabled(ZBA)) {
emit(rvzba_zextw(rd, rs), ctx);
return;
}
@@ -1233,7 +1240,7 @@ static inline void emit_zextw(u8 rd, u8 rs, struct rv_jit_context *ctx)
static inline void emit_bswap(u8 rd, s32 imm, struct rv_jit_context *ctx)
{
- if (rvzbb_enabled()) {
+ if (rv_ext_enabled(ZBB)) {
int bits = 64 - imm;
emit(rvzbb_rev8(rd, rd), ctx);
@@ -1289,6 +1296,35 @@ out_be:
emit_mv(rd, RV_REG_T2, ctx);
}
+static inline void emit_cmpxchg(u8 rd, u8 rs, u8 r0, bool is64, struct rv_jit_context *ctx)
+{
+ int jmp_offset;
+
+ if (rv_ext_enabled(ZACAS)) {
+ ctx->ex_insn_off = ctx->ninsns;
+ emit(is64 ? rvzacas_amocas_d(r0, rs, rd, 1, 1) :
+ rvzacas_amocas_w(r0, rs, rd, 1, 1), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
+ if (!is64)
+ emit_zextw(r0, r0, ctx);
+ return;
+ }
+
+ if (is64)
+ emit_mv(RV_REG_T2, r0, ctx);
+ else
+ emit_addiw(RV_REG_T2, r0, 0, ctx);
+ emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) :
+ rv_lr_w(r0, 0, rd, 0, 0), ctx);
+ jmp_offset = ninsns_rvoff(8);
+ emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
+ emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) :
+ rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
+ jmp_offset = ninsns_rvoff(-6);
+ emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
+ emit_fence_rw_rw(ctx);
+}
+
#endif /* __riscv_xlen == 64 */
void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog);
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 206b2b8552a7..45cbc7c6fe49 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -473,138 +473,92 @@ static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx)
emit(hash, ctx);
}
-static int emit_load_8(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+static void emit_ldx_insn(u8 rd, s16 off, u8 rs, u8 size, bool sign_ext,
+ struct rv_jit_context *ctx)
{
- int insns_start;
-
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lb(rd, off, rs), ctx);
- else
- emit(rv_lbu(rd, off, rs), ctx);
- return ctx->ninsns - insns_start;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lb(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
- return ctx->ninsns - insns_start;
-}
-
-static int emit_load_16(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
-{
- int insns_start;
-
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lh(rd, off, rs), ctx);
- else
- emit(rv_lhu(rd, off, rs), ctx);
- return ctx->ninsns - insns_start;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lh(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
- return ctx->ninsns - insns_start;
-}
-
-static int emit_load_32(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
-{
- int insns_start;
-
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lw(rd, off, rs), ctx);
- else
- emit(rv_lwu(rd, off, rs), ctx);
- return ctx->ninsns - insns_start;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- if (sign_ext)
- emit(rv_lw(rd, 0, RV_REG_T1), ctx);
- else
- emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
- return ctx->ninsns - insns_start;
-}
-
-static int emit_load_64(bool sign_ext, u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
-{
- int insns_start;
-
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
+ switch (size) {
+ case BPF_B:
+ emit(sign_ext ? rv_lb(rd, off, rs) : rv_lbu(rd, off, rs), ctx);
+ break;
+ case BPF_H:
+ emit(sign_ext ? rv_lh(rd, off, rs) : rv_lhu(rd, off, rs), ctx);
+ break;
+ case BPF_W:
+ emit(sign_ext ? rv_lw(rd, off, rs) : rv_lwu(rd, off, rs), ctx);
+ break;
+ case BPF_DW:
emit_ld(rd, off, rs, ctx);
- return ctx->ninsns - insns_start;
+ break;
}
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
- insns_start = ctx->ninsns;
- emit_ld(rd, 0, RV_REG_T1, ctx);
- return ctx->ninsns - insns_start;
}
-static void emit_store_8(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+static void emit_stx_insn(u8 rd, s16 off, u8 rs, u8 size, struct rv_jit_context *ctx)
{
- if (is_12b_int(off)) {
+ switch (size) {
+ case BPF_B:
emit(rv_sb(rd, off, rs), ctx);
- return;
+ break;
+ case BPF_H:
+ emit(rv_sh(rd, off, rs), ctx);
+ break;
+ case BPF_W:
+ emit_sw(rd, off, rs, ctx);
+ break;
+ case BPF_DW:
+ emit_sd(rd, off, rs, ctx);
+ break;
}
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit(rv_sb(RV_REG_T1, 0, rs), ctx);
}
-static void emit_store_16(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+static void emit_ldx(u8 rd, s16 off, u8 rs, u8 size, bool sign_ext,
+ struct rv_jit_context *ctx)
{
if (is_12b_int(off)) {
- emit(rv_sh(rd, off, rs), ctx);
+ ctx->ex_insn_off = ctx->ninsns;
+ emit_ldx_insn(rd, off, rs, size, sign_ext, ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
return;
}
emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit(rv_sh(RV_REG_T1, 0, rs), ctx);
+ emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
+ ctx->ex_insn_off = ctx->ninsns;
+ emit_ldx_insn(rd, 0, RV_REG_T1, size, sign_ext, ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
}
-static void emit_store_32(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+static void emit_st(u8 rd, s16 off, s32 imm, u8 size, struct rv_jit_context *ctx)
{
+ emit_imm(RV_REG_T1, imm, ctx);
if (is_12b_int(off)) {
- emit_sw(rd, off, rs, ctx);
+ ctx->ex_insn_off = ctx->ninsns;
+ emit_stx_insn(rd, off, RV_REG_T1, size, ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
return;
}
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit_sw(RV_REG_T1, 0, rs, ctx);
+ emit_imm(RV_REG_T2, off, ctx);
+ emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
+ ctx->ex_insn_off = ctx->ninsns;
+ emit_stx_insn(RV_REG_T2, 0, RV_REG_T1, size, ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
}
-static void emit_store_64(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
+static void emit_stx(u8 rd, s16 off, u8 rs, u8 size, struct rv_jit_context *ctx)
{
if (is_12b_int(off)) {
- emit_sd(rd, off, rs, ctx);
+ ctx->ex_insn_off = ctx->ninsns;
+ emit_stx_insn(rd, off, rs, size, ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
return;
}
emit_imm(RV_REG_T1, off, ctx);
emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- emit_sd(RV_REG_T1, 0, rs, ctx);
+ ctx->ex_insn_off = ctx->ninsns;
+ emit_stx_insn(RV_REG_T1, 0, rs, size, ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
}
static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
@@ -617,20 +571,12 @@ static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
switch (imm) {
/* dst_reg = load_acquire(src_reg + off16) */
case BPF_LOAD_ACQ:
- switch (BPF_SIZE(code)) {
- case BPF_B:
- emit_load_8(false, rd, off, rs, ctx);
- break;
- case BPF_H:
- emit_load_16(false, rd, off, rs, ctx);
- break;
- case BPF_W:
- emit_load_32(false, rd, off, rs, ctx);
- break;
- case BPF_DW:
- emit_load_64(false, rd, off, rs, ctx);
- break;
+ if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
+ emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx);
+ rs = RV_REG_T2;
}
+
+ emit_ldx(rd, off, rs, BPF_SIZE(code), false, ctx);
emit_fence_r_rw(ctx);
/* If our next insn is a redundant zext, return 1 to tell
@@ -641,21 +587,13 @@ static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
break;
/* store_release(dst_reg + off16, src_reg) */
case BPF_STORE_REL:
- emit_fence_rw_w(ctx);
- switch (BPF_SIZE(code)) {
- case BPF_B:
- emit_store_8(rd, off, rs, ctx);
- break;
- case BPF_H:
- emit_store_16(rd, off, rs, ctx);
- break;
- case BPF_W:
- emit_store_32(rd, off, rs, ctx);
- break;
- case BPF_DW:
- emit_store_64(rd, off, rs, ctx);
- break;
+ if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
+ emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx);
+ rd = RV_REG_T2;
}
+
+ emit_fence_rw_w(ctx);
+ emit_stx(rd, off, rs, BPF_SIZE(code), ctx);
break;
default:
pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm);
@@ -668,17 +606,15 @@ static int emit_atomic_ld_st(u8 rd, u8 rs, const struct bpf_insn *insn,
static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn,
struct rv_jit_context *ctx)
{
- u8 r0, code = insn->code;
+ u8 code = insn->code;
s16 off = insn->off;
s32 imm = insn->imm;
- int jmp_offset;
- bool is64;
+ bool is64 = BPF_SIZE(code) == BPF_DW;
if (BPF_SIZE(code) != BPF_W && BPF_SIZE(code) != BPF_DW) {
pr_err_once("bpf-jit: 1- and 2-byte RMW atomics are not supported\n");
return -EINVAL;
}
- is64 = BPF_SIZE(code) == BPF_DW;
if (off) {
if (is_12b_int(off)) {
@@ -690,72 +626,82 @@ static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn,
rd = RV_REG_T1;
}
+ if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
+ emit_add(RV_REG_T1, rd, RV_REG_ARENA, ctx);
+ rd = RV_REG_T1;
+ }
+
switch (imm) {
/* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */
case BPF_ADD:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) :
rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
break;
case BPF_AND:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) :
rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
break;
case BPF_OR:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) :
rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
break;
case BPF_XOR:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) :
rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
break;
/* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */
case BPF_ADD | BPF_FETCH:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) :
rv_amoadd_w(rs, rs, rd, 1, 1), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
if (!is64)
emit_zextw(rs, rs, ctx);
break;
case BPF_AND | BPF_FETCH:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) :
rv_amoand_w(rs, rs, rd, 1, 1), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
if (!is64)
emit_zextw(rs, rs, ctx);
break;
case BPF_OR | BPF_FETCH:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) :
rv_amoor_w(rs, rs, rd, 1, 1), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
if (!is64)
emit_zextw(rs, rs, ctx);
break;
case BPF_XOR | BPF_FETCH:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) :
rv_amoxor_w(rs, rs, rd, 1, 1), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
if (!is64)
emit_zextw(rs, rs, ctx);
break;
/* src_reg = atomic_xchg(dst_reg + off16, src_reg); */
case BPF_XCHG:
+ ctx->ex_insn_off = ctx->ninsns;
emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) :
rv_amoswap_w(rs, rs, rd, 1, 1), ctx);
+ ctx->ex_jmp_off = ctx->ninsns;
if (!is64)
emit_zextw(rs, rs, ctx);
break;
/* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */
case BPF_CMPXCHG:
- r0 = bpf_to_rv_reg(BPF_REG_0, ctx);
- if (is64)
- emit_mv(RV_REG_T2, r0, ctx);
- else
- emit_addiw(RV_REG_T2, r0, 0, ctx);
- emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) :
- rv_lr_w(r0, 0, rd, 0, 0), ctx);
- jmp_offset = ninsns_rvoff(8);
- emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx);
- emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 1) :
- rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
- jmp_offset = ninsns_rvoff(-6);
- emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
- emit_fence_rw_rw(ctx);
+ emit_cmpxchg(rd, rs, regmap[BPF_REG_0], is64, ctx);
break;
default:
pr_err_once("bpf-jit: invalid atomic RMW opcode %02x\n", imm);
@@ -765,6 +711,39 @@ static int emit_atomic_rmw(u8 rd, u8 rs, const struct bpf_insn *insn,
return 0;
}
+/*
+ * Sign-extend the register if necessary
+ */
+static int sign_extend(u8 rd, u8 rs, u8 sz, bool sign, struct rv_jit_context *ctx)
+{
+ if (!sign && (sz == 1 || sz == 2)) {
+ if (rd != rs)
+ emit_mv(rd, rs, ctx);
+ return 0;
+ }
+
+ switch (sz) {
+ case 1:
+ emit_sextb(rd, rs, ctx);
+ break;
+ case 2:
+ emit_sexth(rd, rs, ctx);
+ break;
+ case 4:
+ emit_sextw(rd, rs, ctx);
+ break;
+ case 8:
+ if (rd != rs)
+ emit_mv(rd, rs, ctx);
+ break;
+ default:
+ pr_err("bpf-jit: invalid size %d for sign_extend\n", sz);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
#define REG_DONT_CLEAR_MARKER 0 /* RV_REG_ZERO unused in pt_regmap */
@@ -783,9 +762,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
}
/* For accesses to BTF pointers, add an entry to the exception table */
-static int add_exception_handler(const struct bpf_insn *insn,
- struct rv_jit_context *ctx,
- int dst_reg, int insn_len)
+static int add_exception_handler(const struct bpf_insn *insn, int dst_reg,
+ struct rv_jit_context *ctx)
{
struct exception_table_entry *ex;
unsigned long pc;
@@ -793,21 +771,23 @@ static int add_exception_handler(const struct bpf_insn *insn,
off_t fixup_offset;
if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
- (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
- BPF_MODE(insn->code) != BPF_PROBE_MEM32))
+ ctx->ex_insn_off <= 0 || ctx->ex_jmp_off <= 0)
return 0;
- if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
- return -EINVAL;
+ if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
+ BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
+ return 0;
- if (WARN_ON_ONCE(insn_len > ctx->ninsns))
+ if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries))
return -EINVAL;
- if (WARN_ON_ONCE(!rvc_enabled() && insn_len == 1))
+ if (WARN_ON_ONCE(ctx->ex_insn_off > ctx->ninsns || ctx->ex_jmp_off > ctx->ninsns))
return -EINVAL;
ex = &ctx->prog->aux->extable[ctx->nexentries];
- pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len];
+ pc = (unsigned long)&ctx->ro_insns[ctx->ex_insn_off];
/*
* This is the relative offset of the instruction that may fault from
@@ -831,7 +811,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
* that may fault. The execution will jump to this after handling the
* fault.
*/
- fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
+ fixup_offset = (long)&ex->fixup - (long)&ctx->ro_insns[ctx->ex_jmp_off];
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
return -ERANGE;
@@ -848,6 +828,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
ex->type = EX_TYPE_BPF;
+ ctx->ex_insn_off = 0;
+ ctx->ex_jmp_off = 0;
ctx->nexentries++;
return 0;
}
@@ -1079,10 +1061,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
stack_size += 16;
save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
- if (save_ret) {
+ if (save_ret)
stack_size += 16; /* Save both A5 (BPF R0) and A0 */
- retval_off = stack_size;
- }
+ retval_off = stack_size;
stack_size += nr_arg_slots * 8;
args_off = stack_size;
@@ -1226,8 +1207,15 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
restore_args(min_t(int, nr_arg_slots, RV_MAX_REG_ARGS), args_off, ctx);
if (save_ret) {
- emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
emit_ld(regmap[BPF_REG_0], -(retval_off - 8), RV_REG_FP, ctx);
+ if (is_struct_ops) {
+ ret = sign_extend(RV_REG_A0, regmap[BPF_REG_0], m->ret_size,
+ m->ret_flags & BTF_FMODEL_SIGNED_ARG, ctx);
+ if (ret)
+ goto out;
+ } else {
+ emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
+ }
}
emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
@@ -1320,7 +1308,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
goto out;
}
- bpf_flush_icache(ro_image, ro_image_end);
out:
kvfree(image);
return ret < 0 ? ret : size;
@@ -1857,7 +1844,6 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
{
bool sign_ext;
- int insn_len;
sign_ext = BPF_MODE(insn->code) == BPF_MEMSX ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
@@ -1867,22 +1853,9 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
rs = RV_REG_T2;
}
- switch (BPF_SIZE(code)) {
- case BPF_B:
- insn_len = emit_load_8(sign_ext, rd, off, rs, ctx);
- break;
- case BPF_H:
- insn_len = emit_load_16(sign_ext, rd, off, rs, ctx);
- break;
- case BPF_W:
- insn_len = emit_load_32(sign_ext, rd, off, rs, ctx);
- break;
- case BPF_DW:
- insn_len = emit_load_64(sign_ext, rd, off, rs, ctx);
- break;
- }
+ emit_ldx(rd, off, rs, BPF_SIZE(code), sign_ext, ctx);
- ret = add_exception_handler(insn, ctx, rd, insn_len);
+ ret = add_exception_handler(insn, rd, ctx);
if (ret)
return ret;
@@ -1890,238 +1863,73 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
return 1;
break;
}
+
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
break;
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_B:
- emit_imm(RV_REG_T1, imm, ctx);
- if (is_12b_int(off)) {
- emit(rv_sb(rd, off, RV_REG_T1), ctx);
- break;
- }
-
- emit_imm(RV_REG_T2, off, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
- emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
- break;
-
case BPF_ST | BPF_MEM | BPF_H:
- emit_imm(RV_REG_T1, imm, ctx);
- if (is_12b_int(off)) {
- emit(rv_sh(rd, off, RV_REG_T1), ctx);
- break;
- }
-
- emit_imm(RV_REG_T2, off, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
- emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
- break;
case BPF_ST | BPF_MEM | BPF_W:
- emit_imm(RV_REG_T1, imm, ctx);
- if (is_12b_int(off)) {
- emit_sw(rd, off, RV_REG_T1, ctx);
- break;
- }
-
- emit_imm(RV_REG_T2, off, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
- emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
- break;
case BPF_ST | BPF_MEM | BPF_DW:
- emit_imm(RV_REG_T1, imm, ctx);
- if (is_12b_int(off)) {
- emit_sd(rd, off, RV_REG_T1, ctx);
- break;
- }
-
- emit_imm(RV_REG_T2, off, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
- emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
- break;
-
+ /* ST | PROBE_MEM32: *(size *)(dst + RV_REG_ARENA + off) = imm */
case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
- {
- int insn_len, insns_start;
-
- emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx);
- rd = RV_REG_T3;
-
- /* Load imm to a register then store it */
- emit_imm(RV_REG_T1, imm, ctx);
-
- switch (BPF_SIZE(code)) {
- case BPF_B:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit(rv_sb(rd, off, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T2, off, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
- insns_start = ctx->ninsns;
- emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- case BPF_H:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit(rv_sh(rd, off, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T2, off, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
- insns_start = ctx->ninsns;
- emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- case BPF_W:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit_sw(rd, off, RV_REG_T1, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T2, off, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
- insns_start = ctx->ninsns;
- emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- case BPF_DW:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit_sd(rd, off, RV_REG_T1, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T2, off, ctx);
- emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
- insns_start = ctx->ninsns;
- emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx);
+ rd = RV_REG_T3;
}
- ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER,
- insn_len);
+ emit_st(rd, off, imm, BPF_SIZE(code), ctx);
+
+ ret = add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx);
if (ret)
return ret;
-
break;
- }
/* STX: *(size *)(dst + off) = src */
case BPF_STX | BPF_MEM | BPF_B:
- emit_store_8(rd, off, rs, ctx);
- break;
case BPF_STX | BPF_MEM | BPF_H:
- emit_store_16(rd, off, rs, ctx);
- break;
case BPF_STX | BPF_MEM | BPF_W:
- emit_store_32(rd, off, rs, ctx);
- break;
case BPF_STX | BPF_MEM | BPF_DW:
- emit_store_64(rd, off, rs, ctx);
+ /* STX | PROBE_MEM32: *(size *)(dst + RV_REG_ARENA + off) = src */
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx);
+ rd = RV_REG_T2;
+ }
+
+ emit_stx(rd, off, rs, BPF_SIZE(code), ctx);
+
+ ret = add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx);
+ if (ret)
+ return ret;
break;
+
+ /* Atomics */
case BPF_STX | BPF_ATOMIC | BPF_B:
case BPF_STX | BPF_ATOMIC | BPF_H:
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
if (bpf_atomic_is_load_store(insn))
ret = emit_atomic_ld_st(rd, rs, insn, ctx);
else
ret = emit_atomic_rmw(rd, rs, insn, ctx);
- if (ret)
- return ret;
- break;
- case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
- case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
- case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
- case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
- {
- int insn_len, insns_start;
-
- emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx);
- rd = RV_REG_T2;
-
- switch (BPF_SIZE(code)) {
- case BPF_B:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit(rv_sb(rd, off, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- insns_start = ctx->ninsns;
- emit(rv_sb(RV_REG_T1, 0, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- case BPF_H:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit(rv_sh(rd, off, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- insns_start = ctx->ninsns;
- emit(rv_sh(RV_REG_T1, 0, rs), ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- case BPF_W:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit_sw(rd, off, rs, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- insns_start = ctx->ninsns;
- emit_sw(RV_REG_T1, 0, rs, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- case BPF_DW:
- if (is_12b_int(off)) {
- insns_start = ctx->ninsns;
- emit_sd(rd, off, rs, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- emit_imm(RV_REG_T1, off, ctx);
- emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
- insns_start = ctx->ninsns;
- emit_sd(RV_REG_T1, 0, rs, ctx);
- insn_len = ctx->ninsns - insns_start;
- break;
- }
-
- ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER,
- insn_len);
+ ret = ret ?: add_exception_handler(insn, REG_DONT_CLEAR_MARKER, ctx);
if (ret)
return ret;
-
break;
- }
default:
pr_err("bpf-jit: unknown opcode %02x\n", code);
@@ -2249,6 +2057,25 @@ bool bpf_jit_supports_arena(void)
return true;
}
+bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
+{
+ if (in_arena) {
+ switch (insn->code) {
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
+ if (insn->imm == BPF_CMPXCHG)
+ return rv_ext_enabled(ZACAS);
+ break;
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
+ return false;
+ }
+ }
+
+ return true;
+}
+
bool bpf_jit_supports_percpu_insn(void)
{
return true;
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index 8cab6deb0403..9275cf63192a 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -2,5 +2,5 @@
#
# Arch-specific network modules
#
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o
obj-$(CONFIG_HAVE_PNETID) += pnet.o
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bb17efe29d65..cf461d76e9da 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -675,20 +675,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp)
} while (0)
/*
- * Call r1 either directly or via __s390_indirect_jump_r1 thunk
- */
-static void call_r1(struct bpf_jit *jit)
-{
- if (nospec_uses_trampoline())
- /* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
- __s390_indirect_jump_r1);
- else
- /* basr %r14,%r1 */
- EMIT2(0x0d00, REG_14, REG_1);
-}
-
-/*
* Function epilogue
*/
static void bpf_jit_epilogue(struct bpf_jit *jit)
@@ -1790,20 +1776,21 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
+
/*
* Copy the tail call counter to where the callee expects it.
- *
- * Note 1: The callee can increment the tail call counter, but
- * we do not load it back, since the x86 JIT does not do this
- * either.
- *
- * Note 2: We assume that the verifier does not let us call the
- * main program, which clears the tail call counter on entry.
*/
- /* mvc tail_call_cnt(4,%r15),frame_off+tail_call_cnt(%r15) */
- _EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
- 0xf000 | (jit->frame_off +
- offsetof(struct prog_frame, tail_call_cnt)));
+
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ /*
+ * mvc tail_call_cnt(4,%r15),
+ * frame_off+tail_call_cnt(%r15)
+ */
+ _EMIT6(0xd203f000 | offsetof(struct prog_frame,
+ tail_call_cnt),
+ 0xf000 | (jit->frame_off +
+ offsetof(struct prog_frame,
+ tail_call_cnt)));
/* Sign-extend the kfunc arguments. */
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
@@ -1819,12 +1806,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
}
}
- /* lgrl %w1,func */
- EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
- /* %r1() */
- call_r1(jit);
- /* lgr %b0,%r2: load return value into %b0 */
- EMIT4(0xb9040000, BPF_REG_0, REG_2);
+ if ((void *)func == arch_bpf_timed_may_goto) {
+ /*
+ * arch_bpf_timed_may_goto() has a special ABI: the
+ * parameters are in BPF_REG_AX and BPF_REG_10; the
+ * return value is in BPF_REG_AX; and all GPRs except
+ * REG_W0, REG_W1, and BPF_REG_AX are callee-saved.
+ */
+
+ /* brasl %r0,func */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_0, (void *)func);
+ } else {
+ /* brasl %r14,func */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, (void *)func);
+ /* lgr %b0,%r2: load return value into %b0 */
+ EMIT4(0xb9040000, BPF_REG_0, REG_2);
+ }
+
+ /*
+ * Copy the potentially updated tail call counter back.
+ */
+
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ /*
+ * mvc frame_off+tail_call_cnt(%r15),
+ * tail_call_cnt(4,%r15)
+ */
+ _EMIT6(0xd203f000 | (jit->frame_off +
+ offsetof(struct prog_frame,
+ tail_call_cnt)),
+ 0xf000 | offsetof(struct prog_frame,
+ tail_call_cnt));
+
break;
}
case BPF_JMP | BPF_TAIL_CALL: {
@@ -2517,14 +2530,12 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
* goto skip;
*/
- /* %r1 = __bpf_prog_enter */
- load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
/* %r2 = p */
load_imm64(jit, REG_2, (u64)p);
/* la %r3,run_ctx_off(%r15) */
EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,__bpf_prog_enter */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_enter(p));
/* ltgr %r7,%r2 */
EMIT4(0xb9020000, REG_7, REG_2);
/* brcl 8,skip */
@@ -2535,15 +2546,13 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
* retval = bpf_func(args, p->insnsi);
*/
- /* %r1 = p->bpf_func */
- load_imm64(jit, REG_1, (u64)p->bpf_func);
/* la %r2,bpf_args_off(%r15) */
EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
/* %r3 = p->insnsi */
if (!p->jited)
load_imm64(jit, REG_3, (u64)p->insnsi);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,p->bpf_func */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, p->bpf_func);
/* stg %r2,retval_off(%r15) */
if (save_ret) {
if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
@@ -2560,16 +2569,14 @@ static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
* __bpf_prog_exit(p, start, &run_ctx);
*/
- /* %r1 = __bpf_prog_exit */
- load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
/* %r2 = p */
load_imm64(jit, REG_2, (u64)p);
/* lgr %r3,%r7 */
EMIT4(0xb9040000, REG_3, REG_7);
/* la %r4,run_ctx_off(%r15) */
EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,__bpf_prog_exit */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, bpf_trampoline_exit(p));
return 0;
}
@@ -2729,9 +2736,6 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
/* lgr %r8,%r0 */
EMIT4(0xb9040000, REG_8, REG_0);
- } else {
- /* %r8 = func_addr + S390X_PATCH_SIZE */
- load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
}
/*
@@ -2757,12 +2761,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
* __bpf_tramp_enter(im);
*/
- /* %r1 = __bpf_tramp_enter */
- load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
/* %r2 = im */
load_imm64(jit, REG_2, (u64)im);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,__bpf_tramp_enter */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_enter);
}
for (i = 0; i < fentry->nr_links; i++)
@@ -2815,13 +2817,25 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
/* mvc tail_call_cnt(4,%r15),tccnt_off(%r15) */
_EMIT6(0xd203f000 | offsetof(struct prog_frame, tail_call_cnt),
0xf000 | tjit->tccnt_off);
- /* lgr %r1,%r8 */
- EMIT4(0xb9040000, REG_1, REG_8);
- /* %r1() */
- call_r1(jit);
+ if (flags & BPF_TRAMP_F_ORIG_STACK) {
+ if (nospec_uses_trampoline())
+ /* brasl %r14,__s390_indirect_jump_r8 */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+ __s390_indirect_jump_r8);
+ else
+ /* basr %r14,%r8 */
+ EMIT2(0x0d00, REG_14, REG_8);
+ } else {
+ /* brasl %r14,func_addr+S390X_PATCH_SIZE */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14,
+ func_addr + S390X_PATCH_SIZE);
+ }
/* stg %r2,retval_off(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
tjit->retval_off);
+ /* mvc tccnt_off(%r15),tail_call_cnt(4,%r15) */
+ _EMIT6(0xd203f000 | tjit->tccnt_off,
+ 0xf000 | offsetof(struct prog_frame, tail_call_cnt));
im->ip_after_call = jit->prg_buf + jit->prg;
@@ -2846,12 +2860,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
* __bpf_tramp_exit(im);
*/
- /* %r1 = __bpf_tramp_exit */
- load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
/* %r2 = im */
load_imm64(jit, REG_2, (u64)im);
- /* %r1() */
- call_r1(jit);
+ /* brasl %r14,__bpf_tramp_exit */
+ EMIT6_PCREL_RILB_PTR(0xc0050000, REG_14, __bpf_tramp_exit);
}
/* lmg %r2,%rN,reg_args_off(%r15) */
@@ -2860,7 +2872,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
REG_2 + (nr_reg_args - 1), REG_15,
tjit->reg_args_off);
/* lgr %r1,%r8 */
- if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
+ if (!(flags & BPF_TRAMP_F_SKIP_FRAME) &&
+ (flags & BPF_TRAMP_F_ORIG_STACK))
EMIT4(0xb9040000, REG_1, REG_8);
/* lmg %r7,%r8,r7_r8_off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
@@ -2879,9 +2892,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
if (flags & BPF_TRAMP_F_SKIP_FRAME)
EMIT_JUMP_REG(14);
- else
+ else if (flags & BPF_TRAMP_F_ORIG_STACK)
EMIT_JUMP_REG(1);
-
+ else
+ /* brcl 0xf,func_addr+S390X_PATCH_SIZE */
+ EMIT6_PCREL_RILC_PTR(0xc0040000, 0xf,
+ func_addr + S390X_PATCH_SIZE);
return 0;
}
@@ -2951,6 +2967,11 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
case BPF_STX | BPF_ATOMIC | BPF_DW:
if (bpf_atomic_is_load_store(insn))
return false;
+ break;
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
+ return false;
}
return true;
}
@@ -2989,3 +3010,8 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64),
prev_addr = addr;
}
}
+
+bool bpf_jit_supports_timed_may_goto(void)
+{
+ return true;
+}
diff --git a/arch/s390/net/bpf_timed_may_goto.S b/arch/s390/net/bpf_timed_may_goto.S
new file mode 100644
index 000000000000..06f567a460d7
--- /dev/null
+++ b/arch/s390/net/bpf_timed_may_goto.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
+
+#define R2_OFF 0
+#define R5_OFF (R2_OFF + (5 - 2 + 1) * 8)
+#define R14_OFF (R5_OFF + 8)
+#define RETADDR_OFF (R14_OFF + 8)
+#define R15_OFF (RETADDR_OFF + 8)
+#define BACKCHAIN_OFF (R15_OFF + 8)
+#define FRAME_SIZE (BACKCHAIN_OFF + 8)
+#define FRAME_OFF (STACK_FRAME_OVERHEAD - FRAME_SIZE)
+#if (FRAME_OFF + BACKCHAIN_OFF) != __SF_BACKCHAIN
+#error Stack frame layout calculation is broken
+#endif
+
+ GEN_BR_THUNK %r1
+
+SYM_FUNC_START(arch_bpf_timed_may_goto)
+ /*
+ * This function has a special ABI: the parameters are in %r12 and
+ * %r13; the return value is in %r12; all GPRs except %r0, %r1, and
+ * %r12 are callee-saved; and the return address is in %r0.
+ */
+ stmg %r2,%r5,FRAME_OFF+R2_OFF(%r15)
+ stg %r14,FRAME_OFF+R14_OFF(%r15)
+ stg %r0,FRAME_OFF+RETADDR_OFF(%r15)
+ stg %r15,FRAME_OFF+R15_OFF(%r15)
+ lgr %r1,%r15
+ lay %r15,-FRAME_SIZE(%r15)
+ stg %r1,__SF_BACKCHAIN(%r15)
+
+ lay %r2,0(%r12,%r13)
+ brasl %r14,bpf_check_timed_may_goto
+ lgr %r12,%r2
+
+ lg %r15,FRAME_SIZE+FRAME_OFF+R15_OFF(%r15)
+ lmg %r2,%r5,FRAME_OFF+R2_OFF(%r15)
+ lg %r14,FRAME_OFF+R14_OFF(%r15)
+ lg %r1,FRAME_OFF+RETADDR_OFF(%r15)
+ BR_EX %r1
+SYM_FUNC_END(arch_bpf_timed_may_goto)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7e3fca164620..fc13306af15f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -8,6 +8,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
+#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/memory.h>
#include <linux/sort.h>
@@ -1151,11 +1152,38 @@ static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 i
*pprog = prog;
}
+static void emit_ldsx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* movsx rax, byte ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBE);
+ break;
+ case BPF_H:
+ /* movsx rax, word ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBF);
+ break;
+ case BPF_W:
+ /* movsx rax, dword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x63);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
+ *pprog = prog;
+}
+
static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
}
+static void emit_ldsx_r12(u8 **prog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_ldsx_index(prog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
/* STX: *(u8*)(dst_reg + off) = src_reg */
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
@@ -1388,16 +1416,67 @@ static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
return 0;
}
+/*
+ * Metadata encoding for exception handling in JITed code.
+ *
+ * Format of `fixup` and `data` fields in `struct exception_table_entry`:
+ *
+ * Bit layout of `fixup` (32-bit):
+ *
+ * +-----------+--------+-----------+---------+----------+
+ * | 31 | 30-24 | 23-16 | 15-8 | 7-0 |
+ * | | | | | |
+ * | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN |
+ * +-----------+--------+-----------+---------+----------+
+ *
+ * - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)).
+ * - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)).
+ * This is set to DONT_CLEAR if the insn is a store.
+ * - ARENA_REG (8 bits): Offset of the register that is used to calculate the
+ * address for load/store when accessing the arena region.
+ * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
+ *
+ * Bit layout of `data` (32-bit):
+ *
+ * +--------------+--------+--------------+
+ * | 31-16 | 15-8 | 7-0 |
+ * | | | |
+ * | ARENA_OFFSET | Unused | EX_TYPE_BPF |
+ * +--------------+--------+--------------+
+ *
+ * - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when
+ * accessing the arena region.
+ */
+
#define DONT_CLEAR 1
+#define FIXUP_INSN_LEN_MASK GENMASK(7, 0)
+#define FIXUP_REG_MASK GENMASK(15, 8)
+#define FIXUP_ARENA_REG_MASK GENMASK(23, 16)
+#define FIXUP_ARENA_ACCESS BIT(31)
+#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16)
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
- u32 reg = x->fixup >> 8;
+ u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup);
+ u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup);
+ bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS);
+ bool is_write = (reg == DONT_CLEAR);
+ unsigned long addr;
+ s16 off;
+ u32 arena_reg;
+
+ if (is_arena) {
+ arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup);
+ off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data);
+ addr = *(unsigned long *)((void *)regs + arena_reg) + off;
+ bpf_prog_report_arena_violation(is_write, addr, regs->ip);
+ }
/* jump over faulting load and clear dest register */
if (reg != DONT_CLEAR)
*(unsigned long *)((void *)regs + reg) = 0;
- regs->ip += x->fixup & 0xff;
+ regs->ip += insn_len;
+
return true;
}
@@ -2057,19 +2136,27 @@ st: if (is_imm8(insn->off))
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W:
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
start_of_ldx = prog;
- if (BPF_CLASS(insn->code) == BPF_LDX)
- emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
- else
+ if (BPF_CLASS(insn->code) == BPF_LDX) {
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX)
+ emit_ldsx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ else
+ emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ } else {
emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ }
populate_extable:
{
struct exception_table_entry *ex;
u8 *_insn = image + proglen + (start_of_ldx - temp);
+ u32 arena_reg, fixup_reg;
s64 delta;
if (!bpf_prog->aux->extable)
@@ -2089,8 +2176,29 @@ populate_extable:
ex->data = EX_TYPE_BPF;
- ex->fixup = (prog - start_of_ldx) |
- ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
+ /*
+ * src_reg/dst_reg holds the address in the arena region with upper
+ * 32-bits being zero because of a preceding addr_space_cast(r<n>,
+ * 0x0, 0x1) instruction. This address is adjusted with the addition
+ * of arena_vm_start (see the implementation of BPF_PROBE_MEM32 and
+ * BPF_PROBE_ATOMIC) before being used for the memory access. Pass
+ * the reg holding the unmodified 32-bit address to
+ * ex_handler_bpf().
+ */
+ if (BPF_CLASS(insn->code) == BPF_LDX) {
+ arena_reg = reg2pt_regs[src_reg];
+ fixup_reg = reg2pt_regs[dst_reg];
+ } else {
+ arena_reg = reg2pt_regs[dst_reg];
+ fixup_reg = DONT_CLEAR;
+ }
+
+ ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
+ FIELD_PREP(FIXUP_ARENA_REG_MASK, arena_reg) |
+ FIELD_PREP(FIXUP_REG_MASK, fixup_reg);
+ ex->fixup |= FIXUP_ARENA_ACCESS;
+
+ ex->data |= FIELD_PREP(DATA_ARENA_OFFSET_MASK, insn->off);
}
break;
@@ -2208,7 +2316,8 @@ populate_extable:
* End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
* of 4 bytes will be ignored and rbx will be zero inited.
*/
- ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8);
+ ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
+ FIELD_PREP(FIXUP_REG_MASK, reg2pt_regs[dst_reg]);
}
break;
diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index f0d4ff3c20a8..6d6475e3a9bf 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -429,6 +429,7 @@ int pkcs7_verify(struct pkcs7_message *pkcs7,
/* Authattr presence checked in parser */
break;
case VERIFYING_UNSPECIFIED_SIGNATURE:
+ case VERIFYING_BPF_SIGNATURE:
if (pkcs7->data_type != OID_data) {
pr_warn("Invalid unspecified sig (not pkcs7-data)\n");
return -EKEYREJECTED;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc700925b802..a98c83346134 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -7,6 +7,7 @@
#include <uapi/linux/bpf.h>
#include <uapi/linux/filter.h>
+#include <crypto/sha2.h>
#include <linux/workqueue.h>
#include <linux/file.h>
#include <linux/percpu.h>
@@ -109,6 +110,7 @@ struct bpf_map_ops {
long (*map_pop_elem)(struct bpf_map *map, void *value);
long (*map_peek_elem)(struct bpf_map *map, void *value);
void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu);
+ int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf);
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -206,6 +208,7 @@ enum btf_field_type {
BPF_WORKQUEUE = (1 << 10),
BPF_UPTR = (1 << 11),
BPF_RES_SPIN_LOCK = (1 << 12),
+ BPF_TASK_WORK = (1 << 13),
};
enum bpf_cgroup_storage_type {
@@ -259,6 +262,7 @@ struct btf_record {
int timer_off;
int wq_off;
int refcount_off;
+ int task_work_off;
struct btf_field fields[];
};
@@ -285,9 +289,11 @@ struct bpf_map_owner {
bool xdp_has_frags;
u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE];
const struct btf_type *attach_func_proto;
+ enum bpf_attach_type expected_attach_type;
};
struct bpf_map {
+ u8 sha[SHA256_DIGEST_SIZE];
const struct bpf_map_ops *ops;
struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
@@ -328,6 +334,7 @@ struct bpf_map {
atomic64_t sleepable_refcnt;
s64 __percpu *elem_count;
u64 cookie; /* write-once */
+ char *excl_prog_sha;
};
static inline const char *btf_field_type_name(enum btf_field_type type)
@@ -358,6 +365,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
return "bpf_rb_node";
case BPF_REFCOUNT:
return "bpf_refcount";
+ case BPF_TASK_WORK:
+ return "bpf_task_work";
default:
WARN_ON_ONCE(1);
return "unknown";
@@ -396,6 +405,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_rb_node);
case BPF_REFCOUNT:
return sizeof(struct bpf_refcount);
+ case BPF_TASK_WORK:
+ return sizeof(struct bpf_task_work);
default:
WARN_ON_ONCE(1);
return 0;
@@ -428,6 +439,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_rb_node);
case BPF_REFCOUNT:
return __alignof__(struct bpf_refcount);
+ case BPF_TASK_WORK:
+ return __alignof__(struct bpf_task_work);
default:
WARN_ON_ONCE(1);
return 0;
@@ -459,6 +472,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
case BPF_UPTR:
+ case BPF_TASK_WORK:
break;
default:
WARN_ON_ONCE(1);
@@ -595,6 +609,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
bool lock_src);
void bpf_timer_cancel_and_free(void *timer);
void bpf_wq_cancel_and_free(void *timer);
+void bpf_task_work_cancel_and_free(void *timer);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock);
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
@@ -767,12 +782,15 @@ enum bpf_type_flag {
*/
MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
+ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
- | DYNPTR_TYPE_XDP)
+ | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -1110,7 +1128,7 @@ struct bpf_prog_offload {
*/
#define MAX_BPF_FUNC_REG_ARGS 5
-/* The argument is a structure. */
+/* The argument is a structure or a union. */
#define BTF_FMODEL_STRUCT_ARG BIT(0)
/* The argument is signed. */
@@ -1358,6 +1376,8 @@ enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_SKB,
/* Underlying data is a xdp_buff */
BPF_DYNPTR_TYPE_XDP,
+ /* Points to skb_metadata_end()-skb_metadata_len() */
+ BPF_DYNPTR_TYPE_SKB_META,
};
int bpf_dynptr_check_size(u32 size);
@@ -1619,6 +1639,7 @@ struct bpf_prog_aux {
bool priv_stack_requested;
bool changes_pkt_data;
bool might_sleep;
+ bool kprobe_write_ctx;
u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
struct bpf_arena *arena;
@@ -1628,6 +1649,7 @@ struct bpf_prog_aux {
/* function name for valid attach_btf_id */
const char *attach_func_name;
struct bpf_prog **func;
+ struct bpf_prog_aux *main_prog_aux;
void *jit_data; /* JIT specific data. arch dependent */
struct bpf_jit_poke_descriptor *poke_tab;
struct bpf_kfunc_desc_tab *kfunc_tab;
@@ -1711,7 +1733,10 @@ struct bpf_prog {
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
- u8 tag[BPF_TAG_SIZE];
+ union {
+ u8 digest[SHA256_DIGEST_SIZE];
+ u8 tag[BPF_TAG_SIZE];
+ };
struct bpf_prog_stats __percpu *stats;
int __percpu *active;
unsigned int (*bpf_func)(const void *ctx,
@@ -1985,6 +2010,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
module_put(owner);
}
int bpf_struct_ops_link_create(union bpf_attr *attr);
+u32 bpf_struct_ops_id(const void *kdata);
#ifdef CONFIG_NET
/* Define it here to avoid the use of forward declaration */
@@ -2411,6 +2437,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec);
bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b);
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
+void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
@@ -2697,7 +2724,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags);
-int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
+int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, bool delete);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
@@ -2874,6 +2901,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
+void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@@ -3161,6 +3189,11 @@ static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
}
+
+static inline void bpf_prog_report_arena_violation(bool write, unsigned long addr,
+ unsigned long fault_ip)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
static __always_inline int
@@ -3403,6 +3436,38 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
#endif /* CONFIG_BPF_SYSCALL */
#endif /* defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) */
+#if defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL)
+
+struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags);
+struct bpf_key *bpf_lookup_system_key(u64 id);
+void bpf_key_put(struct bpf_key *bkey);
+int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
+ struct bpf_key *trusted_keyring);
+
+#else
+static inline struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+{
+ return NULL;
+}
+
+static inline struct bpf_key *bpf_lookup_system_key(u64 id)
+{
+ return NULL;
+}
+
+static inline void bpf_key_put(struct bpf_key *bkey)
+{
+}
+
+static inline int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
+ struct bpf_key *trusted_keyring)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* defined(CONFIG_KEYS) && defined(CONFIG_BPF_SYSCALL) */
+
/* verifier prototypes for helper functions called from eBPF programs */
extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
extern const struct bpf_func_proto bpf_map_update_elem_proto;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 94defa405c85..4c497e839526 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -26,28 +26,6 @@
/* Patch buffer size */
#define INSN_BUF_SIZE 32
-/* Liveness marks, used for registers and spilled-regs (in stack slots).
- * Read marks propagate upwards until they find a write mark; they record that
- * "one of this state's descendants read this reg" (and therefore the reg is
- * relevant for states_equal() checks).
- * Write marks collect downwards and do not propagate; they record that "the
- * straight-line code that reached this state (from its parent) wrote this reg"
- * (and therefore that reads propagated from this state or its descendants
- * should not propagate to its parent).
- * A state with a write mark can receive read marks; it just won't propagate
- * them to its parent, since the write mark is a property, not of the state,
- * but of the link between it and its parent. See mark_reg_read() and
- * mark_stack_slot_read() in kernel/bpf/verifier.c.
- */
-enum bpf_reg_liveness {
- REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
- REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
- REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
- REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
- REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
- REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
-};
-
#define ITER_PREFIX "bpf_iter_"
enum bpf_iter_state {
@@ -212,8 +190,6 @@ struct bpf_reg_state {
* allowed and has the same effect as bpf_sk_release(sk).
*/
u32 ref_obj_id;
- /* parentage chain for liveness checking */
- struct bpf_reg_state *parent;
/* Inside the callee two registers can be both PTR_TO_STACK like
* R1=fp-8 and R2=fp-8, but one of them points to this function stack
* while another to the caller's stack. To differentiate them 'frameno'
@@ -226,7 +202,6 @@ struct bpf_reg_state {
* patching which only happens after main verification finished.
*/
s32 subreg_def;
- enum bpf_reg_liveness live;
/* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
bool precise;
};
@@ -445,6 +420,7 @@ struct bpf_verifier_state {
bool speculative;
bool in_sleepable;
+ bool cleaned;
/* first and last insn idx of this verifier state */
u32 first_insn_idx;
@@ -665,6 +641,7 @@ struct bpf_subprog_info {
/* 'start' has to be the first field otherwise find_subprog() won't work */
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
+ u32 postorder_start; /* The idx to the env->cfg.insn_postorder */
u16 stack_depth; /* max. stack depth used by this function */
u16 stack_extra;
/* offsets in range [stack_depth .. fastcall_stack_off)
@@ -744,6 +721,8 @@ struct bpf_scc_info {
struct bpf_scc_visit visits[];
};
+struct bpf_liveness;
+
/* single container for all structs
* one verifier_env per bpf_check() call
*/
@@ -794,7 +773,10 @@ struct bpf_verifier_env {
struct {
int *insn_state;
int *insn_stack;
- /* vector of instruction indexes sorted in post-order */
+ /*
+ * vector of instruction indexes sorted in post-order, grouped by subprogram,
+ * see bpf_subprog_info->postorder_start.
+ */
int *insn_postorder;
int cur_stack;
/* current position in the insn_postorder vector */
@@ -842,6 +824,7 @@ struct bpf_verifier_env {
struct bpf_insn insn_buf[INSN_BUF_SIZE];
struct bpf_insn epilogue_buf[INSN_BUF_SIZE];
struct bpf_scc_callchain callchain_buf;
+ struct bpf_liveness *liveness;
/* array of pointers to bpf_scc_info indexed by SCC id */
struct bpf_scc_info **scc_info;
u32 scc_cnt;
@@ -875,13 +858,15 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env,
#define verifier_bug_if(cond, env, fmt, args...) \
({ \
bool __cond = (cond); \
- if (unlikely(__cond)) { \
- BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \
- bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \
- } \
+ if (unlikely(__cond)) \
+ verifier_bug(env, fmt " (" #cond ")", ##args); \
(__cond); \
})
-#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args)
+#define verifier_bug(env, fmt, args...) \
+ ({ \
+ BPF_WARN_ONCE(1, "verifier bug: " fmt "\n", ##args); \
+ bpf_log(&env->log, "verifier bug: " fmt "\n", ##args); \
+ })
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
{
@@ -962,6 +947,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
case BPF_PROG_TYPE_STRUCT_OPS:
return prog->aux->jits_use_priv_stack;
case BPF_PROG_TYPE_LSM:
+ case BPF_PROG_TYPE_SYSCALL:
return false;
default:
return true;
@@ -1062,4 +1048,21 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
u32 frameno);
+struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
+int bpf_jmp_offset(struct bpf_insn *insn);
+int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]);
+void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
+bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
+
+int bpf_stack_liveness_init(struct bpf_verifier_env *env);
+void bpf_stack_liveness_free(struct bpf_verifier_env *env);
+int bpf_update_live_stack(struct bpf_verifier_env *env);
+int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frameno, u32 insn_idx, u64 mask);
+void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frameno, u64 mask);
+int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx);
+int bpf_commit_stack_write_marks(struct bpf_verifier_env *env);
+int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st);
+bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi);
+void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env);
+
#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9eda6b113f9b..f06976ffb63f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -86,7 +86,7 @@
* as to avoid issues such as the compiler inlining or eliding either a static
* kfunc, or a global kfunc in an LTO build.
*/
-#define __bpf_kfunc __used __retain noinline
+#define __bpf_kfunc __used __retain __noclone noinline
#define __bpf_kfunc_start_defs() \
__diag_push(); \
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7e292bdff2c1..6ed477338b16 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -656,6 +656,7 @@ static inline void cgroup_kthread_ready(void)
}
void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen);
+struct cgroup *__cgroup_get_from_id(u64 id);
struct cgroup *cgroup_get_from_id(u64 id);
#else /* !CONFIG_CGROUPS */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1e7fd3ee759e..f5c859b8131a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -78,6 +78,9 @@ struct ctl_table_header;
/* unused opcode to mark special atomic instruction */
#define BPF_PROBE_ATOMIC 0xe0
+/* unused opcode to mark special ldsx instruction. Same as BPF_NOSPEC */
+#define BPF_PROBE_MEM32SX 0xc0
+
/* unused opcode to mark call to interpreter with arguments */
#define BPF_CALL_ARGS 0xe0
@@ -997,12 +1000,6 @@ static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
return prog->len * sizeof(struct bpf_insn);
}
-static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
-{
- return round_up(bpf_prog_insn_size(prog) +
- sizeof(__be64) + 1, SHA1_BLOCK_SIZE);
-}
-
static inline unsigned int bpf_prog_size(unsigned int proglen)
{
return max(sizeof(struct bpf_prog),
@@ -1296,7 +1293,7 @@ void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
u32 pass, void *image)
{
- pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen,
+ pr_err("flen=%u proglen=%u pass=%u image=%p from=%s pid=%d\n", flen,
proglen, pass, image, current->comm, task_pid_nr(current));
if (image)
@@ -1784,6 +1781,7 @@ int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
void *buf, unsigned long len, bool flush);
+void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset);
#else /* CONFIG_NET */
static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
void *to, u32 len)
@@ -1818,6 +1816,11 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi
unsigned long len, bool flush)
{
}
+
+static inline void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
#endif /* CONFIG_NET */
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8f346c847ee5..f67f96711f0d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -962,6 +962,20 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
preempt_enable_notrace();
}
+static __always_inline void rcu_read_lock_dont_migrate(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RCU))
+ migrate_disable();
+ rcu_read_lock();
+}
+
+static inline void rcu_read_unlock_migrate(void)
+{
+ rcu_read_unlock();
+ if (IS_ENABLED(CONFIG_PREEMPT_RCU))
+ migrate_enable();
+}
+
/**
* RCU_INIT_POINTER() - initialize an RCU protected pointer
* @p: The pointer to be initialized.
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 57ed3035cc30..c52b862dad45 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -51,9 +51,15 @@ struct tnum tnum_xor(struct tnum a, struct tnum b);
/* Multiply two tnums, return @a * @b */
struct tnum tnum_mul(struct tnum a, struct tnum b);
+/* Return true if the known bits of both tnums have the same value */
+bool tnum_overlap(struct tnum a, struct tnum b);
+
/* Return a tnum representing numbers satisfying both @a and @b */
struct tnum tnum_intersect(struct tnum a, struct tnum b);
+/* Returns a tnum representing numbers satisfying either @a or @b */
+struct tnum tnum_union(struct tnum t1, struct tnum t2);
+
/* Return @a with all but the lowest @size bytes cleared */
struct tnum tnum_cast(struct tnum a, u8 size);
diff --git a/include/linux/verification.h b/include/linux/verification.h
index 4f3022d081c3..dec7f2beabfd 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -36,6 +36,7 @@ enum key_being_used_for {
VERIFYING_KEY_SIGNATURE,
VERIFYING_KEY_SELF_SIGNATURE,
VERIFYING_UNSPECIFIED_SIGNATURE,
+ VERIFYING_BPF_SIGNATURE,
NR__KEY_BEING_USED_FOR
};
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
diff --git a/include/net/xdp.h b/include/net/xdp.h
index b40f1f96cb11..f288c348a6c1 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -115,6 +115,11 @@ static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp)
xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
}
+static __always_inline void xdp_buff_clear_frag_pfmemalloc(struct xdp_buff *xdp)
+{
+ xdp->flags &= ~XDP_FLAGS_FRAGS_PF_MEMALLOC;
+}
+
static __always_inline void
xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq)
{
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 513c8e9704f6..4f2d3268a676 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
return ret;
}
-static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
{
- struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
+ struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
list_del(&xskb->list_node);
}
+static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
+{
+ struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
+ struct xdp_buff_xsk *frag;
+
+ frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
+ list_node);
+ return &frag->xdp;
+}
+
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
{
struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
@@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
return NULL;
}
-static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
+{
+}
+
+static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
{
+ return NULL;
}
static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 233de8677382..ae83d8649ef1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1522,6 +1522,12 @@ union bpf_attr {
* If provided, map_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 map_token_fd;
+
+ /* Hash of the program that has exclusive access to the map.
+ */
+ __aligned_u64 excl_prog_hash;
+ /* Size of the passed excl_prog_hash. */
+ __u32 excl_prog_hash_size;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
@@ -1605,6 +1611,16 @@ union bpf_attr {
* continuous.
*/
__u32 fd_array_cnt;
+ /* Pointer to a buffer containing the signature of the BPF
+ * program.
+ */
+ __aligned_u64 signature;
+ /* Size of the signature buffer in bytes. */
+ __u32 signature_size;
+ /* ID of the kernel keyring to be used for signature
+ * verification.
+ */
+ __s32 keyring_id;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -6666,6 +6682,8 @@ struct bpf_map_info {
__u32 btf_value_type_id;
__u32 btf_vmlinux_id;
__u64 map_extra;
+ __aligned_u64 hash;
+ __u32 hash_size;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -7418,6 +7436,10 @@ struct bpf_timer {
__u64 __opaque[2];
} __attribute__((aligned(8)));
+struct bpf_task_work {
+ __u64 __opaque;
+} __attribute__((aligned(8)));
+
struct bpf_wq {
__u64 __opaque[2];
} __attribute__((aligned(8)));
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 17067dcb4386..eb3de35734f0 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -3,7 +3,7 @@
# BPF interpreter that, for example, classic socket filters depend on.
config BPF
bool
- select CRYPTO_LIB_SHA1
+ select CRYPTO_LIB_SHA256
# Used by archs to tell that they support BPF JIT compiler plus which
# flavour. Only one of the two can be selected for a specific arch since
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f6cf8c2af5f7..7fd0badfacb1 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 5b37753799d2..1074ac4459f2 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -633,3 +633,33 @@ static int __init kfunc_init(void)
return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
}
late_initcall(kfunc_init);
+
+void bpf_prog_report_arena_violation(bool write, unsigned long addr, unsigned long fault_ip)
+{
+ struct bpf_stream_stage ss;
+ struct bpf_prog *prog;
+ u64 user_vm_start;
+
+ /*
+ * The RCU read lock is held to safely traverse the latch tree, but we
+ * don't need its protection when accessing the prog, since it will not
+ * disappear while we are handling the fault.
+ */
+ rcu_read_lock();
+ prog = bpf_prog_ksym_find(fault_ip);
+ rcu_read_unlock();
+ if (!prog)
+ return;
+
+ /* Use main prog for stream access */
+ prog = prog->aux->main_prog_aux->prog;
+
+ user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
+ addr += clear_lo32(user_vm_start);
+
+ bpf_stream_stage(ss, prog, BPF_STDERR, ({
+ bpf_stream_printk(ss, "ERROR: Arena %s access at unmapped address 0x%lx\n",
+ write ? "WRITE" : "READ", addr);
+ bpf_stream_dump_stack(ss);
+ }));
+}
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 3d080916faf9..80b1765a3159 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -12,6 +12,7 @@
#include <uapi/linux/btf.h>
#include <linux/rcupdate_trace.h>
#include <linux/btf_ids.h>
+#include <crypto/sha2.h>
#include "map_in_map.h"
@@ -174,6 +175,17 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
return array->value + (u64)array->elem_size * (index & array->index_mask);
}
+static int array_map_get_hash(struct bpf_map *map, u32 hash_buf_size,
+ void *hash_buf)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ sha256(array->value, (u64)array->elem_size * array->map.max_entries,
+ hash_buf);
+ memcpy(array->map.sha, hash_buf, sizeof(array->map.sha));
+ return 0;
+}
+
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
u32 off)
{
@@ -431,7 +443,7 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
return (void *)round_down((unsigned long)array, PAGE_SIZE);
}
-static void array_map_free_timers_wq(struct bpf_map *map)
+static void array_map_free_internal_structs(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
@@ -439,12 +451,14 @@ static void array_map_free_timers_wq(struct bpf_map *map)
/* We don't reset or free fields other than timer and workqueue
* on uref dropping to zero.
*/
- if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) {
+ if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
for (i = 0; i < array->map.max_entries; i++) {
if (btf_record_has_field(map->record, BPF_TIMER))
bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
if (btf_record_has_field(map->record, BPF_WORKQUEUE))
bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i));
+ if (btf_record_has_field(map->record, BPF_TASK_WORK))
+ bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i));
}
}
}
@@ -783,7 +797,7 @@ const struct bpf_map_ops array_map_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
- .map_release_uref = array_map_free_timers_wq,
+ .map_release_uref = array_map_free_internal_structs,
.map_lookup_elem = array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
@@ -800,6 +814,7 @@ const struct bpf_map_ops array_map_ops = {
.map_mem_usage = array_map_mem_usage,
.map_btf_id = &array_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
+ .map_get_hash = &array_map_get_hash,
};
const struct bpf_map_ops percpu_array_map_ops = {
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 148da8f7ff36..0687a760974a 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -45,8 +45,7 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
struct bpf_local_storage *local_storage;
- migrate_disable();
- rcu_read_lock();
+ rcu_read_lock_dont_migrate();
local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
if (!local_storage)
goto out;
@@ -55,8 +54,7 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
bpf_local_storage_destroy(local_storage);
bpf_cgrp_storage_unlock();
out:
- rcu_read_unlock();
- migrate_enable();
+ rcu_read_unlock_migrate();
}
static struct bpf_local_storage_data *
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 15a3eb9b02d9..e54cce2b9175 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -62,8 +62,7 @@ void bpf_inode_storage_free(struct inode *inode)
if (!bsb)
return;
- migrate_disable();
- rcu_read_lock();
+ rcu_read_lock_dont_migrate();
local_storage = rcu_dereference(bsb->storage);
if (!local_storage)
@@ -71,8 +70,7 @@ void bpf_inode_storage_free(struct inode *inode)
bpf_local_storage_destroy(local_storage);
out:
- rcu_read_unlock();
- migrate_enable();
+ rcu_read_unlock_migrate();
}
static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 0cbcae727079..6ac35430c573 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -705,13 +705,11 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
migrate_enable();
rcu_read_unlock_trace();
} else {
- rcu_read_lock();
- migrate_disable();
+ rcu_read_lock_dont_migrate();
old_run_ctx = bpf_set_run_ctx(&run_ctx);
ret = bpf_prog_run(prog, ctx);
bpf_reset_run_ctx(old_run_ctx);
- migrate_enable();
- rcu_read_unlock();
+ rcu_read_unlock_migrate();
}
/* bpf program can only return 0 or 1:
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index 2d6e1c98d8ad..e7a2fc60523f 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -19,14 +19,6 @@
#define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
#define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET)
-static int get_next_cpu(int cpu)
-{
- cpu = cpumask_next(cpu, cpu_possible_mask);
- if (cpu >= nr_cpu_ids)
- cpu = cpumask_first(cpu_possible_mask);
- return cpu;
-}
-
/* Local list helpers */
static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l)
{
@@ -482,7 +474,7 @@ static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru,
raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
- steal = get_next_cpu(steal);
+ steal = cpumask_next_wrap(steal, cpu_possible_mask);
} while (!node && steal != first_steal);
loc_l->next_steal = steal;
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 687a3e9c76f5..a41e6730edcf 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -1174,6 +1174,18 @@ void bpf_struct_ops_put(const void *kdata)
bpf_map_put(&st_map->map);
}
+u32 bpf_struct_ops_id(const void *kdata)
+{
+ struct bpf_struct_ops_value *kvalue;
+ struct bpf_struct_ops_map *st_map;
+
+ kvalue = container_of(kdata, struct bpf_struct_ops_value, data);
+ st_map = container_of(kvalue, struct bpf_struct_ops_map, kvalue);
+
+ return st_map->map.id;
+}
+EXPORT_SYMBOL_GPL(bpf_struct_ops_id);
+
static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 1109475953c0..a1dc1bf0848a 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -70,8 +70,7 @@ void bpf_task_storage_free(struct task_struct *task)
{
struct bpf_local_storage *local_storage;
- migrate_disable();
- rcu_read_lock();
+ rcu_read_lock_dont_migrate();
local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage)
@@ -81,8 +80,7 @@ void bpf_task_storage_free(struct task_struct *task)
bpf_local_storage_destroy(local_storage);
bpf_task_storage_unlock();
out:
- rcu_read_unlock();
- migrate_enable();
+ rcu_read_unlock_migrate();
}
static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 64739308902f..0de8fc8a0e0b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3478,60 +3478,45 @@ btf_find_graph_root(const struct btf *btf, const struct btf_type *pt,
return BTF_FIELD_FOUND;
}
-#define field_mask_test_name(field_type, field_type_str) \
- if (field_mask & field_type && !strcmp(name, field_type_str)) { \
- type = field_type; \
- goto end; \
- }
-
static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_type,
- u32 field_mask, u32 *seen_mask,
- int *align, int *sz)
-{
- int type = 0;
+ u32 field_mask, u32 *seen_mask, int *align, int *sz)
+{
+ const struct {
+ enum btf_field_type type;
+ const char *const name;
+ const bool is_unique;
+ } field_types[] = {
+ { BPF_SPIN_LOCK, "bpf_spin_lock", true },
+ { BPF_RES_SPIN_LOCK, "bpf_res_spin_lock", true },
+ { BPF_TIMER, "bpf_timer", true },
+ { BPF_WORKQUEUE, "bpf_wq", true },
+ { BPF_TASK_WORK, "bpf_task_work", true },
+ { BPF_LIST_HEAD, "bpf_list_head", false },
+ { BPF_LIST_NODE, "bpf_list_node", false },
+ { BPF_RB_ROOT, "bpf_rb_root", false },
+ { BPF_RB_NODE, "bpf_rb_node", false },
+ { BPF_REFCOUNT, "bpf_refcount", false },
+ };
+ int type = 0, i;
const char *name = __btf_name_by_offset(btf, var_type->name_off);
-
- if (field_mask & BPF_SPIN_LOCK) {
- if (!strcmp(name, "bpf_spin_lock")) {
- if (*seen_mask & BPF_SPIN_LOCK)
- return -E2BIG;
- *seen_mask |= BPF_SPIN_LOCK;
- type = BPF_SPIN_LOCK;
- goto end;
- }
- }
- if (field_mask & BPF_RES_SPIN_LOCK) {
- if (!strcmp(name, "bpf_res_spin_lock")) {
- if (*seen_mask & BPF_RES_SPIN_LOCK)
- return -E2BIG;
- *seen_mask |= BPF_RES_SPIN_LOCK;
- type = BPF_RES_SPIN_LOCK;
- goto end;
- }
- }
- if (field_mask & BPF_TIMER) {
- if (!strcmp(name, "bpf_timer")) {
- if (*seen_mask & BPF_TIMER)
- return -E2BIG;
- *seen_mask |= BPF_TIMER;
- type = BPF_TIMER;
- goto end;
- }
- }
- if (field_mask & BPF_WORKQUEUE) {
- if (!strcmp(name, "bpf_wq")) {
- if (*seen_mask & BPF_WORKQUEUE)
+ const char *field_type_name;
+ enum btf_field_type field_type;
+ bool is_unique;
+
+ for (i = 0; i < ARRAY_SIZE(field_types); ++i) {
+ field_type = field_types[i].type;
+ field_type_name = field_types[i].name;
+ is_unique = field_types[i].is_unique;
+ if (!(field_mask & field_type) || strcmp(name, field_type_name))
+ continue;
+ if (is_unique) {
+ if (*seen_mask & field_type)
return -E2BIG;
- *seen_mask |= BPF_WORKQUEUE;
- type = BPF_WORKQUEUE;
- goto end;
+ *seen_mask |= field_type;
}
+ type = field_type;
+ goto end;
}
- field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
- field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
- field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
- field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
- field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
/* Only return BPF_KPTR when all other types with matchable names fail */
if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) {
@@ -3545,8 +3530,6 @@ end:
return type;
}
-#undef field_mask_test_name
-
/* Repeat a number of fields for a specified number of times.
*
* Copy the fields starting from the first field and repeat them for
@@ -3693,6 +3676,7 @@ static int btf_find_field_one(const struct btf *btf,
case BPF_LIST_NODE:
case BPF_RB_NODE:
case BPF_REFCOUNT:
+ case BPF_TASK_WORK:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
info_cnt ? &info[0] : &tmp);
if (ret < 0)
@@ -3985,6 +3969,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->timer_off = -EINVAL;
rec->wq_off = -EINVAL;
rec->refcount_off = -EINVAL;
+ rec->task_work_off = -EINVAL;
for (i = 0; i < cnt; i++) {
field_type_size = btf_field_type_size(info_arr[i].type);
if (info_arr[i].off + field_type_size > value_size) {
@@ -4024,6 +4009,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
/* Cache offset for faster lookup at runtime */
rec->wq_off = rec->fields[i].offset;
break;
+ case BPF_TASK_WORK:
+ WARN_ON_ONCE(rec->task_work_off >= 0);
+ rec->task_work_off = rec->fields[i].offset;
+ break;
case BPF_REFCOUNT:
WARN_ON_ONCE(rec->refcount_off >= 0);
/* Cache offset for faster lookup at runtime */
@@ -6762,7 +6751,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* skip modifiers */
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
+ if (btf_type_is_small_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t))
/* accessing a scalar */
return true;
if (!btf_type_is_ptr(t)) {
@@ -7334,7 +7323,7 @@ static int __get_type_size(struct btf *btf, u32 btf_id,
if (btf_type_is_ptr(t))
/* kernel size of pointer. Not BPF's size of pointer*/
return sizeof(void *);
- if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t))
+ if (btf_type_is_int(t) || btf_is_any_enum(t) || btf_type_is_struct(t))
return t->size;
return -EINVAL;
}
@@ -7343,7 +7332,7 @@ static u8 __get_type_fmodel_flags(const struct btf_type *t)
{
u8 flags = 0;
- if (__btf_type_is_struct(t))
+ if (btf_type_is_struct(t))
flags |= BTF_FMODEL_STRUCT_ARG;
if (btf_type_is_signed_int(t))
flags |= BTF_FMODEL_SIGNED_ARG;
@@ -7384,7 +7373,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
return -EINVAL;
}
ret = __get_type_size(btf, func->type, &t);
- if (ret < 0 || __btf_type_is_struct(t)) {
+ if (ret < 0 || btf_type_is_struct(t)) {
bpf_log(log,
"The function %s return type %s is unsupported.\n",
tname, btf_type_str(t));
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 180b630279b9..248f517d66d0 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -27,14 +27,15 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
/*
* cgroup bpf destruction makes heavy use of work items and there can be a lot
* of concurrent destructions. Use a separate workqueue so that cgroup bpf
- * destruction work items don't end up filling up max_active of system_wq
+ * destruction work items don't end up filling up max_active of system_percpu_wq
* which may lead to deadlock.
*/
static struct workqueue_struct *cgroup_bpf_destroy_wq;
static int __init cgroup_bpf_wq_init(void)
{
- cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
+ cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy",
+ WQ_PERCPU, 1);
if (!cgroup_bpf_destroy_wq)
panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
return 0;
@@ -71,8 +72,7 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
u32 func_ret;
run_ctx.retval = retval;
- migrate_disable();
- rcu_read_lock();
+ rcu_read_lock_dont_migrate();
array = rcu_dereference(cgrp->effective[atype]);
item = &array->items[0];
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
@@ -88,8 +88,7 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
item++;
}
bpf_reset_run_ctx(old_run_ctx);
- rcu_read_unlock();
- migrate_enable();
+ rcu_read_unlock_migrate();
return run_ctx.retval;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index e4568d44e827..d595fe512498 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -18,6 +18,7 @@
*/
#include <uapi/linux/btf.h>
+#include <crypto/sha1.h>
#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
@@ -38,6 +39,7 @@
#include <linux/bpf_mem_alloc.h>
#include <linux/memcontrol.h>
#include <linux/execmem.h>
+#include <crypto/sha2.h>
#include <asm/barrier.h>
#include <linux/unaligned.h>
@@ -119,6 +121,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
fp->pages = size / PAGE_SIZE;
fp->aux = aux;
+ fp->aux->main_prog_aux = aux;
fp->aux->prog = fp;
fp->jit_requested = ebpf_jit_enabled();
fp->blinding_requested = bpf_jit_blinding_enabled(fp);
@@ -293,28 +296,18 @@ void __bpf_prog_free(struct bpf_prog *fp)
int bpf_prog_calc_tag(struct bpf_prog *fp)
{
- const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
- u32 raw_size = bpf_prog_tag_scratch_size(fp);
- u32 digest[SHA1_DIGEST_WORDS];
- u32 ws[SHA1_WORKSPACE_WORDS];
- u32 i, bsize, psize, blocks;
+ size_t size = bpf_prog_insn_size(fp);
struct bpf_insn *dst;
bool was_ld_map;
- u8 *raw, *todo;
- __be32 *result;
- __be64 *bits;
+ u32 i;
- raw = vmalloc(raw_size);
- if (!raw)
+ dst = vmalloc(size);
+ if (!dst)
return -ENOMEM;
- sha1_init_raw(digest);
- memset(ws, 0, sizeof(ws));
-
/* We need to take out the map fd for the digest calculation
* since they are unstable from user space side.
*/
- dst = (void *)raw;
for (i = 0, was_ld_map = false; i < fp->len; i++) {
dst[i] = fp->insnsi[i];
if (!was_ld_map &&
@@ -334,33 +327,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp)
was_ld_map = false;
}
}
-
- psize = bpf_prog_insn_size(fp);
- memset(&raw[psize], 0, raw_size - psize);
- raw[psize++] = 0x80;
-
- bsize = round_up(psize, SHA1_BLOCK_SIZE);
- blocks = bsize / SHA1_BLOCK_SIZE;
- todo = raw;
- if (bsize - psize >= sizeof(__be64)) {
- bits = (__be64 *)(todo + bsize - sizeof(__be64));
- } else {
- bits = (__be64 *)(todo + bsize + bits_offset);
- blocks++;
- }
- *bits = cpu_to_be64((psize - 1) << 3);
-
- while (blocks--) {
- sha1_transform(digest, todo, ws);
- todo += SHA1_BLOCK_SIZE;
- }
-
- result = (__force __be32 *)digest;
- for (i = 0; i < SHA1_DIGEST_WORDS; i++)
- result[i] = cpu_to_be32(digest[i]);
- memcpy(fp->tag, result, sizeof(fp->tag));
-
- vfree(raw);
+ sha256((u8 *)dst, size, fp->digest);
+ vfree(dst);
return 0;
}
@@ -2393,6 +2361,7 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map,
map->owner->type = prog_type;
map->owner->jited = fp->jited;
map->owner->xdp_has_frags = aux->xdp_has_frags;
+ map->owner->expected_attach_type = fp->expected_attach_type;
map->owner->attach_func_proto = aux->attach_func_proto;
for_each_cgroup_storage_type(i) {
map->owner->storage_cookie[i] =
@@ -2404,6 +2373,10 @@ static bool __bpf_prog_map_compatible(struct bpf_map *map,
ret = map->owner->type == prog_type &&
map->owner->jited == fp->jited &&
map->owner->xdp_has_frags == aux->xdp_has_frags;
+ if (ret &&
+ map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
+ map->owner->expected_attach_type != fp->expected_attach_type)
+ ret = false;
for_each_cgroup_storage_type(i) {
if (!ret)
break;
@@ -3329,9 +3302,8 @@ static bool find_from_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp)
rcu_read_unlock();
if (!prog)
return true;
- if (bpf_is_subprog(prog))
- return true;
- ctxp->prog = prog;
+ /* Make sure we return the main prog if we found a subprog */
+ ctxp->prog = prog->aux->main_prog_aux->prog;
return false;
}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index c46360b27871..703e5df1f4ef 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -550,7 +550,7 @@ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
old_rcpu = unrcu_pointer(xchg(&cmap->cpu_map[key_cpu], RCU_INITIALIZER(rcpu)));
if (old_rcpu) {
INIT_RCU_WORK(&old_rcpu->free_work, __cpu_map_entry_free);
- queue_rcu_work(system_wq, &old_rcpu->free_work);
+ queue_rcu_work(system_percpu_wq, &old_rcpu->free_work);
}
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 482d284a1553..2625601de76e 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -865,7 +865,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_dtab_netdev *dev;
dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
- GFP_NOWAIT | __GFP_NOWARN,
+ GFP_NOWAIT,
dtab->map.numa_node);
if (!dev)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 71f9931ac64c..c2fcd0cd51e5 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -215,7 +215,20 @@ static bool htab_has_extra_elems(struct bpf_htab *htab)
return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab);
}
-static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
+static void htab_free_internal_structs(struct bpf_htab *htab, struct htab_elem *elem)
+{
+ if (btf_record_has_field(htab->map.record, BPF_TIMER))
+ bpf_obj_free_timer(htab->map.record,
+ htab_elem_value(elem, htab->map.key_size));
+ if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
+ bpf_obj_free_workqueue(htab->map.record,
+ htab_elem_value(elem, htab->map.key_size));
+ if (btf_record_has_field(htab->map.record, BPF_TASK_WORK))
+ bpf_obj_free_task_work(htab->map.record,
+ htab_elem_value(elem, htab->map.key_size));
+}
+
+static void htab_free_prealloced_internal_structs(struct bpf_htab *htab)
{
u32 num_entries = htab->map.max_entries;
int i;
@@ -227,12 +240,7 @@ static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab)
struct htab_elem *elem;
elem = get_htab_elem(htab, i);
- if (btf_record_has_field(htab->map.record, BPF_TIMER))
- bpf_obj_free_timer(htab->map.record,
- htab_elem_value(elem, htab->map.key_size));
- if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
- bpf_obj_free_workqueue(htab->map.record,
- htab_elem_value(elem, htab->map.key_size));
+ htab_free_internal_structs(htab, elem);
cond_resched();
}
}
@@ -1490,7 +1498,7 @@ static void delete_all_elements(struct bpf_htab *htab)
}
}
-static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
+static void htab_free_malloced_internal_structs(struct bpf_htab *htab)
{
int i;
@@ -1502,28 +1510,23 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab)
hlist_nulls_for_each_entry(l, n, head, hash_node) {
/* We only free timer on uref dropping to zero */
- if (btf_record_has_field(htab->map.record, BPF_TIMER))
- bpf_obj_free_timer(htab->map.record,
- htab_elem_value(l, htab->map.key_size));
- if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE))
- bpf_obj_free_workqueue(htab->map.record,
- htab_elem_value(l, htab->map.key_size));
+ htab_free_internal_structs(htab, l);
}
cond_resched_rcu();
}
rcu_read_unlock();
}
-static void htab_map_free_timers_and_wq(struct bpf_map *map)
+static void htab_map_free_internal_structs(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
/* We only free timer and workqueue on uref dropping to zero */
- if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) {
+ if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
if (!htab_is_prealloc(htab))
- htab_free_malloced_timers_and_wq(htab);
+ htab_free_malloced_internal_structs(htab);
else
- htab_free_prealloced_timers_and_wq(htab);
+ htab_free_prealloced_internal_structs(htab);
}
}
@@ -2255,7 +2258,7 @@ const struct bpf_map_ops htab_map_ops = {
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
- .map_release_uref = htab_map_free_timers_and_wq,
+ .map_release_uref = htab_map_free_internal_structs,
.map_lookup_elem = htab_map_lookup_elem,
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
.map_update_elem = htab_map_update_elem,
@@ -2276,7 +2279,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_alloc = htab_map_alloc,
.map_free = htab_map_free,
.map_get_next_key = htab_map_get_next_key,
- .map_release_uref = htab_map_free_timers_and_wq,
+ .map_release_uref = htab_map_free_internal_structs,
.map_lookup_elem = htab_lru_map_lookup_elem,
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 8af62cb243d9..c9fab9a356df 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -25,6 +25,9 @@
#include <linux/kasan.h>
#include <linux/bpf_verifier.h>
#include <linux/uaccess.h>
+#include <linux/verification.h>
+#include <linux/task_work.h>
+#include <linux/irq_work.h>
#include "../../lib/kstrtox.h"
@@ -774,11 +777,9 @@ int bpf_try_get_buffers(struct bpf_bprintf_buffers **bufs)
{
int nest_level;
- preempt_disable();
nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
this_cpu_dec(bpf_bprintf_nest_level);
- preempt_enable();
return -EBUSY;
}
*bufs = this_cpu_ptr(&bpf_bprintf_bufs[nest_level - 1]);
@@ -791,7 +792,6 @@ void bpf_put_buffers(void)
if (WARN_ON_ONCE(this_cpu_read(bpf_bprintf_nest_level) == 0))
return;
this_cpu_dec(bpf_bprintf_nest_level);
- preempt_enable();
}
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data)
@@ -1084,6 +1084,17 @@ const struct bpf_func_proto bpf_snprintf_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
+static void *map_key_from_value(struct bpf_map *map, void *value, u32 *arr_idx)
+{
+ if (map->map_type == BPF_MAP_TYPE_ARRAY) {
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ *arr_idx = ((char *)value - array->value) / array->elem_size;
+ return arr_idx;
+ }
+ return (void *)value - round_up(map->key_size, 8);
+}
+
struct bpf_async_cb {
struct bpf_map *map;
struct bpf_prog *prog;
@@ -1166,15 +1177,8 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
* bpf_map_delete_elem() on the same timer.
*/
this_cpu_write(hrtimer_running, t);
- if (map->map_type == BPF_MAP_TYPE_ARRAY) {
- struct bpf_array *array = container_of(map, struct bpf_array, map);
- /* compute the key */
- idx = ((char *)value - array->value) / array->elem_size;
- key = &idx;
- } else { /* hash or lru */
- key = value - round_up(map->key_size, 8);
- }
+ key = map_key_from_value(map, value, &idx);
callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
/* The verifier checked that return value is zero. */
@@ -1200,15 +1204,7 @@ static void bpf_wq_work(struct work_struct *work)
if (!callback_fn)
return;
- if (map->map_type == BPF_MAP_TYPE_ARRAY) {
- struct bpf_array *array = container_of(map, struct bpf_array, map);
-
- /* compute the key */
- idx = ((char *)value - array->value) / array->elem_size;
- key = &idx;
- } else { /* hash or lru */
- key = value - round_up(map->key_size, 8);
- }
+ key = map_key_from_value(map, value, &idx);
rcu_read_lock_trace();
migrate_disable();
@@ -1600,7 +1596,7 @@ void bpf_timer_cancel_and_free(void *val)
* timer callback.
*/
if (this_cpu_read(hrtimer_running)) {
- queue_work(system_unbound_wq, &t->cb.delete_work);
+ queue_work(system_dfl_wq, &t->cb.delete_work);
return;
}
@@ -1613,7 +1609,7 @@ void bpf_timer_cancel_and_free(void *val)
if (hrtimer_try_to_cancel(&t->timer) >= 0)
kfree_rcu(t, cb.rcu);
else
- queue_work(system_unbound_wq, &t->cb.delete_work);
+ queue_work(system_dfl_wq, &t->cb.delete_work);
} else {
bpf_timer_delete_work(&t->cb.delete_work);
}
@@ -1783,6 +1779,9 @@ static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *s
return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
case BPF_DYNPTR_TYPE_XDP:
return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
+ case BPF_DYNPTR_TYPE_SKB_META:
+ memmove(dst, bpf_skb_meta_pointer(src->data, src->offset + offset), len);
+ return 0;
default:
WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
return -EFAULT;
@@ -1839,6 +1838,11 @@ int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
if (flags)
return -EINVAL;
return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
+ case BPF_DYNPTR_TYPE_SKB_META:
+ if (flags)
+ return -EINVAL;
+ memmove(bpf_skb_meta_pointer(dst->data, dst->offset + offset), src, len);
+ return 0;
default:
WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
return -EFAULT;
@@ -1885,6 +1889,7 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
return (unsigned long)(ptr->data + ptr->offset + offset);
case BPF_DYNPTR_TYPE_SKB:
case BPF_DYNPTR_TYPE_XDP:
+ case BPF_DYNPTR_TYPE_SKB_META:
/* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
return 0;
default:
@@ -2540,7 +2545,7 @@ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
{
struct cgroup *cgrp;
- cgrp = cgroup_get_from_id(cgid);
+ cgrp = __cgroup_get_from_id(cgid);
if (IS_ERR(cgrp))
return NULL;
return cgrp;
@@ -2713,6 +2718,8 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset,
bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
return buffer__opt;
}
+ case BPF_DYNPTR_TYPE_SKB_META:
+ return bpf_skb_meta_pointer(ptr->data, ptr->offset + offset);
default:
WARN_ONCE(true, "unknown dynptr type %d\n", type);
return NULL;
@@ -3344,39 +3351,30 @@ __bpf_kfunc void __bpf_trap(void)
* __get_kernel_nofault instead of plain dereference to make them safe.
*/
-/**
- * bpf_strcmp - Compare two strings
- * @s1__ign: One string
- * @s2__ign: Another string
- *
- * Return:
- * * %0 - Strings are equal
- * * %-1 - @s1__ign is smaller
- * * %1 - @s2__ign is smaller
- * * %-EFAULT - Cannot read one of the strings
- * * %-E2BIG - One of strings is too large
- * * %-ERANGE - One of strings is outside of kernel address space
- */
-__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
+static int __bpf_strcasecmp(const char *s1, const char *s2, bool ignore_case)
{
char c1, c2;
int i;
- if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
- !copy_from_kernel_nofault_allowed(s2__ign, 1)) {
+ if (!copy_from_kernel_nofault_allowed(s1, 1) ||
+ !copy_from_kernel_nofault_allowed(s2, 1)) {
return -ERANGE;
}
guard(pagefault)();
for (i = 0; i < XATTR_SIZE_MAX; i++) {
- __get_kernel_nofault(&c1, s1__ign, char, err_out);
- __get_kernel_nofault(&c2, s2__ign, char, err_out);
+ __get_kernel_nofault(&c1, s1, char, err_out);
+ __get_kernel_nofault(&c2, s2, char, err_out);
+ if (ignore_case) {
+ c1 = tolower(c1);
+ c2 = tolower(c2);
+ }
if (c1 != c2)
return c1 < c2 ? -1 : 1;
if (c1 == '\0')
return 0;
- s1__ign++;
- s2__ign++;
+ s1++;
+ s2++;
}
return -E2BIG;
err_out:
@@ -3384,6 +3382,42 @@ err_out:
}
/**
+ * bpf_strcmp - Compare two strings
+ * @s1__ign: One string
+ * @s2__ign: Another string
+ *
+ * Return:
+ * * %0 - Strings are equal
+ * * %-1 - @s1__ign is smaller
+ * * %1 - @s2__ign is smaller
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG - One of strings is too large
+ * * %-ERANGE - One of strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strcmp(const char *s1__ign, const char *s2__ign)
+{
+ return __bpf_strcasecmp(s1__ign, s2__ign, false);
+}
+
+/**
+ * bpf_strcasecmp - Compare two strings, ignoring the case of the characters
+ * @s1__ign: One string
+ * @s2__ign: Another string
+ *
+ * Return:
+ * * %0 - Strings are equal
+ * * %-1 - @s1__ign is smaller
+ * * %1 - @s2__ign is smaller
+ * * %-EFAULT - Cannot read one of the strings
+ * * %-E2BIG - One of strings is too large
+ * * %-ERANGE - One of strings is outside of kernel address space
+ */
+__bpf_kfunc int bpf_strcasecmp(const char *s1__ign, const char *s2__ign)
+{
+ return __bpf_strcasecmp(s1__ign, s2__ign, true);
+}
+
+/**
* bpf_strnchr - Find a character in a length limited string
* @s__ign: The string to be searched
* @count: The number of characters to be searched
@@ -3712,9 +3746,490 @@ __bpf_kfunc int bpf_strstr(const char *s1__ign, const char *s2__ign)
{
return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
}
+#ifdef CONFIG_KEYS
+/**
+ * bpf_lookup_user_key - lookup a key by its serial
+ * @serial: key handle serial number
+ * @flags: lookup-specific flags
+ *
+ * Search a key with a given *serial* and the provided *flags*.
+ * If found, increment the reference count of the key by one, and
+ * return it in the bpf_key structure.
+ *
+ * The bpf_key structure must be passed to bpf_key_put() when done
+ * with it, so that the key reference count is decremented and the
+ * bpf_key structure is freed.
+ *
+ * Permission checks are deferred to the time the key is used by
+ * one of the available key-specific kfuncs.
+ *
+ * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
+ * special keyring (e.g. session keyring), if it doesn't yet exist.
+ * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
+ * for the key construction, and to retrieve uninstantiated keys (keys
+ * without data attached to them).
+ *
+ * Return: a bpf_key pointer with a valid key pointer if the key is found, a
+ * NULL pointer otherwise.
+ */
+__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
+{
+ key_ref_t key_ref;
+ struct bpf_key *bkey;
+
+ if (flags & ~KEY_LOOKUP_ALL)
+ return NULL;
+
+ /*
+ * Permission check is deferred until the key is used, as the
+ * intent of the caller is unknown here.
+ */
+ key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
+ if (IS_ERR(key_ref))
+ return NULL;
+
+ bkey = kmalloc(sizeof(*bkey), GFP_KERNEL);
+ if (!bkey) {
+ key_put(key_ref_to_ptr(key_ref));
+ return NULL;
+ }
+
+ bkey->key = key_ref_to_ptr(key_ref);
+ bkey->has_ref = true;
+
+ return bkey;
+}
+
+/**
+ * bpf_lookup_system_key - lookup a key by a system-defined ID
+ * @id: key ID
+ *
+ * Obtain a bpf_key structure with a key pointer set to the passed key ID.
+ * The key pointer is marked as invalid, to prevent bpf_key_put() from
+ * attempting to decrement the key reference count on that pointer. The key
+ * pointer set in such way is currently understood only by
+ * verify_pkcs7_signature().
+ *
+ * Set *id* to one of the values defined in include/linux/verification.h:
+ * 0 for the primary keyring (immutable keyring of system keys);
+ * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
+ * (where keys can be added only if they are vouched for by existing keys
+ * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
+ * keyring (primarily used by the integrity subsystem to verify a kexec'ed
+ * kerned image and, possibly, the initramfs signature).
+ *
+ * Return: a bpf_key pointer with an invalid key pointer set from the
+ * pre-determined ID on success, a NULL pointer otherwise
+ */
+__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
+{
+ struct bpf_key *bkey;
+
+ if (system_keyring_id_check(id) < 0)
+ return NULL;
+
+ bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC);
+ if (!bkey)
+ return NULL;
+
+ bkey->key = (struct key *)(unsigned long)id;
+ bkey->has_ref = false;
+
+ return bkey;
+}
+
+/**
+ * bpf_key_put - decrement key reference count if key is valid and free bpf_key
+ * @bkey: bpf_key structure
+ *
+ * Decrement the reference count of the key inside *bkey*, if the pointer
+ * is valid, and free *bkey*.
+ */
+__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
+{
+ if (bkey->has_ref)
+ key_put(bkey->key);
+
+ kfree(bkey);
+}
+
+/**
+ * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
+ * @data_p: data to verify
+ * @sig_p: signature of the data
+ * @trusted_keyring: keyring with keys trusted for signature verification
+ *
+ * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
+ * with keys in a keyring referenced by *trusted_keyring*.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
+ struct bpf_key *trusted_keyring)
+{
+#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
+ struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
+ struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
+ const void *data, *sig;
+ u32 data_len, sig_len;
+ int ret;
+
+ if (trusted_keyring->has_ref) {
+ /*
+ * Do the permission check deferred in bpf_lookup_user_key().
+ * See bpf_lookup_user_key() for more details.
+ *
+ * A call to key_task_permission() here would be redundant, as
+ * it is already done by keyring_search() called by
+ * find_asymmetric_key().
+ */
+ ret = key_validate(trusted_keyring->key);
+ if (ret < 0)
+ return ret;
+ }
+
+ data_len = __bpf_dynptr_size(data_ptr);
+ data = __bpf_dynptr_data(data_ptr, data_len);
+ sig_len = __bpf_dynptr_size(sig_ptr);
+ sig = __bpf_dynptr_data(sig_ptr, sig_len);
+
+ return verify_pkcs7_signature(data, data_len, sig, sig_len,
+ trusted_keyring->key,
+ VERIFYING_BPF_SIGNATURE, NULL,
+ NULL);
+#else
+ return -EOPNOTSUPP;
+#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
+}
+#endif /* CONFIG_KEYS */
+
+typedef int (*bpf_task_work_callback_t)(struct bpf_map *map, void *key, void *value);
+
+enum bpf_task_work_state {
+ /* bpf_task_work is ready to be used */
+ BPF_TW_STANDBY = 0,
+ /* irq work scheduling in progress */
+ BPF_TW_PENDING,
+ /* task work scheduling in progress */
+ BPF_TW_SCHEDULING,
+ /* task work is scheduled successfully */
+ BPF_TW_SCHEDULED,
+ /* callback is running */
+ BPF_TW_RUNNING,
+ /* associated BPF map value is deleted */
+ BPF_TW_FREED,
+};
+
+struct bpf_task_work_ctx {
+ enum bpf_task_work_state state;
+ refcount_t refcnt;
+ struct callback_head work;
+ struct irq_work irq_work;
+ /* bpf_prog that schedules task work */
+ struct bpf_prog *prog;
+ /* task for which callback is scheduled */
+ struct task_struct *task;
+ /* the map and map value associated with this context */
+ struct bpf_map *map;
+ void *map_val;
+ enum task_work_notify_mode mode;
+ bpf_task_work_callback_t callback_fn;
+ struct rcu_head rcu;
+} __aligned(8);
+
+/* Actual type for struct bpf_task_work */
+struct bpf_task_work_kern {
+ struct bpf_task_work_ctx *ctx;
+};
+
+static void bpf_task_work_ctx_reset(struct bpf_task_work_ctx *ctx)
+{
+ if (ctx->prog) {
+ bpf_prog_put(ctx->prog);
+ ctx->prog = NULL;
+ }
+ if (ctx->task) {
+ bpf_task_release(ctx->task);
+ ctx->task = NULL;
+ }
+}
+
+static bool bpf_task_work_ctx_tryget(struct bpf_task_work_ctx *ctx)
+{
+ return refcount_inc_not_zero(&ctx->refcnt);
+}
+
+static void bpf_task_work_ctx_put(struct bpf_task_work_ctx *ctx)
+{
+ if (!refcount_dec_and_test(&ctx->refcnt))
+ return;
+
+ bpf_task_work_ctx_reset(ctx);
+
+ /* bpf_mem_free expects migration to be disabled */
+ migrate_disable();
+ bpf_mem_free(&bpf_global_ma, ctx);
+ migrate_enable();
+}
+
+static void bpf_task_work_cancel(struct bpf_task_work_ctx *ctx)
+{
+ /*
+ * Scheduled task_work callback holds ctx ref, so if we successfully
+ * cancelled, we put that ref on callback's behalf. If we couldn't
+ * cancel, callback will inevitably run or has already completed
+ * running, and it would have taken care of its ctx ref itself.
+ */
+ if (task_work_cancel(ctx->task, &ctx->work))
+ bpf_task_work_ctx_put(ctx);
+}
+
+static void bpf_task_work_callback(struct callback_head *cb)
+{
+ struct bpf_task_work_ctx *ctx = container_of(cb, struct bpf_task_work_ctx, work);
+ enum bpf_task_work_state state;
+ u32 idx;
+ void *key;
+
+ /* Read lock is needed to protect ctx and map key/value access */
+ guard(rcu_tasks_trace)();
+ /*
+ * This callback may start running before bpf_task_work_irq() switched to
+ * SCHEDULED state, so handle both transition variants SCHEDULING|SCHEDULED -> RUNNING.
+ */
+ state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_RUNNING);
+ if (state == BPF_TW_SCHEDULED)
+ state = cmpxchg(&ctx->state, BPF_TW_SCHEDULED, BPF_TW_RUNNING);
+ if (state == BPF_TW_FREED) {
+ bpf_task_work_ctx_put(ctx);
+ return;
+ }
+
+ key = (void *)map_key_from_value(ctx->map, ctx->map_val, &idx);
+
+ migrate_disable();
+ ctx->callback_fn(ctx->map, key, ctx->map_val);
+ migrate_enable();
+
+ bpf_task_work_ctx_reset(ctx);
+ (void)cmpxchg(&ctx->state, BPF_TW_RUNNING, BPF_TW_STANDBY);
+
+ bpf_task_work_ctx_put(ctx);
+}
+
+static void bpf_task_work_irq(struct irq_work *irq_work)
+{
+ struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);
+ enum bpf_task_work_state state;
+ int err;
+
+ guard(rcu_tasks_trace)();
+
+ if (cmpxchg(&ctx->state, BPF_TW_PENDING, BPF_TW_SCHEDULING) != BPF_TW_PENDING) {
+ bpf_task_work_ctx_put(ctx);
+ return;
+ }
+
+ err = task_work_add(ctx->task, &ctx->work, ctx->mode);
+ if (err) {
+ bpf_task_work_ctx_reset(ctx);
+ /*
+ * try to switch back to STANDBY for another task_work reuse, but we might have
+ * gone to FREED already, which is fine as we already cleaned up after ourselves
+ */
+ (void)cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_STANDBY);
+ bpf_task_work_ctx_put(ctx);
+ return;
+ }
+
+ /*
+ * It's technically possible for just scheduled task_work callback to
+ * complete running by now, going SCHEDULING -> RUNNING and then
+ * dropping its ctx refcount. Instead of capturing extra ref just to
+ * protected below ctx->state access, we rely on RCU protection to
+ * perform below SCHEDULING -> SCHEDULED attempt.
+ */
+ state = cmpxchg(&ctx->state, BPF_TW_SCHEDULING, BPF_TW_SCHEDULED);
+ if (state == BPF_TW_FREED)
+ bpf_task_work_cancel(ctx); /* clean up if we switched into FREED state */
+}
+
+static struct bpf_task_work_ctx *bpf_task_work_fetch_ctx(struct bpf_task_work *tw,
+ struct bpf_map *map)
+{
+ struct bpf_task_work_kern *twk = (void *)tw;
+ struct bpf_task_work_ctx *ctx, *old_ctx;
+
+ ctx = READ_ONCE(twk->ctx);
+ if (ctx)
+ return ctx;
+
+ ctx = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_task_work_ctx));
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ memset(ctx, 0, sizeof(*ctx));
+ refcount_set(&ctx->refcnt, 1); /* map's own ref */
+ ctx->state = BPF_TW_STANDBY;
+
+ old_ctx = cmpxchg(&twk->ctx, NULL, ctx);
+ if (old_ctx) {
+ /*
+ * tw->ctx is set by concurrent BPF program, release allocated
+ * memory and try to reuse already set context.
+ */
+ bpf_mem_free(&bpf_global_ma, ctx);
+ return old_ctx;
+ }
+
+ return ctx; /* Success */
+}
+
+static struct bpf_task_work_ctx *bpf_task_work_acquire_ctx(struct bpf_task_work *tw,
+ struct bpf_map *map)
+{
+ struct bpf_task_work_ctx *ctx;
+
+ ctx = bpf_task_work_fetch_ctx(tw, map);
+ if (IS_ERR(ctx))
+ return ctx;
+
+ /* try to get ref for task_work callback to hold */
+ if (!bpf_task_work_ctx_tryget(ctx))
+ return ERR_PTR(-EBUSY);
+
+ if (cmpxchg(&ctx->state, BPF_TW_STANDBY, BPF_TW_PENDING) != BPF_TW_STANDBY) {
+ /* lost acquiring race or map_release_uref() stole it from us, put ref and bail */
+ bpf_task_work_ctx_put(ctx);
+ return ERR_PTR(-EBUSY);
+ }
+
+ /*
+ * If no process or bpffs is holding a reference to the map, no new callbacks should be
+ * scheduled. This does not address any race or correctness issue, but rather is a policy
+ * choice: dropping user references should stop everything.
+ */
+ if (!atomic64_read(&map->usercnt)) {
+ /* drop ref we just got for task_work callback itself */
+ bpf_task_work_ctx_put(ctx);
+ /* transfer map's ref into cancel_and_free() */
+ bpf_task_work_cancel_and_free(tw);
+ return ERR_PTR(-EBUSY);
+ }
+
+ return ctx;
+}
+
+static int bpf_task_work_schedule(struct task_struct *task, struct bpf_task_work *tw,
+ struct bpf_map *map, bpf_task_work_callback_t callback_fn,
+ struct bpf_prog_aux *aux, enum task_work_notify_mode mode)
+{
+ struct bpf_prog *prog;
+ struct bpf_task_work_ctx *ctx;
+ int err;
+
+ BTF_TYPE_EMIT(struct bpf_task_work);
+
+ prog = bpf_prog_inc_not_zero(aux->prog);
+ if (IS_ERR(prog))
+ return -EBADF;
+ task = bpf_task_acquire(task);
+ if (!task) {
+ err = -EBADF;
+ goto release_prog;
+ }
+
+ ctx = bpf_task_work_acquire_ctx(tw, map);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto release_all;
+ }
+
+ ctx->task = task;
+ ctx->callback_fn = callback_fn;
+ ctx->prog = prog;
+ ctx->mode = mode;
+ ctx->map = map;
+ ctx->map_val = (void *)tw - map->record->task_work_off;
+ init_task_work(&ctx->work, bpf_task_work_callback);
+ init_irq_work(&ctx->irq_work, bpf_task_work_irq);
+
+ irq_work_queue(&ctx->irq_work);
+ return 0;
+
+release_all:
+ bpf_task_release(task);
+release_prog:
+ bpf_prog_put(prog);
+ return err;
+}
+
+/**
+ * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode
+ * @task: Task struct for which callback should be scheduled
+ * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
+ * @map__map: bpf_map that embeds struct bpf_task_work in the values
+ * @callback: pointer to BPF subprogram to call
+ * @aux__prog: user should pass NULL
+ *
+ * Return: 0 if task work has been scheduled successfully, negative error code otherwise
+ */
+__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw,
+ void *map__map, bpf_task_work_callback_t callback,
+ void *aux__prog)
+{
+ return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL);
+}
+
+/**
+ * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode
+ * @task: Task struct for which callback should be scheduled
+ * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping
+ * @map__map: bpf_map that embeds struct bpf_task_work in the values
+ * @callback: pointer to BPF subprogram to call
+ * @aux__prog: user should pass NULL
+ *
+ * Return: 0 if task work has been scheduled successfully, negative error code otherwise
+ */
+__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw,
+ void *map__map, bpf_task_work_callback_t callback,
+ void *aux__prog)
+{
+ return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME);
+}
__bpf_kfunc_end_defs();
+static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work)
+{
+ struct bpf_task_work_ctx *ctx = container_of(irq_work, struct bpf_task_work_ctx, irq_work);
+
+ bpf_task_work_cancel(ctx); /* this might put task_work callback's ref */
+ bpf_task_work_ctx_put(ctx); /* and here we put map's own ref that was transferred to us */
+}
+
+void bpf_task_work_cancel_and_free(void *val)
+{
+ struct bpf_task_work_kern *twk = val;
+ struct bpf_task_work_ctx *ctx;
+ enum bpf_task_work_state state;
+
+ ctx = xchg(&twk->ctx, NULL);
+ if (!ctx)
+ return;
+
+ state = xchg(&ctx->state, BPF_TW_FREED);
+ if (state == BPF_TW_SCHEDULED) {
+ /* run in irq_work to avoid locks in NMI */
+ init_irq_work(&ctx->irq_work, bpf_task_work_cancel_scheduled);
+ irq_work_queue(&ctx->irq_work);
+ return;
+ }
+
+ bpf_task_work_ctx_put(ctx); /* put bpf map's ref */
+}
+
BTF_KFUNCS_START(generic_btf_ids)
#ifdef CONFIG_CRASH_DUMP
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
@@ -3753,6 +4268,14 @@ BTF_ID_FLAGS(func, bpf_throw)
#ifdef CONFIG_BPF_EVENTS
BTF_ID_FLAGS(func, bpf_send_signal_task, KF_TRUSTED_ARGS)
#endif
+#ifdef CONFIG_KEYS
+BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
+#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
+BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
+#endif
+#endif
BTF_KFUNCS_END(generic_btf_ids)
static const struct btf_kfunc_id_set generic_kfunc_set = {
@@ -3834,6 +4357,7 @@ BTF_ID_FLAGS(func, bpf_iter_dmabuf_destroy, KF_ITER_DESTROY | KF_SLEEPABLE)
#endif
BTF_ID_FLAGS(func, __bpf_trap)
BTF_ID_FLAGS(func, bpf_strcmp);
+BTF_ID_FLAGS(func, bpf_strcasecmp);
BTF_ID_FLAGS(func, bpf_strchr);
BTF_ID_FLAGS(func, bpf_strchrnul);
BTF_ID_FLAGS(func, bpf_strnchr);
@@ -3848,6 +4372,8 @@ BTF_ID_FLAGS(func, bpf_strnstr);
BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU)
#endif
BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/liveness.c b/kernel/bpf/liveness.c
new file mode 100644
index 000000000000..3c611aba7f52
--- /dev/null
+++ b/kernel/bpf/liveness.c
@@ -0,0 +1,733 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf_verifier.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+
+/*
+ * This file implements live stack slots analysis. After accumulating
+ * stack usage data, the analysis answers queries about whether a
+ * particular stack slot may be read by an instruction or any of it's
+ * successors. This data is consumed by the verifier states caching
+ * mechanism to decide which stack slots are important when looking for a
+ * visited state corresponding to the current state.
+ *
+ * The analysis is call chain sensitive, meaning that data is collected
+ * and queried for tuples (call chain, subprogram instruction index).
+ * Such sensitivity allows identifying if some subprogram call always
+ * leads to writes in the caller's stack.
+ *
+ * The basic idea is as follows:
+ * - As the verifier accumulates a set of visited states, the analysis instance
+ * accumulates a conservative estimate of stack slots that can be read
+ * or must be written for each visited tuple (call chain, instruction index).
+ * - If several states happen to visit the same instruction with the same
+ * call chain, stack usage information for the corresponding tuple is joined:
+ * - "may_read" set represents a union of all possibly read slots
+ * (any slot in "may_read" set might be read at or after the instruction);
+ * - "must_write" set represents an intersection of all possibly written slots
+ * (any slot in "must_write" set is guaranteed to be written by the instruction).
+ * - The analysis is split into two phases:
+ * - read and write marks accumulation;
+ * - read and write marks propagation.
+ * - The propagation phase is a textbook live variable data flow analysis:
+ *
+ * state[cc, i].live_after = U [state[cc, s].live_before for s in insn_successors(i)]
+ * state[cc, i].live_before =
+ * (state[cc, i].live_after / state[cc, i].must_write) U state[i].may_read
+ *
+ * Where:
+ * - `U` stands for set union
+ * - `/` stands for set difference;
+ * - `cc` stands for a call chain;
+ * - `i` and `s` are instruction indexes;
+ *
+ * The above equations are computed for each call chain and instruction
+ * index until state stops changing.
+ * - Additionally, in order to transfer "must_write" information from a
+ * subprogram to call instructions invoking this subprogram,
+ * the "must_write_acc" set is tracked for each (cc, i) tuple.
+ * A set of stack slots that are guaranteed to be written by this
+ * instruction or any of its successors (within the subprogram).
+ * The equation for "must_write_acc" propagation looks as follows:
+ *
+ * state[cc, i].must_write_acc =
+ * ∩ [state[cc, s].must_write_acc for s in insn_successors(i)]
+ * U state[cc, i].must_write
+ *
+ * (An intersection of all "must_write_acc" for instruction successors
+ * plus all "must_write" slots for the instruction itself).
+ * - After the propagation phase completes for a subprogram, information from
+ * (cc, 0) tuple (subprogram entry) is transferred to the caller's call chain:
+ * - "must_write_acc" set is intersected with the call site's "must_write" set;
+ * - "may_read" set is added to the call site's "may_read" set.
+ * - Any live stack queries must be taken after the propagation phase.
+ * - Accumulation and propagation phases can be entered multiple times,
+ * at any point in time:
+ * - "may_read" set only grows;
+ * - "must_write" set only shrinks;
+ * - for each visited verifier state with zero branches, all relevant
+ * read and write marks are already recorded by the analysis instance.
+ *
+ * Technically, the analysis is facilitated by the following data structures:
+ * - Call chain: for given verifier state, the call chain is a tuple of call
+ * instruction indexes leading to the current subprogram plus the subprogram
+ * entry point index.
+ * - Function instance: for a given call chain, for each instruction in
+ * the current subprogram, a mapping between instruction index and a
+ * set of "may_read", "must_write" and other marks accumulated for this
+ * instruction.
+ * - A hash table mapping call chains to function instances.
+ */
+
+struct callchain {
+ u32 callsites[MAX_CALL_FRAMES]; /* instruction pointer for each frame */
+ /* cached subprog_info[*].start for functions owning the frames:
+ * - sp_starts[curframe] used to get insn relative index within current function;
+ * - sp_starts[0..current-1] used for fast callchain_frame_up().
+ */
+ u32 sp_starts[MAX_CALL_FRAMES];
+ u32 curframe; /* depth of callsites and sp_starts arrays */
+};
+
+struct per_frame_masks {
+ u64 may_read; /* stack slots that may be read by this instruction */
+ u64 must_write; /* stack slots written by this instruction */
+ u64 must_write_acc; /* stack slots written by this instruction and its successors */
+ u64 live_before; /* stack slots that may be read by this insn and its successors */
+};
+
+/*
+ * A function instance created for a specific callchain.
+ * Encapsulates read and write marks for each instruction in the function.
+ * Marks are tracked for each frame in the callchain.
+ */
+struct func_instance {
+ struct hlist_node hl_node;
+ struct callchain callchain;
+ u32 insn_cnt; /* cached number of insns in the function */
+ bool updated;
+ bool must_write_dropped;
+ /* Per frame, per instruction masks, frames allocated lazily. */
+ struct per_frame_masks *frames[MAX_CALL_FRAMES];
+ /* For each instruction a flag telling if "must_write" had been initialized for it. */
+ bool *must_write_set;
+};
+
+struct live_stack_query {
+ struct func_instance *instances[MAX_CALL_FRAMES]; /* valid in range [0..curframe] */
+ u32 curframe;
+ u32 insn_idx;
+};
+
+struct bpf_liveness {
+ DECLARE_HASHTABLE(func_instances, 8); /* maps callchain to func_instance */
+ struct live_stack_query live_stack_query; /* cache to avoid repetitive ht lookups */
+ /* Cached instance corresponding to env->cur_state, avoids per-instruction ht lookup */
+ struct func_instance *cur_instance;
+ /*
+ * Below fields are used to accumulate stack write marks for instruction at
+ * @write_insn_idx before submitting the marks to @cur_instance.
+ */
+ u64 write_masks_acc[MAX_CALL_FRAMES];
+ u32 write_insn_idx;
+};
+
+/* Compute callchain corresponding to state @st at depth @frameno */
+static void compute_callchain(struct bpf_verifier_env *env, struct bpf_verifier_state *st,
+ struct callchain *callchain, u32 frameno)
+{
+ struct bpf_subprog_info *subprog_info = env->subprog_info;
+ u32 i;
+
+ memset(callchain, 0, sizeof(*callchain));
+ for (i = 0; i <= frameno; i++) {
+ callchain->sp_starts[i] = subprog_info[st->frame[i]->subprogno].start;
+ if (i < st->curframe)
+ callchain->callsites[i] = st->frame[i + 1]->callsite;
+ }
+ callchain->curframe = frameno;
+ callchain->callsites[callchain->curframe] = callchain->sp_starts[callchain->curframe];
+}
+
+static u32 hash_callchain(struct callchain *callchain)
+{
+ return jhash2(callchain->callsites, callchain->curframe, 0);
+}
+
+static bool same_callsites(struct callchain *a, struct callchain *b)
+{
+ int i;
+
+ if (a->curframe != b->curframe)
+ return false;
+ for (i = a->curframe; i >= 0; i--)
+ if (a->callsites[i] != b->callsites[i])
+ return false;
+ return true;
+}
+
+/*
+ * Find existing or allocate new function instance corresponding to @callchain.
+ * Instances are accumulated in env->liveness->func_instances and persist
+ * until the end of the verification process.
+ */
+static struct func_instance *__lookup_instance(struct bpf_verifier_env *env,
+ struct callchain *callchain)
+{
+ struct bpf_liveness *liveness = env->liveness;
+ struct bpf_subprog_info *subprog;
+ struct func_instance *result;
+ u32 subprog_sz, size, key;
+
+ key = hash_callchain(callchain);
+ hash_for_each_possible(liveness->func_instances, result, hl_node, key)
+ if (same_callsites(&result->callchain, callchain))
+ return result;
+
+ subprog = bpf_find_containing_subprog(env, callchain->sp_starts[callchain->curframe]);
+ subprog_sz = (subprog + 1)->start - subprog->start;
+ size = sizeof(struct func_instance);
+ result = kvzalloc(size, GFP_KERNEL_ACCOUNT);
+ if (!result)
+ return ERR_PTR(-ENOMEM);
+ result->must_write_set = kvcalloc(subprog_sz, sizeof(*result->must_write_set),
+ GFP_KERNEL_ACCOUNT);
+ if (!result->must_write_set)
+ return ERR_PTR(-ENOMEM);
+ memcpy(&result->callchain, callchain, sizeof(*callchain));
+ result->insn_cnt = subprog_sz;
+ hash_add(liveness->func_instances, &result->hl_node, key);
+ return result;
+}
+
+static struct func_instance *lookup_instance(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st,
+ u32 frameno)
+{
+ struct callchain callchain;
+
+ compute_callchain(env, st, &callchain, frameno);
+ return __lookup_instance(env, &callchain);
+}
+
+int bpf_stack_liveness_init(struct bpf_verifier_env *env)
+{
+ env->liveness = kvzalloc(sizeof(*env->liveness), GFP_KERNEL_ACCOUNT);
+ if (!env->liveness)
+ return -ENOMEM;
+ hash_init(env->liveness->func_instances);
+ return 0;
+}
+
+void bpf_stack_liveness_free(struct bpf_verifier_env *env)
+{
+ struct func_instance *instance;
+ struct hlist_node *tmp;
+ int bkt, i;
+
+ if (!env->liveness)
+ return;
+ hash_for_each_safe(env->liveness->func_instances, bkt, tmp, instance, hl_node) {
+ for (i = 0; i <= instance->callchain.curframe; i++)
+ kvfree(instance->frames[i]);
+ kvfree(instance->must_write_set);
+ kvfree(instance);
+ }
+ kvfree(env->liveness);
+}
+
+/*
+ * Convert absolute instruction index @insn_idx to an index relative
+ * to start of the function corresponding to @instance.
+ */
+static int relative_idx(struct func_instance *instance, u32 insn_idx)
+{
+ return insn_idx - instance->callchain.sp_starts[instance->callchain.curframe];
+}
+
+static struct per_frame_masks *get_frame_masks(struct func_instance *instance,
+ u32 frame, u32 insn_idx)
+{
+ if (!instance->frames[frame])
+ return NULL;
+
+ return &instance->frames[frame][relative_idx(instance, insn_idx)];
+}
+
+static struct per_frame_masks *alloc_frame_masks(struct bpf_verifier_env *env,
+ struct func_instance *instance,
+ u32 frame, u32 insn_idx)
+{
+ struct per_frame_masks *arr;
+
+ if (!instance->frames[frame]) {
+ arr = kvcalloc(instance->insn_cnt, sizeof(*arr), GFP_KERNEL_ACCOUNT);
+ instance->frames[frame] = arr;
+ if (!arr)
+ return ERR_PTR(-ENOMEM);
+ }
+ return get_frame_masks(instance, frame, insn_idx);
+}
+
+void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env)
+{
+ env->liveness->cur_instance = NULL;
+}
+
+/* If @env->liveness->cur_instance is null, set it to instance corresponding to @env->cur_state. */
+static int ensure_cur_instance(struct bpf_verifier_env *env)
+{
+ struct bpf_liveness *liveness = env->liveness;
+ struct func_instance *instance;
+
+ if (liveness->cur_instance)
+ return 0;
+
+ instance = lookup_instance(env, env->cur_state, env->cur_state->curframe);
+ if (IS_ERR(instance))
+ return PTR_ERR(instance);
+
+ liveness->cur_instance = instance;
+ return 0;
+}
+
+/* Accumulate may_read masks for @frame at @insn_idx */
+static int mark_stack_read(struct bpf_verifier_env *env,
+ struct func_instance *instance, u32 frame, u32 insn_idx, u64 mask)
+{
+ struct per_frame_masks *masks;
+ u64 new_may_read;
+
+ masks = alloc_frame_masks(env, instance, frame, insn_idx);
+ if (IS_ERR(masks))
+ return PTR_ERR(masks);
+ new_may_read = masks->may_read | mask;
+ if (new_may_read != masks->may_read &&
+ ((new_may_read | masks->live_before) != masks->live_before))
+ instance->updated = true;
+ masks->may_read |= mask;
+ return 0;
+}
+
+int bpf_mark_stack_read(struct bpf_verifier_env *env, u32 frame, u32 insn_idx, u64 mask)
+{
+ int err;
+
+ err = ensure_cur_instance(env);
+ err = err ?: mark_stack_read(env, env->liveness->cur_instance, frame, insn_idx, mask);
+ return err;
+}
+
+static void reset_stack_write_marks(struct bpf_verifier_env *env,
+ struct func_instance *instance, u32 insn_idx)
+{
+ struct bpf_liveness *liveness = env->liveness;
+ int i;
+
+ liveness->write_insn_idx = insn_idx;
+ for (i = 0; i <= instance->callchain.curframe; i++)
+ liveness->write_masks_acc[i] = 0;
+}
+
+int bpf_reset_stack_write_marks(struct bpf_verifier_env *env, u32 insn_idx)
+{
+ struct bpf_liveness *liveness = env->liveness;
+ int err;
+
+ err = ensure_cur_instance(env);
+ if (err)
+ return err;
+
+ reset_stack_write_marks(env, liveness->cur_instance, insn_idx);
+ return 0;
+}
+
+void bpf_mark_stack_write(struct bpf_verifier_env *env, u32 frame, u64 mask)
+{
+ env->liveness->write_masks_acc[frame] |= mask;
+}
+
+static int commit_stack_write_marks(struct bpf_verifier_env *env,
+ struct func_instance *instance)
+{
+ struct bpf_liveness *liveness = env->liveness;
+ u32 idx, frame, curframe, old_must_write;
+ struct per_frame_masks *masks;
+ u64 mask;
+
+ if (!instance)
+ return 0;
+
+ curframe = instance->callchain.curframe;
+ idx = relative_idx(instance, liveness->write_insn_idx);
+ for (frame = 0; frame <= curframe; frame++) {
+ mask = liveness->write_masks_acc[frame];
+ /* avoid allocating frames for zero masks */
+ if (mask == 0 && !instance->must_write_set[idx])
+ continue;
+ masks = alloc_frame_masks(env, instance, frame, liveness->write_insn_idx);
+ if (IS_ERR(masks))
+ return PTR_ERR(masks);
+ old_must_write = masks->must_write;
+ /*
+ * If instruction at this callchain is seen for a first time, set must_write equal
+ * to @mask. Otherwise take intersection with the previous value.
+ */
+ if (instance->must_write_set[idx])
+ mask &= old_must_write;
+ if (old_must_write != mask) {
+ masks->must_write = mask;
+ instance->updated = true;
+ }
+ if (old_must_write & ~mask)
+ instance->must_write_dropped = true;
+ }
+ instance->must_write_set[idx] = true;
+ liveness->write_insn_idx = 0;
+ return 0;
+}
+
+/*
+ * Merge stack writes marks in @env->liveness->write_masks_acc
+ * with information already in @env->liveness->cur_instance.
+ */
+int bpf_commit_stack_write_marks(struct bpf_verifier_env *env)
+{
+ return commit_stack_write_marks(env, env->liveness->cur_instance);
+}
+
+static char *fmt_callchain(struct bpf_verifier_env *env, struct callchain *callchain)
+{
+ char *buf_end = env->tmp_str_buf + sizeof(env->tmp_str_buf);
+ char *buf = env->tmp_str_buf;
+ int i;
+
+ buf += snprintf(buf, buf_end - buf, "(");
+ for (i = 0; i <= callchain->curframe; i++)
+ buf += snprintf(buf, buf_end - buf, "%s%d", i ? "," : "", callchain->callsites[i]);
+ snprintf(buf, buf_end - buf, ")");
+ return env->tmp_str_buf;
+}
+
+static void log_mask_change(struct bpf_verifier_env *env, struct callchain *callchain,
+ char *pfx, u32 frame, u32 insn_idx, u64 old, u64 new)
+{
+ u64 changed_bits = old ^ new;
+ u64 new_ones = new & changed_bits;
+ u64 new_zeros = ~new & changed_bits;
+
+ if (!changed_bits)
+ return;
+ bpf_log(&env->log, "%s frame %d insn %d ", fmt_callchain(env, callchain), frame, insn_idx);
+ if (new_ones) {
+ bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_ones);
+ bpf_log(&env->log, "+%s %s ", pfx, env->tmp_str_buf);
+ }
+ if (new_zeros) {
+ bpf_fmt_stack_mask(env->tmp_str_buf, sizeof(env->tmp_str_buf), new_zeros);
+ bpf_log(&env->log, "-%s %s", pfx, env->tmp_str_buf);
+ }
+ bpf_log(&env->log, "\n");
+}
+
+int bpf_jmp_offset(struct bpf_insn *insn)
+{
+ u8 code = insn->code;
+
+ if (code == (BPF_JMP32 | BPF_JA))
+ return insn->imm;
+ return insn->off;
+}
+
+__diag_push();
+__diag_ignore_all("-Woverride-init", "Allow field initialization overrides for opcode_info_tbl");
+
+inline int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2])
+{
+ static const struct opcode_info {
+ bool can_jump;
+ bool can_fallthrough;
+ } opcode_info_tbl[256] = {
+ [0 ... 255] = {.can_jump = false, .can_fallthrough = true},
+ #define _J(code, ...) \
+ [BPF_JMP | code] = __VA_ARGS__, \
+ [BPF_JMP32 | code] = __VA_ARGS__
+
+ _J(BPF_EXIT, {.can_jump = false, .can_fallthrough = false}),
+ _J(BPF_JA, {.can_jump = true, .can_fallthrough = false}),
+ _J(BPF_JEQ, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JNE, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JLT, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JLE, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JGT, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JGE, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JSGT, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JSGE, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JSLT, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JSLE, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JCOND, {.can_jump = true, .can_fallthrough = true}),
+ _J(BPF_JSET, {.can_jump = true, .can_fallthrough = true}),
+ #undef _J
+ };
+ struct bpf_insn *insn = &prog->insnsi[idx];
+ const struct opcode_info *opcode_info;
+ int i = 0, insn_sz;
+
+ opcode_info = &opcode_info_tbl[BPF_CLASS(insn->code) | BPF_OP(insn->code)];
+ insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
+ if (opcode_info->can_fallthrough)
+ succ[i++] = idx + insn_sz;
+
+ if (opcode_info->can_jump)
+ succ[i++] = idx + bpf_jmp_offset(insn) + 1;
+
+ return i;
+}
+
+__diag_pop();
+
+static struct func_instance *get_outer_instance(struct bpf_verifier_env *env,
+ struct func_instance *instance)
+{
+ struct callchain callchain = instance->callchain;
+
+ /* Adjust @callchain to represent callchain one frame up */
+ callchain.callsites[callchain.curframe] = 0;
+ callchain.sp_starts[callchain.curframe] = 0;
+ callchain.curframe--;
+ callchain.callsites[callchain.curframe] = callchain.sp_starts[callchain.curframe];
+ return __lookup_instance(env, &callchain);
+}
+
+static u32 callchain_subprog_start(struct callchain *callchain)
+{
+ return callchain->sp_starts[callchain->curframe];
+}
+
+/*
+ * Transfer @may_read and @must_write_acc marks from the first instruction of @instance,
+ * to the call instruction in function instance calling @instance.
+ */
+static int propagate_to_outer_instance(struct bpf_verifier_env *env,
+ struct func_instance *instance)
+{
+ struct callchain *callchain = &instance->callchain;
+ u32 this_subprog_start, callsite, frame;
+ struct func_instance *outer_instance;
+ struct per_frame_masks *insn;
+ int err;
+
+ this_subprog_start = callchain_subprog_start(callchain);
+ outer_instance = get_outer_instance(env, instance);
+ callsite = callchain->callsites[callchain->curframe - 1];
+
+ reset_stack_write_marks(env, outer_instance, callsite);
+ for (frame = 0; frame < callchain->curframe; frame++) {
+ insn = get_frame_masks(instance, frame, this_subprog_start);
+ if (!insn)
+ continue;
+ bpf_mark_stack_write(env, frame, insn->must_write_acc);
+ err = mark_stack_read(env, outer_instance, frame, callsite, insn->live_before);
+ if (err)
+ return err;
+ }
+ commit_stack_write_marks(env, outer_instance);
+ return 0;
+}
+
+static inline bool update_insn(struct bpf_verifier_env *env,
+ struct func_instance *instance, u32 frame, u32 insn_idx)
+{
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ u64 new_before, new_after, must_write_acc;
+ struct per_frame_masks *insn, *succ_insn;
+ u32 succ_num, s, succ[2];
+ bool changed;
+
+ succ_num = bpf_insn_successors(env->prog, insn_idx, succ);
+ if (unlikely(succ_num == 0))
+ return false;
+
+ changed = false;
+ insn = get_frame_masks(instance, frame, insn_idx);
+ new_before = 0;
+ new_after = 0;
+ /*
+ * New "must_write_acc" is an intersection of all "must_write_acc"
+ * of successors plus all "must_write" slots of instruction itself.
+ */
+ must_write_acc = U64_MAX;
+ for (s = 0; s < succ_num; ++s) {
+ succ_insn = get_frame_masks(instance, frame, succ[s]);
+ new_after |= succ_insn->live_before;
+ must_write_acc &= succ_insn->must_write_acc;
+ }
+ must_write_acc |= insn->must_write;
+ /*
+ * New "live_before" is a union of all "live_before" of successors
+ * minus slots written by instruction plus slots read by instruction.
+ */
+ new_before = (new_after & ~insn->must_write) | insn->may_read;
+ changed |= new_before != insn->live_before;
+ changed |= must_write_acc != insn->must_write_acc;
+ if (unlikely(env->log.level & BPF_LOG_LEVEL2) &&
+ (insn->may_read || insn->must_write ||
+ insn_idx == callchain_subprog_start(&instance->callchain) ||
+ aux[insn_idx].prune_point)) {
+ log_mask_change(env, &instance->callchain, "live",
+ frame, insn_idx, insn->live_before, new_before);
+ log_mask_change(env, &instance->callchain, "written",
+ frame, insn_idx, insn->must_write_acc, must_write_acc);
+ }
+ insn->live_before = new_before;
+ insn->must_write_acc = must_write_acc;
+ return changed;
+}
+
+/* Fixed-point computation of @live_before and @must_write_acc marks */
+static int update_instance(struct bpf_verifier_env *env, struct func_instance *instance)
+{
+ u32 i, frame, po_start, po_end, cnt, this_subprog_start;
+ struct callchain *callchain = &instance->callchain;
+ int *insn_postorder = env->cfg.insn_postorder;
+ struct bpf_subprog_info *subprog;
+ struct per_frame_masks *insn;
+ bool changed;
+ int err;
+
+ this_subprog_start = callchain_subprog_start(callchain);
+ /*
+ * If must_write marks were updated must_write_acc needs to be reset
+ * (to account for the case when new must_write sets became smaller).
+ */
+ if (instance->must_write_dropped) {
+ for (frame = 0; frame <= callchain->curframe; frame++) {
+ if (!instance->frames[frame])
+ continue;
+
+ for (i = 0; i < instance->insn_cnt; i++) {
+ insn = get_frame_masks(instance, frame, this_subprog_start + i);
+ insn->must_write_acc = 0;
+ }
+ }
+ }
+
+ subprog = bpf_find_containing_subprog(env, this_subprog_start);
+ po_start = subprog->postorder_start;
+ po_end = (subprog + 1)->postorder_start;
+ cnt = 0;
+ /* repeat until fixed point is reached */
+ do {
+ cnt++;
+ changed = false;
+ for (frame = 0; frame <= instance->callchain.curframe; frame++) {
+ if (!instance->frames[frame])
+ continue;
+
+ for (i = po_start; i < po_end; i++)
+ changed |= update_insn(env, instance, frame, insn_postorder[i]);
+ }
+ } while (changed);
+
+ if (env->log.level & BPF_LOG_LEVEL2)
+ bpf_log(&env->log, "%s live stack update done in %d iterations\n",
+ fmt_callchain(env, callchain), cnt);
+
+ /* transfer marks accumulated for outer frames to outer func instance (caller) */
+ if (callchain->curframe > 0) {
+ err = propagate_to_outer_instance(env, instance);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/*
+ * Prepare all callchains within @env->cur_state for querying.
+ * This function should be called after each verifier.c:pop_stack()
+ * and whenever verifier.c:do_check_insn() processes subprogram exit.
+ * This would guarantee that visited verifier states with zero branches
+ * have their bpf_mark_stack_{read,write}() effects propagated in
+ * @env->liveness.
+ */
+int bpf_update_live_stack(struct bpf_verifier_env *env)
+{
+ struct func_instance *instance;
+ int err, frame;
+
+ bpf_reset_live_stack_callchain(env);
+ for (frame = env->cur_state->curframe; frame >= 0; --frame) {
+ instance = lookup_instance(env, env->cur_state, frame);
+ if (IS_ERR(instance))
+ return PTR_ERR(instance);
+
+ if (instance->updated) {
+ err = update_instance(env, instance);
+ if (err)
+ return err;
+ instance->updated = false;
+ instance->must_write_dropped = false;
+ }
+ }
+ return 0;
+}
+
+static bool is_live_before(struct func_instance *instance, u32 insn_idx, u32 frameno, u32 spi)
+{
+ struct per_frame_masks *masks;
+
+ masks = get_frame_masks(instance, frameno, insn_idx);
+ return masks && (masks->live_before & BIT(spi));
+}
+
+int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct live_stack_query *q = &env->liveness->live_stack_query;
+ struct func_instance *instance;
+ u32 frame;
+
+ memset(q, 0, sizeof(*q));
+ for (frame = 0; frame <= st->curframe; frame++) {
+ instance = lookup_instance(env, st, frame);
+ if (IS_ERR(instance))
+ return PTR_ERR(instance);
+ q->instances[frame] = instance;
+ }
+ q->curframe = st->curframe;
+ q->insn_idx = st->insn_idx;
+ return 0;
+}
+
+bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi)
+{
+ /*
+ * Slot is alive if it is read before q->st->insn_idx in current func instance,
+ * or if for some outer func instance:
+ * - alive before callsite if callsite calls callback, otherwise
+ * - alive after callsite
+ */
+ struct live_stack_query *q = &env->liveness->live_stack_query;
+ struct func_instance *instance, *curframe_instance;
+ u32 i, callsite;
+ bool alive;
+
+ curframe_instance = q->instances[q->curframe];
+ if (is_live_before(curframe_instance, q->insn_idx, frameno, spi))
+ return true;
+
+ for (i = frameno; i < q->curframe; i++) {
+ callsite = curframe_instance->callchain.callsites[i];
+ instance = q->instances[i];
+ alive = bpf_calls_callback(env, callsite)
+ ? is_live_before(instance, callsite, frameno, spi)
+ : is_live_before(instance, callsite + 1, frameno, spi);
+ if (alive)
+ return true;
+ }
+
+ return false;
+}
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 632d51b05fe9..c93a756e035c 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -165,7 +165,7 @@ static long cgroup_storage_update_elem(struct bpf_map *map, void *key,
}
new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size),
- __GFP_ZERO | GFP_NOWAIT | __GFP_NOWARN,
+ __GFP_ZERO | GFP_NOWAIT,
map->numa_node);
if (!new)
return -ENOMEM;
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 38050f4ee400..f50533169cc3 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -498,6 +498,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type)
return "skb";
case BPF_DYNPTR_TYPE_XDP:
return "xdp";
+ case BPF_DYNPTR_TYPE_SKB_META:
+ return "skb_meta";
case BPF_DYNPTR_TYPE_INVALID:
return "<invalid>";
default:
@@ -540,19 +542,6 @@ static char slot_type_char[] = {
[STACK_IRQ_FLAG] = 'f'
};
-static void print_liveness(struct bpf_verifier_env *env,
- enum bpf_reg_liveness live)
-{
- if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
- verbose(env, "_");
- if (live & REG_LIVE_READ)
- verbose(env, "r");
- if (live & REG_LIVE_WRITTEN)
- verbose(env, "w");
- if (live & REG_LIVE_DONE)
- verbose(env, "D");
-}
-
#define UNUM_MAX_DECIMAL U16_MAX
#define SNUM_MAX_DECIMAL S16_MAX
#define SNUM_MIN_DECIMAL S16_MIN
@@ -770,7 +759,6 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
if (!print_all && !reg_scratched(env, i))
continue;
verbose(env, " R%d", i);
- print_liveness(env, reg->live);
verbose(env, "=");
print_reg_state(env, state, reg);
}
@@ -803,9 +791,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
break;
types_buf[j] = '\0';
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, reg->live);
- verbose(env, "=%s", types_buf);
+ verbose(env, " fp%d=%s", (-i - 1) * BPF_REG_SIZE, types_buf);
print_reg_state(env, state, reg);
break;
case STACK_DYNPTR:
@@ -814,7 +800,6 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
reg = &state->stack[i].spilled_ptr;
verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, reg->live);
verbose(env, "=dynptr_%s(", dynptr_type_str(reg->dynptr.type));
if (reg->id)
verbose_a("id=%d", reg->id);
@@ -829,9 +814,8 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
if (!reg->ref_obj_id)
continue;
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, reg->live);
- verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
+ verbose(env, " fp%d=iter_%s(ref_id=%d,state=%s,depth=%u)",
+ (-i - 1) * BPF_REG_SIZE,
iter_type_str(reg->iter.btf, reg->iter.btf_id),
reg->ref_obj_id, iter_state_str(reg->iter.state),
reg->iter.depth);
@@ -839,9 +823,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifie
case STACK_MISC:
case STACK_ZERO:
default:
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, reg->live);
- verbose(env, "=%s", types_buf);
+ verbose(env, " fp%d=%s", (-i - 1) * BPF_REG_SIZE, types_buf);
break;
}
}
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 889374722d0a..bd45dda9dc35 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -736,7 +736,7 @@ static void destroy_mem_alloc(struct bpf_mem_alloc *ma, int rcu_in_progress)
/* Defer barriers into worker to let the rest of map memory to be freed */
memset(ma, 0, sizeof(*ma));
INIT_WORK(&copy->work, free_mem_alloc_deferred);
- queue_work(system_unbound_wq, &copy->work);
+ queue_work(system_dfl_wq, &copy->work);
}
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index ec3a57a5fba1..4d53cdd1374c 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -646,7 +646,15 @@ static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
}
/* Called from syscall */
-int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
+static int stack_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
+{
+ return bpf_stackmap_extract(map, key, value, true);
+}
+
+/* Called from syscall */
+int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value,
+ bool delete)
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct stack_map_bucket *bucket, *old_bucket;
@@ -663,7 +671,10 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
memcpy(value, bucket->data, trace_len);
memset(value + trace_len, 0, map->value_size - trace_len);
- old_bucket = xchg(&smap->buckets[id], bucket);
+ if (delete)
+ old_bucket = bucket;
+ else
+ old_bucket = xchg(&smap->buckets[id], bucket);
if (old_bucket)
pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
return 0;
@@ -754,6 +765,7 @@ const struct bpf_map_ops stack_trace_map_ops = {
.map_free = stack_map_free,
.map_get_next_key = stack_map_get_next_key,
.map_lookup_elem = stack_map_lookup_elem,
+ .map_lookup_and_delete_elem = stack_map_lookup_and_delete_elem,
.map_update_elem = stack_map_update_elem,
.map_delete_elem = stack_map_delete_elem,
.map_check_btf = map_check_no_btf,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0fbfa8532c39..a48fa86f82a7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*/
+#include <crypto/sha2.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <linux/bpf_trace.h>
@@ -38,6 +39,7 @@
#include <linux/tracepoint.h>
#include <linux/overflow.h>
#include <linux/cookie.h>
+#include <linux/verification.h>
#include <net/netfilter/nf_bpf_link.h>
#include <net/netkit.h>
@@ -318,7 +320,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
err = bpf_percpu_cgroup_storage_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
- err = bpf_stackmap_copy(map, key, value);
+ err = bpf_stackmap_extract(map, key, value, false);
} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
err = bpf_fd_array_map_lookup_elem(map, key, value);
} else if (IS_FD_HASH(map)) {
@@ -672,6 +674,7 @@ void btf_record_free(struct btf_record *rec)
case BPF_TIMER:
case BPF_REFCOUNT:
case BPF_WORKQUEUE:
+ case BPF_TASK_WORK:
/* Nothing to release */
break;
default:
@@ -725,6 +728,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
case BPF_TIMER:
case BPF_REFCOUNT:
case BPF_WORKQUEUE:
+ case BPF_TASK_WORK:
/* Nothing to acquire */
break;
default:
@@ -783,6 +787,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj)
bpf_wq_cancel_and_free(obj + rec->wq_off);
}
+void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
+{
+ if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK)))
+ return;
+ bpf_task_work_cancel_and_free(obj + rec->task_work_off);
+}
+
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
{
const struct btf_field *fields;
@@ -807,6 +818,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
case BPF_WORKQUEUE:
bpf_wq_cancel_and_free(field_ptr);
break;
+ case BPF_TASK_WORK:
+ bpf_task_work_cancel_and_free(field_ptr);
+ break;
case BPF_KPTR_UNREF:
WRITE_ONCE(*(u64 *)field_ptr, 0);
break;
@@ -860,6 +874,7 @@ static void bpf_map_free(struct bpf_map *map)
* the free of values or special fields allocated from bpf memory
* allocator.
*/
+ kfree(map->excl_prog_sha);
migrate_disable();
map->ops->map_free(map);
migrate_enable();
@@ -905,7 +920,7 @@ static void bpf_map_free_in_work(struct bpf_map *map)
/* Avoid spawning kworkers, since they all might contend
* for the same mutex like slab_mutex.
*/
- queue_work(system_unbound_wq, &map->work);
+ queue_work(system_dfl_wq, &map->work);
}
static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
@@ -1237,7 +1252,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
map->record = btf_parse_fields(btf, value_type,
BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
- BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
+ BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR |
+ BPF_TASK_WORK,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
int i;
@@ -1269,6 +1285,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
break;
case BPF_TIMER:
case BPF_WORKQUEUE:
+ case BPF_TASK_WORK:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY) {
@@ -1338,9 +1355,9 @@ static bool bpf_net_capable(void)
return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
}
-#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
+#define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size
/* called via syscall */
-static int map_create(union bpf_attr *attr, bool kernel)
+static int map_create(union bpf_attr *attr, bpfptr_t uattr)
{
const struct bpf_map_ops *ops;
struct bpf_token *token = NULL;
@@ -1534,7 +1551,29 @@ static int map_create(union bpf_attr *attr, bool kernel)
attr->btf_vmlinux_value_type_id;
}
- err = security_bpf_map_create(map, attr, token, kernel);
+ if (attr->excl_prog_hash) {
+ bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel);
+
+ if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) {
+ err = -EINVAL;
+ goto free_map;
+ }
+
+ map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
+ if (!map->excl_prog_sha) {
+ err = -ENOMEM;
+ goto free_map;
+ }
+
+ if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) {
+ err = -EFAULT;
+ goto free_map;
+ }
+ } else if (attr->excl_prog_hash_size) {
+ return -EINVAL;
+ }
+
+ err = security_bpf_map_create(map, attr, token, uattr.is_kernel);
if (err)
goto free_map_sec;
@@ -1627,7 +1666,8 @@ struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
}
EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
-int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
+int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value,
+ bool delete)
{
return -ENOTSUPP;
}
@@ -2158,7 +2198,8 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
} else if (map->map_type == BPF_MAP_TYPE_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
if (!bpf_map_is_offloaded(map)) {
bpf_disable_instrumentation();
rcu_read_lock();
@@ -2761,8 +2802,44 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
}
}
+static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr,
+ bool is_kernel)
+{
+ bpfptr_t usig = make_bpfptr(attr->signature, is_kernel);
+ struct bpf_dynptr_kern sig_ptr, insns_ptr;
+ struct bpf_key *key = NULL;
+ void *sig;
+ int err = 0;
+
+ if (system_keyring_id_check(attr->keyring_id) == 0)
+ key = bpf_lookup_system_key(attr->keyring_id);
+ else
+ key = bpf_lookup_user_key(attr->keyring_id, 0);
+
+ if (!key)
+ return -EINVAL;
+
+ sig = kvmemdup_bpfptr(usig, attr->signature_size);
+ if (IS_ERR(sig)) {
+ bpf_key_put(key);
+ return -ENOMEM;
+ }
+
+ bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0,
+ attr->signature_size);
+ bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0,
+ prog->len * sizeof(struct bpf_insn));
+
+ err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr,
+ (struct bpf_dynptr *)&sig_ptr, key);
+
+ bpf_key_put(key);
+ kvfree(sig);
+ return err;
+}
+
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
+#define BPF_PROG_LOAD_LAST_FIELD keyring_id
static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
{
@@ -2926,6 +3003,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
/* eBPF programs must be GPL compatible to use GPL-ed functions */
prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0;
+ if (attr->signature) {
+ err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel);
+ if (err)
+ goto free_prog;
+ }
+
prog->orig_prog = NULL;
prog->jited = 0;
@@ -5161,6 +5244,9 @@ static int bpf_map_get_info_by_fd(struct file *file,
info_len = min_t(u32, sizeof(info), info_len);
memset(&info, 0, sizeof(info));
+ if (copy_from_user(&info, uinfo, info_len))
+ return -EFAULT;
+
info.type = map->map_type;
info.id = map->id;
info.key_size = map->key_size;
@@ -5185,6 +5271,25 @@ static int bpf_map_get_info_by_fd(struct file *file,
return err;
}
+ if (info.hash) {
+ char __user *uhash = u64_to_user_ptr(info.hash);
+
+ if (!map->ops->map_get_hash)
+ return -EINVAL;
+
+ if (info.hash_size != SHA256_DIGEST_SIZE)
+ return -EINVAL;
+
+ err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha);
+ if (err != 0)
+ return err;
+
+ if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0)
+ return -EFAULT;
+ } else if (info.hash_size) {
+ return -EINVAL;
+ }
+
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
return -EFAULT;
@@ -6008,7 +6113,7 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
switch (cmd) {
case BPF_MAP_CREATE:
- err = map_create(&attr, uattr.is_kernel);
+ err = map_create(&attr, uattr);
break;
case BPF_MAP_LOOKUP_ELEM:
err = map_lookup_elem(&attr);
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index fa353c5d550f..f8e70e9c3998 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -116,31 +116,55 @@ struct tnum tnum_xor(struct tnum a, struct tnum b)
return TNUM(v & ~mu, mu);
}
-/* Generate partial products by multiplying each bit in the multiplier (tnum a)
- * with the multiplicand (tnum b), and add the partial products after
- * appropriately bit-shifting them. Instead of directly performing tnum addition
- * on the generated partial products, equivalenty, decompose each partial
- * product into two tnums, consisting of the value-sum (acc_v) and the
- * mask-sum (acc_m) and then perform tnum addition on them. The following paper
- * explains the algorithm in more detail: https://arxiv.org/abs/2105.05398.
+/* Perform long multiplication, iterating through the bits in a using rshift:
+ * - if LSB(a) is a known 0, keep current accumulator
+ * - if LSB(a) is a known 1, add b to current accumulator
+ * - if LSB(a) is unknown, take a union of the above cases.
+ *
+ * For example:
+ *
+ * acc_0: acc_1:
+ *
+ * 11 * -> 11 * -> 11 * -> union(0011, 1001) == x0x1
+ * x1 01 11
+ * ------ ------ ------
+ * 11 11 11
+ * xx 00 11
+ * ------ ------ ------
+ * ???? 0011 1001
*/
struct tnum tnum_mul(struct tnum a, struct tnum b)
{
- u64 acc_v = a.value * b.value;
- struct tnum acc_m = TNUM(0, 0);
+ struct tnum acc = TNUM(0, 0);
while (a.value || a.mask) {
/* LSB of tnum a is a certain 1 */
if (a.value & 1)
- acc_m = tnum_add(acc_m, TNUM(0, b.mask));
+ acc = tnum_add(acc, b);
/* LSB of tnum a is uncertain */
- else if (a.mask & 1)
- acc_m = tnum_add(acc_m, TNUM(0, b.value | b.mask));
+ else if (a.mask & 1) {
+ /* acc = tnum_union(acc_0, acc_1), where acc_0 and
+ * acc_1 are partial accumulators for cases
+ * LSB(a) = certain 0 and LSB(a) = certain 1.
+ * acc_0 = acc + 0 * b = acc.
+ * acc_1 = acc + 1 * b = tnum_add(acc, b).
+ */
+
+ acc = tnum_union(acc, tnum_add(acc, b));
+ }
/* Note: no case for LSB is certain 0 */
a = tnum_rshift(a, 1);
b = tnum_lshift(b, 1);
}
- return tnum_add(TNUM(acc_v, 0), acc_m);
+ return acc;
+}
+
+bool tnum_overlap(struct tnum a, struct tnum b)
+{
+ u64 mu;
+
+ mu = ~a.mask & ~b.mask;
+ return (a.value & mu) == (b.value & mu);
}
/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
@@ -155,6 +179,19 @@ struct tnum tnum_intersect(struct tnum a, struct tnum b)
return TNUM(v & ~mu, mu);
}
+/* Returns a tnum with the uncertainty from both a and b, and in addition, new
+ * uncertainty at any position that a and b disagree. This represents a
+ * superset of the union of the concrete sets of both a and b. Despite the
+ * overapproximation, it is optimal.
+ */
+struct tnum tnum_union(struct tnum a, struct tnum b)
+{
+ u64 v = a.value & b.value;
+ u64 mu = (a.value ^ b.value) | a.mask | b.mask;
+
+ return TNUM(v & ~mu, mu);
+}
+
struct tnum tnum_cast(struct tnum a, u8 size)
{
a.value &= (1ULL << (size * 8)) - 1;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 0e364614c3a2..5949095e51c3 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -899,8 +899,7 @@ static __always_inline u64 notrace bpf_prog_start_time(void)
static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
__acquires(RCU)
{
- rcu_read_lock();
- migrate_disable();
+ rcu_read_lock_dont_migrate();
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
@@ -949,8 +948,7 @@ static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
update_prog_stats(prog, start);
this_cpu_dec(*(prog->active));
- migrate_enable();
- rcu_read_unlock();
+ rcu_read_unlock_migrate();
}
static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
@@ -960,8 +958,7 @@ static u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
/* Runtime stats are exported via actual BPF_LSM_CGROUP
* programs, not the shims.
*/
- rcu_read_lock();
- migrate_disable();
+ rcu_read_lock_dont_migrate();
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
@@ -974,8 +971,7 @@ static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
{
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
- migrate_enable();
- rcu_read_unlock();
+ rcu_read_unlock_migrate();
}
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
@@ -1033,8 +1029,7 @@ static u64 notrace __bpf_prog_enter(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx)
__acquires(RCU)
{
- rcu_read_lock();
- migrate_disable();
+ rcu_read_lock_dont_migrate();
run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
@@ -1048,8 +1043,7 @@ static void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start,
bpf_reset_run_ctx(run_ctx->saved_run_ctx);
update_prog_stats(prog, start);
- migrate_enable();
- rcu_read_unlock();
+ rcu_read_unlock_migrate();
}
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8340cecd1b35..73bba397672a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -674,6 +674,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
return BPF_DYNPTR_TYPE_SKB;
case DYNPTR_TYPE_XDP:
return BPF_DYNPTR_TYPE_XDP;
+ case DYNPTR_TYPE_SKB_META:
+ return BPF_DYNPTR_TYPE_SKB_META;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
@@ -690,6 +692,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
return DYNPTR_TYPE_SKB;
case BPF_DYNPTR_TYPE_XDP:
return DYNPTR_TYPE_XDP;
+ case BPF_DYNPTR_TYPE_SKB_META:
+ return DYNPTR_TYPE_SKB_META;
default:
return 0;
}
@@ -783,8 +787,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
}
- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
- state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
return 0;
}
@@ -801,29 +804,7 @@ static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_stat
__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
- /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
- *
- * While we don't allow reading STACK_INVALID, it is still possible to
- * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
- * helpers or insns can do partial read of that part without failing,
- * but check_stack_range_initialized, check_stack_read_var_off, and
- * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
- * the slot conservatively. Hence we need to prevent those liveness
- * marking walks.
- *
- * This was not a problem before because STACK_INVALID is only set by
- * default (where the default reg state has its reg->parent as NULL), or
- * in clean_live_states after REG_LIVE_DONE (at which point
- * mark_reg_read won't walk reg->parent chain), but not randomly during
- * verifier state exploration (like we did above). Hence, for our case
- * parentage chain will still be live (i.e. reg->parent may be
- * non-NULL), while earlier reg->parent was NULL, so we need
- * REG_LIVE_WRITTEN to screen off read marker propagation when it is
- * done later on reads or by mark_dynptr_read as well to unnecessary
- * mark registers in verifier state.
- */
- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
- state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
}
static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
@@ -932,9 +913,7 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
- /* Same reason as unmark_stack_slots_dynptr above */
- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
- state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ bpf_mark_stack_write(env, state->frameno, BIT(spi - 1) | BIT(spi));
return 0;
}
@@ -1052,7 +1031,6 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
else
st->type |= PTR_UNTRUSTED;
}
- st->live |= REG_LIVE_WRITTEN;
st->ref_obj_id = i == 0 ? id : 0;
st->iter.btf = btf;
st->iter.btf_id = btf_id;
@@ -1062,6 +1040,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
for (j = 0; j < BPF_REG_SIZE; j++)
slot->slot_type[j] = STACK_ITER;
+ bpf_mark_stack_write(env, state->frameno, BIT(spi - i));
mark_stack_slot_scratched(env, spi - i);
}
@@ -1087,12 +1066,10 @@ static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
__mark_reg_not_init(env, st);
- /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
- st->live |= REG_LIVE_WRITTEN;
-
for (j = 0; j < BPF_REG_SIZE; j++)
slot->slot_type[j] = STACK_INVALID;
+ bpf_mark_stack_write(env, state->frameno, BIT(spi - i));
mark_stack_slot_scratched(env, spi - i);
}
@@ -1182,9 +1159,9 @@ static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env,
slot = &state->stack[spi];
st = &slot->spilled_ptr;
+ bpf_mark_stack_write(env, reg->frameno, BIT(spi));
__mark_reg_known_zero(st);
st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
- st->live |= REG_LIVE_WRITTEN;
st->ref_obj_id = id;
st->irq.kfunc_class = kfunc_class;
@@ -1238,8 +1215,7 @@ static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_r
__mark_reg_not_init(env, st);
- /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
- st->live |= REG_LIVE_WRITTEN;
+ bpf_mark_stack_write(env, reg->frameno, BIT(spi));
for (i = 0; i < BPF_REG_SIZE; i++)
slot->slot_type[i] = STACK_INVALID;
@@ -1754,6 +1730,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
return err;
dst_state->speculative = src->speculative;
dst_state->in_sleepable = src->in_sleepable;
+ dst_state->cleaned = src->cleaned;
dst_state->curframe = src->curframe;
dst_state->branches = src->branches;
dst_state->parent = src->parent;
@@ -1946,9 +1923,24 @@ static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_stat
return 0;
visit = scc_visit_lookup(env, callchain);
if (!visit) {
- verifier_bug(env, "scc exit: no visit info for call chain %s",
- format_callchain(env, callchain));
- return -EFAULT;
+ /*
+ * If path traversal stops inside an SCC, corresponding bpf_scc_visit
+ * must exist for non-speculative paths. For non-speculative paths
+ * traversal stops when:
+ * a. Verification error is found, maybe_exit_scc() is not called.
+ * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member
+ * of any SCC.
+ * c. A checkpoint is reached and matched. Checkpoints are created by
+ * is_state_visited(), which calls maybe_enter_scc(), which allocates
+ * bpf_scc_visit instances for checkpoints within SCCs.
+ * (c) is the only case that can reach this point.
+ */
+ if (!st->speculative) {
+ verifier_bug(env, "scc exit: no visit info for call chain %s",
+ format_callchain(env, callchain));
+ return -EFAULT;
+ }
+ return 0;
}
if (visit->entry_state != st)
return 0;
@@ -2017,7 +2009,7 @@ static void free_backedges(struct bpf_scc_visit *visit)
for (backedge = visit->backedges; backedge; backedge = next) {
free_verifier_state(&backedge->state, false);
next = backedge->next;
- kvfree(backedge);
+ kfree(backedge);
}
visit->backedges = NULL;
}
@@ -2232,10 +2224,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
/* transfer reg's id which is unique for every map_lookup_elem
* as UID of the inner map.
*/
- if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
- reg->map_uid = reg->id;
- if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE))
+ if (btf_record_has_field(map->inner_map_meta->record,
+ BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) {
reg->map_uid = reg->id;
+ }
} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
reg->type = PTR_TO_XDP_SOCK;
} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
@@ -2274,7 +2266,8 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
{
return base_type(reg->type) == PTR_TO_MEM &&
- (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
+ (reg->type &
+ (DYNPTR_TYPE_SKB | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META));
}
/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
@@ -2873,8 +2866,6 @@ static void init_reg_state(struct bpf_verifier_env *env,
for (i = 0; i < MAX_BPF_REG; i++) {
mark_reg_not_init(env, regs, i);
- regs[i].live = REG_LIVE_NONE;
- regs[i].parent = NULL;
regs[i].subreg_def = DEF_NOT_SUBREG;
}
@@ -2958,7 +2949,7 @@ static int cmp_subprogs(const void *a, const void *b)
}
/* Find subprogram that contains instruction at 'off' */
-static struct bpf_subprog_info *find_containing_subprog(struct bpf_verifier_env *env, int off)
+struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off)
{
struct bpf_subprog_info *vals = env->subprog_info;
int l, r, m;
@@ -2983,7 +2974,7 @@ static int find_subprog(struct bpf_verifier_env *env, int off)
{
struct bpf_subprog_info *p;
- p = find_containing_subprog(env, off);
+ p = bpf_find_containing_subprog(env, off);
if (!p || p->start != off)
return -ENOENT;
return p - env->subprog_info;
@@ -3494,15 +3485,6 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
return 0;
}
-static int jmp_offset(struct bpf_insn *insn)
-{
- u8 code = insn->code;
-
- if (code == (BPF_JMP32 | BPF_JA))
- return insn->imm;
- return insn->off;
-}
-
static int check_subprogs(struct bpf_verifier_env *env)
{
int i, subprog_start, subprog_end, off, cur_subprog = 0;
@@ -3529,7 +3511,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
goto next;
if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
goto next;
- off = i + jmp_offset(&insn[i]) + 1;
+ off = i + bpf_jmp_offset(&insn[i]) + 1;
if (off < subprog_start || off >= subprog_end) {
verbose(env, "jump out of range from insn %d to %d\n", i, off);
return -EINVAL;
@@ -3555,69 +3537,15 @@ next:
return 0;
}
-/* Parentage chain of this register (or stack slot) should take care of all
- * issues like callee-saved registers, stack slot allocation time, etc.
- */
-static int mark_reg_read(struct bpf_verifier_env *env,
- const struct bpf_reg_state *state,
- struct bpf_reg_state *parent, u8 flag)
-{
- bool writes = parent == state->parent; /* Observe write marks */
- int cnt = 0;
-
- while (parent) {
- /* if read wasn't screened by an earlier write ... */
- if (writes && state->live & REG_LIVE_WRITTEN)
- break;
- if (verifier_bug_if(parent->live & REG_LIVE_DONE, env,
- "type %s var_off %lld off %d",
- reg_type_str(env, parent->type),
- parent->var_off.value, parent->off))
- return -EFAULT;
- /* The first condition is more likely to be true than the
- * second, checked it first.
- */
- if ((parent->live & REG_LIVE_READ) == flag ||
- parent->live & REG_LIVE_READ64)
- /* The parentage chain never changes and
- * this parent was already marked as LIVE_READ.
- * There is no need to keep walking the chain again and
- * keep re-marking all parents as LIVE_READ.
- * This case happens when the same register is read
- * multiple times without writes into it in-between.
- * Also, if parent has the stronger REG_LIVE_READ64 set,
- * then no need to set the weak REG_LIVE_READ32.
- */
- break;
- /* ... then we depend on parent's value */
- parent->live |= flag;
- /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
- if (flag == REG_LIVE_READ64)
- parent->live &= ~REG_LIVE_READ32;
- state = parent;
- parent = state->parent;
- writes = true;
- cnt++;
- }
-
- if (env->longest_mark_read_walk < cnt)
- env->longest_mark_read_walk = cnt;
- return 0;
-}
-
static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
int spi, int nr_slots)
{
- struct bpf_func_state *state = func(env, reg);
int err, i;
for (i = 0; i < nr_slots; i++) {
- struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
-
- err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64);
+ err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi - i));
if (err)
return err;
-
mark_stack_slot_scratched(env, spi - i);
}
return 0;
@@ -3663,7 +3591,7 @@ static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state
* code only. It returns TRUE if the source or destination register operates
* on 64-bit, otherwise return FALSE.
*/
-static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
+static bool is_reg64(struct bpf_insn *insn,
u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
{
u8 code, class, op;
@@ -3774,14 +3702,14 @@ static int insn_def_regno(const struct bpf_insn *insn)
}
/* Return TRUE if INSN has defined any 32-bit value explicitly. */
-static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
+static bool insn_has_def32(struct bpf_insn *insn)
{
int dst_reg = insn_def_regno(insn);
if (dst_reg == -1)
return false;
- return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
+ return !is_reg64(insn, dst_reg, NULL, DST_OP);
}
static void mark_insn_zext(struct bpf_verifier_env *env,
@@ -3812,7 +3740,7 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r
mark_reg_scratched(env, regno);
reg = &regs[regno];
- rw64 = is_reg64(env, insn, regno, reg, t);
+ rw64 = is_reg64(insn, regno, reg, t);
if (t == SRC_OP) {
/* check whether register used as source operand can be read */
if (reg->type == NOT_INIT) {
@@ -3826,15 +3754,13 @@ static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *r
if (rw64)
mark_insn_zext(env, reg);
- return mark_reg_read(env, reg, reg->parent,
- rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
+ return 0;
} else {
/* check whether register used as dest operand can be written to */
if (regno == BPF_REG_FP) {
verbose(env, "frame pointer is read only\n");
return -EACCES;
}
- reg->live |= REG_LIVE_WRITTEN;
reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
if (t == DST_OP)
mark_reg_unknown(env, regs, regno);
@@ -4195,7 +4121,7 @@ static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
}
}
/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
-static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
+void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
{
DECLARE_BITMAP(mask, 64);
bool first = true;
@@ -4250,8 +4176,6 @@ static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_histo
}
}
-static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
-
/* For given verifier state backtrack_insn() is called from the last insn to
* the first insn. Its purpose is to compute a bitmask of registers and
* stack slots that needs precision in the parent verifier state.
@@ -4278,7 +4202,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
verbose(env, "mark_precise: frame%d: regs=%s ",
bt->frame, env->tmp_str_buf);
- fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
+ bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
verbose(env, "stack=%s before ", env->tmp_str_buf);
verbose(env, "%d: ", idx);
verbose_insn(env, insn);
@@ -4479,7 +4403,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* backtracking, as these registers are set by the function
* invoking callback.
*/
- if (subseq_idx >= 0 && calls_callback(env, subseq_idx))
+ if (subseq_idx >= 0 && bpf_calls_callback(env, subseq_idx))
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
bt_clear_reg(bt, i);
if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
@@ -4918,7 +4842,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env,
bt_frame_reg_mask(bt, fr));
verbose(env, "mark_precise: frame%d: parent state regs=%s ",
fr, env->tmp_str_buf);
- fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
+ bpf_fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
bt_frame_stack_mask(bt, fr));
verbose(env, "stack=%s: ", env->tmp_str_buf);
print_verifier_state(env, st, fr, true);
@@ -5041,12 +4965,7 @@ static void assign_scalar_id_before_mov(struct bpf_verifier_env *env,
/* Copy src state preserving dst->parent and dst->live fields */
static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
{
- struct bpf_reg_state *parent = dst->parent;
- enum bpf_reg_liveness live = dst->live;
-
*dst = *src;
- dst->parent = parent;
- dst->live = live;
}
static void save_register_state(struct bpf_verifier_env *env,
@@ -5057,8 +4976,6 @@ static void save_register_state(struct bpf_verifier_env *env,
int i;
copy_register_state(&state->stack[spi].spilled_ptr, reg);
- if (size == BPF_REG_SIZE)
- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
state->stack[spi].slot_type[i - 1] = STACK_SPILL;
@@ -5152,6 +5069,18 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
if (err)
return err;
+ if (!(off % BPF_REG_SIZE) && size == BPF_REG_SIZE) {
+ /* only mark the slot as written if all 8 bytes were written
+ * otherwise read propagation may incorrectly stop too soon
+ * when stack slots are partially written.
+ * This heuristic means that read propagation will be
+ * conservative, since it will add reg_live_read marks
+ * to stack slots all the way to first state when programs
+ * writes+reads less than 8 bytes
+ */
+ bpf_mark_stack_write(env, state->frameno, BIT(spi));
+ }
+
check_fastcall_stack_contract(env, state, insn_idx, off);
mark_stack_slot_scratched(env, spi);
if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
@@ -5195,17 +5124,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
for (i = 0; i < BPF_REG_SIZE; i++)
scrub_spilled_slot(&state->stack[spi].slot_type[i]);
- /* only mark the slot as written if all 8 bytes were written
- * otherwise read propagation may incorrectly stop too soon
- * when stack slots are partially written.
- * This heuristic means that read propagation will be
- * conservative, since it will add reg_live_read marks
- * to stack slots all the way to first state when programs
- * writes+reads less than 8 bytes
- */
- if (size == BPF_REG_SIZE)
- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
-
/* when we zero initialize stack slots mark them as such */
if ((reg && register_is_null(reg)) ||
(!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
@@ -5398,7 +5316,6 @@ static void mark_reg_stack_read(struct bpf_verifier_env *env,
/* have read misc data from the stack */
mark_reg_unknown(env, state->regs, dst_regno);
}
- state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
}
/* Read the stack at 'off' and put the results into the register indicated by
@@ -5421,12 +5338,16 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
struct bpf_reg_state *reg;
u8 *stype, type;
int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
+ int err;
stype = reg_state->stack[spi].slot_type;
reg = &reg_state->stack[spi].spilled_ptr;
mark_stack_slot_scratched(env, spi);
check_fastcall_stack_contract(env, state, env->insn_idx, off);
+ err = bpf_mark_stack_read(env, reg_state->frameno, env->insn_idx, BIT(spi));
+ if (err)
+ return err;
if (is_spilled_reg(&reg_state->stack[spi])) {
u8 spill_size = 1;
@@ -5441,7 +5362,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
return -EACCES;
}
- mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
if (dst_regno < 0)
return 0;
@@ -5495,7 +5415,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
insn_flags = 0; /* not restoring original register state */
}
}
- state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
} else if (dst_regno >= 0) {
/* restore register state from stack */
copy_register_state(&state->regs[dst_regno], reg);
@@ -5503,7 +5422,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
* has its liveness marks cleared by is_state_visited()
* which resets stack/reg liveness for state transitions
*/
- state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
/* If dst_regno==-1, the caller is asking us whether
* it is acceptable to use this value as a SCALAR_VALUE
@@ -5515,7 +5433,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
off);
return -EACCES;
}
- mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
for (i = 0; i < size; i++) {
type = stype[(slot - i) % BPF_REG_SIZE];
@@ -5529,7 +5446,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
off, i, size);
return -EACCES;
}
- mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
if (dst_regno >= 0)
mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
insn_flags = 0; /* we are not restoring spilled register */
@@ -8157,10 +8073,10 @@ mark:
/* reading any byte out of 8-byte 'spill_slot' will cause
* the whole slot to be marked as 'read'
*/
- mark_reg_read(env, &state->stack[spi].spilled_ptr,
- state->stack[spi].spilled_ptr.parent,
- REG_LIVE_READ64);
- /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
+ err = bpf_mark_stack_read(env, reg->frameno, env->insn_idx, BIT(spi));
+ if (err)
+ return err;
+ /* We do not call bpf_mark_stack_write(), as we can not
* be sure that whether stack slot is written to or not. Hence,
* we must still conservatively propagate reads upwards even if
* helper may write to the entire memory range.
@@ -8515,34 +8431,62 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, int flags)
return 0;
}
-static int process_timer_func(struct bpf_verifier_env *env, int regno,
- struct bpf_call_arg_meta *meta)
+/* Check if @regno is a pointer to a specific field in a map value */
+static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno,
+ enum btf_field_type field_type)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
bool is_const = tnum_is_const(reg->var_off);
struct bpf_map *map = reg->map_ptr;
u64 val = reg->var_off.value;
+ const char *struct_name = btf_field_type_name(field_type);
+ int field_off = -1;
if (!is_const) {
verbose(env,
- "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
- regno);
+ "R%d doesn't have constant offset. %s has to be at the constant offset\n",
+ regno, struct_name);
return -EINVAL;
}
if (!map->btf) {
- verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
- map->name);
+ verbose(env, "map '%s' has to have BTF in order to use %s\n", map->name,
+ struct_name);
return -EINVAL;
}
- if (!btf_record_has_field(map->record, BPF_TIMER)) {
- verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
+ if (!btf_record_has_field(map->record, field_type)) {
+ verbose(env, "map '%s' has no valid %s\n", map->name, struct_name);
+ return -EINVAL;
+ }
+ switch (field_type) {
+ case BPF_TIMER:
+ field_off = map->record->timer_off;
+ break;
+ case BPF_TASK_WORK:
+ field_off = map->record->task_work_off;
+ break;
+ default:
+ verifier_bug(env, "unsupported BTF field type: %s\n", struct_name);
return -EINVAL;
}
- if (map->record->timer_off != val + reg->off) {
- verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
- val + reg->off, map->record->timer_off);
+ if (field_off != val + reg->off) {
+ verbose(env, "off %lld doesn't point to 'struct %s' that is at %d\n",
+ val + reg->off, struct_name, field_off);
return -EINVAL;
}
+ return 0;
+}
+
+static int process_timer_func(struct bpf_verifier_env *env, int regno,
+ struct bpf_call_arg_meta *meta)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_map *map = reg->map_ptr;
+ int err;
+
+ err = check_map_field_pointer(env, regno, BPF_TIMER);
+ if (err)
+ return err;
+
if (meta->map_ptr) {
verifier_bug(env, "Two map pointers in a timer helper");
return -EFAULT;
@@ -8573,6 +8517,26 @@ static int process_wq_func(struct bpf_verifier_env *env, int regno,
return 0;
}
+static int process_task_work_func(struct bpf_verifier_env *env, int regno,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ struct bpf_map *map = reg->map_ptr;
+ int err;
+
+ err = check_map_field_pointer(env, regno, BPF_TASK_WORK);
+ if (err)
+ return err;
+
+ if (meta->map.ptr) {
+ verifier_bug(env, "Two map pointers in a bpf_task_work helper");
+ return -EFAULT;
+ }
+ meta->map.uid = reg->map_uid;
+ meta->map.ptr = map;
+ return 0;
+}
+
static int process_kptr_func(struct bpf_verifier_env *env, int regno,
struct bpf_call_arg_meta *meta)
{
@@ -10402,6 +10366,8 @@ typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
struct bpf_func_state *callee,
int insn_idx);
+static bool is_task_work_add_kfunc(u32 func_id);
+
static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee, int insn_idx);
@@ -10620,7 +10586,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
env->subprog_info[subprog].is_async_cb = true;
async_cb = push_async_cb(env, env->subprog_info[subprog].start,
insn_idx, subprog,
- is_bpf_wq_set_callback_impl_kfunc(insn->imm));
+ is_bpf_wq_set_callback_impl_kfunc(insn->imm) ||
+ is_task_work_add_kfunc(insn->imm));
if (!async_cb)
return -EFAULT;
callee = async_cb->frame[0];
@@ -10721,6 +10688,8 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* and go analyze first insn of the callee */
*insn_idx = env->subprog_info[subprog].start - 1;
+ bpf_reset_live_stack_callchain(env);
+
if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "caller:\n");
print_verifier_state(env, state, caller->frameno, true);
@@ -10846,7 +10815,7 @@ static int set_timer_callback_state(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_async_callback_fn = true;
- callee->callback_ret_range = retval_range(0, 1);
+ callee->callback_ret_range = retval_range(0, 0);
return 0;
}
@@ -10933,6 +10902,36 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
return 0;
}
+static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+ int insn_idx)
+{
+ struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr;
+
+ /*
+ * callback_fn(struct bpf_map *map, void *key, void *value);
+ */
+ callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+ callee->regs[BPF_REG_1].map_ptr = map_ptr;
+
+ callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
+ callee->regs[BPF_REG_2].map_ptr = map_ptr;
+
+ callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
+ callee->regs[BPF_REG_3].map_ptr = map_ptr;
+
+ /* unused */
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ callee->in_async_callback_fn = true;
+ callee->callback_ret_range = retval_range(S32_MIN, S32_MAX);
+ return 0;
+}
+
static bool is_rbtree_lock_required_kfunc(u32 btf_id);
/* Are we currently verifying the callback for a rbtree helper that must
@@ -10996,8 +10995,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
}
/* we are going to rely on register's precise value */
- err = mark_reg_read(env, r0, r0->parent, REG_LIVE_READ64);
- err = err ?: mark_chain_precision(env, BPF_REG_0);
+ err = mark_chain_precision(env, BPF_REG_0);
if (err)
return err;
@@ -11007,7 +11005,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
"At callback return", "R0");
return -EINVAL;
}
- if (!calls_callback(env, callee->callsite)) {
+ if (!bpf_calls_callback(env, callee->callsite)) {
verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
*insn_idx, callee->callsite);
return -EFAULT;
@@ -11645,7 +11643,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
return -EFAULT;
- if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
+ if (dynptr_type == BPF_DYNPTR_TYPE_SKB ||
+ dynptr_type == BPF_DYNPTR_TYPE_SKB_META)
/* this will trigger clear_all_pkt_pointers(), which will
* invalidate all dynptr slices associated with the skb
*/
@@ -11900,17 +11899,11 @@ static void __mark_btf_func_reg_size(struct bpf_verifier_env *env, struct bpf_re
if (regno == BPF_REG_0) {
/* Function return value */
- reg->live |= REG_LIVE_WRITTEN;
reg->subreg_def = reg_size == sizeof(u64) ?
DEF_NOT_SUBREG : env->insn_idx + 1;
- } else {
+ } else if (reg_size == sizeof(u64)) {
/* Function argument */
- if (reg_size == sizeof(u64)) {
- mark_insn_zext(env, reg);
- mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
- } else {
- mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
- }
+ mark_insn_zext(env, reg);
}
}
@@ -12063,6 +12056,7 @@ enum {
KF_ARG_RB_NODE_ID,
KF_ARG_WORKQUEUE_ID,
KF_ARG_RES_SPIN_LOCK_ID,
+ KF_ARG_TASK_WORK_ID,
};
BTF_ID_LIST(kf_arg_btf_ids)
@@ -12073,6 +12067,7 @@ BTF_ID(struct, bpf_rb_root)
BTF_ID(struct, bpf_rb_node)
BTF_ID(struct, bpf_wq)
BTF_ID(struct, bpf_res_spin_lock)
+BTF_ID(struct, bpf_task_work)
static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
const struct btf_param *arg, int type)
@@ -12121,6 +12116,11 @@ static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg)
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID);
}
+static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg)
+{
+ return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID);
+}
+
static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg)
{
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
@@ -12208,6 +12208,7 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_WORKQUEUE,
KF_ARG_PTR_TO_IRQ_FLAG,
KF_ARG_PTR_TO_RES_SPIN_LOCK,
+ KF_ARG_PTR_TO_TASK_WORK,
};
enum special_kfunc_type {
@@ -12232,6 +12233,8 @@ enum special_kfunc_type {
KF_bpf_rbtree_right,
KF_bpf_dynptr_from_skb,
KF_bpf_dynptr_from_xdp,
+ KF_bpf_dynptr_from_skb_meta,
+ KF_bpf_xdp_pull_data,
KF_bpf_dynptr_slice,
KF_bpf_dynptr_slice_rdwr,
KF_bpf_dynptr_clone,
@@ -12256,6 +12259,8 @@ enum special_kfunc_type {
KF_bpf_res_spin_lock_irqsave,
KF_bpf_res_spin_unlock_irqrestore,
KF___bpf_trap,
+ KF_bpf_task_work_schedule_signal,
+ KF_bpf_task_work_schedule_resume,
};
BTF_ID_LIST(special_kfunc_list)
@@ -12281,9 +12286,13 @@ BTF_ID(func, bpf_rbtree_right)
#ifdef CONFIG_NET
BTF_ID(func, bpf_dynptr_from_skb)
BTF_ID(func, bpf_dynptr_from_xdp)
+BTF_ID(func, bpf_dynptr_from_skb_meta)
+BTF_ID(func, bpf_xdp_pull_data)
#else
BTF_ID_UNUSED
BTF_ID_UNUSED
+BTF_ID_UNUSED
+BTF_ID_UNUSED
#endif
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
@@ -12322,6 +12331,14 @@ BTF_ID(func, bpf_res_spin_unlock)
BTF_ID(func, bpf_res_spin_lock_irqsave)
BTF_ID(func, bpf_res_spin_unlock_irqrestore)
BTF_ID(func, __bpf_trap)
+BTF_ID(func, bpf_task_work_schedule_signal)
+BTF_ID(func, bpf_task_work_schedule_resume)
+
+static bool is_task_work_add_kfunc(u32 func_id)
+{
+ return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] ||
+ func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume];
+}
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -12353,6 +12370,11 @@ static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
}
+static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
+}
+
static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
struct bpf_kfunc_call_arg_meta *meta,
@@ -12412,6 +12434,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_wq(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_WORKQUEUE;
+ if (is_kfunc_arg_task_work(meta->btf, &args[argno]))
+ return KF_ARG_PTR_TO_TASK_WORK;
+
if (is_kfunc_arg_irq_flag(meta->btf, &args[argno]))
return KF_ARG_PTR_TO_IRQ_FLAG;
@@ -12755,7 +12780,8 @@ static bool is_sync_callback_calling_kfunc(u32 btf_id)
static bool is_async_callback_calling_kfunc(u32 btf_id)
{
- return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
+ return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl] ||
+ is_task_work_add_kfunc(btf_id);
}
static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
@@ -13136,7 +13162,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
verbose(env, "pointer in R%d isn't map pointer\n", regno);
return -EINVAL;
}
- if (meta->map.ptr && reg->map_ptr->record->wq_off >= 0) {
+ if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 ||
+ reg->map_ptr->record->task_work_off >= 0)) {
/* Use map_uid (which is unique id of inner map) to reject:
* inner_map1 = bpf_map_lookup_elem(outer_map, key1)
* inner_map2 = bpf_map_lookup_elem(outer_map, key2)
@@ -13151,6 +13178,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
*/
if (meta->map.ptr != reg->map_ptr ||
meta->map.uid != reg->map_uid) {
+ if (reg->map_ptr->record->task_work_off >= 0) {
+ verbose(env,
+ "bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n",
+ meta->map.uid, reg->map_uid);
+ return -EINVAL;
+ }
verbose(env,
"workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
meta->map.uid, reg->map_uid);
@@ -13189,6 +13222,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
case KF_ARG_PTR_TO_CONST_STR:
case KF_ARG_PTR_TO_WORKQUEUE:
+ case KF_ARG_PTR_TO_TASK_WORK:
case KF_ARG_PTR_TO_IRQ_FLAG:
case KF_ARG_PTR_TO_RES_SPIN_LOCK:
break;
@@ -13257,6 +13291,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
dynptr_arg_type |= DYNPTR_TYPE_SKB;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
dynptr_arg_type |= DYNPTR_TYPE_XDP;
+ } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) {
+ dynptr_arg_type |= DYNPTR_TYPE_SKB_META;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
(dynptr_arg_type & MEM_UNINIT)) {
enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
@@ -13480,6 +13516,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (ret < 0)
return ret;
break;
+ case KF_ARG_PTR_TO_TASK_WORK:
+ if (reg->type != PTR_TO_MAP_VALUE) {
+ verbose(env, "arg#%d doesn't point to a map value\n", i);
+ return -EINVAL;
+ }
+ ret = process_task_work_func(env, regno, meta);
+ if (ret < 0)
+ return ret;
+ break;
case KF_ARG_PTR_TO_IRQ_FLAG:
if (reg->type != PTR_TO_STACK) {
verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i);
@@ -13846,6 +13891,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
+ if (is_task_work_add_kfunc(meta.func_id)) {
+ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+ set_task_work_schedule_callback_state);
+ if (err) {
+ verbose(env, "kfunc %s#%d failed callback verification\n",
+ func_name, meta.func_id);
+ return err;
+ }
+ }
+
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
@@ -13905,6 +13960,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EACCES;
}
+ if (is_kfunc_rcu_protected(&meta) && !in_rcu_cs(env)) {
+ verbose(env, "kernel func %s requires RCU critical section protection\n", func_name);
+ return -EACCES;
+ }
+
/* In case of release function, we get register number of refcounted
* PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
*/
@@ -14018,6 +14078,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* Ensures we don't access the memory after a release_reference() */
if (meta.ref_obj_id)
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+
+ if (is_kfunc_rcu_protected(&meta))
+ regs[BPF_REG_0].type |= MEM_RCU;
} else {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].btf = desc_btf;
@@ -14026,6 +14089,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache])
regs[BPF_REG_0].type |= PTR_UNTRUSTED;
+ else if (is_kfunc_rcu_protected(&meta))
+ regs[BPF_REG_0].type |= MEM_RCU;
if (is_iter_next_kfunc(&meta)) {
struct bpf_reg_state *cur_iter;
@@ -14070,6 +14135,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
+ if (is_kfunc_pkt_changing(&meta))
+ clear_all_pkt_pointers(env);
+
nargs = btf_type_vlen(meta.func_proto);
args = (const struct btf_param *)(meta.func_proto + 1);
for (i = 0; i < nargs; i++) {
@@ -15649,7 +15717,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
*/
assign_scalar_id_before_mov(env, src_reg);
copy_register_state(dst_reg, src_reg);
- dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = DEF_NOT_SUBREG;
} else {
/* case: R1 = (s8, s16 s32)R2 */
@@ -15668,7 +15735,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (!no_sext)
dst_reg->id = 0;
coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
- dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = DEF_NOT_SUBREG;
} else {
mark_reg_unknown(env, regs, insn->dst_reg);
@@ -15694,7 +15760,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
*/
if (!is_src_reg_u32)
dst_reg->id = 0;
- dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = env->insn_idx + 1;
} else {
/* case: W1 = (s8, s16)W2 */
@@ -15705,7 +15770,6 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
copy_register_state(dst_reg, src_reg);
if (!no_sext)
dst_reg->id = 0;
- dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = env->insn_idx + 1;
coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
}
@@ -15890,6 +15954,8 @@ static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_sta
*/
if (tnum_is_const(t1) && tnum_is_const(t2))
return t1.value == t2.value;
+ if (!tnum_overlap(t1, t2))
+ return 0;
/* non-overlapping ranges */
if (umin1 > umax2 || umax1 < umin2)
return 0;
@@ -15914,6 +15980,8 @@ static int is_scalar_branch_taken(struct bpf_reg_state *reg1, struct bpf_reg_sta
*/
if (tnum_is_const(t1) && tnum_is_const(t2))
return t1.value != t2.value;
+ if (!tnum_overlap(t1, t2))
+ return 1;
/* non-overlapping ranges */
if (umin1 > umax2 || umax1 < umin2)
return 1;
@@ -17121,9 +17189,8 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
}
if (frame->in_async_callback_fn) {
- /* enforce return zero from async callbacks like timer */
exit_ctx = "At async callback return";
- range = retval_range(0, 0);
+ range = frame->callback_ret_range;
goto enforce_retval;
}
@@ -17262,7 +17329,7 @@ static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
{
struct bpf_subprog_info *subprog;
- subprog = find_containing_subprog(env, off);
+ subprog = bpf_find_containing_subprog(env, off);
subprog->changes_pkt_data = true;
}
@@ -17270,7 +17337,7 @@ static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off)
{
struct bpf_subprog_info *subprog;
- subprog = find_containing_subprog(env, off);
+ subprog = bpf_find_containing_subprog(env, off);
subprog->might_sleep = true;
}
@@ -17284,8 +17351,8 @@ static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
{
struct bpf_subprog_info *caller, *callee;
- caller = find_containing_subprog(env, t);
- callee = find_containing_subprog(env, w);
+ caller = bpf_find_containing_subprog(env, t);
+ callee = bpf_find_containing_subprog(env, w);
caller->changes_pkt_data |= callee->changes_pkt_data;
caller->might_sleep |= callee->might_sleep;
}
@@ -17355,7 +17422,7 @@ static void mark_calls_callback(struct bpf_verifier_env *env, int idx)
env->insn_aux_data[idx].calls_callback = true;
}
-static bool calls_callback(struct bpf_verifier_env *env, int insn_idx)
+bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx)
{
return env->insn_aux_data[insn_idx].calls_callback;
}
@@ -17787,6 +17854,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
*/
if (ret == 0 && is_kfunc_sleepable(&meta))
mark_subprog_might_sleep(env, t);
+ if (ret == 0 && is_kfunc_pkt_changing(&meta))
+ mark_subprog_changes_pkt_data(env, t);
}
return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
@@ -17829,7 +17898,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
static int check_cfg(struct bpf_verifier_env *env)
{
int insn_cnt = env->prog->len;
- int *insn_stack, *insn_state, *insn_postorder;
+ int *insn_stack, *insn_state;
int ex_insn_beg, i, ret = 0;
insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
@@ -17842,14 +17911,6 @@ static int check_cfg(struct bpf_verifier_env *env)
return -ENOMEM;
}
- insn_postorder = env->cfg.insn_postorder =
- kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
- if (!insn_postorder) {
- kvfree(insn_state);
- kvfree(insn_stack);
- return -ENOMEM;
- }
-
ex_insn_beg = env->exception_callback_subprog
? env->subprog_info[env->exception_callback_subprog].start
: 0;
@@ -17867,7 +17928,6 @@ walk_cfg:
case DONE_EXPLORING:
insn_state[t] = EXPLORED;
env->cfg.cur_stack--;
- insn_postorder[env->cfg.cur_postorder++] = t;
break;
case KEEP_EXPLORING:
break;
@@ -17921,6 +17981,56 @@ err_free:
return ret;
}
+/*
+ * For each subprogram 'i' fill array env->cfg.insn_subprogram sub-range
+ * [env->subprog_info[i].postorder_start, env->subprog_info[i+1].postorder_start)
+ * with indices of 'i' instructions in postorder.
+ */
+static int compute_postorder(struct bpf_verifier_env *env)
+{
+ u32 cur_postorder, i, top, stack_sz, s, succ_cnt, succ[2];
+ int *stack = NULL, *postorder = NULL, *state = NULL;
+
+ postorder = kvcalloc(env->prog->len, sizeof(int), GFP_KERNEL_ACCOUNT);
+ state = kvcalloc(env->prog->len, sizeof(int), GFP_KERNEL_ACCOUNT);
+ stack = kvcalloc(env->prog->len, sizeof(int), GFP_KERNEL_ACCOUNT);
+ if (!postorder || !state || !stack) {
+ kvfree(postorder);
+ kvfree(state);
+ kvfree(stack);
+ return -ENOMEM;
+ }
+ cur_postorder = 0;
+ for (i = 0; i < env->subprog_cnt; i++) {
+ env->subprog_info[i].postorder_start = cur_postorder;
+ stack[0] = env->subprog_info[i].start;
+ stack_sz = 1;
+ do {
+ top = stack[stack_sz - 1];
+ state[top] |= DISCOVERED;
+ if (state[top] & EXPLORED) {
+ postorder[cur_postorder++] = top;
+ stack_sz--;
+ continue;
+ }
+ succ_cnt = bpf_insn_successors(env->prog, top, succ);
+ for (s = 0; s < succ_cnt; ++s) {
+ if (!state[succ[s]]) {
+ stack[stack_sz++] = succ[s];
+ state[succ[s]] |= DISCOVERED;
+ }
+ }
+ state[top] |= EXPLORED;
+ } while (stack_sz);
+ }
+ env->subprog_info[i].postorder_start = cur_postorder;
+ env->cfg.insn_postorder = postorder;
+ env->cfg.cur_postorder = cur_postorder;
+ kvfree(stack);
+ kvfree(state);
+ return 0;
+}
+
static int check_abnormal_return(struct bpf_verifier_env *env)
{
int i;
@@ -18453,16 +18563,15 @@ static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
}
static void clean_func_state(struct bpf_verifier_env *env,
- struct bpf_func_state *st)
+ struct bpf_func_state *st,
+ u32 ip)
{
- enum bpf_reg_liveness live;
+ u16 live_regs = env->insn_aux_data[ip].live_regs_before;
int i, j;
for (i = 0; i < BPF_REG_FP; i++) {
- live = st->regs[i].live;
/* liveness must not touch this register anymore */
- st->regs[i].live |= REG_LIVE_DONE;
- if (!(live & REG_LIVE_READ))
+ if (!(live_regs & BIT(i)))
/* since the register is unused, clear its state
* to make further comparison simpler
*/
@@ -18470,10 +18579,7 @@ static void clean_func_state(struct bpf_verifier_env *env,
}
for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
- live = st->stack[i].spilled_ptr.live;
- /* liveness must not touch this stack slot anymore */
- st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
- if (!(live & REG_LIVE_READ)) {
+ if (!bpf_stack_slot_alive(env, st->frameno, i)) {
__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
for (j = 0; j < BPF_REG_SIZE; j++)
st->stack[i].slot_type[j] = STACK_INVALID;
@@ -18484,10 +18590,14 @@ static void clean_func_state(struct bpf_verifier_env *env,
static void clean_verifier_state(struct bpf_verifier_env *env,
struct bpf_verifier_state *st)
{
- int i;
+ int i, ip;
- for (i = 0; i <= st->curframe; i++)
- clean_func_state(env, st->frame[i]);
+ bpf_live_stack_query_init(env, st);
+ st->cleaned = true;
+ for (i = 0; i <= st->curframe; i++) {
+ ip = frame_insn_idx(st, i);
+ clean_func_state(env, st->frame[i], ip);
+ }
}
/* the parentage chains form a tree.
@@ -18498,25 +18608,23 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
* but a lot of states will get revised from liveness point of view when
* the verifier explores other branches.
* Example:
- * 1: r0 = 1
+ * 1: *(u64)(r10 - 8) = 1
* 2: if r1 == 100 goto pc+1
- * 3: r0 = 2
- * 4: exit
- * when the verifier reaches exit insn the register r0 in the state list of
- * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
- * of insn 2 and goes exploring further. At the insn 4 it will walk the
- * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
+ * 3: *(u64)(r10 - 8) = 2
+ * 4: r0 = *(u64)(r10 - 8)
+ * 5: exit
+ * when the verifier reaches exit insn the stack slot -8 in the state list of
+ * insn 2 is not yet marked alive. Then the verifier pops the other_branch
+ * of insn 2 and goes exploring further. After the insn 4 read, liveness
+ * analysis would propagate read mark for -8 at insn 2.
*
* Since the verifier pushes the branch states as it sees them while exploring
* the program the condition of walking the branch instruction for the second
* time means that all states below this branch were already explored and
* their final liveness marks are already propagated.
* Hence when the verifier completes the search of state list in is_state_visited()
- * we can call this clean_live_states() function to mark all liveness states
- * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
- * will not be used.
- * This function also clears the registers and stack for states that !READ
- * to simplify state merging.
+ * we can call this clean_live_states() function to clear dead the registers and stack
+ * slots to simplify state merging.
*
* Important note here that walking the same branch instruction in the callee
* doesn't meant that the states are DONE. The verifier has to compare
@@ -18536,7 +18644,7 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
if (sl->state.insn_idx != insn ||
!same_callsites(&sl->state, cur))
continue;
- if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE)
+ if (sl->state.cleaned)
/* all regs in this state in all frames were already marked */
continue;
if (incomplete_read_marks(env, &sl->state))
@@ -18568,9 +18676,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
if (exact == EXACT)
return regs_exact(rold, rcur, idmap);
- if (!(rold->live & REG_LIVE_READ) && exact == NOT_EXACT)
- /* explored state didn't use this */
- return true;
if (rold->type == NOT_INIT) {
if (exact == NOT_EXACT || rcur->type == NOT_INIT)
/* explored state can't have used this */
@@ -18694,7 +18799,6 @@ static struct bpf_reg_state unbound_reg;
static __init int unbound_reg_init(void)
{
__mark_reg_unknown_imprecise(&unbound_reg);
- unbound_reg.live |= REG_LIVE_READ;
return 0;
}
late_initcall(unbound_reg_init);
@@ -18747,13 +18851,6 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
cur->stack[spi].slot_type[i % BPF_REG_SIZE]))
return false;
- if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)
- && exact == NOT_EXACT) {
- i += BPF_REG_SIZE - 1;
- /* explored state didn't use this */
- continue;
- }
-
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
continue;
@@ -18996,91 +19093,6 @@ static bool states_equal(struct bpf_verifier_env *env,
return true;
}
-/* Return 0 if no propagation happened. Return negative error code if error
- * happened. Otherwise, return the propagated bit.
- */
-static int propagate_liveness_reg(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg,
- struct bpf_reg_state *parent_reg)
-{
- u8 parent_flag = parent_reg->live & REG_LIVE_READ;
- u8 flag = reg->live & REG_LIVE_READ;
- int err;
-
- /* When comes here, read flags of PARENT_REG or REG could be any of
- * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
- * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
- */
- if (parent_flag == REG_LIVE_READ64 ||
- /* Or if there is no read flag from REG. */
- !flag ||
- /* Or if the read flag from REG is the same as PARENT_REG. */
- parent_flag == flag)
- return 0;
-
- err = mark_reg_read(env, reg, parent_reg, flag);
- if (err)
- return err;
-
- return flag;
-}
-
-/* A write screens off any subsequent reads; but write marks come from the
- * straight-line code between a state and its parent. When we arrive at an
- * equivalent state (jump target or such) we didn't arrive by the straight-line
- * code, so read marks in the state must propagate to the parent regardless
- * of the state's write marks. That's what 'parent == state->parent' comparison
- * in mark_reg_read() is for.
- */
-static int propagate_liveness(struct bpf_verifier_env *env,
- const struct bpf_verifier_state *vstate,
- struct bpf_verifier_state *vparent,
- bool *changed)
-{
- struct bpf_reg_state *state_reg, *parent_reg;
- struct bpf_func_state *state, *parent;
- int i, frame, err = 0;
- bool tmp = false;
-
- changed = changed ?: &tmp;
- if (vparent->curframe != vstate->curframe) {
- WARN(1, "propagate_live: parent frame %d current frame %d\n",
- vparent->curframe, vstate->curframe);
- return -EFAULT;
- }
- /* Propagate read liveness of registers... */
- BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
- for (frame = 0; frame <= vstate->curframe; frame++) {
- parent = vparent->frame[frame];
- state = vstate->frame[frame];
- parent_reg = parent->regs;
- state_reg = state->regs;
- /* We don't need to worry about FP liveness, it's read-only */
- for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
- err = propagate_liveness_reg(env, &state_reg[i],
- &parent_reg[i]);
- if (err < 0)
- return err;
- *changed |= err > 0;
- if (err == REG_LIVE_READ64)
- mark_insn_zext(env, &parent_reg[i]);
- }
-
- /* Propagate stack slots. */
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
- i < parent->allocated_stack / BPF_REG_SIZE; i++) {
- parent_reg = &parent->stack[i].spilled_ptr;
- state_reg = &state->stack[i].spilled_ptr;
- err = propagate_liveness_reg(env, state_reg,
- parent_reg);
- *changed |= err > 0;
- if (err < 0)
- return err;
- }
- }
- return 0;
-}
-
/* find precise scalars in the previous equivalent state and
* propagate them into the current state
*/
@@ -19100,8 +19112,7 @@ static int propagate_precision(struct bpf_verifier_env *env,
first = true;
for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
if (state_reg->type != SCALAR_VALUE ||
- !state_reg->precise ||
- !(state_reg->live & REG_LIVE_READ))
+ !state_reg->precise)
continue;
if (env->log.level & BPF_LOG_LEVEL2) {
if (first)
@@ -19118,8 +19129,7 @@ static int propagate_precision(struct bpf_verifier_env *env,
continue;
state_reg = &state->stack[i].spilled_ptr;
if (state_reg->type != SCALAR_VALUE ||
- !state_reg->precise ||
- !(state_reg->live & REG_LIVE_READ))
+ !state_reg->precise)
continue;
if (env->log.level & BPF_LOG_LEVEL2) {
if (first)
@@ -19169,9 +19179,6 @@ static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visi
changed = false;
for (backedge = visit->backedges; backedge; backedge = backedge->next) {
st = &backedge->state;
- err = propagate_liveness(env, st->equal_state, st, &changed);
- if (err)
- return err;
err = propagate_precision(env, st->equal_state, st, &changed);
if (err)
return err;
@@ -19195,7 +19202,7 @@ static bool states_maybe_looping(struct bpf_verifier_state *old,
fcur = cur->frame[fr];
for (i = 0; i < MAX_BPF_REG; i++)
if (memcmp(&fold->regs[i], &fcur->regs[i],
- offsetof(struct bpf_reg_state, parent)))
+ offsetof(struct bpf_reg_state, frameno)))
return false;
return true;
}
@@ -19293,7 +19300,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
struct bpf_verifier_state_list *sl;
struct bpf_verifier_state *cur = env->cur_state, *new;
bool force_new_state, add_new_state, loop;
- int i, j, n, err, states_cnt = 0;
+ int n, err, states_cnt = 0;
struct list_head *pos, *tmp, *head;
force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
@@ -19408,7 +19415,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
goto hit;
}
}
- if (calls_callback(env, insn_idx)) {
+ if (bpf_calls_callback(env, insn_idx)) {
if (states_equal(env, &sl->state, cur, RANGE_WITHIN))
goto hit;
goto skip_inf_loop_check;
@@ -19451,25 +19458,15 @@ skip_inf_loop_check:
if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
hit:
sl->hit_cnt++;
- /* reached equivalent register/stack state,
- * prune the search.
- * Registers read by the continuation are read by us.
- * If we have any write marks in env->cur_state, they
- * will prevent corresponding reads in the continuation
- * from reaching our parent (an explored_state). Our
- * own state will get the read marks recorded, but
- * they'll be immediately forgotten as we're pruning
- * this state and will pop a new one.
- */
- err = propagate_liveness(env, &sl->state, cur, NULL);
/* if previous state reached the exit with precision and
* current state is equivalent to it (except precision marks)
* the precision needs to be propagated back in
* the current state.
*/
+ err = 0;
if (is_jmp_point(env, env->insn_idx))
- err = err ? : push_jmp_history(env, cur, 0, 0);
+ err = push_jmp_history(env, cur, 0, 0);
err = err ? : propagate_precision(env, &sl->state, cur, NULL);
if (err)
return err;
@@ -19557,7 +19554,7 @@ hit:
err = err ?: add_scc_backedge(env, &sl->state, backedge);
if (err) {
free_verifier_state(&backedge->state, false);
- kvfree(backedge);
+ kfree(backedge);
return err;
}
}
@@ -19640,7 +19637,7 @@ miss:
err = maybe_enter_scc(env, new);
if (err) {
free_verifier_state(new, false);
- kvfree(new_sl);
+ kfree(new_sl);
return err;
}
@@ -19649,38 +19646,6 @@ miss:
cur->dfs_depth = new->dfs_depth + 1;
clear_jmp_history(cur);
list_add(&new_sl->node, head);
-
- /* connect new state to parentage chain. Current frame needs all
- * registers connected. Only r6 - r9 of the callers are alive (pushed
- * to the stack implicitly by JITs) so in callers' frames connect just
- * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
- * the state of the call instruction (with WRITTEN set), and r0 comes
- * from callee with its full parentage chain, anyway.
- */
- /* clear write marks in current state: the writes we did are not writes
- * our child did, so they don't screen off its reads from us.
- * (There are no read marks in current state, because reads always mark
- * their parent and current state never has children yet. Only
- * explored_states can get read marks.)
- */
- for (j = 0; j <= cur->curframe; j++) {
- for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
- cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
- for (i = 0; i < BPF_REG_FP; i++)
- cur->frame[j]->regs[i].live = REG_LIVE_NONE;
- }
-
- /* all stack frames are accessible from callee, clear them all */
- for (j = 0; j <= cur->curframe; j++) {
- struct bpf_func_state *frame = cur->frame[j];
- struct bpf_func_state *newframe = new->frame[j];
-
- for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
- frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
- frame->stack[i].spilled_ptr.parent =
- &newframe->stack[i].spilled_ptr;
- }
- }
return 0;
}
@@ -19816,6 +19781,9 @@ static int process_bpf_exit_full(struct bpf_verifier_env *env,
return PROCESS_BPF_EXIT;
if (env->cur_state->curframe) {
+ err = bpf_update_live_stack(env);
+ if (err)
+ return err;
/* exit from nested function */
err = prepare_func_exit(env, &env->insn_idx);
if (err)
@@ -20001,7 +19969,7 @@ static int do_check(struct bpf_verifier_env *env)
for (;;) {
struct bpf_insn *insn;
struct bpf_insn_aux_data *insn_aux;
- int err;
+ int err, marks_err;
/* reset current history entry on each new instruction */
env->cur_hist_ent = NULL;
@@ -20094,7 +20062,15 @@ static int do_check(struct bpf_verifier_env *env)
if (state->speculative && insn_aux->nospec)
goto process_bpf_exit;
+ err = bpf_reset_stack_write_marks(env, env->insn_idx);
+ if (err)
+ return err;
err = do_check_insn(env, &do_print_state);
+ if (err >= 0 || error_recoverable_with_nospec(err)) {
+ marks_err = bpf_commit_stack_write_marks(env);
+ if (marks_err)
+ return marks_err;
+ }
if (error_recoverable_with_nospec(err) && state->speculative) {
/* Prevent this speculative path from ever reaching the
* insn that would have been unsafe to execute.
@@ -20135,6 +20111,9 @@ process_bpf_exit:
err = update_branch_counts(env, env->cur_state);
if (err)
return err;
+ err = bpf_update_live_stack(env);
+ if (err)
+ return err;
err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
pop_log);
if (err < 0) {
@@ -20197,8 +20176,11 @@ static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf)
if (env->used_btfs[i].btf == btf)
return i;
- if (env->used_btf_cnt >= MAX_USED_BTFS)
+ if (env->used_btf_cnt >= MAX_USED_BTFS) {
+ verbose(env, "The total number of btfs per program has reached the limit of %u\n",
+ MAX_USED_BTFS);
return -E2BIG;
+ }
btf_get(btf);
@@ -20364,6 +20346,12 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
+ if (map->excl_prog_sha &&
+ memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) {
+ verbose(env, "program's hash doesn't match map's excl_prog_hash\n");
+ return -EACCES;
+ }
+
if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
btf_record_has_field(map->record, BPF_RB_ROOT)) {
if (is_tracing_prog_type(prog_type)) {
@@ -20703,12 +20691,11 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
* [0, off) and [off, end) to new locations, so the patched range stays zero
*/
static void adjust_insn_aux_data(struct bpf_verifier_env *env,
- struct bpf_insn_aux_data *new_data,
struct bpf_prog *new_prog, u32 off, u32 cnt)
{
- struct bpf_insn_aux_data *old_data = env->insn_aux_data;
+ struct bpf_insn_aux_data *data = env->insn_aux_data;
struct bpf_insn *insn = new_prog->insnsi;
- u32 old_seen = old_data[off].seen;
+ u32 old_seen = data[off].seen;
u32 prog_len;
int i;
@@ -20716,22 +20703,20 @@ static void adjust_insn_aux_data(struct bpf_verifier_env *env,
* (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
* original insn at old prog.
*/
- old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
+ data[off].zext_dst = insn_has_def32(insn + off + cnt - 1);
if (cnt == 1)
return;
prog_len = new_prog->len;
- memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
- memcpy(new_data + off + cnt - 1, old_data + off,
- sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ memmove(data + off + cnt - 1, data + off,
+ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ memset(data + off, 0, sizeof(struct bpf_insn_aux_data) * (cnt - 1));
for (i = off; i < off + cnt - 1; i++) {
/* Expand insni[off]'s seen count to the patched range. */
- new_data[i].seen = old_seen;
- new_data[i].zext_dst = insn_has_def32(env, insn + i);
+ data[i].seen = old_seen;
+ data[i].zext_dst = insn_has_def32(insn + i);
}
- env->insn_aux_data = new_data;
- vfree(old_data);
}
static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
@@ -20769,10 +20754,14 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
struct bpf_insn_aux_data *new_data = NULL;
if (len > 1) {
- new_data = vzalloc(array_size(env->prog->len + len - 1,
- sizeof(struct bpf_insn_aux_data)));
+ new_data = vrealloc(env->insn_aux_data,
+ array_size(env->prog->len + len - 1,
+ sizeof(struct bpf_insn_aux_data)),
+ GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!new_data)
return NULL;
+
+ env->insn_aux_data = new_data;
}
new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
@@ -20781,10 +20770,9 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
verbose(env,
"insn %d cannot be patched due to 16-bit range\n",
env->insn_aux_data[off].orig_idx);
- vfree(new_data);
return NULL;
}
- adjust_insn_aux_data(env, new_data, new_prog, off, len);
+ adjust_insn_aux_data(env, new_prog, off, len);
adjust_subprog_starts(env, off, len);
adjust_poke_descs(new_prog, off, len);
return new_prog;
@@ -21135,7 +21123,7 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
* BPF_STX + SRC_OP, so it is safe to pass NULL
* here.
*/
- if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
+ if (is_reg64(&insn, load_reg, NULL, DST_OP)) {
if (class == BPF_LD &&
BPF_MODE(code) == BPF_IMM)
i++;
@@ -21404,10 +21392,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
continue;
case PTR_TO_ARENA:
if (BPF_MODE(insn->code) == BPF_MEMSX) {
- verbose(env, "sign extending loads from arena are not supported yet\n");
- return -EOPNOTSUPP;
+ if (!bpf_jit_supports_insn(insn, true)) {
+ verbose(env, "sign extending loads from arena are not supported yet\n");
+ return -EOPNOTSUPP;
+ }
+ insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32SX | BPF_SIZE(insn->code);
+ } else {
+ insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
}
- insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
env->prog->aux->num_exentries++;
continue;
default:
@@ -21582,6 +21574,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
func[i]->aux->poke_tab = prog->aux->poke_tab;
func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
+ func[i]->aux->main_prog_aux = prog->aux;
for (j = 0; j < prog->aux->size_poke_tab; j++) {
struct bpf_jit_poke_descriptor *poke;
@@ -21612,6 +21605,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
if (BPF_CLASS(insn->code) == BPF_LDX &&
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
+ BPF_MODE(insn->code) == BPF_PROBE_MEM32SX ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
num_exentries++;
if ((BPF_CLASS(insn->code) == BPF_STX ||
@@ -24089,67 +24083,6 @@ static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr,
return 0;
}
-static bool can_fallthrough(struct bpf_insn *insn)
-{
- u8 class = BPF_CLASS(insn->code);
- u8 opcode = BPF_OP(insn->code);
-
- if (class != BPF_JMP && class != BPF_JMP32)
- return true;
-
- if (opcode == BPF_EXIT || opcode == BPF_JA)
- return false;
-
- return true;
-}
-
-static bool can_jump(struct bpf_insn *insn)
-{
- u8 class = BPF_CLASS(insn->code);
- u8 opcode = BPF_OP(insn->code);
-
- if (class != BPF_JMP && class != BPF_JMP32)
- return false;
-
- switch (opcode) {
- case BPF_JA:
- case BPF_JEQ:
- case BPF_JNE:
- case BPF_JLT:
- case BPF_JLE:
- case BPF_JGT:
- case BPF_JGE:
- case BPF_JSGT:
- case BPF_JSGE:
- case BPF_JSLT:
- case BPF_JSLE:
- case BPF_JCOND:
- case BPF_JSET:
- return true;
- }
-
- return false;
-}
-
-static int insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2])
-{
- struct bpf_insn *insn = &prog->insnsi[idx];
- int i = 0, insn_sz;
- u32 dst;
-
- insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
- if (can_fallthrough(insn) && idx + 1 < prog->len)
- succ[i++] = idx + insn_sz;
-
- if (can_jump(insn)) {
- dst = idx + jmp_offset(insn) + 1;
- if (i == 0 || succ[0] != dst)
- succ[i++] = dst;
- }
-
- return i;
-}
-
/* Each field is a register bitmask */
struct insn_live_regs {
u16 use; /* registers read by instruction */
@@ -24347,7 +24280,7 @@ static int compute_live_registers(struct bpf_verifier_env *env)
u16 new_out = 0;
u16 new_in = 0;
- succ_num = insn_successors(env->prog, insn_idx, succ);
+ succ_num = bpf_insn_successors(env->prog, insn_idx, succ);
for (int s = 0; s < succ_num; ++s)
new_out |= state[succ[s]].in;
new_in = (new_out & ~live->def) | live->use;
@@ -24384,9 +24317,6 @@ static int compute_live_registers(struct bpf_verifier_env *env)
out:
kvfree(state);
- kvfree(env->cfg.insn_postorder);
- env->cfg.insn_postorder = NULL;
- env->cfg.cur_postorder = 0;
return err;
}
@@ -24516,7 +24446,7 @@ dfs_continue:
stack[stack_sz++] = w;
}
/* Visit 'w' successors */
- succ_cnt = insn_successors(env->prog, w, succ);
+ succ_cnt = bpf_insn_successors(env->prog, w, succ);
for (j = 0; j < succ_cnt; ++j) {
if (pre[succ[j]]) {
low[w] = min(low[w], low[succ[j]]);
@@ -24689,6 +24619,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret < 0)
goto skip_full_check;
+ ret = compute_postorder(env);
+ if (ret < 0)
+ goto skip_full_check;
+
+ ret = bpf_stack_liveness_init(env);
+ if (ret)
+ goto skip_full_check;
+
ret = check_attach_btf_id(env);
if (ret)
goto skip_full_check;
@@ -24838,6 +24776,7 @@ err_unlock:
mutex_unlock(&bpf_verifier_lock);
vfree(env->insn_aux_data);
err_free_env:
+ bpf_stack_liveness_free(env);
kvfree(env->cfg.insn_postorder);
kvfree(env->scc_info);
kvfree(env);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index da4eaee52178..6ae5f48cf64e 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6481,15 +6481,15 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
}
/*
- * cgroup_get_from_id : get the cgroup associated with cgroup id
+ * __cgroup_get_from_id : get the cgroup associated with cgroup id
* @id: cgroup id
* On success return the cgrp or ERR_PTR on failure
- * Only cgroups within current task's cgroup NS are valid.
+ * There are no cgroup NS restrictions.
*/
-struct cgroup *cgroup_get_from_id(u64 id)
+struct cgroup *__cgroup_get_from_id(u64 id)
{
struct kernfs_node *kn;
- struct cgroup *cgrp, *root_cgrp;
+ struct cgroup *cgrp;
kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
if (!kn)
@@ -6511,6 +6511,22 @@ struct cgroup *cgroup_get_from_id(u64 id)
if (!cgrp)
return ERR_PTR(-ENOENT);
+ return cgrp;
+}
+
+/*
+ * cgroup_get_from_id : get the cgroup associated with cgroup id
+ * @id: cgroup id
+ * On success return the cgrp or ERR_PTR on failure
+ * Only cgroups within current task's cgroup NS are valid.
+ */
+struct cgroup *cgroup_get_from_id(u64 id)
+{
+ struct cgroup *cgrp, *root_cgrp;
+
+ cgrp = __cgroup_get_from_id(id);
+ if (IS_ERR(cgrp))
+ return cgrp;
root_cgrp = current_cgns_cgroup_dfl();
if (!cgroup_is_descendant(cgrp, root_cgrp)) {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fb1eae762044..ef1beb9ea128 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11245,6 +11245,10 @@ static int __perf_event_set_bpf_prog(struct perf_event *event,
if (prog->kprobe_override && !is_kprobe)
return -EINVAL;
+ /* Writing to context allowed only for uprobes. */
+ if (prog->aux->kprobe_write_ctx && !is_uprobe)
+ return -EINVAL;
+
if (is_tracepoint || is_syscall_tp) {
int off = trace_event_get_offsets(event->tp_event);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index c8bfff023c6e..5dcf927310fd 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2765,6 +2765,13 @@ static void handle_swbp(struct pt_regs *regs)
handler_chain(uprobe, regs);
+ /*
+ * If user decided to take execution elsewhere, it makes little sense
+ * to execute the original instruction, so let's skip it.
+ */
+ if (instruction_pointer(regs) != bp_vaddr)
+ goto out;
+
/* Try to optimize after first hit. */
arch_uprobe_optimize(&uprobe->arch, bp_vaddr);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3ae52978cae6..8f23f5273bab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -22,7 +22,6 @@
#include <linux/bsearch.h>
#include <linux/sort.h>
#include <linux/key.h>
-#include <linux/verification.h>
#include <linux/namei.h>
#include <net/bpf_sk_storage.h>
@@ -1241,188 +1240,6 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-#ifdef CONFIG_KEYS
-__bpf_kfunc_start_defs();
-
-/**
- * bpf_lookup_user_key - lookup a key by its serial
- * @serial: key handle serial number
- * @flags: lookup-specific flags
- *
- * Search a key with a given *serial* and the provided *flags*.
- * If found, increment the reference count of the key by one, and
- * return it in the bpf_key structure.
- *
- * The bpf_key structure must be passed to bpf_key_put() when done
- * with it, so that the key reference count is decremented and the
- * bpf_key structure is freed.
- *
- * Permission checks are deferred to the time the key is used by
- * one of the available key-specific kfuncs.
- *
- * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
- * special keyring (e.g. session keyring), if it doesn't yet exist.
- * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
- * for the key construction, and to retrieve uninstantiated keys (keys
- * without data attached to them).
- *
- * Return: a bpf_key pointer with a valid key pointer if the key is found, a
- * NULL pointer otherwise.
- */
-__bpf_kfunc struct bpf_key *bpf_lookup_user_key(s32 serial, u64 flags)
-{
- key_ref_t key_ref;
- struct bpf_key *bkey;
-
- if (flags & ~KEY_LOOKUP_ALL)
- return NULL;
-
- /*
- * Permission check is deferred until the key is used, as the
- * intent of the caller is unknown here.
- */
- key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
- if (IS_ERR(key_ref))
- return NULL;
-
- bkey = kmalloc(sizeof(*bkey), GFP_KERNEL);
- if (!bkey) {
- key_put(key_ref_to_ptr(key_ref));
- return NULL;
- }
-
- bkey->key = key_ref_to_ptr(key_ref);
- bkey->has_ref = true;
-
- return bkey;
-}
-
-/**
- * bpf_lookup_system_key - lookup a key by a system-defined ID
- * @id: key ID
- *
- * Obtain a bpf_key structure with a key pointer set to the passed key ID.
- * The key pointer is marked as invalid, to prevent bpf_key_put() from
- * attempting to decrement the key reference count on that pointer. The key
- * pointer set in such way is currently understood only by
- * verify_pkcs7_signature().
- *
- * Set *id* to one of the values defined in include/linux/verification.h:
- * 0 for the primary keyring (immutable keyring of system keys);
- * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
- * (where keys can be added only if they are vouched for by existing keys
- * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
- * keyring (primarily used by the integrity subsystem to verify a kexec'ed
- * kerned image and, possibly, the initramfs signature).
- *
- * Return: a bpf_key pointer with an invalid key pointer set from the
- * pre-determined ID on success, a NULL pointer otherwise
- */
-__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id)
-{
- struct bpf_key *bkey;
-
- if (system_keyring_id_check(id) < 0)
- return NULL;
-
- bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC);
- if (!bkey)
- return NULL;
-
- bkey->key = (struct key *)(unsigned long)id;
- bkey->has_ref = false;
-
- return bkey;
-}
-
-/**
- * bpf_key_put - decrement key reference count if key is valid and free bpf_key
- * @bkey: bpf_key structure
- *
- * Decrement the reference count of the key inside *bkey*, if the pointer
- * is valid, and free *bkey*.
- */
-__bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
-{
- if (bkey->has_ref)
- key_put(bkey->key);
-
- kfree(bkey);
-}
-
-#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-/**
- * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
- * @data_p: data to verify
- * @sig_p: signature of the data
- * @trusted_keyring: keyring with keys trusted for signature verification
- *
- * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
- * with keys in a keyring referenced by *trusted_keyring*.
- *
- * Return: 0 on success, a negative value on error.
- */
-__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
- struct bpf_dynptr *sig_p,
- struct bpf_key *trusted_keyring)
-{
- struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
- struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
- const void *data, *sig;
- u32 data_len, sig_len;
- int ret;
-
- if (trusted_keyring->has_ref) {
- /*
- * Do the permission check deferred in bpf_lookup_user_key().
- * See bpf_lookup_user_key() for more details.
- *
- * A call to key_task_permission() here would be redundant, as
- * it is already done by keyring_search() called by
- * find_asymmetric_key().
- */
- ret = key_validate(trusted_keyring->key);
- if (ret < 0)
- return ret;
- }
-
- data_len = __bpf_dynptr_size(data_ptr);
- data = __bpf_dynptr_data(data_ptr, data_len);
- sig_len = __bpf_dynptr_size(sig_ptr);
- sig = __bpf_dynptr_data(sig_ptr, sig_len);
-
- return verify_pkcs7_signature(data, data_len, sig, sig_len,
- trusted_keyring->key,
- VERIFYING_UNSPECIFIED_SIGNATURE, NULL,
- NULL);
-}
-#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
-
-__bpf_kfunc_end_defs();
-
-BTF_KFUNCS_START(key_sig_kfunc_set)
-BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
-#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
-#endif
-BTF_KFUNCS_END(key_sig_kfunc_set)
-
-static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = {
- .owner = THIS_MODULE,
- .set = &key_sig_kfunc_set,
-};
-
-static int __init bpf_key_sig_kfuncs_init(void)
-{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
- &bpf_key_sig_kfunc_set);
-}
-
-late_initcall(bpf_key_sig_kfuncs_init);
-#endif /* CONFIG_KEYS */
-
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1521,8 +1338,6 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
{
if (off < 0 || off >= sizeof(struct pt_regs))
return false;
- if (type != BPF_READ)
- return false;
if (off % size != 0)
return false;
/*
@@ -1532,6 +1347,9 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
if (off + size > sizeof(struct pt_regs))
return false;
+ if (type == BPF_WRITE)
+ prog->aux->kprobe_write_ctx = true;
+
return true;
}
@@ -2728,20 +2546,25 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
struct pt_regs *regs;
int err;
+ /*
+ * graph tracer framework ensures we won't migrate, so there is no need
+ * to use migrate_disable for bpf_prog_run again. The check here just for
+ * __this_cpu_inc_return.
+ */
+ cant_sleep();
+
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
bpf_prog_inc_misses_counter(link->link.prog);
err = 1;
goto out;
}
- migrate_disable();
rcu_read_lock();
regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
- migrate_enable();
out:
__this_cpu_dec(bpf_prog_active);
@@ -2913,6 +2736,10 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (!is_kprobe_multi(prog))
return -EINVAL;
+ /* Writing to context is not allowed for kprobes. */
+ if (prog->aux->kprobe_write_ctx)
+ return -EINVAL;
+
flags = attr->link_create.kprobe_multi.flags;
if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
return -EINVAL;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 9728dbd4c66c..dfb03ee0bb62 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -524,27 +524,27 @@ __bpf_kfunc int bpf_fentry_test1(int a)
}
EXPORT_SYMBOL_GPL(bpf_fentry_test1);
-int noinline bpf_fentry_test2(int a, u64 b)
+noinline int bpf_fentry_test2(int a, u64 b)
{
return a + b;
}
-int noinline bpf_fentry_test3(char a, int b, u64 c)
+noinline int bpf_fentry_test3(char a, int b, u64 c)
{
return a + b + c;
}
-int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
+noinline int bpf_fentry_test4(void *a, char b, int c, u64 d)
{
return (long)a + b + c + d;
}
-int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
+noinline int bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
{
return a + (long)b + c + d + e;
}
-int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
+noinline int bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
{
return a + (long)b + c + d + (long)e + f;
}
@@ -553,13 +553,13 @@ struct bpf_fentry_test_t {
struct bpf_fentry_test_t *a;
};
-int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
+noinline int bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
- asm volatile ("": "+r"(arg));
+ asm volatile ("" : "+r"(arg));
return (long)arg;
}
-int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
+noinline int bpf_fentry_test8(struct bpf_fentry_test_t *arg)
{
return (long)arg->a;
}
@@ -569,12 +569,12 @@ __bpf_kfunc u32 bpf_fentry_test9(u32 *a)
return *a;
}
-int noinline bpf_fentry_test10(const void *a)
+noinline int bpf_fentry_test10(const void *a)
{
return (long)a;
}
-void noinline bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
+noinline void bpf_fentry_test_sinfo(struct skb_shared_info *sinfo)
{
}
@@ -598,7 +598,7 @@ __bpf_kfunc int bpf_modify_return_test_tp(int nonce)
return nonce;
}
-int noinline bpf_fentry_shadow_test(int a)
+noinline int bpf_fentry_shadow_test(int a)
{
return a + 1;
}
@@ -665,7 +665,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
void *data;
- if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom)
+ if (user_size > PAGE_SIZE - headroom - tailroom)
return ERR_PTR(-EINVAL);
size = SKB_DATA_ALIGN(size);
@@ -1001,6 +1001,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
kattr->test.cpu || kattr->test.batch_size)
return -EINVAL;
+ if (size < ETH_HLEN)
+ return -EINVAL;
+
data = bpf_test_init(kattr, kattr->test.data_size_in,
size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
@@ -1207,9 +1210,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
{
bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES);
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size;
+ u32 linear_sz = kattr->test.data_size_in;
u32 batch_size = kattr->test.batch_size;
- u32 retval = 0, duration, max_data_sz;
- u32 size = kattr->test.data_size_in;
u32 headroom = XDP_PACKET_HEADROOM;
u32 repeat = kattr->test.repeat;
struct netdev_rx_queue *rxqueue;
@@ -1246,39 +1249,45 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (ctx) {
/* There can't be user provided data before the meta data */
- if (ctx->data_meta || ctx->data_end != size ||
+ if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in ||
ctx->data > ctx->data_end ||
unlikely(xdp_metalen_invalid(ctx->data)) ||
(do_live && (kattr->test.data_out || kattr->test.ctx_out)))
goto free_ctx;
/* Meta data is allocated from the headroom */
headroom -= ctx->data;
- }
- max_data_sz = PAGE_SIZE - headroom - tailroom;
- if (size > max_data_sz) {
- /* disallow live data mode for jumbo frames */
- if (do_live)
- goto free_ctx;
- size = max_data_sz;
+ meta_sz = ctx->data;
+ linear_sz = ctx->data_end;
}
- data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom);
+ max_linear_sz = PAGE_SIZE - headroom - tailroom;
+ linear_sz = min_t(u32, linear_sz, max_linear_sz);
+
+ /* disallow live data mode for jumbo frames */
+ if (do_live && kattr->test.data_size_in > linear_sz)
+ goto free_ctx;
+
+ if (kattr->test.data_size_in - meta_sz < ETH_HLEN)
+ return -EINVAL;
+
+ data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto free_ctx;
}
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
- rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom;
+ rxqueue->xdp_rxq.frag_size = PAGE_SIZE;
xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq);
- xdp_prepare_buff(&xdp, data, headroom, size, true);
+ xdp_prepare_buff(&xdp, data, headroom, linear_sz, true);
sinfo = xdp_get_shared_info_from_buff(&xdp);
ret = xdp_convert_md_to_buff(ctx, &xdp);
if (ret)
goto free_data;
+ size = linear_sz;
if (unlikely(kattr->test.data_size_in > size)) {
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
diff --git a/net/core/filter.c b/net/core/filter.c
index da391e2b0788..2af0a5f1d748 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4153,34 +4153,45 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
return 0;
}
-static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
- enum xdp_mem_type mem_type, bool release)
+static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
+ bool tail, bool release)
{
- struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
+ struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
+ xsk_buff_get_head(xdp);
if (release) {
- xsk_buff_del_tail(zc_frag);
- __xdp_return(0, mem_type, false, zc_frag);
+ xsk_buff_del_frag(zc_frag);
} else {
- zc_frag->data_end -= shrink;
+ if (tail)
+ zc_frag->data_end -= shrink;
+ else
+ zc_frag->data += shrink;
}
+
+ return zc_frag;
}
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
- int shrink)
+ int shrink, bool tail)
{
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink;
+ netmem_ref netmem = skb_frag_netmem(frag);
+ struct xdp_buff *zc_frag = NULL;
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
- bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
- goto out;
+ netmem = 0;
+ zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
}
- if (release)
- __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
+ if (release) {
+ __xdp_return(netmem, mem_type, false, zc_frag);
+ } else {
+ if (!tail)
+ skb_frag_off_add(frag, shrink);
+ skb_frag_size_sub(frag, shrink);
+ }
-out:
return release;
}
@@ -4198,18 +4209,15 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
len_free += shrink;
offset -= shrink;
- if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
+ if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
n_frags_free++;
- } else {
- skb_frag_size_sub(frag, shrink);
- break;
- }
}
sinfo->nr_frags -= n_frags_free;
sinfo->xdp_frags_size -= len_free;
if (unlikely(!sinfo->nr_frags)) {
xdp_buff_clear_frags_flag(xdp);
+ xdp_buff_clear_frag_pfmemalloc(xdp);
xdp->data_end -= offset;
}
@@ -7431,6 +7439,8 @@ u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
offsetof(struct xdp_sock, FIELD)); \
} while (0)
+ BTF_TYPE_EMIT(struct bpf_xdp_sock);
+
switch (si->off) {
case offsetof(struct bpf_xdp_sock, queue_id):
BPF_XDP_SOCK_GET(queue_id);
@@ -9284,13 +9294,17 @@ static bool sock_addr_is_valid_access(int off, int size,
return false;
info->reg_type = PTR_TO_SOCKET;
break;
- default:
- if (type == BPF_READ) {
- if (size != size_default)
- return false;
- } else {
+ case bpf_ctx_range(struct bpf_sock_addr, user_family):
+ case bpf_ctx_range(struct bpf_sock_addr, family):
+ case bpf_ctx_range(struct bpf_sock_addr, type):
+ case bpf_ctx_range(struct bpf_sock_addr, protocol):
+ if (type != BPF_READ)
return false;
- }
+ if (size != size_default)
+ return false;
+ break;
+ default:
+ return false;
}
return true;
@@ -11990,6 +12004,16 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return func;
}
+/**
+ * bpf_skb_meta_pointer() - Gets a mutable pointer within the skb metadata area.
+ * @skb: socket buffer carrying the metadata
+ * @offset: offset into the metadata area, must be <= skb_metadata_len()
+ */
+void *bpf_skb_meta_pointer(struct sk_buff *skb, u32 offset)
+{
+ return skb_metadata_end(skb) - skb_metadata_len(skb) + offset;
+}
+
__bpf_kfunc_start_defs();
__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
struct bpf_dynptr *ptr__uninit)
@@ -12007,6 +12031,42 @@ __bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags,
return 0;
}
+/**
+ * bpf_dynptr_from_skb_meta() - Initialize a dynptr to the skb metadata area.
+ * @skb_: socket buffer carrying the metadata
+ * @flags: future use, must be zero
+ * @ptr__uninit: dynptr to initialize
+ *
+ * Set up a dynptr for access to the metadata area earlier allocated from the
+ * XDP context with bpf_xdp_adjust_meta(). Serves as an alternative to
+ * &__sk_buff->data_meta.
+ *
+ * If passed @skb_ is a clone which shares the data with the original, the
+ * dynptr will be read-only. This limitation may be lifted in the future.
+ *
+ * Return:
+ * * %0 - dynptr ready to use
+ * * %-EINVAL - invalid flags, dynptr set to null
+ */
+__bpf_kfunc int bpf_dynptr_from_skb_meta(struct __sk_buff *skb_, u64 flags,
+ struct bpf_dynptr *ptr__uninit)
+{
+ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit;
+ struct sk_buff *skb = (struct sk_buff *)skb_;
+
+ if (flags) {
+ bpf_dynptr_set_null(ptr);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB_META, 0, skb_metadata_len(skb));
+
+ if (skb_cloned(skb))
+ bpf_dynptr_set_rdonly(ptr);
+
+ return 0;
+}
+
__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags,
struct bpf_dynptr *ptr__uninit)
{
@@ -12160,6 +12220,98 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops,
return 0;
}
+/**
+ * bpf_xdp_pull_data() - Pull in non-linear xdp data.
+ * @x: &xdp_md associated with the XDP buffer
+ * @len: length of data to be made directly accessible in the linear part
+ *
+ * Pull in data in case the XDP buffer associated with @x is non-linear and
+ * not all @len are in the linear data area.
+ *
+ * Direct packet access allows reading and writing linear XDP data through
+ * packet pointers (i.e., &xdp_md->data + offsets). The amount of data which
+ * ends up in the linear part of the xdp_buff depends on the NIC and its
+ * configuration. When a frag-capable XDP program wants to directly access
+ * headers that may be in the non-linear area, call this kfunc to make sure
+ * the data is available in the linear area. Alternatively, use dynptr or
+ * bpf_xdp_{load,store}_bytes() to access data without pulling.
+ *
+ * This kfunc can also be used with bpf_xdp_adjust_head() to decapsulate
+ * headers in the non-linear data area.
+ *
+ * A call to this kfunc may reduce headroom. If there is not enough tailroom
+ * in the linear data area, metadata and data will be shifted down.
+ *
+ * A call to this kfunc is susceptible to change the buffer geometry.
+ * Therefore, at load time, all checks on pointers previously done by the
+ * verifier are invalidated and must be performed again, if the kfunc is used
+ * in combination with direct packet access.
+ *
+ * Return:
+ * * %0 - success
+ * * %-EINVAL - invalid len
+ */
+__bpf_kfunc int bpf_xdp_pull_data(struct xdp_md *x, u32 len)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)x;
+ struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+ int i, delta, shift, headroom, tailroom, n_frags_free = 0;
+ void *data_hard_end = xdp_data_hard_end(xdp);
+ int data_len = xdp->data_end - xdp->data;
+ void *start;
+
+ if (len <= data_len)
+ return 0;
+
+ if (unlikely(len > xdp_get_buff_len(xdp)))
+ return -EINVAL;
+
+ start = xdp_data_meta_unsupported(xdp) ? xdp->data : xdp->data_meta;
+
+ headroom = start - xdp->data_hard_start - sizeof(struct xdp_frame);
+ tailroom = data_hard_end - xdp->data_end;
+
+ delta = len - data_len;
+ if (unlikely(delta > tailroom + headroom))
+ return -EINVAL;
+
+ shift = delta - tailroom;
+ if (shift > 0) {
+ memmove(start - shift, start, xdp->data_end - start);
+
+ xdp->data_meta -= shift;
+ xdp->data -= shift;
+ xdp->data_end -= shift;
+ }
+
+ for (i = 0; i < sinfo->nr_frags && delta; i++) {
+ skb_frag_t *frag = &sinfo->frags[i];
+ u32 shrink = min_t(u32, delta, skb_frag_size(frag));
+
+ memcpy(xdp->data_end, skb_frag_address(frag), shrink);
+
+ xdp->data_end += shrink;
+ sinfo->xdp_frags_size -= shrink;
+ delta -= shrink;
+ if (bpf_xdp_shrink_data(xdp, frag, shrink, false))
+ n_frags_free++;
+ }
+
+ if (unlikely(n_frags_free)) {
+ memmove(sinfo->frags, sinfo->frags + n_frags_free,
+ (sinfo->nr_frags - n_frags_free) * sizeof(skb_frag_t));
+
+ sinfo->nr_frags -= n_frags_free;
+
+ if (!sinfo->nr_frags) {
+ xdp_buff_clear_frags_flag(xdp);
+ xdp_buff_clear_frag_pfmemalloc(xdp);
+ }
+ }
+
+ return 0;
+}
+
__bpf_kfunc_end_defs();
int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
@@ -12181,8 +12333,13 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
+BTF_KFUNCS_START(bpf_kfunc_check_set_skb_meta)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb_meta, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_kfunc_check_set_skb_meta)
+
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
+BTF_ID_FLAGS(func, bpf_xdp_pull_data)
BTF_KFUNCS_END(bpf_kfunc_check_set_xdp)
BTF_KFUNCS_START(bpf_kfunc_check_set_sock_addr)
@@ -12202,6 +12359,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
.set = &bpf_kfunc_check_set_skb,
};
+static const struct btf_kfunc_id_set bpf_kfunc_set_skb_meta = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_skb_meta,
+};
+
static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
.owner = THIS_MODULE,
.set = &bpf_kfunc_check_set_xdp,
@@ -12237,6 +12399,8 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb_meta);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb_meta);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
&bpf_kfunc_set_sock_addr);
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index ca860fd97d8d..d0a36f442db7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **gen** *COMMAND*
-*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
+*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } | [ { **-S** | **--sign** } {**-k** <private_key.pem>} **-i** <certificate.x509> ] }
*COMMAND* := { **object** | **skeleton** | **help** }
@@ -186,6 +186,17 @@ OPTIONS
skeleton). A light skeleton contains a loader eBPF program. It does not use
the majority of the libbpf infrastructure, and does not need libelf.
+-S, --sign
+ For skeletons, generate a signed skeleton. This option must be used with
+ **-k** and **-i**. Using this flag implicitly enables **--use-loader**.
+
+-k <private_key.pem>
+ Path to the private key file in PEM format, required for signing.
+
+-i <certificate.x509>
+ Path to the X.509 certificate file in PEM or DER format, required for
+ signing.
+
EXAMPLES
========
**$ cat example1.bpf.c**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index f69fd92df8d8..009633294b09 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -18,7 +18,7 @@ SYNOPSIS
*OPTIONS* := { |COMMON_OPTIONS| |
{ **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
-{ **-L** | **--use-loader** } }
+{ **-L** | **--use-loader** } | [ { **-S** | **--sign** } **-k** <private_key.pem> **-i** <certificate.x509> ] }
*COMMANDS* :=
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** |
@@ -248,6 +248,18 @@ OPTIONS
creating the maps, and loading the programs (see **bpftool prog tracelog**
as a way to dump those messages).
+-S, --sign
+ Enable signing of the BPF program before loading. This option must be
+ used with **-k** and **-i**. Using this flag implicitly enables
+ **--use-loader**.
+
+-k <private_key.pem>
+ Path to the private key file in PEM format, required when signing.
+
+-i <certificate.x509>
+ Path to the X.509 certificate file in PEM or DER format, required when
+ signing.
+
EXAMPLES
========
**# bpftool prog show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-token.rst b/tools/bpf/bpftool/Documentation/bpftool-token.rst
new file mode 100644
index 000000000000..d082c499cfe3
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-token.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+================
+bpftool-token
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF tokens
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+.. include:: substitutions.rst
+
+SYNOPSIS
+========
+
+**bpftool** [*OPTIONS*] **token** *COMMAND*
+
+*OPTIONS* := { |COMMON_OPTIONS| }
+
+*COMMANDS* := { **show** | **list** | **help** }
+
+TOKEN COMMANDS
+===============
+
+| **bpftool** **token** { **show** | **list** }
+| **bpftool** **token help**
+|
+
+DESCRIPTION
+===========
+bpftool token { show | list }
+ List BPF token information for each *bpffs* mount point containing token
+ information on the system. Information include mount point path, allowed
+ **bpf**\ () system call commands, maps, programs, and attach types for the
+ token.
+
+bpftool prog help
+ Print short help message.
+
+OPTIONS
+========
+.. include:: common_options.rst
+
+EXAMPLES
+========
+|
+| **# mkdir -p /sys/fs/bpf/token**
+| **# mount -t bpf bpffs /sys/fs/bpf/token** \
+| **-o delegate_cmds=prog_load:map_create** \
+| **-o delegate_progs=kprobe** \
+| **-o delegate_attachs=xdp**
+| **# bpftool token list**
+
+::
+
+ token_info /sys/fs/bpf/token
+ allowed_cmds:
+ map_create prog_load
+ allowed_maps:
+ allowed_progs:
+ kprobe
+ allowed_attachs:
+ xdp
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 9e9a5f006cd2..586d1b2595d1 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -130,8 +130,8 @@ include $(FEATURES_DUMP)
endif
endif
-LIBS = $(LIBBPF) -lelf -lz
-LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
+LIBS = $(LIBBPF) -lelf -lz -lcrypto
+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz -lcrypto
ifeq ($(feature-libelf-zstd),1)
LIBS += -lzstd
@@ -194,7 +194,7 @@ endif
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o sign.o)
$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index a759ba24471d..53bcfeb1a76e 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -262,7 +262,7 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
local c='--version --json --pretty --bpffs --mapcompat --debug \
- --use-loader --base-btf'
+ --use-loader --base-btf --sign -i -k'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
@@ -283,7 +283,7 @@ _bpftool()
_sysfs_get_netdevs
return 0
;;
- file|pinned|-B|--base-btf)
+ file|pinned|-B|--base-btf|-i|-k)
_filedir
return 0
;;
@@ -296,13 +296,21 @@ _bpftool()
# Remove all options so completions don't have to deal with them.
local i pprev
for (( i=1; i < ${#words[@]}; )); do
- if [[ ${words[i]::1} == - ]] &&
- [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
- words=( "${words[@]:0:i}" "${words[@]:i+1}" )
- [[ $i -le $cword ]] && cword=$(( cword - 1 ))
- else
- i=$(( ++i ))
- fi
+ case ${words[i]} in
+ # Remove option and its argument
+ -B|--base-btf|-i|-k)
+ words=( "${words[@]:0:i}" "${words[@]:i+2}" )
+ [[ $i -le $(($cword + 1)) ]] && cword=$(( cword - 2 ))
+ ;;
+ # No argument, remove option only
+ -*)
+ words=( "${words[@]:0:i}" "${words[@]:i+1}" )
+ [[ $i -le $cword ]] && cword=$(( cword - 1 ))
+ ;;
+ *)
+ i=$(( ++i ))
+ ;;
+ esac
done
cur=${words[cword]}
prev=${words[cword - 1]}
@@ -1215,6 +1223,17 @@ _bpftool()
;;
esac
;;
+ token)
+ case $command in
+ show|list)
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 4e896d8a2416..ff12628593ae 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -38,7 +38,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
__u32 info_len = sizeof(info);
const char *prog_name = NULL;
struct btf *prog_btf = NULL;
- struct bpf_func_info finfo;
+ struct bpf_func_info finfo = {};
__u32 finfo_rec_size;
char prog_str[1024];
int err;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 944ebe21a216..ec356deb27c9 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -2,6 +2,10 @@
// Copyright (C) 2017 Facebook
// Author: Roman Gushchin <guro@fb.com>
+#undef GCC_VERSION
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#define _XOPEN_SOURCE 500
#include <errno.h>
#include <fcntl.h>
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b07317d2842f..e8daf963ecef 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -21,6 +21,7 @@
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
+#include <sys/utsname.h>
#include <linux/filter.h>
#include <linux/limits.h>
@@ -31,6 +32,7 @@
#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
#include <bpf/btf.h>
+#include <zlib.h>
#include "main.h"
@@ -1208,3 +1210,94 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
return 0;
}
+
+static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
+ char **value)
+{
+ char *sep;
+
+ while (gzgets(file, buf, n)) {
+ if (strncmp(buf, "CONFIG_", 7))
+ continue;
+
+ sep = strchr(buf, '=');
+ if (!sep)
+ continue;
+
+ /* Trim ending '\n' */
+ buf[strlen(buf) - 1] = '\0';
+
+ /* Split on '=' and ensure that a value is present. */
+ *sep = '\0';
+ if (!sep[1])
+ continue;
+
+ *value = sep + 1;
+ return true;
+ }
+
+ return false;
+}
+
+int read_kernel_config(const struct kernel_config_option *requested_options,
+ size_t num_options, char **out_values,
+ const char *define_prefix)
+{
+ struct utsname utsn;
+ char path[PATH_MAX];
+ gzFile file = NULL;
+ char buf[4096];
+ char *value;
+ size_t i;
+ int ret = 0;
+
+ if (!requested_options || !out_values || num_options == 0)
+ return -1;
+
+ if (!uname(&utsn)) {
+ snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
+
+ /* gzopen also accepts uncompressed files. */
+ file = gzopen(path, "r");
+ }
+
+ if (!file) {
+ /* Some distributions build with CONFIG_IKCONFIG=y and put the
+ * config file at /proc/config.gz.
+ */
+ file = gzopen("/proc/config.gz", "r");
+ }
+
+ if (!file) {
+ p_info("skipping kernel config, can't open file: %s",
+ strerror(errno));
+ return -1;
+ }
+
+ if (!gzgets(file, buf, sizeof(buf)) || !gzgets(file, buf, sizeof(buf))) {
+ p_info("skipping kernel config, can't read from file: %s",
+ strerror(errno));
+ ret = -1;
+ goto end_parse;
+ }
+
+ if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
+ p_info("skipping kernel config, can't find correct file");
+ ret = -1;
+ goto end_parse;
+ }
+
+ while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
+ for (i = 0; i < num_options; i++) {
+ if ((define_prefix && !requested_options[i].macro_dump) ||
+ out_values[i] || strcmp(buf, requested_options[i].name))
+ continue;
+
+ out_values[i] = strdup(value);
+ }
+ }
+
+end_parse:
+ gzclose(file);
+ return ret;
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 24fecdf8e430..0f6070a0c8e7 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -10,7 +10,6 @@
#ifdef USE_LIBCAP
#include <sys/capability.h>
#endif
-#include <sys/utsname.h>
#include <sys/vfs.h>
#include <linux/filter.h>
@@ -18,7 +17,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include <zlib.h>
#include "main.h"
@@ -327,40 +325,9 @@ static void probe_jit_limit(void)
}
}
-static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
- char **value)
-{
- char *sep;
-
- while (gzgets(file, buf, n)) {
- if (strncmp(buf, "CONFIG_", 7))
- continue;
-
- sep = strchr(buf, '=');
- if (!sep)
- continue;
-
- /* Trim ending '\n' */
- buf[strlen(buf) - 1] = '\0';
-
- /* Split on '=' and ensure that a value is present. */
- *sep = '\0';
- if (!sep[1])
- continue;
-
- *value = sep + 1;
- return true;
- }
-
- return false;
-}
-
static void probe_kernel_image_config(const char *define_prefix)
{
- static const struct {
- const char * const name;
- bool macro_dump;
- } options[] = {
+ struct kernel_config_option options[] = {
/* Enable BPF */
{ "CONFIG_BPF", },
/* Enable bpf() syscall */
@@ -435,52 +402,11 @@ static void probe_kernel_image_config(const char *define_prefix)
{ "CONFIG_HZ", true, }
};
char *values[ARRAY_SIZE(options)] = { };
- struct utsname utsn;
- char path[PATH_MAX];
- gzFile file = NULL;
- char buf[4096];
- char *value;
size_t i;
- if (!uname(&utsn)) {
- snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
-
- /* gzopen also accepts uncompressed files. */
- file = gzopen(path, "r");
- }
-
- if (!file) {
- /* Some distributions build with CONFIG_IKCONFIG=y and put the
- * config file at /proc/config.gz.
- */
- file = gzopen("/proc/config.gz", "r");
- }
- if (!file) {
- p_info("skipping kernel config, can't open file: %s",
- strerror(errno));
- goto end_parse;
- }
- /* Sanity checks */
- if (!gzgets(file, buf, sizeof(buf)) ||
- !gzgets(file, buf, sizeof(buf))) {
- p_info("skipping kernel config, can't read from file: %s",
- strerror(errno));
- goto end_parse;
- }
- if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
- p_info("skipping kernel config, can't find correct file");
- goto end_parse;
- }
-
- while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
- for (i = 0; i < ARRAY_SIZE(options); i++) {
- if ((define_prefix && !options[i].macro_dump) ||
- values[i] || strcmp(buf, options[i].name))
- continue;
-
- values[i] = strdup(value);
- }
- }
+ if (read_kernel_config(options, ARRAY_SIZE(options), values,
+ define_prefix))
+ return;
for (i = 0; i < ARRAY_SIZE(options); i++) {
if (define_prefix && !options[i].macro_dump)
@@ -488,10 +414,6 @@ static void probe_kernel_image_config(const char *define_prefix)
print_kernel_option(options[i].name, values[i], define_prefix);
free(values[i]);
}
-
-end_parse:
- if (file)
- gzclose(file);
}
static bool probe_bpf_syscall(const char *define_prefix)
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 67a60114368f..993c7d9484a4 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -688,10 +688,17 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
{
DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
+ struct bpf_load_and_run_opts sopts = {};
+ char sig_buf[MAX_SIG_SIZE];
+ __u8 prog_sha[SHA256_DIGEST_LENGTH];
struct bpf_map *map;
+
char ident[256];
int err = 0;
+ if (sign_progs)
+ opts.gen_hash = true;
+
err = bpf_object__gen_loader(obj, &opts);
if (err)
return err;
@@ -701,6 +708,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
p_err("failed to load object file");
goto out;
}
+
/* If there was no error during load then gen_loader_opts
* are populated with the loader program.
*/
@@ -780,8 +788,52 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
print_hex(opts.insns, opts.insns_sz);
codegen("\
\n\
- \"; \n\
- \n\
+ \";\n");
+
+ if (sign_progs) {
+ sopts.insns = opts.insns;
+ sopts.insns_sz = opts.insns_sz;
+ sopts.excl_prog_hash = prog_sha;
+ sopts.excl_prog_hash_sz = sizeof(prog_sha);
+ sopts.signature = sig_buf;
+ sopts.signature_sz = MAX_SIG_SIZE;
+
+ err = bpftool_prog_sign(&sopts);
+ if (err < 0) {
+ p_err("failed to sign program");
+ goto out;
+ }
+
+ codegen("\
+ \n\
+ static const char opts_sig[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
+ print_hex((const void *)sig_buf, sopts.signature_sz);
+ codegen("\
+ \n\
+ \";\n");
+
+ codegen("\
+ \n\
+ static const char opts_excl_hash[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
+ print_hex((const void *)prog_sha, sizeof(prog_sha));
+ codegen("\
+ \n\
+ \";\n");
+
+ codegen("\
+ \n\
+ opts.signature = (void *)opts_sig; \n\
+ opts.signature_sz = sizeof(opts_sig) - 1; \n\
+ opts.excl_prog_hash = (void *)opts_excl_hash; \n\
+ opts.excl_prog_hash_sz = sizeof(opts_excl_hash) - 1; \n\
+ opts.keyring_id = skel->keyring_id; \n\
+ ");
+ }
+
+ codegen("\
+ \n\
opts.ctx = (struct bpf_loader_ctx *)skel; \n\
opts.data_sz = sizeof(opts_data) - 1; \n\
opts.data = (void *)opts_data; \n\
@@ -1240,7 +1292,7 @@ static int do_skeleton(int argc, char **argv)
err = -errno;
libbpf_strerror(err, err_buf, sizeof(err_buf));
p_err("failed to open BPF object file: %s", err_buf);
- goto out;
+ goto out_obj;
}
bpf_object__for_each_map(map, obj) {
@@ -1355,6 +1407,13 @@ static int do_skeleton(int argc, char **argv)
printf("\t} links;\n");
}
+ if (sign_progs) {
+ codegen("\
+ \n\
+ __s32 keyring_id; \n\
+ ");
+ }
+
if (btf) {
err = codegen_datasecs(obj, obj_name);
if (err)
@@ -1552,6 +1611,7 @@ static int do_skeleton(int argc, char **argv)
err = 0;
out:
bpf_object__close(obj);
+out_obj:
if (obj_data)
munmap(obj_data, mmap_sz);
close(fd);
@@ -1930,7 +1990,7 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS " |\n"
- " {-L|--use-loader} }\n"
+ " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ]}\n"
"",
bin_name, "gen");
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index a773e05d5ade..bdcd717b0348 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -282,11 +282,52 @@ get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count)
return data;
}
+static bool is_x86_ibt_enabled(void)
+{
+#if defined(__x86_64__)
+ struct kernel_config_option options[] = {
+ { "CONFIG_X86_KERNEL_IBT", },
+ };
+ char *values[ARRAY_SIZE(options)] = { };
+ bool ret;
+
+ if (read_kernel_config(options, ARRAY_SIZE(options), values, NULL))
+ return false;
+
+ ret = !!values[0];
+ free(values[0]);
+ return ret;
+#else
+ return false;
+#endif
+}
+
+static bool
+symbol_matches_target(__u64 sym_addr, __u64 target_addr, bool is_ibt_enabled)
+{
+ if (sym_addr == target_addr)
+ return true;
+
+ /*
+ * On x86_64 architectures with CET (Control-flow Enforcement Technology),
+ * function entry points have a 4-byte 'endbr' instruction prefix.
+ * This causes kprobe hooks to target the address *after* 'endbr'
+ * (symbol address + 4), preserving the CET instruction.
+ * Here we check if the symbol address matches the hook target address
+ * minus 4, indicating a CET-enabled function entry point.
+ */
+ if (is_ibt_enabled && sym_addr == target_addr - 4)
+ return true;
+
+ return false;
+}
+
static void
show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
{
struct addr_cookie *data;
__u32 i, j = 0;
+ bool is_ibt_enabled;
jsonw_bool_field(json_wtr, "retprobe",
info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
@@ -306,11 +347,13 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
if (!dd.sym_count)
goto error;
+ is_ibt_enabled = is_x86_ibt_enabled();
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != data[j].addr)
+ if (!symbol_matches_target(dd.sym_mapping[i].address,
+ data[j].addr, is_ibt_enabled))
continue;
jsonw_start_object(json_wtr);
- jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address);
+ jsonw_uint_field(json_wtr, "addr", (unsigned long)data[j].addr);
jsonw_string_field(json_wtr, "func", dd.sym_mapping[i].name);
/* Print null if it is vmlinux */
if (dd.sym_mapping[i].module[0] == '\0') {
@@ -719,6 +762,7 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
{
struct addr_cookie *data;
__u32 i, j = 0;
+ bool is_ibt_enabled;
if (!info->kprobe_multi.count)
return;
@@ -742,12 +786,14 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
if (!dd.sym_count)
goto error;
+ is_ibt_enabled = is_x86_ibt_enabled();
printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]");
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != data[j].addr)
+ if (!symbol_matches_target(dd.sym_mapping[i].address,
+ data[j].addr, is_ibt_enabled))
continue;
printf("\n\t%016lx %-16llx %s",
- dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name);
+ (unsigned long)data[j].addr, data[j].cookie, dd.sym_mapping[i].name);
if (dd.sym_mapping[i].module[0] != '\0')
printf(" [%s] ", dd.sym_mapping[i].module);
else
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 2b7f2bd3a7db..a829a6a49037 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -33,6 +33,9 @@ bool relaxed_maps;
bool use_loader;
struct btf *base_btf;
struct hashmap *refs_table;
+bool sign_progs;
+const char *private_key_path;
+const char *cert_path;
static void __noreturn clean_and_exit(int i)
{
@@ -61,7 +64,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
+ " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter | token }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-V|--version} }\n"
"",
@@ -87,6 +90,7 @@ static const struct cmd commands[] = {
{ "gen", do_gen },
{ "struct_ops", do_struct_ops },
{ "iter", do_iter },
+ { "token", do_token },
{ "version", do_version },
{ 0 }
};
@@ -447,6 +451,7 @@ int main(int argc, char **argv)
{ "nomount", no_argument, NULL, 'n' },
{ "debug", no_argument, NULL, 'd' },
{ "use-loader", no_argument, NULL, 'L' },
+ { "sign", no_argument, NULL, 'S' },
{ "base-btf", required_argument, NULL, 'B' },
{ 0 }
};
@@ -473,7 +478,7 @@ int main(int argc, char **argv)
bin_name = "bpftool";
opterr = 0;
- while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l",
+ while ((opt = getopt_long(argc, argv, "VhpjfLmndSi:k:B:l",
options, NULL)) >= 0) {
switch (opt) {
case 'V':
@@ -519,6 +524,16 @@ int main(int argc, char **argv)
case 'L':
use_loader = true;
break;
+ case 'S':
+ sign_progs = true;
+ use_loader = true;
+ break;
+ case 'k':
+ private_key_path = optarg;
+ break;
+ case 'i':
+ cert_path = optarg;
+ break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)
@@ -533,6 +548,16 @@ int main(int argc, char **argv)
if (argc < 0)
usage();
+ if (sign_progs && (private_key_path == NULL || cert_path == NULL)) {
+ p_err("-i <identity_x509_cert> and -k <private_key> must be supplied with -S for signing");
+ return -EINVAL;
+ }
+
+ if (!sign_progs && (private_key_path != NULL || cert_path != NULL)) {
+ p_err("--sign (or -S) must be explicitly passed with -i <identity_x509_cert> and -k <private_key> to sign the programs");
+ return -EINVAL;
+ }
+
if (version_requested)
ret = do_version(argc, argv);
else
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6db704fda5c0..1130299cede0 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -6,9 +6,14 @@
/* BFD and kernel.h both define GCC_VERSION, differently */
#undef GCC_VERSION
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#include <stdbool.h>
#include <stdio.h>
+#include <errno.h>
#include <stdlib.h>
+#include <bpf/skel_internal.h>
#include <linux/bpf.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
@@ -52,6 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
})
#define ERR_MAX_LEN 1024
+#define MAX_SIG_SIZE 4096
#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
@@ -85,6 +91,9 @@ extern bool relaxed_maps;
extern bool use_loader;
extern struct btf *base_btf;
extern struct hashmap *refs_table;
+extern bool sign_progs;
+extern const char *private_key_path;
+extern const char *cert_path;
void __printf(1, 2) p_err(const char *fmt, ...);
void __printf(1, 2) p_info(const char *fmt, ...);
@@ -166,6 +175,7 @@ int do_tracelog(int argc, char **arg) __weak;
int do_feature(int argc, char **argv) __weak;
int do_struct_ops(int argc, char **argv) __weak;
int do_iter(int argc, char **argv) __weak;
+int do_token(int argc, char **argv) __weak;
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
@@ -274,4 +284,15 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
/* print netfilter bpf_link info */
void netfilter_dump_plain(const struct bpf_link_info *info);
void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr);
+
+struct kernel_config_option {
+ const char *name;
+ bool macro_dump;
+};
+
+int read_kernel_config(const struct kernel_config_option *requested_options,
+ size_t num_options, char **out_values,
+ const char *define_prefix);
+int bpftool_prog_sign(struct bpf_load_and_run_opts *opts);
+__u32 register_session_key(const char *key_der_path);
#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9722d841abc0..6daf19809ca4 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -23,6 +23,7 @@
#include <linux/err.h>
#include <linux/perf_event.h>
#include <linux/sizes.h>
+#include <linux/keyctl.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
@@ -714,7 +715,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
if (mode == DUMP_JITED) {
if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
- p_info("no instructions returned");
+ p_err("error retrieving jit dump: no instructions returned or kernel.kptr_restrict set?");
return -1;
}
buf = u64_to_ptr(info->jited_prog_insns);
@@ -1930,6 +1931,8 @@ static int try_loader(struct gen_loader_opts *gen)
{
struct bpf_load_and_run_opts opts = {};
struct bpf_loader_ctx *ctx;
+ char sig_buf[MAX_SIG_SIZE];
+ __u8 prog_sha[SHA256_DIGEST_LENGTH];
int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc),
sizeof(struct bpf_prog_desc));
int log_buf_sz = (1u << 24) - 1;
@@ -1953,6 +1956,26 @@ static int try_loader(struct gen_loader_opts *gen)
opts.insns = gen->insns;
opts.insns_sz = gen->insns_sz;
fds_before = count_open_fds();
+
+ if (sign_progs) {
+ opts.excl_prog_hash = prog_sha;
+ opts.excl_prog_hash_sz = sizeof(prog_sha);
+ opts.signature = sig_buf;
+ opts.signature_sz = MAX_SIG_SIZE;
+ opts.keyring_id = KEY_SPEC_SESSION_KEYRING;
+
+ err = bpftool_prog_sign(&opts);
+ if (err < 0) {
+ p_err("failed to sign program");
+ goto out;
+ }
+
+ err = register_session_key(cert_path);
+ if (err < 0) {
+ p_err("failed to add session key");
+ goto out;
+ }
+ }
err = bpf_load_and_run(&opts);
fd_delta = count_open_fds() - fds_before;
if (err < 0 || verifier_logs) {
@@ -1961,6 +1984,7 @@ static int try_loader(struct gen_loader_opts *gen)
fprintf(stderr, "loader prog leaked %d FDs\n",
fd_delta);
}
+out:
free(log_buf);
return err;
}
@@ -1988,6 +2012,9 @@ static int do_loader(int argc, char **argv)
goto err_close_obj;
}
+ if (sign_progs)
+ gen.gen_hash = true;
+
err = bpf_object__gen_loader(obj, &gen);
if (err)
goto err_close_obj;
@@ -2262,7 +2289,7 @@ static void profile_print_readings(void)
static char *profile_target_name(int tgt_fd)
{
- struct bpf_func_info func_info;
+ struct bpf_func_info func_info = {};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const struct btf_type *t;
@@ -2562,7 +2589,7 @@ static int do_help(int argc, char **argv)
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
- " {-L|--use-loader} }\n"
+ " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ] \n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/sign.c b/tools/bpf/bpftool/sign.c
new file mode 100644
index 000000000000..b34f74d210e9
--- /dev/null
+++ b/tools/bpf/bpftool/sign.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2025 Google LLC.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <getopt.h>
+#include <err.h>
+#include <openssl/opensslv.h>
+#include <openssl/bio.h>
+#include <openssl/evp.h>
+#include <openssl/pem.h>
+#include <openssl/err.h>
+#include <openssl/cms.h>
+#include <linux/keyctl.h>
+#include <errno.h>
+
+#include <bpf/skel_internal.h>
+
+#include "main.h"
+
+#define OPEN_SSL_ERR_BUF_LEN 256
+
+static void display_openssl_errors(int l)
+{
+ char buf[OPEN_SSL_ERR_BUF_LEN];
+ const char *file;
+ const char *data;
+ unsigned long e;
+ int flags;
+ int line;
+
+ while ((e = ERR_get_error_all(&file, &line, NULL, &data, &flags))) {
+ ERR_error_string_n(e, buf, sizeof(buf));
+ if (data && (flags & ERR_TXT_STRING)) {
+ p_err("OpenSSL %s: %s:%d: %s", buf, file, line, data);
+ } else {
+ p_err("OpenSSL %s: %s:%d", buf, file, line);
+ }
+ }
+}
+
+#define DISPLAY_OSSL_ERR(cond) \
+ do { \
+ bool __cond = (cond); \
+ if (__cond && ERR_peek_error()) \
+ display_openssl_errors(__LINE__);\
+ } while (0)
+
+static EVP_PKEY *read_private_key(const char *pkey_path)
+{
+ EVP_PKEY *private_key = NULL;
+ BIO *b;
+
+ b = BIO_new_file(pkey_path, "rb");
+ private_key = PEM_read_bio_PrivateKey(b, NULL, NULL, NULL);
+ BIO_free(b);
+ DISPLAY_OSSL_ERR(!private_key);
+ return private_key;
+}
+
+static X509 *read_x509(const char *x509_name)
+{
+ unsigned char buf[2];
+ X509 *x509 = NULL;
+ BIO *b;
+ int n;
+
+ b = BIO_new_file(x509_name, "rb");
+ if (!b)
+ goto cleanup;
+
+ /* Look at the first two bytes of the file to determine the encoding */
+ n = BIO_read(b, buf, 2);
+ if (n != 2)
+ goto cleanup;
+
+ if (BIO_reset(b) != 0)
+ goto cleanup;
+
+ if (buf[0] == 0x30 && buf[1] >= 0x81 && buf[1] <= 0x84)
+ /* Assume raw DER encoded X.509 */
+ x509 = d2i_X509_bio(b, NULL);
+ else
+ /* Assume PEM encoded X.509 */
+ x509 = PEM_read_bio_X509(b, NULL, NULL, NULL);
+
+cleanup:
+ BIO_free(b);
+ DISPLAY_OSSL_ERR(!x509);
+ return x509;
+}
+
+__u32 register_session_key(const char *key_der_path)
+{
+ unsigned char *der_buf = NULL;
+ X509 *x509 = NULL;
+ int key_id = -1;
+ int der_len;
+
+ if (!key_der_path)
+ return key_id;
+ x509 = read_x509(key_der_path);
+ if (!x509)
+ goto cleanup;
+ der_len = i2d_X509(x509, &der_buf);
+ if (der_len < 0)
+ goto cleanup;
+ key_id = syscall(__NR_add_key, "asymmetric", key_der_path, der_buf,
+ (size_t)der_len, KEY_SPEC_SESSION_KEYRING);
+cleanup:
+ X509_free(x509);
+ OPENSSL_free(der_buf);
+ DISPLAY_OSSL_ERR(key_id == -1);
+ return key_id;
+}
+
+int bpftool_prog_sign(struct bpf_load_and_run_opts *opts)
+{
+ BIO *bd_in = NULL, *bd_out = NULL;
+ EVP_PKEY *private_key = NULL;
+ CMS_ContentInfo *cms = NULL;
+ long actual_sig_len = 0;
+ X509 *x509 = NULL;
+ int err = 0;
+
+ bd_in = BIO_new_mem_buf(opts->insns, opts->insns_sz);
+ if (!bd_in) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ private_key = read_private_key(private_key_path);
+ if (!private_key) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ x509 = read_x509(cert_path);
+ if (!x509) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ cms = CMS_sign(NULL, NULL, NULL, NULL,
+ CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED |
+ CMS_STREAM);
+ if (!cms) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!CMS_add1_signer(cms, x509, private_key, EVP_sha256(),
+ CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP |
+ CMS_USE_KEYID | CMS_NOATTR)) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (CMS_final(cms, bd_in, NULL, CMS_NOCERTS | CMS_BINARY) != 1) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ EVP_Digest(opts->insns, opts->insns_sz, opts->excl_prog_hash,
+ &opts->excl_prog_hash_sz, EVP_sha256(), NULL);
+
+ bd_out = BIO_new(BIO_s_mem());
+ if (!bd_out) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ if (!i2d_CMS_bio_stream(bd_out, cms, NULL, 0)) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ actual_sig_len = BIO_get_mem_data(bd_out, NULL);
+ if (actual_sig_len <= 0) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ if ((size_t)actual_sig_len > opts->signature_sz) {
+ err = -ENOSPC;
+ goto cleanup;
+ }
+
+ if (BIO_read(bd_out, opts->signature, actual_sig_len) != actual_sig_len) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ opts->signature_sz = actual_sig_len;
+cleanup:
+ BIO_free(bd_out);
+ CMS_ContentInfo_free(cms);
+ X509_free(x509);
+ EVP_PKEY_free(private_key);
+ BIO_free(bd_in);
+ DISPLAY_OSSL_ERR(err < 0);
+ return err;
+}
diff --git a/tools/bpf/bpftool/token.c b/tools/bpf/bpftool/token.c
new file mode 100644
index 000000000000..c08f34b9d51b
--- /dev/null
+++ b/tools/bpf/bpftool/token.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2025 Didi Technology Co., Tao Chen */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <mntent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+#define MOUNTS_FILE "/proc/mounts"
+
+static struct {
+ const char *header;
+ const char *key;
+} sets[] = {
+ {"allowed_cmds", "delegate_cmds"},
+ {"allowed_maps", "delegate_maps"},
+ {"allowed_progs", "delegate_progs"},
+ {"allowed_attachs", "delegate_attachs"},
+};
+
+static bool has_delegate_options(const char *mnt_ops)
+{
+ return strstr(mnt_ops, "delegate_cmds") ||
+ strstr(mnt_ops, "delegate_maps") ||
+ strstr(mnt_ops, "delegate_progs") ||
+ strstr(mnt_ops, "delegate_attachs");
+}
+
+static char *get_delegate_value(char *opts, const char *key)
+{
+ char *token, *rest, *ret = NULL;
+
+ if (!opts)
+ return NULL;
+
+ for (token = strtok_r(opts, ",", &rest); token;
+ token = strtok_r(NULL, ",", &rest)) {
+ if (strncmp(token, key, strlen(key)) == 0 &&
+ token[strlen(key)] == '=') {
+ ret = token + strlen(key) + 1;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void print_items_per_line(char *input, int items_per_line)
+{
+ char *str, *rest;
+ int cnt = 0;
+
+ if (!input)
+ return;
+
+ for (str = strtok_r(input, ":", &rest); str;
+ str = strtok_r(NULL, ":", &rest)) {
+ if (cnt % items_per_line == 0)
+ printf("\n\t ");
+
+ printf("%-20s", str);
+ cnt++;
+ }
+}
+
+#define ITEMS_PER_LINE 4
+static void show_token_info_plain(struct mntent *mntent)
+{
+ size_t i;
+
+ printf("token_info %s", mntent->mnt_dir);
+
+ for (i = 0; i < ARRAY_SIZE(sets); i++) {
+ char *opts, *value;
+
+ printf("\n\t%s:", sets[i].header);
+ opts = strdup(mntent->mnt_opts);
+ value = get_delegate_value(opts, sets[i].key);
+ print_items_per_line(value, ITEMS_PER_LINE);
+ free(opts);
+ }
+
+ printf("\n");
+}
+
+static void split_json_array_str(char *input)
+{
+ char *str, *rest;
+
+ if (!input) {
+ jsonw_start_array(json_wtr);
+ jsonw_end_array(json_wtr);
+ return;
+ }
+
+ jsonw_start_array(json_wtr);
+ for (str = strtok_r(input, ":", &rest); str;
+ str = strtok_r(NULL, ":", &rest)) {
+ jsonw_string(json_wtr, str);
+ }
+ jsonw_end_array(json_wtr);
+}
+
+static void show_token_info_json(struct mntent *mntent)
+{
+ size_t i;
+
+ jsonw_start_object(json_wtr);
+ jsonw_string_field(json_wtr, "token_info", mntent->mnt_dir);
+
+ for (i = 0; i < ARRAY_SIZE(sets); i++) {
+ char *opts, *value;
+
+ jsonw_name(json_wtr, sets[i].header);
+ opts = strdup(mntent->mnt_opts);
+ value = get_delegate_value(opts, sets[i].key);
+ split_json_array_str(value);
+ free(opts);
+ }
+
+ jsonw_end_object(json_wtr);
+}
+
+static int __show_token_info(struct mntent *mntent)
+{
+ if (json_output)
+ show_token_info_json(mntent);
+ else
+ show_token_info_plain(mntent);
+
+ return 0;
+}
+
+static int show_token_info(void)
+{
+ FILE *fp;
+ struct mntent *ent;
+
+ fp = setmntent(MOUNTS_FILE, "r");
+ if (!fp) {
+ p_err("Failed to open: %s", MOUNTS_FILE);
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ while ((ent = getmntent(fp)) != NULL) {
+ if (strncmp(ent->mnt_type, "bpf", 3) == 0) {
+ if (has_delegate_options(ent->mnt_opts))
+ __show_token_info(ent);
+ }
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ endmntent(fp);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ if (argc)
+ return BAD_ARG();
+
+ return show_token_info();
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %1$s %2$s { show | list }\n"
+ " %1$s %2$s help\n"
+ " " HELP_SPEC_OPTIONS " }\n"
+ "\n"
+ "",
+ bin_name, argv[-2]);
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_token(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c
index 31d806e3bdaa..573a8d99f009 100644
--- a/tools/bpf/bpftool/tracelog.c
+++ b/tools/bpf/bpftool/tracelog.c
@@ -57,10 +57,8 @@ find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt)
static bool get_tracefs_pipe(char *mnt)
{
static const char * const known_mnts[] = {
- "/sys/kernel/debug/tracing",
"/sys/kernel/tracing",
- "/tracing",
- "/trace",
+ "/sys/kernel/debug/tracing",
};
const char *pipe_name = "/trace_pipe";
const char *fstype = "tracefs";
@@ -95,12 +93,7 @@ static bool get_tracefs_pipe(char *mnt)
return false;
p_info("could not find tracefs, attempting to mount it now");
- /* Most of the time, tracefs is automatically mounted by debugfs at
- * /sys/kernel/debug/tracing when we try to access it. If we could not
- * find it, it is likely that debugfs is not mounted. Let's give one
- * attempt at mounting just tracefs at /sys/kernel/tracing.
- */
- strcpy(mnt, known_mnts[1]);
+ strcpy(mnt, known_mnts[0]);
if (mount_tracefs(mnt))
return false;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 233de8677382..ae83d8649ef1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1522,6 +1522,12 @@ union bpf_attr {
* If provided, map_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 map_token_fd;
+
+ /* Hash of the program that has exclusive access to the map.
+ */
+ __aligned_u64 excl_prog_hash;
+ /* Size of the passed excl_prog_hash. */
+ __u32 excl_prog_hash_size;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
@@ -1605,6 +1611,16 @@ union bpf_attr {
* continuous.
*/
__u32 fd_array_cnt;
+ /* Pointer to a buffer containing the signature of the BPF
+ * program.
+ */
+ __aligned_u64 signature;
+ /* Size of the signature buffer in bytes. */
+ __u32 signature_size;
+ /* ID of the kernel keyring to be used for signature
+ * verification.
+ */
+ __s32 keyring_id;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -6666,6 +6682,8 @@ struct bpf_map_info {
__u32 btf_value_type_id;
__u32 btf_vmlinux_id;
__u64 map_extra;
+ __aligned_u64 hash;
+ __u32 hash_size;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -7418,6 +7436,10 @@ struct bpf_timer {
__u64 __opaque[2];
} __attribute__((aligned(8)));
+struct bpf_task_work {
+ __u64 __opaque;
+} __attribute__((aligned(8)));
+
struct bpf_wq {
__u64 __opaque[2];
} __attribute__((aligned(8)));
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index ab40dbf9f020..339b19797237 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -172,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type,
__u32 max_entries,
const struct bpf_map_create_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
+ const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
union bpf_attr attr;
int fd;
@@ -203,6 +203,8 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+ attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
+ attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
@@ -238,7 +240,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insn_cnt,
struct bpf_prog_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt);
+ const size_t attr_sz = offsetofend(union bpf_attr, keyring_id);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 7252150e7ad3..e983a3e40d61 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -54,9 +54,12 @@ struct bpf_map_create_opts {
__s32 value_type_btf_obj_fd;
__u32 token_fd;
+
+ const void *excl_prog_hash;
+ __u32 excl_prog_hash_size;
size_t :0;
};
-#define bpf_map_create_opts__last_field token_fd
+#define bpf_map_create_opts__last_field excl_prog_hash_size
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
const char *map_name,
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 6ff963a491d9..49af4260b8e6 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -4,6 +4,7 @@
#define __BPF_GEN_INTERNAL_H
#include "bpf.h"
+#include "libbpf_internal.h"
struct ksym_relo_desc {
const char *name;
@@ -50,6 +51,7 @@ struct bpf_gen {
__u32 nr_ksyms;
int fd_array;
int nr_fd_array;
+ int hash_insn_offset[SHA256_DWORD_SIZE];
};
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 113ae4abd345..6945dd99a846 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -110,6 +110,7 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+static void emit_signature_match(struct bpf_gen *gen);
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
{
@@ -152,6 +153,8 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps
/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
emit(gen, BPF_EXIT_INSN());
+ if (OPTS_GET(gen->opts, gen_hash, false))
+ emit_signature_match(gen);
}
static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
@@ -368,6 +371,8 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
__emit_sys_close(gen);
}
+static void compute_sha_update_offsets(struct bpf_gen *gen);
+
int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
{
int i;
@@ -394,6 +399,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
blob_fd_array_off(gen, i));
emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
emit(gen, BPF_EXIT_INSN());
+ if (OPTS_GET(gen->opts, gen_hash, false))
+ compute_sha_update_offsets(gen);
+
pr_debug("gen: finish %s\n", errstr(gen->error));
if (!gen->error) {
struct gen_loader_opts *opts = gen->opts;
@@ -446,6 +454,22 @@ void bpf_gen__free(struct bpf_gen *gen)
_val; \
})
+static void compute_sha_update_offsets(struct bpf_gen *gen)
+{
+ __u64 sha[SHA256_DWORD_SIZE];
+ __u64 sha_dw;
+ int i;
+
+ libbpf_sha256(gen->data_start, gen->data_cur - gen->data_start, (__u8 *)sha);
+ for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+ struct bpf_insn *insn =
+ (struct bpf_insn *)(gen->insn_start + gen->hash_insn_offset[i]);
+ sha_dw = tgt_endian(sha[i]);
+ insn[0].imm = (__u32)sha_dw;
+ insn[1].imm = sha_dw >> 32;
+ }
+}
+
void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
__u32 btf_raw_size)
{
@@ -557,6 +581,29 @@ void bpf_gen__map_create(struct bpf_gen *gen,
emit_sys_close_stack(gen, stack_off(inner_map_fd));
}
+static void emit_signature_match(struct bpf_gen *gen)
+{
+ __s64 off;
+ int i;
+
+ for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX,
+ 0, 0, 0, 0));
+ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, i * sizeof(__u64)));
+ gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start;
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0));
+
+ off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
+ if (is_simm16(off)) {
+ emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL));
+ emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off));
+ } else {
+ gen->error = -ERANGE;
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
+ }
+ }
+}
+
void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
enum bpf_attach_type type)
{
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8f5a81b672e1..f92083f51bdb 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -35,6 +35,7 @@
#include <linux/perf_event.h>
#include <linux/bpf_perf_event.h>
#include <linux/ring_buffer.h>
+#include <linux/unaligned.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
@@ -496,6 +497,7 @@ struct bpf_program {
__u32 line_info_rec_size;
__u32 line_info_cnt;
__u32 prog_flags;
+ __u8 hash[SHA256_DIGEST_LENGTH];
};
struct bpf_struct_ops {
@@ -575,6 +577,7 @@ struct bpf_map {
bool autocreate;
bool autoattach;
__u64 map_extra;
+ struct bpf_program *excl_prog;
};
enum extern_type {
@@ -1013,35 +1016,33 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
const struct btf_member *kern_data_member;
struct btf *btf = NULL;
__s32 kern_vtype_id, kern_type_id;
- char tname[256];
+ char tname[192], stname[256];
__u32 i;
snprintf(tname, sizeof(tname), "%.*s",
(int)bpf_core_essential_name_len(tname_raw), tname_raw);
- kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
- &btf, mod_btf);
- if (kern_type_id < 0) {
- pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
- tname);
- return kern_type_id;
- }
- kern_type = btf__type_by_id(btf, kern_type_id);
+ snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
- /* Find the corresponding "map_value" type that will be used
- * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
- * find "struct bpf_struct_ops_tcp_congestion_ops" from the
- * btf_vmlinux.
+ /* Look for the corresponding "map_value" type that will be used
+ * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
+ * and the mod_btf.
+ * For example, find "struct bpf_struct_ops_tcp_congestion_ops".
*/
- kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
- tname, BTF_KIND_STRUCT);
+ kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
if (kern_vtype_id < 0) {
- pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
- STRUCT_OPS_VALUE_PREFIX, tname);
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
return kern_vtype_id;
}
kern_vtype = btf__type_by_id(btf, kern_vtype_id);
+ kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ if (kern_type_id < 0) {
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
+ return kern_type_id;
+ }
+ kern_type = btf__type_by_id(btf, kern_type_id);
+
/* Find "struct tcp_congestion_ops" from
* struct bpf_struct_ops_tcp_congestion_ops {
* [ ... ]
@@ -1054,8 +1055,8 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
break;
}
if (i == btf_vlen(kern_vtype)) {
- pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
- tname, STRUCT_OPS_VALUE_PREFIX, tname);
+ pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
+ tname, stname);
return -EINVAL;
}
@@ -4485,6 +4486,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
}
}
+static int bpf_prog_compute_hash(struct bpf_program *prog)
+{
+ struct bpf_insn *purged;
+ int i, err = 0;
+
+ purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
+ if (!purged)
+ return -ENOMEM;
+
+ /* If relocations have been done, the map_fd needs to be
+ * discarded for the digest calculation.
+ */
+ for (i = 0; i < prog->insns_cnt; i++) {
+ purged[i] = prog->insns[i];
+ if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+ (purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
+ purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
+ purged[i].imm = 0;
+ i++;
+ if (i >= prog->insns_cnt ||
+ prog->insns[i].code != 0 ||
+ prog->insns[i].dst_reg != 0 ||
+ prog->insns[i].src_reg != 0 ||
+ prog->insns[i].off != 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ purged[i] = prog->insns[i];
+ purged[i].imm = 0;
+ }
+ }
+ libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
+ prog->hash);
+out:
+ free(purged);
+ return err;
+}
+
static int bpf_program__record_reloc(struct bpf_program *prog,
struct reloc_desc *reloc_desc,
__u32 insn_idx, const char *sym_name,
@@ -5093,6 +5132,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
return false;
}
+ /*
+ * bpf_get_map_info_by_fd() for DEVMAP will always return flags with
+ * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
+ * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
+ * bpf_get_map_info_by_fd() when checking for compatibility with an
+ * existing DEVMAP.
+ */
+ if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
+ map_info.map_flags &= ~BPF_F_RDONLY_PROG;
+
return (map_info.type == map->def.type &&
map_info.key_size == map->def.key_size &&
map_info.value_size == map->def.value_size &&
@@ -5224,6 +5273,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.token_fd = obj->token_fd;
if (obj->token_fd)
create_attr.map_flags |= BPF_F_TOKEN_FD;
+ if (map->excl_prog) {
+ err = bpf_prog_compute_hash(map->excl_prog);
+ if (err)
+ return err;
+
+ create_attr.excl_prog_hash = map->excl_prog->hash;
+ create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
+ }
if (bpf_map__is_struct_ops(map)) {
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
@@ -10514,6 +10571,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
return 0;
}
+int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
+{
+ if (map_is_created(map)) {
+ pr_warn("exclusive programs must be set before map creation\n");
+ return libbpf_err(-EINVAL);
+ }
+
+ if (map->obj != prog->obj) {
+ pr_warn("excl_prog and map must be from the same bpf object\n");
+ return libbpf_err(-EINVAL);
+ }
+
+ map->excl_prog = prog;
+ return 0;
+}
+
+struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
+{
+ return map->excl_prog;
+}
+
static struct bpf_map *
__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
{
@@ -14207,3 +14285,100 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
free(s->progs);
free(s);
}
+
+static inline __u32 ror32(__u32 v, int bits)
+{
+ return (v >> bits) | (v << (32 - bits));
+}
+
+#define SHA256_BLOCK_LENGTH 64
+#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
+#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22))
+#define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25))
+#define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3))
+#define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10))
+
+static const __u32 sha256_K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+ 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
+ 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+ 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
+ 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+ 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
+#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \
+ { \
+ __u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \
+ d += tmp; \
+ h = tmp + Sigma_0(a) + Maj(a, b, c); \
+ }
+
+static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks)
+{
+ while (nblocks--) {
+ __u32 a = state[0];
+ __u32 b = state[1];
+ __u32 c = state[2];
+ __u32 d = state[3];
+ __u32 e = state[4];
+ __u32 f = state[5];
+ __u32 g = state[6];
+ __u32 h = state[7];
+ __u32 w[64];
+ int i;
+
+ for (i = 0; i < 16; i++)
+ w[i] = get_unaligned_be32(&data[4 * i]);
+ for (; i < ARRAY_SIZE(w); i++)
+ w[i] = sigma_1(w[i - 2]) + w[i - 7] +
+ sigma_0(w[i - 15]) + w[i - 16];
+ for (i = 0; i < ARRAY_SIZE(w); i += 8) {
+ SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
+ SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
+ SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
+ SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
+ SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
+ SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
+ SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
+ SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
+ }
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ state[5] += f;
+ state[6] += g;
+ state[7] += h;
+ data += SHA256_BLOCK_LENGTH;
+ }
+}
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH])
+{
+ __u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
+ const __be64 bitcount = cpu_to_be64((__u64)len * 8);
+ __u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 };
+ size_t final_len = len % SHA256_BLOCK_LENGTH;
+ int i;
+
+ sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH);
+
+ memcpy(final_data, data + len - final_len, final_len);
+ final_data[final_len] = 0x80;
+ final_len = round_up(final_len + 9, SHA256_BLOCK_LENGTH);
+ memcpy(&final_data[final_len - 8], &bitcount, 8);
+
+ sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH);
+
+ for (i = 0; i < ARRAY_SIZE(state); i++)
+ put_unaligned_be32(state[i], &out[4 * i]);
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 455a957cb702..5118d0a90e24 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -24,8 +24,25 @@
extern "C" {
#endif
+/**
+ * @brief **libbpf_major_version()** provides the major version of libbpf.
+ * @return An integer, the major version number
+ */
LIBBPF_API __u32 libbpf_major_version(void);
+
+/**
+ * @brief **libbpf_minor_version()** provides the minor version of libbpf.
+ * @return An integer, the minor version number
+ */
LIBBPF_API __u32 libbpf_minor_version(void);
+
+/**
+ * @brief **libbpf_version_string()** provides the version of libbpf in a
+ * human-readable form, e.g., "v1.7".
+ * @return Pointer to a static string containing the version
+ *
+ * The format is *not* a part of a stable API and may change in the future.
+ */
LIBBPF_API const char *libbpf_version_string(void);
enum libbpf_errno {
@@ -49,6 +66,14 @@ enum libbpf_errno {
__LIBBPF_ERRNO__END,
};
+/**
+ * @brief **libbpf_strerror()** converts the provided error code into a
+ * human-readable string.
+ * @param err The error code to convert
+ * @param buf Pointer to a buffer where the error message will be stored
+ * @param size The number of bytes in the buffer
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
/**
@@ -252,7 +277,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
* @return 0, on success; negative error code, otherwise, error code is
* stored in errno
*/
-int bpf_object__prepare(struct bpf_object *obj);
+LIBBPF_API int bpf_object__prepare(struct bpf_object *obj);
/**
* @brief **bpf_object__load()** loads BPF object into kernel.
@@ -1266,6 +1291,28 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
*/
LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
const void *cur_key, void *next_key, size_t key_sz);
+/**
+ * @brief **bpf_map__set_exclusive_program()** sets a map to be exclusive to the
+ * specified program. This must be called *before* the map is created.
+ *
+ * @param map BPF map to make exclusive.
+ * @param prog BPF program to be the exclusive user of the map. Must belong
+ * to the same bpf_object as the map.
+ * @return 0 on success; a negative error code otherwise.
+ *
+ * This function must be called after the BPF object is opened but before
+ * it is loaded. Once the object is loaded, only the specified program
+ * will be able to access the map's contents.
+ */
+LIBBPF_API int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog);
+
+/**
+ * @brief **bpf_map__exclusive_program()** returns the exclusive program
+ * that is registered with the map (if any).
+ * @param map BPF map to which the exclusive program is registered.
+ * @return the registered exclusive program.
+ */
+LIBBPF_API struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map);
struct bpf_xdp_set_link_opts {
size_t sz;
@@ -1810,9 +1857,10 @@ struct gen_loader_opts {
const char *insns;
__u32 data_sz;
__u32 insns_sz;
+ bool gen_hash;
};
-#define gen_loader_opts__last_field insns_sz
+#define gen_loader_opts__last_field gen_hash
LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
struct gen_loader_opts *opts);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d7bd463e7017..8ed8749907d4 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -448,4 +448,7 @@ LIBBPF_1.6.0 {
} LIBBPF_1.5.0;
LIBBPF_1.7.0 {
+ global:
+ bpf_map__set_exclusive_program;
+ bpf_map__exclusive_program;
} LIBBPF_1.6.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 477a3b3389a0..c93797dcaf5b 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -736,4 +736,8 @@ int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
int probe_fd(int fd);
+#define SHA256_DIGEST_LENGTH 32
+#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 4d5fa079b5d6..6a8f5c7a02eb 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -13,10 +13,15 @@
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/mman.h>
+#include <linux/keyctl.h>
#include <stdlib.h>
#include "bpf.h"
#endif
+#ifndef SHA256_DIGEST_LENGTH
+#define SHA256_DIGEST_LENGTH 32
+#endif
+
#ifndef __NR_bpf
# if defined(__mips__) && defined(_ABIO32)
# define __NR_bpf 4355
@@ -64,6 +69,11 @@ struct bpf_load_and_run_opts {
__u32 data_sz;
__u32 insns_sz;
const char *errstr;
+ void *signature;
+ __u32 signature_sz;
+ __s32 keyring_id;
+ void *excl_prog_hash;
+ __u32 excl_prog_hash_sz;
};
long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
@@ -220,14 +230,19 @@ static inline int skel_map_create(enum bpf_map_type map_type,
const char *map_name,
__u32 key_size,
__u32 value_size,
- __u32 max_entries)
+ __u32 max_entries,
+ const void *excl_prog_hash,
+ __u32 excl_prog_hash_sz)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
union bpf_attr attr;
memset(&attr, 0, attr_sz);
attr.map_type = map_type;
+ attr.excl_prog_hash = (unsigned long) excl_prog_hash;
+ attr.excl_prog_hash_size = excl_prog_hash_sz;
+
strncpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
@@ -300,6 +315,35 @@ static inline int skel_link_create(int prog_fd, int target_fd,
return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
}
+static inline int skel_obj_get_info_by_fd(int fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, info);
+ __u8 sha[SHA256_DIGEST_LENGTH];
+ struct bpf_map_info info;
+ __u32 info_len = sizeof(info);
+ union bpf_attr attr;
+
+ memset(&info, 0, sizeof(info));
+ info.hash = (long) &sha;
+ info.hash_size = SHA256_DIGEST_LENGTH;
+
+ memset(&attr, 0, attr_sz);
+ attr.info.bpf_fd = fd;
+ attr.info.info = (long) &info;
+ attr.info.info_len = info_len;
+ return skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
+}
+
+static inline int skel_map_freeze(int fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+
+ return skel_sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
+}
#ifdef __KERNEL__
#define set_err
#else
@@ -308,12 +352,13 @@ static inline int skel_link_create(int prog_fd, int target_fd,
static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
{
- const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, keyring_id);
const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
int map_fd = -1, prog_fd = -1, key = 0, err;
union bpf_attr attr;
- err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
+ err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1,
+ opts->excl_prog_hash, opts->excl_prog_hash_sz);
if (map_fd < 0) {
opts->errstr = "failed to create loader map";
set_err;
@@ -327,11 +372,34 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
goto out;
}
+#ifndef __KERNEL__
+ err = skel_map_freeze(map_fd);
+ if (err < 0) {
+ opts->errstr = "failed to freeze map";
+ set_err;
+ goto out;
+ }
+ err = skel_obj_get_info_by_fd(map_fd);
+ if (err < 0) {
+ opts->errstr = "failed to fetch obj info";
+ set_err;
+ goto out;
+ }
+#endif
+
memset(&attr, 0, prog_load_attr_sz);
attr.prog_type = BPF_PROG_TYPE_SYSCALL;
attr.insns = (long) opts->insns;
attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
attr.license = (long) "Dual BSD/GPL";
+#ifndef __KERNEL__
+ attr.signature = (long) opts->signature;
+ attr.signature_size = opts->signature_sz;
+#else
+ if (opts->signature || opts->signature_sz)
+ pr_warn("signatures are not supported from bpf_preload\n");
+#endif
+ attr.keyring_id = opts->keyring_id;
memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
attr.fd_array = (long) &map_fd;
attr.log_level = opts->ctx->log_level;
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index 2a7865c8e3fe..43deb05a5197 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
BPF_USDT_ARG_CONST,
BPF_USDT_ARG_REG,
BPF_USDT_ARG_REG_DEREF,
+ BPF_USDT_ARG_SIB,
};
+/*
+ * This struct layout is designed specifically to be backwards/forward
+ * compatible between libbpf versions for ARG_CONST, ARG_REG, and
+ * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
+ */
struct __bpf_usdt_arg_spec {
/* u64 scalar interpreted depending on arg_type, see below */
__u64 val_off;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* arg location case, see bpf_usdt_arg() for details */
- enum __bpf_usdt_arg_type arg_type;
+ enum __bpf_usdt_arg_type arg_type: 8;
+ /* index register offset within struct pt_regs */
+ __u16 idx_reg_off: 12;
+ /* scale factor for index register (1, 2, 4, or 8) */
+ __u16 scale_bitshift: 4;
+ /* reserved for future use, keeps reg_off offset stable */
+ __u8 __reserved: 8;
+#else
+ __u8 __reserved: 8;
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ enum __bpf_usdt_arg_type arg_type: 8;
+#endif
/* offset of referenced register within struct pt_regs */
short reg_off;
/* whether arg should be interpreted as signed value */
@@ -149,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
{
struct __bpf_usdt_spec *spec;
struct __bpf_usdt_arg_spec *arg_spec;
- unsigned long val;
+ unsigned long val, idx;
int err, spec_id;
*res = 0;
@@ -204,6 +223,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
val >>= arg_spec->arg_bitshift;
#endif
break;
+ case BPF_USDT_ARG_SIB:
+ /* Arg is in memory addressed by SIB (Scale-Index-Base) mode
+ * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
+ * fetch the base register contents and the index register
+ * contents from pt_regs. Then we calculate the final address
+ * as base + (index * scale) + offset, and do a user-space
+ * probe read to fetch the argument value.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
+ if (err)
+ return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ val >>= arg_spec->arg_bitshift;
+#endif
+ break;
default:
return -EINVAL;
}
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 3373b9d45ac4..fc2785eecc17 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -200,12 +200,23 @@ enum usdt_arg_type {
USDT_ARG_CONST,
USDT_ARG_REG,
USDT_ARG_REG_DEREF,
+ USDT_ARG_SIB,
};
/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
struct usdt_arg_spec {
__u64 val_off;
- enum usdt_arg_type arg_type;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ enum usdt_arg_type arg_type: 8;
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ __u8 __reserved: 8; /* keep reg_off offset stable */
+#else
+ __u8 __reserved: 8; /* keep reg_off offset stable */
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ enum usdt_arg_type arg_type: 8;
+#endif
short reg_off;
bool arg_signed;
char arg_bitshift;
@@ -570,9 +581,8 @@ static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long o
return NULL;
}
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
- const char *data, size_t name_off, size_t desc_off,
- struct usdt_note *usdt_note);
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
+ size_t desc_off, struct usdt_note *usdt_note);
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
@@ -626,7 +636,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
struct elf_seg *seg = NULL;
void *tmp;
- err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
+ err = parse_usdt_note(&nhdr, data->d_buf, name_off, desc_off, &note);
if (err)
goto err_out;
@@ -1132,8 +1142,7 @@ err_out:
/* Parse out USDT ELF note from '.note.stapsdt' section.
* Logic inspired by perf's code.
*/
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
- const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
struct usdt_note *note)
{
const char *provider, *name, *args;
@@ -1283,11 +1292,51 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
- char reg_name[16];
- int len, reg_off;
- long off;
+ char reg_name[16] = {0}, idx_reg_name[16] = {0};
+ int len, reg_off, idx_reg_off, scale = 1;
+ long off = 0;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
+ arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
+ sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
+ arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
+ sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
+ arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
+ sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
+ arg_sz, reg_name, idx_reg_name, &len) == 3
+ ) {
+ /*
+ * Scale Index Base case:
+ * 1@-96(%rbp,%rax,8)
+ * 1@(%rbp,%rax,8)
+ * 1@-96(%rbp,%rax)
+ * 1@(%rbp,%rax)
+ */
+ arg->arg_type = USDT_ARG_SIB;
+ arg->val_off = off;
- if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+
+ idx_reg_off = calc_pt_regs_off(idx_reg_name);
+ if (idx_reg_off < 0)
+ return idx_reg_off;
+ arg->idx_reg_off = idx_reg_off;
+
+ /* validate scale factor and set fields directly */
+ switch (scale) {
+ case 1: arg->scale_bitshift = 0; break;
+ case 2: arg->scale_bitshift = 1; break;
+ case 4: arg->scale_bitshift = 2; break;
+ case 8: arg->scale_bitshift = 3; break;
+ default:
+ pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
+ return -EINVAL;
+ }
+ } else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
+ arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -4@-20(%rbp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@@ -1306,6 +1355,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -4@%eax */
arg->arg_type = USDT_ARG_REG;
+ /* register read has no memory offset */
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3d8378972d26..be1ee7ba7ce0 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -44,3 +44,4 @@ xdp_redirect_multi
xdp_synproxy
xdp_hw_metadata
xdp_features
+verification_cert.h
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 3ebd77206f98..a17baf8c6fd7 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -2,4 +2,3 @@
# Alphabetical order
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
-verifier_iterating_callbacks
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4863106034df..f00587d4ede6 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -120,7 +120,7 @@ TEST_PROGS_EXTENDED := \
test_bpftool.py
TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
- bpf_test_modorder_y.ko
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
# Compile but not part of 'make run_tests'
@@ -137,7 +137,7 @@ TEST_GEN_PROGS_EXTENDED = \
xdping \
xskxceiver
-TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
+TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi
ifneq ($(V),1)
submake_extras := feature_display=0
@@ -398,7 +398,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
-# vmlinux.h is first dumped to a temprorary file and then compared to
+# vmlinux.h is first dumped to a temporary file and then compared to
# the previous version. This helps to avoid unnecessary re-builds of
# $(TRUNNER_BPF_OBJS)
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
@@ -496,15 +496,16 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
test_subskeleton.skel.h test_subskeleton_lib.skel.h \
test_usdt.skel.h
-LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
- trace_printk.c trace_vprintk.c map_ptr_kern.c \
+LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
+LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
+
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
kfunc_call_test_subprog.c
-SKEL_BLACKLIST += $$(LSKELS)
+SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED)
test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
@@ -535,6 +536,7 @@ HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \
# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER
+LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT)
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
TRUNNER_BINARY := $1$(if $2,-)$2
TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \
@@ -550,6 +552,7 @@ TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(TRUNNER_BPF_SRCS)))
TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
+TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -604,6 +607,15 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+ $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+
$(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/%
# .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites
@@ -653,6 +665,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_SKELS) \
$(TRUNNER_BPF_LSKELS) \
+ $(TRUNNER_BPF_LSKELS_SIGNED) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
@@ -667,6 +680,7 @@ $(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
$(TRUNNER_EXTRA_HDRS) \
+ $(VERIFY_SIG_HDR) \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
@@ -697,6 +711,18 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
endef
+VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh
+VERIFY_SIG_HDR := verification_cert.h
+VERIFICATION_CERT := $(BUILD_DIR)/signing_key.der
+PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem
+
+$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
+ $(Q)mkdir -p $(BUILD_DIR)
+ $(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
+
+$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
+ $(Q)xxd -i -n test_progs_verification_cert $< > $@
+
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
@@ -716,6 +742,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
disasm.c \
disasm_helpers.c \
json_writer.c \
+ $(VERIFY_SIG_HDR) \
flow_dissector_load.h \
ip_check_defrag_frags.h
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
@@ -725,7 +752,7 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/uprobe_multi \
$(TEST_KMOD_TARGETS) \
ima_setup.sh \
- verify_sig_setup.sh \
+ $(VERIFY_SIG_SETUP) \
$(wildcard progs/btf_dump_test_case_*.c) \
$(wildcard progs/*.bpf.o)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -816,6 +843,7 @@ $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
+$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -837,6 +865,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_htab_mem.o \
$(OUTPUT)/bench_bpf_crypto.o \
$(OUTPUT)/bench_sockmap.o \
+ $(OUTPUT)/bench_lpm_trie_map.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index ddd73d06a1eb..bd29bb2e6cb5 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -284,6 +284,7 @@ extern struct argp bench_htab_mem_argp;
extern struct argp bench_trigger_batch_argp;
extern struct argp bench_crypto_argp;
extern struct argp bench_sockmap_argp;
+extern struct argp bench_lpm_trie_map_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -299,6 +300,7 @@ static const struct argp_child bench_parsers[] = {
{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
{ &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
+ { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 },
{},
};
@@ -499,7 +501,7 @@ extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-/* pure counting benchmarks to establish theoretical lmits */
+/* pure counting benchmarks to establish theoretical limits */
extern const struct bench bench_trig_usermode_count;
extern const struct bench bench_trig_syscall_count;
extern const struct bench bench_trig_kernel_count;
@@ -510,6 +512,8 @@ extern const struct bench bench_trig_kretprobe;
extern const struct bench bench_trig_kprobe_multi;
extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_kprobe_multi_all;
+extern const struct bench bench_trig_kretprobe_multi_all;
extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_tp;
@@ -558,6 +562,13 @@ extern const struct bench bench_htab_mem;
extern const struct bench bench_crypto_encrypt;
extern const struct bench bench_crypto_decrypt;
extern const struct bench bench_sockmap;
+extern const struct bench bench_lpm_trie_noop;
+extern const struct bench bench_lpm_trie_baseline;
+extern const struct bench bench_lpm_trie_lookup;
+extern const struct bench bench_lpm_trie_insert;
+extern const struct bench bench_lpm_trie_update;
+extern const struct bench bench_lpm_trie_delete;
+extern const struct bench bench_lpm_trie_free;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -578,6 +589,8 @@ static const struct bench *benchs[] = {
&bench_trig_kprobe_multi,
&bench_trig_kretprobe_multi,
&bench_trig_fentry,
+ &bench_trig_kprobe_multi_all,
+ &bench_trig_kretprobe_multi_all,
&bench_trig_fexit,
&bench_trig_fmodret,
&bench_trig_tp,
@@ -625,6 +638,13 @@ static const struct bench *benchs[] = {
&bench_crypto_encrypt,
&bench_crypto_decrypt,
&bench_sockmap,
+ &bench_lpm_trie_noop,
+ &bench_lpm_trie_baseline,
+ &bench_lpm_trie_lookup,
+ &bench_lpm_trie_insert,
+ &bench_lpm_trie_update,
+ &bench_lpm_trie_delete,
+ &bench_lpm_trie_free,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 005c401b3e22..bea323820ffb 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -46,6 +46,7 @@ struct bench_res {
unsigned long gp_ns;
unsigned long gp_ct;
unsigned int stime;
+ unsigned long duration_ns;
};
struct bench {
diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
new file mode 100644
index 000000000000..246f6cb3387d
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+/*
+ * All of these benchmarks operate on tries with keys in the range
+ * [0, args.nr_entries), i.e. there are no gaps or partially filled
+ * branches of the trie for any key < args.nr_entries.
+ *
+ * This gives an idea of worst-case behaviour.
+ */
+
+#include <argp.h>
+#include <linux/time64.h>
+#include <linux/if_ether.h>
+#include "lpm_trie_bench.skel.h"
+#include "lpm_trie_map.skel.h"
+#include "bench.h"
+#include "testing_helpers.h"
+#include "progs/lpm_trie.h"
+
+static struct ctx {
+ struct lpm_trie_bench *bench;
+} ctx;
+
+static struct {
+ __u32 nr_entries;
+ __u32 prefixlen;
+ bool random;
+} args = {
+ .nr_entries = 0,
+ .prefixlen = 32,
+ .random = false,
+};
+
+enum {
+ ARG_NR_ENTRIES = 9000,
+ ARG_PREFIX_LEN,
+ ARG_RANDOM,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "Number of unique entries in the LPM trie" },
+ { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
+ "Number of prefix bits to use in the LPM trie" },
+ { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
+ {},
+};
+
+static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid nr_entries count.");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_PREFIX_LEN:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid prefix_len value.");
+ argp_usage(state);
+ }
+ args.prefixlen = ret;
+ break;
+ case ARG_RANDOM:
+ args.random = true;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+const struct argp bench_lpm_trie_map_argp = {
+ .options = opts,
+ .parser = lpm_parse_arg,
+};
+
+static void validate_common(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer\n");
+ exit(1);
+ }
+
+ if (args.nr_entries == 0) {
+ fprintf(stderr, "Missing --nr_entries parameter\n");
+ exit(1);
+ }
+
+ if ((1UL << args.prefixlen) < args.nr_entries) {
+ fprintf(stderr, "prefix_len value too small for nr_entries\n");
+ exit(1);
+ }
+}
+
+static void lpm_insert_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-insert requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-insert does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_delete_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-delete requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-delete does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_free_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-free requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-free does not support --random\n");
+ exit(1);
+ }
+}
+
+static struct trie_key *keys;
+static __u32 *vals;
+
+static void fill_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch update keys to map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void empty_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch delete keys for map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void attach_prog(void)
+{
+ int i;
+
+ ctx.bench = lpm_trie_bench__open_and_load();
+ if (!ctx.bench) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.bench->bss->nr_entries = args.nr_entries;
+ ctx.bench->bss->prefixlen = args.prefixlen;
+ ctx.bench->bss->random = args.random;
+
+ if (lpm_trie_bench__attach(ctx.bench)) {
+ fprintf(stderr, "failed to attach skeleton\n");
+ exit(1);
+ }
+
+ keys = calloc(args.nr_entries, sizeof(*keys));
+ vals = calloc(args.nr_entries, sizeof(*vals));
+
+ for (i = 0; i < args.nr_entries; i++) {
+ struct trie_key *k = &keys[i];
+ __u32 *v = &vals[i];
+
+ k->prefixlen = args.prefixlen;
+ k->data = i;
+ *v = 1;
+ }
+}
+
+static void attach_prog_and_fill_map(void)
+{
+ int fd;
+
+ attach_prog();
+
+ fd = bpf_map__fd(ctx.bench->maps.trie_map);
+ fill_map(fd);
+}
+
+static void lpm_noop_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_NOOP;
+}
+
+static void lpm_baseline_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_BASELINE;
+}
+
+static void lpm_lookup_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_LOOKUP;
+}
+
+static void lpm_insert_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_INSERT;
+}
+
+static void lpm_update_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_UPDATE;
+}
+
+static void lpm_delete_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_DELETE;
+}
+
+static void lpm_free_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_FREE;
+}
+
+static void lpm_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
+ res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
+}
+
+static void bench_reinit_map(void)
+{
+ int fd = bpf_map__fd(ctx.bench->maps.trie_map);
+
+ switch (ctx.bench->bss->op) {
+ case LPM_OP_INSERT:
+ /* trie_map needs to be emptied */
+ empty_map(fd);
+ break;
+ case LPM_OP_DELETE:
+ /* trie_map needs to be refilled */
+ fill_map(fd);
+ break;
+ default:
+ fprintf(stderr, "Unexpected REINIT return code for op %d\n",
+ ctx.bench->bss->op);
+ exit(1);
+ }
+}
+
+/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
+static void *lpm_producer(void *unused __always_unused)
+{
+ int err;
+ char in[ETH_HLEN]; /* unused */
+
+ LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
+ .data_size_in = sizeof(in), .repeat = 1, );
+
+ while (true) {
+ int fd = bpf_program__fd(ctx.bench->progs.run_bench);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err) {
+ fprintf(stderr, "failed to run BPF prog: %d\n", err);
+ exit(1);
+ }
+
+ /* Check for kernel error code */
+ if ((int)opts.retval < 0) {
+ fprintf(stderr, "BPF prog returned error: %d\n",
+ opts.retval);
+ exit(1);
+ }
+
+ switch (opts.retval) {
+ case LPM_BENCH_SUCCESS:
+ break;
+ case LPM_BENCH_REINIT_MAP:
+ bench_reinit_map();
+ break;
+ default:
+ fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
+ opts.retval, ctx.bench->bss->op);
+ exit(1);
+ }
+ }
+
+ return NULL;
+}
+
+static void *lpm_free_producer(void *unused __always_unused)
+{
+ while (true) {
+ struct lpm_trie_map *skel;
+
+ skel = lpm_trie_map__open_and_load();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ fill_map(bpf_map__fd(skel->maps.trie_free_map));
+ lpm_trie_map__destroy(skel);
+ }
+
+ return NULL;
+}
+
+/*
+ * The standard bench op_report_*() functions assume measurements are
+ * taken over a 1-second interval but operations that modify the map
+ * (INSERT, DELETE, and FREE) cannot run indefinitely without
+ * "resetting" the map to the initial state. Depending on the size of
+ * the map, this likely needs to happen before the 1-second timer fires.
+ *
+ * Calculate the fraction of a second over which the op measurement was
+ * taken (to ignore any time spent doing the reset) and report the
+ * throughput results per second.
+ */
+static void frac_second_report_progress(int iter, struct bench_res *res,
+ long delta_ns, double rate_divisor,
+ char rate)
+{
+ double hits_per_sec, hits_per_prod;
+
+ hits_per_sec = res->hits / rate_divisor /
+ (res->duration_ns / (double)NSEC_PER_SEC);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+
+ printf("Iter %3d (%7.3lfus): ", iter,
+ (delta_ns - NSEC_PER_SEC) / 1000.0);
+ printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
+ hits_per_prod, rate);
+}
+
+static void frac_second_report_final(struct bench_res res[], int res_cnt,
+ double lat_divisor, double rate_divisor,
+ char rate, const char *unit)
+{
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ double latency = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ double val = res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_mean += val / (0.0 + res_cnt);
+ latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ double val =
+ res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_stddev += (hits_mean - val) * (hits_mean - val) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ }
+ printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
+ hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
+ rate);
+ printf("latency %8.3lf %s/op\n",
+ latency / lat_divisor / env.producer_cnt, unit);
+}
+
+static void insert_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void delete_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void free_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000.0;
+ char rate = 'K';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void insert_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void delete_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void free_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1000000.0;
+ double rate_divisor = 1000.0;
+ const char *unit = "ms";
+ char rate = 'K';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+/* noop bench measures harness-overhead */
+const struct bench bench_lpm_trie_noop = {
+ .name = "lpm-trie-noop",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_noop_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* baseline overhead for lookup and update */
+const struct bench bench_lpm_trie_baseline = {
+ .name = "lpm-trie-baseline",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_baseline_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of doing a lookup on existing entries in a full trie */
+const struct bench bench_lpm_trie_lookup = {
+ .name = "lpm-trie-lookup",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_lookup_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of inserting new entries into an empty trie */
+const struct bench bench_lpm_trie_insert = {
+ .name = "lpm-trie-insert",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_insert_validate,
+ .setup = lpm_insert_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = insert_ops_report_progress,
+ .report_final = insert_ops_report_final,
+};
+
+/* measure cost of updating existing entries in a full trie */
+const struct bench bench_lpm_trie_update = {
+ .name = "lpm-trie-update",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_update_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of deleting existing entries from a full trie */
+const struct bench bench_lpm_trie_delete = {
+ .name = "lpm-trie-delete",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_delete_validate,
+ .setup = lpm_delete_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = delete_ops_report_progress,
+ .report_final = delete_ops_report_final,
+};
+
+/* measure cost of freeing a full trie */
+const struct bench bench_lpm_trie_free = {
+ .name = "lpm-trie-free",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_free_validate,
+ .setup = lpm_free_setup,
+ .producer_thread = lpm_free_producer,
+ .measure = lpm_measure,
+ .report_progress = free_ops_report_progress,
+ .report_final = free_ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
index 8ebf563a67a2..cfc072aa7fff 100644
--- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -10,6 +10,7 @@
#include <argp.h>
#include "bench.h"
#include "bench_sockmap_prog.skel.h"
+#include "bpf_util.h"
#define FILE_SIZE (128 * 1024)
#define DATA_REPEAT_SIZE 10
@@ -124,8 +125,8 @@ static void bench_sockmap_prog_destroy(void)
{
int i;
- for (i = 0; i < sizeof(ctx.fds); i++) {
- if (ctx.fds[0] > 0)
+ for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) {
+ if (ctx.fds[i] > 0)
close(ctx.fds[i]);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 82327657846e..1e2aff007c2a 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -226,6 +226,65 @@ static void trigger_fentry_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ /* Some recursive functions will be skipped in
+ * bpf_get_ksyms -> skip_entry, as they can introduce sufficient
+ * overhead. However, it's difficut to skip all the recursive
+ * functions for a debug kernel.
+ *
+ * So, don't run the kprobe-multi-all and kretprobe-multi-all on
+ * a debug kernel.
+ */
+ if (bpf_get_ksyms(&syms, &cnt, true)) {
+ fprintf(stderr, "failed to get ksyms\n");
+ exit(1);
+ }
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+ opts.retprobe = kretprobe;
+ /* attach empty to all the kernel functions except bpf_get_numa_node_id. */
+ if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
+ fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
+ exit(1);
+ }
+}
+
+static void trigger_kprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, false);
+ attach_bpf(prog);
+}
+
+static void trigger_kretprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kretprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, true);
+ attach_bpf(prog);
+}
+
static void trigger_fexit_setup(void)
{
setup_ctx();
@@ -512,6 +571,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
+BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
BENCH_TRIG_KERNEL(fexit, "fexit");
BENCH_TRIG_KERNEL(fmodret, "fmodret");
BENCH_TRIG_KERNEL(tp, "tp");
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index a690f5a68b6b..f7573708a0c3 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -6,8 +6,8 @@ def_tests=( \
usermode-count kernel-count syscall-count \
fentry fexit fmodret \
rawtp tp \
- kprobe kprobe-multi \
- kretprobe kretprobe-multi \
+ kprobe kprobe-multi kprobe-multi-all \
+ kretprobe kretprobe-multi kretprobe-multi-all \
)
tests=("$@")
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index da7e230f2781..d89eda3fd8a3 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -599,4 +599,58 @@ extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
struct bpf_dynptr *value_p) __weak __ksym;
+#define PREEMPT_BITS 8
+#define SOFTIRQ_BITS 8
+#define HARDIRQ_BITS 4
+#define NMI_BITS 4
+
+#define PREEMPT_SHIFT 0
+#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x) ((1UL << (x))-1)
+
+#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+
+extern bool CONFIG_PREEMPT_RT __kconfig __weak;
+#ifdef bpf_target_x86
+extern const int __preempt_count __ksym;
+#endif
+
+struct task_struct___preempt_rt {
+ int softirq_disable_cnt;
+} __attribute__((preserve_access_index));
+
+static inline int get_preempt_count(void)
+{
+#if defined(bpf_target_x86)
+ return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+#elif defined(bpf_target_arm64)
+ return bpf_get_current_task_btf()->thread_info.preempt.count;
+#endif
+ return 0;
+}
+
+/* Description
+ * Report whether it is in interrupt context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_interrupt(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
+ (tsk->softirq_disable_cnt & SOFTIRQ_MASK);
+}
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 9386dfe8b884..794d44d19c88 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym __weak;
+extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
/* Description
* Obtain a read-only pointer to the dynptr's data
* Returns
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 5f6963a320d7..4bc2d25f33e1 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -67,6 +67,9 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
#define sys_gettid() syscall(SYS_gettid)
#endif
+/* and poison usage to ensure it does not creep back in. */
+#pragma GCC poison gettid
+
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 15f626014872..20cede4db3ce 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -412,6 +412,26 @@ void remove_cgroup(const char *relative_path)
log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
}
+/*
+ * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ * @pid: PID to be used to find cgroup_path
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup_pid(const char *relative_path, int pid)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path_pid(cgroup_path, relative_path, pid);
+ if (rmdir(cgroup_path))
+ log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
+}
+
/**
* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
* @relative_path: The cgroup path, relative to the workdir, to join
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 182e1ac36c95..3857304be874 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -19,6 +19,7 @@ int cgroup_setup_and_join(const char *relative_path);
int get_root_cgroup(void);
int create_and_get_cgroup(const char *relative_path);
void remove_cgroup(const char *relative_path);
+void remove_cgroup_pid(const char *relative_path, int pid);
unsigned long long get_cgroup_id(const char *relative_path);
int get_cgroup1_hierarchy_id(const char *subsys_name);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 8916ab814a3e..70b28c1e653e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -61,6 +61,7 @@ CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
CONFIG_NET_ACT_GACT=y
+CONFIG_NET_ACT_MIRRED=y
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index e1495a4bbc99..7efad36ceb26 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -31,10 +31,7 @@ CONFIG_COMPAT=y
CONFIG_CPUSETS=y
CONFIG_CRASH_DUMP=y
CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_INFO_REDUCED=n
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
@@ -46,7 +43,6 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEVTMPFS=y
CONFIG_DRM=y
-CONFIG_DUMMY=y
CONFIG_EXPERT=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -70,13 +66,11 @@ CONFIG_HZ_100=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_IKHEADERS=y
CONFIG_INET6_ESP=y
-CONFIG_INET_ESP=y
CONFIG_INET=y
CONFIG_INPUT_EVDEV=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPVLAN=y
CONFIG_JUMP_LABEL=y
@@ -97,22 +91,18 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
CONFIG_NET_VRF=y
CONFIG_NET=y
-CONFIG_NF_TABLES=y
CONFIG_NLMON=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
CONFIG_OVERLAY_FS=y
CONFIG_PACKET_DIAG=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI_HOST_GENERIC=y
@@ -149,7 +139,6 @@ CONFIG_TASK_XACCT=y
CONFIG_TCG_TIS=y
CONFIG_TCG_TPM=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
CONFIG_TLS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS=y
@@ -161,6 +150,5 @@ CONFIG_UPROBES=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el
index 9acf389dc4ce..b53afb5e0b71 100644
--- a/tools/testing/selftests/bpf/config.ppc64el
+++ b/tools/testing/selftests/bpf/config.ppc64el
@@ -54,7 +54,6 @@ CONFIG_NET=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NONPORTABLE=y
CONFIG_NR_CPUS=256
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI_HOST_GENERIC=y
diff --git a/tools/testing/selftests/bpf/config.riscv64 b/tools/testing/selftests/bpf/config.riscv64
index bb7043a80e1a..7bee24a79a71 100644
--- a/tools/testing/selftests/bpf/config.riscv64
+++ b/tools/testing/selftests/bpf/config.riscv64
@@ -48,7 +48,6 @@ CONFIG_NET_VRF=y
CONFIG_NONPORTABLE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 26c3bc2ce11d..db61878148e4 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -22,10 +22,7 @@ CONFIG_CHECKPOINT_RESTORE=y
CONFIG_CPUSETS=y
CONFIG_CRASH_DUMP=y
CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_NOTIFIERS=y
@@ -56,11 +53,9 @@ CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_IKHEADERS=y
CONFIG_INET6_ESP=y
CONFIG_INET=y
-CONFIG_INET_ESP=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPVLAN=y
CONFIG_JUMP_LABEL=y
@@ -83,18 +78,14 @@ CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
CONFIG_NET_VRF=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
-CONFIG_NF_TABLES=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
@@ -119,7 +110,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_TASK_XACCT=y
CONFIG_TASKSTATS=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
CONFIG_TLS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -131,6 +121,5 @@ CONFIG_UPROBES=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 5e713ef7caa3..42ad817b00ae 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -44,7 +44,6 @@ CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_XXHASH=y
CONFIG_DCB=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEFAULT_FQ_CODEL=y
@@ -104,12 +103,10 @@ CONFIG_HZ_1000=y
CONFIG_INET=y
CONFIG_INPUT_EVDEV=y
CONFIG_INTEL_POWERCLAMP=y
-CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_IP_ROUTE_MULTIPATH=y
@@ -162,7 +159,6 @@ CONFIG_NUMA=y
CONFIG_NUMA_BALANCING=y
CONFIG_NVMEM=y
CONFIG_OSF_PARTITION=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
@@ -220,7 +216,6 @@ CONFIG_VALIDATE_FS_PARSER=y
CONFIG_VETH=y
CONFIG_VIRT_DRIVERS=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_X86_CPUID=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 72b5c174ab3b..cdf7b6641444 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -457,7 +457,7 @@ int append_tid(char *str, size_t sz)
if (end + 8 > sz)
return -1;
- sprintf(&str[end], "%07d", gettid());
+ sprintf(&str[end], "%07ld", sys_gettid());
str[end + 7] = '\0';
return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 1d53a8561ee2..24c509ce4e5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -42,11 +42,11 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "2"},
- {1, "R3_w", "4"},
- {2, "R3_w", "8"},
- {3, "R3_w", "16"},
- {4, "R3_w", "32"},
+ {0, "R3", "2"},
+ {1, "R3", "4"},
+ {2, "R3", "8"},
+ {3, "R3", "16"},
+ {4, "R3", "32"},
},
},
{
@@ -70,17 +70,17 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "1"},
- {1, "R3_w", "2"},
- {2, "R3_w", "4"},
- {3, "R3_w", "8"},
- {4, "R3_w", "16"},
- {5, "R3_w", "1"},
- {6, "R4_w", "32"},
- {7, "R4_w", "16"},
- {8, "R4_w", "8"},
- {9, "R4_w", "4"},
- {10, "R4_w", "2"},
+ {0, "R3", "1"},
+ {1, "R3", "2"},
+ {2, "R3", "4"},
+ {3, "R3", "8"},
+ {4, "R3", "16"},
+ {5, "R3", "1"},
+ {6, "R4", "32"},
+ {7, "R4", "16"},
+ {8, "R4", "8"},
+ {9, "R4", "4"},
+ {10, "R4", "2"},
},
},
{
@@ -99,12 +99,12 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "4"},
- {1, "R3_w", "8"},
- {2, "R3_w", "10"},
- {3, "R4_w", "8"},
- {4, "R4_w", "12"},
- {5, "R4_w", "14"},
+ {0, "R3", "4"},
+ {1, "R3", "8"},
+ {2, "R3", "10"},
+ {3, "R4", "8"},
+ {4, "R4", "12"},
+ {5, "R4", "14"},
},
},
{
@@ -121,10 +121,10 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "7"},
- {1, "R3_w", "7"},
- {2, "R3_w", "14"},
- {3, "R3_w", "56"},
+ {0, "R3", "7"},
+ {1, "R3", "7"},
+ {2, "R3", "14"},
+ {3, "R3", "56"},
},
},
@@ -162,19 +162,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R0_w", "pkt(off=8,r=8)"},
- {6, "R3_w", "var_off=(0x0; 0xff)"},
- {7, "R3_w", "var_off=(0x0; 0x1fe)"},
- {8, "R3_w", "var_off=(0x0; 0x3fc)"},
- {9, "R3_w", "var_off=(0x0; 0x7f8)"},
- {10, "R3_w", "var_off=(0x0; 0xff0)"},
- {12, "R3_w", "pkt_end()"},
- {17, "R4_w", "var_off=(0x0; 0xff)"},
- {18, "R4_w", "var_off=(0x0; 0x1fe0)"},
- {19, "R4_w", "var_off=(0x0; 0xff0)"},
- {20, "R4_w", "var_off=(0x0; 0x7f8)"},
- {21, "R4_w", "var_off=(0x0; 0x3fc)"},
- {22, "R4_w", "var_off=(0x0; 0x1fe)"},
+ {6, "R0", "pkt(off=8,r=8)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R3", "var_off=(0x0; 0x1fe)"},
+ {8, "R3", "var_off=(0x0; 0x3fc)"},
+ {9, "R3", "var_off=(0x0; 0x7f8)"},
+ {10, "R3", "var_off=(0x0; 0xff0)"},
+ {12, "R3", "pkt_end()"},
+ {17, "R4", "var_off=(0x0; 0xff)"},
+ {18, "R4", "var_off=(0x0; 0x1fe0)"},
+ {19, "R4", "var_off=(0x0; 0xff0)"},
+ {20, "R4", "var_off=(0x0; 0x7f8)"},
+ {21, "R4", "var_off=(0x0; 0x3fc)"},
+ {22, "R4", "var_off=(0x0; 0x1fe)"},
},
},
{
@@ -195,16 +195,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R3_w", "var_off=(0x0; 0xff)"},
- {7, "R4_w", "var_off=(0x0; 0xff)"},
- {8, "R4_w", "var_off=(0x0; 0xff)"},
- {9, "R4_w", "var_off=(0x0; 0xff)"},
- {10, "R4_w", "var_off=(0x0; 0x1fe)"},
- {11, "R4_w", "var_off=(0x0; 0xff)"},
- {12, "R4_w", "var_off=(0x0; 0x3fc)"},
- {13, "R4_w", "var_off=(0x0; 0xff)"},
- {14, "R4_w", "var_off=(0x0; 0x7f8)"},
- {15, "R4_w", "var_off=(0x0; 0xff0)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R4", "var_off=(0x0; 0xff)"},
+ {8, "R4", "var_off=(0x0; 0xff)"},
+ {9, "R4", "var_off=(0x0; 0xff)"},
+ {10, "R4", "var_off=(0x0; 0x1fe)"},
+ {11, "R4", "var_off=(0x0; 0xff)"},
+ {12, "R4", "var_off=(0x0; 0x3fc)"},
+ {13, "R4", "var_off=(0x0; 0xff)"},
+ {14, "R4", "var_off=(0x0; 0x7f8)"},
+ {15, "R4", "var_off=(0x0; 0xff0)"},
},
},
{
@@ -235,14 +235,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {2, "R5_w", "pkt(r=0)"},
- {4, "R5_w", "pkt(off=14,r=0)"},
- {5, "R4_w", "pkt(off=14,r=0)"},
+ {2, "R5", "pkt(r=0)"},
+ {4, "R5", "pkt(off=14,r=0)"},
+ {5, "R4", "pkt(off=14,r=0)"},
{9, "R2", "pkt(r=18)"},
{10, "R5", "pkt(off=14,r=18)"},
- {10, "R4_w", "var_off=(0x0; 0xff)"},
- {13, "R4_w", "var_off=(0x0; 0xffff)"},
- {14, "R4_w", "var_off=(0x0; 0xffff)"},
+ {10, "R4", "var_off=(0x0; 0xff)"},
+ {13, "R4", "var_off=(0x0; 0xffff)"},
+ {14, "R4", "var_off=(0x0; 0xffff)"},
},
},
{
@@ -299,12 +299,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {7, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
- {11, "R5_w", "pkt(id=1,off=14,"},
+ {11, "R5", "pkt(id=1,off=14,"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
@@ -320,12 +320,12 @@ static struct bpf_align_test tests[] = {
* instruction to validate R5 state. We also check
* that R4 is what it should be in such case.
*/
- {18, "R4_w", "var_off=(0x0; 0x3fc)"},
- {18, "R5_w", "var_off=(0x0; 0x3fc)"},
+ {18, "R4", "var_off=(0x0; 0x3fc)"},
+ {18, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5_w", "pkt(id=2,off=14,"},
+ {19, "R5", "pkt(id=2,off=14,"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
@@ -337,21 +337,21 @@ static struct bpf_align_test tests[] = {
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w", "pkt(off=14,r=8)"},
+ {26, "R5", "pkt(off=14,r=8)"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n). See comment for insn #18
* for R4 = R5 trick.
*/
- {28, "R4_w", "var_off=(0x0; 0x3fc)"},
- {28, "R5_w", "var_off=(0x0; 0x3fc)"},
+ {28, "R4", "var_off=(0x0; 0x3fc)"},
+ {28, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {29, "R5_w", "pkt(id=3,off=18,"},
+ {29, "R5", "pkt(id=3,off=18,"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {31, "R4_w", "var_off=(0x0; 0x7fc)"},
- {31, "R5_w", "var_off=(0x0; 0x7fc)"},
+ {31, "R4", "var_off=(0x0; 0x7fc)"},
+ {31, "R5", "var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
@@ -397,12 +397,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {7, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {8, "R6_w", "var_off=(0x2; 0x7fc)"},
+ {8, "R6", "var_off=(0x2; 0x7fc)"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {11, "R5", "var_off=(0x2; 0x7fc)"},
{12, "R4", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -414,11 +414,11 @@ static struct bpf_align_test tests[] = {
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {17, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {17, "R6", "var_off=(0x0; 0x3fc)"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w", "var_off=(0x2; 0xffc)"},
+ {19, "R5", "var_off=(0x2; 0xffc)"},
{20, "R4", "var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -459,18 +459,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {3, "R5_w", "pkt_end()"},
+ {3, "R5", "pkt_end()"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
+ {5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"},
+ {6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
{9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
- {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -478,7 +478,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
}
},
{
@@ -513,12 +513,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {8, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {8, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6_w", "var_off=(0x2; 0x7fc)"},
+ {9, "R6", "var_off=(0x2; 0x7fc)"},
/* New unknown value in R7 is (4n) */
- {10, "R7_w", "var_off=(0x0; 0x3fc)"},
+ {10, "R7", "var_off=(0x0; 0x3fc)"},
/* Subtracting it from R6 blows our unsigned bounds */
{11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
@@ -566,16 +566,16 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {9, "R6_w", "var_off=(0x0; 0x3c)"},
+ {6, "R2", "pkt(r=8)"},
+ {9, "R6", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6_w", "var_off=(0x2; 0x7c)"},
+ {10, "R6", "var_off=(0x2; 0x7c)"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"},
+ {13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
- {14, "R7_w", "var_off=(0x0; 0x7fc)"},
+ {14, "R7", "var_off=(0x0; 0x7fc)"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {16, "R5", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
index 0223fce4db2b..693fd86fbde6 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -40,8 +40,13 @@ static void *spin_lock_thread(void *arg)
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run err");
+
+ if (topts.retval == -EOPNOTSUPP)
+ goto end;
+
ASSERT_EQ((int)topts.retval, 0, "test_run retval");
+end:
pthread_exit(arg);
}
@@ -63,6 +68,7 @@ static void test_arena_spin_lock_size(int size)
skel = arena_spin_lock__open_and_load();
if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load"))
return;
+
if (skel->data->test_skip == 2) {
test__skip();
goto end;
@@ -86,6 +92,13 @@ static void test_arena_spin_lock_size(int size)
goto end_barrier;
}
+ if (skel->data->test_skip == 3) {
+ printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n",
+ __func__);
+ test__skip();
+ goto end_barrier;
+ }
+
ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
end_barrier:
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 13e101f370a1..92b5f378bfb8 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -165,11 +165,17 @@ static void test_xchg(struct atomics_lskel *skel)
void test_atomics(void)
{
struct atomics_lskel *skel;
+ int err;
- skel = atomics_lskel__open_and_load();
- if (!ASSERT_OK_PTR(skel, "atomics skeleton load"))
+ skel = atomics_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "atomics skeleton open"))
return;
+ skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = atomics_lskel__load(skel);
+ if (!ASSERT_OK(err, "atomics skeleton load"))
+ goto cleanup;
+
if (skel->data->skip_tests) {
printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)",
__func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index cabc51c2ca6b..9e77e5da7097 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -3,6 +3,7 @@
#include "test_attach_kprobe_sleepable.skel.h"
#include "test_attach_probe_manual.skel.h"
#include "test_attach_probe.skel.h"
+#include "kprobe_write_ctx.skel.h"
/* this is how USDT semaphore is actually defined, except volatile modifier */
volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
@@ -201,6 +202,31 @@ cleanup:
test_attach_probe_manual__destroy(skel);
}
+#ifdef __x86_64__
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_kprobe_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
static void test_attach_probe_auto(struct test_attach_probe *skel)
{
struct bpf_link *uprobe_err_link;
@@ -406,6 +432,8 @@ void test_attach_probe(void)
test_attach_uprobe_long_event_name();
if (test__start_subtest("kprobe-long_name"))
test_attach_kprobe_long_event_name();
+ if (test__start_subtest("kprobe-write-ctx"))
+ test_attach_kprobe_write_ctx();
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 4a0670c056ba..75f4dff7d042 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -450,8 +450,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
attr.size = sizeof(attr);
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
- attr.freq = 1;
- attr.sample_freq = 10000;
+ attr.sample_period = 100000;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
if (!ASSERT_GE(pfd, 0, "perf_fd"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 82903585c870..10cba526d3e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -63,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
/* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
* so it's impossible to determine correct pointer size; but if they
- * do, it should be 8 regardless of host architecture, becaues BPF
+ * do, it should be 8 regardless of host architecture, because BPF
* target is always 64-bit
*/
if (!t->known_ptr_sz) {
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
index e0dd966e4a3e..5ad904e9d15d 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
@@ -44,7 +44,7 @@ static void test_read_cgroup_xattr(void)
if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load"))
goto out;
- skel->bss->target_pid = gettid();
+ skel->bss->target_pid = sys_gettid();
if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach"))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index adda85f97058..4b42fbc96efc 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -4,6 +4,8 @@
#define _GNU_SOURCE
#include <cgroup_helpers.h>
#include <test_progs.h>
+#include <sched.h>
+#include <sys/wait.h>
#include "cgrp_kfunc_failure.skel.h"
#include "cgrp_kfunc_success.skel.h"
@@ -87,6 +89,72 @@ static const char * const success_tests[] = {
"test_cgrp_from_id",
};
+static void test_cgrp_from_id_ns(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct cgrp_kfunc_success *skel;
+ struct bpf_program *prog;
+ int pid, pipe_fd[2];
+
+ skel = open_load_cgrp_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+ goto cleanup;
+
+ prog = skel->progs.test_cgrp_from_id_ns;
+
+ if (!ASSERT_OK(pipe(pipe_fd), "pipe"))
+ goto cleanup;
+
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork result")) {
+ close(pipe_fd[0]);
+ close(pipe_fd[1]);
+ goto cleanup;
+ }
+
+ if (pid == 0) {
+ int ret = 0;
+
+ close(pipe_fd[0]);
+
+ if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup"))
+ exit(1);
+
+ if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns"))
+ exit(1);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(ret, "test run ret"))
+ exit(1);
+
+ if (!ASSERT_OK(opts.retval, "test run retval"))
+ exit(1);
+
+ if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe"))
+ exit(1);
+
+ exit(0);
+ } else {
+ int res;
+
+ close(pipe_fd[1]);
+
+ ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res");
+ ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child");
+
+ remove_cgroup_pid("cgrp_from_id_ns", pid);
+
+ ASSERT_OK(res, "result from run");
+ }
+
+ close(pipe_fd[0]);
+cleanup:
+ cgrp_kfunc_success__destroy(skel);
+}
+
void test_cgrp_kfunc(void)
{
int i, err;
@@ -102,6 +170,9 @@ void test_cgrp_kfunc(void)
run_success_test(success_tests[i]);
}
+ if (test__start_subtest("test_cgrp_from_id_ns"))
+ test_cgrp_from_id_ns();
+
RUN_TESTS(cgrp_kfunc_failure);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 9b2d9ceda210..b9f86cb91e81 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -32,6 +32,8 @@ static struct {
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG},
{"test_adjust", SETUP_SYSCALL_SLEEP},
{"test_adjust_err", SETUP_SYSCALL_SLEEP},
{"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index 241b2c8c6e0f..c534b4d5f9da 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -293,7 +293,7 @@ static int get_btf_id_by_fd(int btf_fd, __u32 *id)
* 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1
* 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2
* 3) Close the btf_fd, now refcnt=1
- * Wait and check that BTF stil exists.
+ * Wait and check that BTF still exists.
*/
static void check_fd_array_cnt__referenced_btfs(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 130f5b82d2e6..5ef1804e44df 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -12,13 +12,24 @@ void test_fentry_fexit(void)
int err, prog_fd, i;
LIBBPF_OPTS(bpf_test_run_opts, topts);
- fentry_skel = fentry_test_lskel__open_and_load();
+ fentry_skel = fentry_test_lskel__open();
if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
goto close_prog;
- fexit_skel = fexit_test_lskel__open_and_load();
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
+ goto close_prog;
+
+ fexit_skel = fexit_test_lskel__open();
if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
goto close_prog;
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
+ goto close_prog;
+
err = fentry_test_lskel__attach(fentry_skel);
if (!ASSERT_OK(err, "fentry_attach"))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index aee1bc77a17f..ec882328eb59 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -43,8 +43,13 @@ static void fentry_test(void)
struct fentry_test_lskel *fentry_skel = NULL;
int err;
- fentry_skel = fentry_test_lskel__open_and_load();
- if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
+ fentry_skel = fentry_test_lskel__open();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open"))
+ goto cleanup;
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
goto cleanup;
err = fentry_test_common(fentry_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 1c13007e37dd..94eed753560c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -43,8 +43,13 @@ static void fexit_test(void)
struct fexit_test_lskel *fexit_skel = NULL;
int err;
- fexit_skel = fexit_test_lskel__open_and_load();
- if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+ fexit_skel = fexit_test_lskel__open();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open"))
+ goto cleanup;
+
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
goto cleanup;
err = fexit_test_common(fexit_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
index a133354ac9bc..97b00c7efe94 100644
--- a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
+++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
@@ -16,7 +16,7 @@ void test_kernel_flag(void)
if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel"))
return;
- lsm_skel->bss->monitored_tid = gettid();
+ lsm_skel->bss->monitored_tid = sys_gettid();
ret = test_kernel_flag__attach(lsm_skel);
if (!ASSERT_OK(ret, "test_kernel_flag__attach"))
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index e19ef509ebf8..6cfaa978bc9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -7,6 +7,7 @@
#include "kprobe_multi_session.skel.h"
#include "kprobe_multi_session_cookie.skel.h"
#include "kprobe_multi_verifier.skel.h"
+#include "kprobe_write_ctx.skel.h"
#include "bpf/libbpf_internal.h"
#include "bpf/hashmap.h"
@@ -422,220 +423,6 @@ static void test_unique_match(void)
kprobe_multi__destroy(skel);
}
-static size_t symbol_hash(long key, void *ctx __maybe_unused)
-{
- return str_hash((const char *) key);
-}
-
-static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
-{
- return strcmp((const char *) key1, (const char *) key2) == 0;
-}
-
-static bool is_invalid_entry(char *buf, bool kernel)
-{
- if (kernel && strchr(buf, '['))
- return true;
- if (!kernel && !strchr(buf, '['))
- return true;
- return false;
-}
-
-static bool skip_entry(char *name)
-{
- /*
- * We attach to almost all kernel functions and some of them
- * will cause 'suspicious RCU usage' when fprobe is attached
- * to them. Filter out the current culprits - arch_cpu_idle
- * default_idle and rcu_* functions.
- */
- if (!strcmp(name, "arch_cpu_idle"))
- return true;
- if (!strcmp(name, "default_idle"))
- return true;
- if (!strncmp(name, "rcu_", 4))
- return true;
- if (!strcmp(name, "bpf_dispatcher_xdp_func"))
- return true;
- if (!strncmp(name, "__ftrace_invalid_address__",
- sizeof("__ftrace_invalid_address__") - 1))
- return true;
- return false;
-}
-
-/* Do comparision by ignoring '.llvm.<hash>' suffixes. */
-static int compare_name(const char *name1, const char *name2)
-{
- const char *res1, *res2;
- int len1, len2;
-
- res1 = strstr(name1, ".llvm.");
- res2 = strstr(name2, ".llvm.");
- len1 = res1 ? res1 - name1 : strlen(name1);
- len2 = res2 ? res2 - name2 : strlen(name2);
-
- if (len1 == len2)
- return strncmp(name1, name2, len1);
- if (len1 < len2)
- return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
- return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
-}
-
-static int load_kallsyms_compare(const void *p1, const void *p2)
-{
- return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
-}
-
-static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
-{
- return compare_name(p1, p2->name);
-}
-
-static int get_syms(char ***symsp, size_t *cntp, bool kernel)
-{
- size_t cap = 0, cnt = 0;
- char *name = NULL, *ksym_name, **syms = NULL;
- struct hashmap *map;
- struct ksyms *ksyms;
- struct ksym *ks;
- char buf[256];
- FILE *f;
- int err = 0;
-
- ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
- if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local"))
- return -EINVAL;
-
- /*
- * The available_filter_functions contains many duplicates,
- * but other than that all symbols are usable in kprobe multi
- * interface.
- * Filtering out duplicates by using hashmap__add, which won't
- * add existing entry.
- */
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
-
- if (!f)
- return -EINVAL;
-
- map = hashmap__new(symbol_hash, symbol_equal, NULL);
- if (IS_ERR(map)) {
- err = libbpf_get_error(map);
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
- continue;
- if (skip_entry(name))
- continue;
-
- ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
- if (!ks) {
- err = -EINVAL;
- goto error;
- }
-
- ksym_name = ks->name;
- err = hashmap__add(map, ksym_name, 0);
- if (err == -EEXIST) {
- err = 0;
- continue;
- }
- if (err)
- goto error;
-
- err = libbpf_ensure_mem((void **) &syms, &cap,
- sizeof(*syms), cnt + 1);
- if (err)
- goto error;
-
- syms[cnt++] = ksym_name;
- }
-
- *symsp = syms;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- hashmap__free(map);
- if (err)
- free(syms);
- return err;
-}
-
-static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
-{
- unsigned long *addr, *addrs, *tmp_addrs;
- int err = 0, max_cnt, inc_cnt;
- char *name = NULL;
- size_t cnt = 0;
- char buf[256];
- FILE *f;
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
-
- if (!f)
- return -ENOENT;
-
- /* In my local setup, the number of entries is 50k+ so Let us initially
- * allocate space to hold 64k entries. If 64k is not enough, incrementally
- * increase 1k each time.
- */
- max_cnt = 65536;
- inc_cnt = 1024;
- addrs = malloc(max_cnt * sizeof(long));
- if (addrs == NULL) {
- err = -ENOMEM;
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
- continue;
- if (skip_entry(name))
- continue;
-
- if (cnt == max_cnt) {
- max_cnt += inc_cnt;
- tmp_addrs = realloc(addrs, max_cnt);
- if (!tmp_addrs) {
- err = -ENOMEM;
- goto error;
- }
- addrs = tmp_addrs;
- }
-
- addrs[cnt++] = (unsigned long)addr;
- }
-
- *addrsp = addrs;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- if (err)
- free(addrs);
- return err;
-}
-
static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
{
long attach_start_ns, attach_end_ns;
@@ -670,7 +457,7 @@ static void test_kprobe_multi_bench_attach(bool kernel)
char **syms = NULL;
size_t cnt = 0;
- if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+ if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -696,13 +483,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel)
size_t cnt = 0;
int err;
- err = get_addrs(&addrs, &cnt, kernel);
+ err = bpf_get_addrs(&addrs, &cnt, kernel);
if (err == -ENOENT) {
test__skip();
return;
}
- if (!ASSERT_OK(err, "get_addrs"))
+ if (!ASSERT_OK(err, "bpf_get_addrs"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -753,6 +540,30 @@ cleanup:
kprobe_multi_override__destroy(skel);
}
+#ifdef __x86_64__
+static void test_attach_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
void serial_test_kprobe_multi_bench_attach(void)
{
if (test__start_subtest("kernel"))
@@ -792,5 +603,7 @@ void test_kprobe_multi_test(void)
test_session_cookie_skel_api();
if (test__start_subtest("unique_match"))
test_unique_match();
+ if (test__start_subtest("attach_write_ctx"))
+ test_attach_write_ctx();
RUN_TESTS(kprobe_multi_verifier);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c
new file mode 100644
index 000000000000..6bdc6d6de0da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "map_excl.skel.h"
+
+static void test_map_excl_allowed(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__set_exclusive_program"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, true);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, false);
+
+ err = map_excl__load(skel);
+ ASSERT_OK(err, "map_excl__load");
+out:
+ map_excl__destroy(skel);
+}
+
+static void test_map_excl_denied(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__make_exclusive"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, false);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, true);
+
+ err = map_excl__load(skel);
+ ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n");
+out:
+ map_excl__destroy(skel);
+
+}
+
+void test_map_excl(void)
+{
+ if (test__start_subtest("map_excl_allowed"))
+ test_map_excl_allowed();
+ if (test__start_subtest("map_excl_denied"))
+ test_map_excl_denied();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 6d391d95f96e..70fa7ae93173 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -90,7 +90,7 @@ void test_module_attach(void)
test_module_attach__detach(skel);
- /* attach fentry/fexit and make sure it get's module reference */
+ /* attach fentry/fexit and make sure it gets module reference */
link = bpf_program__attach(skel->progs.handle_fentry);
if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
new file mode 100644
index 000000000000..9ae49b587f3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <test_progs.h>
+
+
+#include "test_pinning_devmap.skel.h"
+
+void test_pinning_devmap_reuse(void)
+{
+ const char *pinpath1 = "/sys/fs/bpf/pinmap1";
+ const char *pinpath2 = "/sys/fs/bpf/pinmap2";
+ struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+ /* load the object a first time */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "skel_load1"))
+ goto out;
+
+ /* load the object a second time, re-using the pinned map */
+ skel2 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "skel_load2"))
+ goto out;
+
+ /* we can close the reference safely without
+ * the map's refcount falling to 0
+ */
+ test_pinning_devmap__destroy(skel1);
+ skel1 = NULL;
+
+ /* now, swap the pins */
+ err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "swap pins"))
+ goto out;
+
+ /* load the object again, this time the re-use should fail */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_ERR_PTR(skel1, "skel_load3"))
+ goto out;
+
+out:
+ unlink(pinpath1);
+ unlink(pinpath2);
+ test_pinning_devmap__destroy(skel1);
+ test_pinning_devmap__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
index 14f2796076e0..7607cfc2408c 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -54,3 +54,128 @@ void test_prog_tests_framework(void)
return;
clear_test_state(state);
}
+
+static void dummy_emit(const char *buf, bool force) {}
+
+void test_prog_tests_framework_expected_msgs(void)
+{
+ struct expected_msgs msgs;
+ int i, j, error_cnt;
+ const struct {
+ const char *name;
+ const char *log;
+ const char *expected;
+ struct expect_msg *pats;
+ } cases[] = {
+ {
+ .name = "simple-ok",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "simple-fail",
+ .log = "aaabbbddd",
+ .expected = "MATCHED SUBSTR: 'aaa'\n"
+ "EXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-mid",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ { .substr = "bar", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-tail",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-head",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "foo", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-head",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'aaa'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa", .negative = true },
+ { .substr = "bbb" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-tail",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "bbb" },
+ { .substr = "ccc", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-1",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'bbb'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-2",
+ .log = "aaabbb222ccc",
+ .expected = "UNEXPECTED SUBSTR: '222'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "222", .negative = true },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ }
+ };
+
+ for (i = 0; i < ARRAY_SIZE(cases); i++) {
+ if (test__start_subtest(cases[i].name)) {
+ error_cnt = env.subtest_state->error_cnt;
+ msgs.patterns = cases[i].pats;
+ msgs.cnt = 0;
+ for (j = 0; cases[i].pats[j].substr; j++)
+ msgs.cnt++;
+ validate_msgs(cases[i].log, &msgs, dummy_emit);
+ fflush(stderr);
+ env.subtest_state->error_cnt = error_cnt;
+ if (cases[i].expected)
+ ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output");
+ else
+ ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output");
+ test__end_subtest();
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index e261b0e872db..d93a0c7b1786 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -623,7 +623,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
*newx = range(t, x.a, x.b);
*newy = range(t, y.a + 1, y.b);
} else if (x.a == x.b && x.b == y.b) {
- /* X is a constant matching rigth side of Y */
+ /* X is a constant matching right side of Y */
*newx = range(t, x.a, x.b);
*newy = range(t, y.a, y.b - 1);
} else if (y.a == y.b && x.a == y.a) {
@@ -631,7 +631,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
*newx = range(t, x.a + 1, x.b);
*newy = range(t, y.a, y.b);
} else if (y.a == y.b && x.b == y.b) {
- /* Y is a constant matching rigth side of X */
+ /* Y is a constant matching right side of X */
*newx = range(t, x.a, x.b - 1);
*newy = range(t, y.a, y.b);
} else {
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
index 0703e987df89..8c6c2043a432 100644
--- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -99,3 +99,19 @@ end:
res_spin_lock__destroy(skel);
return;
}
+
+void serial_test_res_spin_lock_stress(void)
+{
+ if (libbpf_num_possible_cpus() < 3) {
+ test__skip();
+ return;
+ }
+
+ ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA");
+ sleep(5);
+ unload_module("bpf_test_rqspinlock", false);
+
+ ASSERT_OK(load_module_params("bpf_test_rqspinlock.ko", "test_ab=1", false), "load module ABBA");
+ sleep(5);
+ unload_module("bpf_test_rqspinlock", false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index e3ea5dc2f697..254fbfeab06a 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
const char *err_msg;
} spin_lock_fail_tests[] = {
{ "lock_id_kptr_preserve",
- "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
- "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "5: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2) "
+ "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=ptr_ expected=percpu_ptr_" },
{ "lock_id_global_zero",
- "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
- " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
- " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
+ " R0=map_value(id=1,map=array_map,ks=4,vs=8)"
+ " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_innermapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
" R0=map_value(id=2,ks=4,vs=8)"
- " R1_w=map_value(id=2,ks=4,vs=8)\n"
+ " R1=map_value(id=2,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index b7ba5cd47d96..271b5cc9fc01 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -39,7 +39,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 0832fd787457..b277dddd5af7 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -66,7 +66,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index df59e4ae2951..c23b97414813 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -1,46 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "stacktrace_map.skel.h"
void test_stacktrace_map(void)
{
+ struct stacktrace_map *skel;
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "oncpu";
- int err, prog_fd, stack_trace_len;
- const char *file = "./test_stacktrace_map.bpf.o";
- __u32 key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link;
+ int err, stack_trace_len;
+ __u32 key, val, stack_id, duration = 0;
+ __u64 stack[PERF_MAX_STACK_DEPTH];
- err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = stacktrace_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
- prog = bpf_object__find_program_by_name(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
-
- link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (!ASSERT_OK_PTR(link, "attach_tp"))
- goto close_prog;
-
- /* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK_FAIL(control_map_fd < 0))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK_FAIL(stackid_hmap_fd < 0))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK_FAIL(stackmap_fd < 0))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK_FAIL(stack_amap_fd < 0))
- goto disable_pmu;
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+ err = stacktrace_map__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
/* give some time for bpf program run */
sleep(1);
@@ -50,26 +31,32 @@ void test_stacktrace_map(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
-
-disable_pmu:
- bpf_link__destroy(link);
-close_prog:
- bpf_object__close(obj);
+ goto out;
+
+ stack_id = skel->bss->stack_id;
+ err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_OK(err, "lookup and delete target stack_id"))
+ goto out;
+
+ err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id"))
+ goto out;
+out:
+ stacktrace_map__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index c6ef06f55cdb..e985d51d3d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void)
{
const char *prog_name = "oncpu";
int control_map_fd, stackid_hmap_fd, stackmap_fd;
- const char *file = "./test_stacktrace_map.bpf.o";
+ const char *file = "./stacktrace_map.bpf.o";
__u32 key, val, duration = 0;
int err, prog_fd;
struct bpf_program *prog;
@@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
index 1932b1e0685c..dc2ccf6a14d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -40,7 +40,7 @@ void test_stacktrace_map_skip(void)
skel->bss->control = 1;
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap"))
diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c
index d9f0185dca61..c3cce5c292bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/stream.c
+++ b/tools/testing/selftests/bpf/prog_tests/stream.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
#include <test_progs.h>
#include <sys/mman.h>
-#include <regex.h>
#include "stream.skel.h"
#include "stream_fail.skel.h"
@@ -18,87 +17,6 @@ void test_stream_success(void)
return;
}
-struct {
- int prog_off;
- const char *errstr;
-} stream_error_arr[] = {
- {
- offsetof(struct stream, progs.stream_cond_break),
- "ERROR: Timeout detected for may_goto instruction\n"
- "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
- "Call trace:\n"
- "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
- "|[ \t]+[^\n]+\n)*",
- },
- {
- offsetof(struct stream, progs.stream_deadlock),
- "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n"
- "Attempted lock = (0x[0-9a-fA-F]+)\n"
- "Total held locks = 1\n"
- "Held lock\\[ 0\\] = \\1\n" // Lock address must match
- "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
- "Call trace:\n"
- "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
- "|[ \t]+[^\n]+\n)*",
- },
-};
-
-static int match_regex(const char *pattern, const char *string)
-{
- int err, rc;
- regex_t re;
-
- err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE);
- if (err)
- return -1;
- rc = regexec(&re, string, 0, NULL, 0);
- regfree(&re);
- return rc == 0 ? 1 : 0;
-}
-
-void test_stream_errors(void)
-{
- LIBBPF_OPTS(bpf_test_run_opts, opts);
- LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
- struct stream *skel;
- int ret, prog_fd;
- char buf[1024];
-
- skel = stream__open_and_load();
- if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
- return;
-
- for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) {
- struct bpf_program **prog;
-
- prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off);
- prog_fd = bpf_program__fd(*prog);
- ret = bpf_prog_test_run_opts(prog_fd, &opts);
- ASSERT_OK(ret, "ret");
- ASSERT_OK(opts.retval, "retval");
-
-#if !defined(__x86_64__)
- ASSERT_TRUE(1, "Timed may_goto unsupported, skip.");
- if (i == 0) {
- ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts);
- ASSERT_EQ(ret, 0, "stream read");
- continue;
- }
-#endif
-
- ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
- ASSERT_GT(ret, 0, "stream read");
- ASSERT_LE(ret, 1023, "len for buf");
- buf[ret] = '\0';
-
- ret = match_regex(stream_error_arr[i].errstr, buf);
- if (!ASSERT_TRUE(ret == 1, "regex match"))
- fprintf(stderr, "Output from stream:\n%s\n", buf);
- }
-
- stream__destroy(skel);
-}
-
void test_stream_syscall(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
@@ -139,3 +57,52 @@ void test_stream_syscall(void)
stream__destroy(skel);
}
+
+static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret, prog_fd;
+ char fault_addr[64];
+ char buf[1024];
+
+ prog_fd = bpf_program__fd(prog);
+
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+ sprintf(fault_addr, "0x%lx", *fault_addr_p);
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ ASSERT_LE(ret, 1023, "len for buf");
+ buf[ret] = '\0';
+
+ if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) {
+ fprintf(stderr, "Output from stream:\n%s\n", buf);
+ fprintf(stderr, "Fault Addr: %s\n", fault_addr);
+ }
+}
+
+void test_stream_arena_fault_address(void)
+{
+ struct stream *skel;
+
+#if !defined(__x86_64__) && !defined(__aarch64__)
+ printf("%s:SKIP: arena fault reporting not supported\n", __func__);
+ test__skip();
+ return;
+#endif
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ if (test__start_subtest("read_fault"))
+ test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr);
+ if (test__start_subtest("write_fault"))
+ test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr);
+
+ stream__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
index 35af8044d059..4d66fad3c8bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -8,6 +8,7 @@
static const char * const test_cases[] = {
"strcmp",
+ "strcasecmp",
"strchr",
"strchrnul",
"strnchr",
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
new file mode 100644
index 000000000000..2de38776a2d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_H
+#define __TASK_LOCAL_DATA_H
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdatomic.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+#include <pthread.h>
+#endif
+
+#include <bpf/bpf.h>
+
+/*
+ * OPTIONS
+ *
+ * Define the option before including the header
+ *
+ * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically
+ *
+ * Thread-specific memory for storing TLD is allocated lazily on the first call to
+ * tld_get_data(). The thread that calls it must also call tld_free() on thread exit
+ * to prevent memory leak. Pthread will be included if the option is defined. A pthread
+ * key will be registered with a destructor that calls tld_free().
+ *
+ *
+ * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically
+ * (default: 64 bytes)
+ *
+ * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using
+ * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be
+ * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated
+ * for these TLDs. This additional memory is allocated for every thread that calls
+ * tld_get_data() even if no tld_create_key are actually called, so be mindful of
+ * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory
+ * will be allocated for TLDs created with it.
+ *
+ *
+ * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62)
+ *
+ * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store,
+ * TLD_MAX_DATA_CNT.
+ *
+ *
+ * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc()
+ *
+ * When allocating the memory for storing TLDs, we need to make sure there is a memory
+ * region of the X bytes within a page. This is due to the limit posed by UPTR: memory
+ * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The
+ * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses
+ * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage
+ * as not all memory allocator can use the exact amount of memory requested to fulfill
+ * aligned_alloc(). For example, some may round the size up to the alignment. Enable the
+ * option to always use aligned_alloc() if the implementation has low memory overhead.
+ */
+
+#define TLD_PAGE_SIZE getpagesize()
+#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1))
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#ifndef TLD_DYN_DATA_SIZE
+#define TLD_DYN_DATA_SIZE 64
+#endif
+
+#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ __s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+ char name[TLD_NAME_LEN];
+ _Atomic __u16 size;
+};
+
+struct tld_meta_u {
+ _Atomic __u8 cnt;
+ __u16 size;
+ struct tld_metadata metadata[];
+};
+
+struct tld_data_u {
+ __u64 start; /* offset of tld_data_u->data in a page */
+ char data[];
+};
+
+struct tld_map_value {
+ void *data;
+ struct tld_meta_u *meta;
+};
+
+struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak));
+__thread struct tld_data_u *tld_data_p __attribute__((weak));
+__thread void *tld_data_alloc_p __attribute__((weak));
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+pthread_key_t tld_pthread_key __attribute__((weak));
+
+static void tld_free(void);
+
+static void __tld_thread_exit_handler(void *unused)
+{
+ tld_free();
+}
+#endif
+
+static int __tld_init_meta_p(void)
+{
+ struct tld_meta_u *meta, *uninit = NULL;
+ int err = 0;
+
+ meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE);
+ if (!meta) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memset(meta, 0, TLD_PAGE_SIZE);
+ meta->size = TLD_DYN_DATA_SIZE;
+
+ if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) {
+ free(meta);
+ goto out;
+ }
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
+#endif
+out:
+ return err;
+}
+
+static int __tld_init_data_p(int map_fd)
+{
+ bool use_aligned_alloc = false;
+ struct tld_map_value map_val;
+ struct tld_data_u *data;
+ void *data_alloc = NULL;
+ int err, tid_fd = -1;
+
+ tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL);
+ if (tid_fd < 0) {
+ err = -errno;
+ goto out;
+ }
+
+#ifdef TLD_DATA_USE_ALIGNED_ALLOC
+ use_aligned_alloc = true;
+#endif
+
+ /*
+ * tld_meta_p->size = TLD_DYN_DATA_SIZE +
+ * total size of TLDs defined via TLD_DEFINE_KEY()
+ */
+ data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ?
+ aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) :
+ malloc(tld_meta_p->size * 2);
+ if (!data_alloc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Always pass a page-aligned address to UPTR since the size of tld_map_value::data
+ * is a page in BTF. If data_alloc spans across two pages, use the page that contains large
+ * enough memory.
+ */
+ if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) {
+ map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc);
+ data = data_alloc;
+ data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) +
+ offsetof(struct tld_data_u, data);
+ } else {
+ map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+ data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+ data->start = offsetof(struct tld_data_u, data);
+ }
+ map_val.meta = TLD_READ_ONCE(tld_meta_p);
+
+ err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0);
+ if (err) {
+ free(data_alloc);
+ goto out;
+ }
+
+ tld_data_p = data;
+ tld_data_alloc_p = data_alloc;
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ pthread_setspecific(tld_pthread_key, (void *)1);
+#endif
+out:
+ if (tid_fd >= 0)
+ close(tid_fd);
+ return err;
+}
+
+static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
+{
+ int err, i, sz, off = 0;
+ __u8 cnt;
+
+ if (!TLD_READ_ONCE(tld_meta_p)) {
+ err = __tld_init_meta_p();
+ if (err)
+ return (tld_key_t){err};
+ }
+
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+retry:
+ cnt = atomic_load(&tld_meta_p->cnt);
+ if (i < cnt) {
+ /* A metadata is not ready until size is updated with a non-zero value */
+ while (!(sz = atomic_load(&tld_meta_p->metadata[i].size)))
+ sched_yield();
+
+ if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN))
+ return (tld_key_t){-EEXIST};
+
+ off += TLD_ROUND_UP(sz, 8);
+ continue;
+ }
+
+ /*
+ * TLD_DEFINE_KEY() is given memory upto a page while at most
+ * TLD_DYN_DATA_SIZE is allocated for tld_create_key()
+ */
+ if (dyn_data) {
+ if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size)
+ return (tld_key_t){-E2BIG};
+ } else {
+ if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u))
+ return (tld_key_t){-E2BIG};
+ tld_meta_p->size += TLD_ROUND_UP(size, 8);
+ }
+
+ /*
+ * Only one tld_create_key() can increase the current cnt by one and
+ * takes the latest available slot. Other threads will check again if a new
+ * TLD can still be added, and then compete for the new slot after the
+ * succeeding thread update the size.
+ */
+ if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1))
+ goto retry;
+
+ strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN);
+ atomic_store(&tld_meta_p->metadata[i].size, size);
+ return (tld_key_t){(__s16)off};
+ }
+
+ return (tld_key_t){-ENOSPC};
+}
+
+/**
+ * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD.
+ *
+ * @name: The name of the TLD
+ * @size: The size of the TLD
+ * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN
+ *
+ * The macro can only be used in file scope.
+ *
+ * A global variable key of opaque type, tld_key_t, will be declared and initialized before
+ * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key
+ * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD.
+ * bpf programs can also fetch the same key by name.
+ *
+ * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just
+ * enough memory will be allocated for each thread on the first call to tld_get_data().
+ */
+#define TLD_DEFINE_KEY(key, name, size) \
+tld_key_t key; \
+ \
+__attribute__((constructor)) \
+void __tld_define_key_##key(void) \
+{ \
+ key = __tld_create_key(name, size, false); \
+}
+
+/**
+ * tld_create_key() - Create a TLD and return a key associated with the TLD.
+ *
+ * @name: The name the TLD
+ * @size: The size of the TLD
+ *
+ * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check
+ * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to
+ * locate the TLD. bpf programs can also fetch the same key by name.
+ *
+ * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is
+ * not known statically or a TLD needs to be created conditionally)
+ *
+ * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs
+ * created dynamically with tld_create_key(). Since only a user page is pinned to the
+ * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE -
+ * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page.
+ */
+__attribute__((unused))
+static tld_key_t tld_create_key(const char *name, size_t size)
+{
+ return __tld_create_key(name, size, true);
+}
+
+__attribute__((unused))
+static inline bool tld_key_is_err(tld_key_t key)
+{
+ return key.off < 0;
+}
+
+__attribute__((unused))
+static inline int tld_key_err_or_zero(tld_key_t key)
+{
+ return tld_key_is_err(key) ? key.off : 0;
+}
+
+/**
+ * tld_get_data() - Get a pointer to the TLD associated with the given key of the
+ * calling thread.
+ *
+ * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map
+ * of task local data.
+ * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key().
+ *
+ * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD
+ * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte
+ * aligned.
+ *
+ * Threads that call tld_get_data() must call tld_free() on exit to prevent
+ * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined.
+ */
+__attribute__((unused))
+static void *tld_get_data(int map_fd, tld_key_t key)
+{
+ if (!TLD_READ_ONCE(tld_meta_p))
+ return NULL;
+
+ /* tld_data_p is allocated on the first invocation of tld_get_data() */
+ if (!tld_data_p && __tld_init_data_p(map_fd))
+ return NULL;
+
+ return tld_data_p->data + key.off;
+}
+
+/**
+ * tld_free() - Free task local data memory of the calling thread
+ *
+ * For the calling thread, all pointers to TLDs acquired before will become invalid.
+ *
+ * Users must call tld_free() on thread exit to prevent memory leak. Alternatively,
+ * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered
+ * to free the memory automatically.
+ */
+__attribute__((unused))
+static void tld_free(void)
+{
+ if (tld_data_alloc_p) {
+ free(tld_data_alloc_p);
+ tld_data_alloc_p = NULL;
+ tld_data_p = NULL;
+ }
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __TASK_LOCAL_DATA_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/task_work_stress.c b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
new file mode 100644
index 000000000000..450d17d91a56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work_stress.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdatomic.h>
+
+struct test_data {
+ int prog_fd;
+ atomic_int exit;
+};
+
+void *runner(void *test_data)
+{
+ struct test_data *td = test_data;
+ int err = 0;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ while (!err && !atomic_load(&td->exit))
+ err = bpf_prog_test_run_opts(td->prog_fd, &opts);
+
+ return NULL;
+}
+
+static int get_env_int(const char *str, int def)
+{
+ const char *s = getenv(str);
+ char *end;
+ int retval;
+
+ if (!s || !*s)
+ return def;
+ errno = 0;
+ retval = strtol(s, &end, 10);
+ if (errno || *end || retval < 0)
+ return def;
+ return retval;
+}
+
+static void task_work_run(bool enable_delete)
+{
+ struct task_work_stress *skel;
+ struct bpf_program *scheduler, *deleter;
+ int nthreads = 16;
+ int test_time_s = get_env_int("BPF_TASK_WORK_TEST_TIME", 1);
+ pthread_t tid[nthreads], tid_del;
+ bool started[nthreads], started_del = false;
+ struct test_data td_sched = { .exit = 0 }, td_del = { .exit = 1 };
+ int i, err;
+
+ skel = task_work_stress__open();
+ if (!ASSERT_OK_PTR(skel, "task_work__open"))
+ return;
+
+ scheduler = bpf_object__find_program_by_name(skel->obj, "schedule_task_work");
+ bpf_program__set_autoload(scheduler, true);
+
+ deleter = bpf_object__find_program_by_name(skel->obj, "delete_task_work");
+ bpf_program__set_autoload(deleter, true);
+
+ err = task_work_stress__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ for (i = 0; i < nthreads; ++i)
+ started[i] = false;
+
+ td_sched.prog_fd = bpf_program__fd(scheduler);
+ for (i = 0; i < nthreads; ++i) {
+ if (pthread_create(&tid[i], NULL, runner, &td_sched) != 0) {
+ fprintf(stderr, "could not start thread");
+ goto cancel;
+ }
+ started[i] = true;
+ }
+
+ if (enable_delete)
+ atomic_store(&td_del.exit, 0);
+
+ td_del.prog_fd = bpf_program__fd(deleter);
+ if (pthread_create(&tid_del, NULL, runner, &td_del) != 0) {
+ fprintf(stderr, "could not start thread");
+ goto cancel;
+ }
+ started_del = true;
+
+ /* Run stress test for some time */
+ sleep(test_time_s);
+
+cancel:
+ atomic_store(&td_sched.exit, 1);
+ atomic_store(&td_del.exit, 1);
+ for (i = 0; i < nthreads; ++i) {
+ if (started[i])
+ pthread_join(tid[i], NULL);
+ }
+
+ if (started_del)
+ pthread_join(tid_del, NULL);
+
+ ASSERT_GT(skel->bss->callback_scheduled, 0, "work scheduled");
+ /* Some scheduling attempts should have failed due to contention */
+ ASSERT_GT(skel->bss->schedule_error, 0, "schedule error");
+
+ if (enable_delete) {
+ /* If delete thread is enabled, it has cancelled some callbacks */
+ ASSERT_GT(skel->bss->delete_success, 0, "delete success");
+ ASSERT_LT(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+ } else {
+ /* Without delete thread number of scheduled callbacks is the same as fired */
+ ASSERT_EQ(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+ }
+
+cleanup:
+ task_work_stress__destroy(skel);
+}
+
+void test_task_work_stress(void)
+{
+ if (test__start_subtest("no_delete"))
+ task_work_run(false);
+ if (test__start_subtest("with_delete"))
+ task_work_run(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
new file mode 100644
index 000000000000..fd8762ba4b67
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_id_ops_mapping1.skel.h"
+#include "struct_ops_id_ops_mapping2.skel.h"
+
+static void test_st_ops_id_ops_mapping(void)
+{
+ struct struct_ops_id_ops_mapping1 *skel1 = NULL;
+ struct struct_ops_id_ops_mapping2 *skel2 = NULL;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int err, pid, prog1_fd, prog2_fd;
+
+ skel1 = struct_ops_id_ops_mapping1__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open"))
+ goto out;
+
+ skel2 = struct_ops_id_ops_mapping2__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open"))
+ goto out;
+
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map),
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto out;
+
+ skel1->bss->st_ops_id = info.id;
+
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map),
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto out;
+
+ skel2->bss->st_ops_id = info.id;
+
+ err = struct_ops_id_ops_mapping1__attach(skel1);
+ if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach"))
+ goto out;
+
+ err = struct_ops_id_ops_mapping2__attach(skel2);
+ if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach"))
+ goto out;
+
+ /* run tracing prog that calls .test_1 and checks return */
+ pid = getpid();
+ skel1->bss->test_pid = pid;
+ skel2->bss->test_pid = pid;
+ sys_gettid();
+ skel1->bss->test_pid = 0;
+ skel2->bss->test_pid = 0;
+
+ /* run syscall_prog that calls .test_1 and checks return */
+ prog1_fd = bpf_program__fd(skel1->progs.syscall_prog);
+ err = bpf_prog_test_run_opts(prog1_fd, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ prog2_fd = bpf_program__fd(skel2->progs.syscall_prog);
+ err = bpf_prog_test_run_opts(prog2_fd, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err");
+ ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err");
+
+out:
+ struct_ops_id_ops_mapping1__destroy(skel1);
+ struct_ops_id_ops_mapping2__destroy(skel2);
+}
+
+void test_struct_ops_id_ops_mapping(void)
+{
+ if (test__start_subtest("st_ops_id_ops_mapping"))
+ test_st_ops_id_ops_mapping();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
new file mode 100644
index 000000000000..9fd6306b455c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <bpf/btf.h>
+#include <test_progs.h>
+
+#define TLD_FREE_DATA_ON_THREAD_EXIT
+#define TLD_DYN_DATA_SIZE 4096
+#include "task_local_data.h"
+
+struct test_tld_struct {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+ __u64 d;
+};
+
+#include "test_task_local_data.skel.h"
+
+TLD_DEFINE_KEY(value0_key, "value0", sizeof(int));
+
+/*
+ * Reset task local data between subtests by clearing metadata other
+ * than the statically defined value0. This is safe as subtests run
+ * sequentially. Users of task local data library should not touch
+ * library internal.
+ */
+static void reset_tld(void)
+{
+ if (TLD_READ_ONCE(tld_meta_p)) {
+ /* Remove TLDs created by tld_create_key() */
+ tld_meta_p->cnt = 1;
+ tld_meta_p->size = TLD_DYN_DATA_SIZE;
+ memset(&tld_meta_p->metadata[1], 0,
+ (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata));
+ }
+}
+
+/* Serialize access to bpf program's global variables */
+static pthread_mutex_t global_mutex;
+
+static tld_key_t *tld_keys;
+
+#define TEST_BASIC_THREAD_NUM 32
+
+void *test_task_local_data_basic_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct test_task_local_data *skel = (struct test_task_local_data *)arg;
+ int fd, err, tid, *value0, *value1;
+ struct test_tld_struct *value2;
+
+ fd = bpf_map__fd(skel->maps.tld_data_map);
+
+ value0 = tld_get_data(fd, value0_key);
+ if (!ASSERT_OK_PTR(value0, "tld_get_data"))
+ goto out;
+
+ value1 = tld_get_data(fd, tld_keys[1]);
+ if (!ASSERT_OK_PTR(value1, "tld_get_data"))
+ goto out;
+
+ value2 = tld_get_data(fd, tld_keys[2]);
+ if (!ASSERT_OK_PTR(value2, "tld_get_data"))
+ goto out;
+
+ tid = sys_gettid();
+
+ *value0 = tid + 0;
+ *value1 = tid + 1;
+ value2->a = tid + 2;
+ value2->b = tid + 3;
+ value2->c = tid + 4;
+ value2->d = tid + 5;
+
+ pthread_mutex_lock(&global_mutex);
+ /* Run task_main that read task local data and save to global variables */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+
+ ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0");
+ ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1");
+ ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a");
+ ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b");
+ ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c");
+ ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d");
+ pthread_mutex_unlock(&global_mutex);
+
+ /* Make sure valueX are indeed local to threads */
+ ASSERT_EQ(*value0, tid + 0, "value0");
+ ASSERT_EQ(*value1, tid + 1, "value1");
+ ASSERT_EQ(value2->a, tid + 2, "value2.a");
+ ASSERT_EQ(value2->b, tid + 3, "value2.b");
+ ASSERT_EQ(value2->c, tid + 4, "value2.c");
+ ASSERT_EQ(value2->d, tid + 5, "value2.d");
+
+ *value0 = tid + 5;
+ *value1 = tid + 4;
+ value2->a = tid + 3;
+ value2->b = tid + 2;
+ value2->c = tid + 1;
+ value2->d = tid + 0;
+
+ /* Run task_main again */
+ pthread_mutex_lock(&global_mutex);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+
+ ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0");
+ ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1");
+ ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a");
+ ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b");
+ ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c");
+ ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d");
+ pthread_mutex_unlock(&global_mutex);
+
+out:
+ pthread_exit(NULL);
+}
+
+static void test_task_local_data_basic(void)
+{
+ struct test_task_local_data *skel;
+ pthread_t thread[TEST_BASIC_THREAD_NUM];
+ char dummy_key_name[TLD_NAME_LEN];
+ tld_key_t key;
+ int i, err;
+
+ reset_tld();
+
+ ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init");
+
+ skel = test_task_local_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+ if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+ goto out;
+
+ ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+ tld_keys[1] = tld_create_key("value1", sizeof(int));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key");
+ tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key");
+
+ /*
+ * Shouldn't be able to store data exceed a page. Create a TLD just big
+ * enough to exceed a page. TLDs already created are int value0, int
+ * value1, and struct test_tld_struct value2.
+ */
+ key = tld_create_key("value_not_exist",
+ TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1);
+ ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key");
+
+ key = tld_create_key("value2", sizeof(struct test_tld_struct));
+ ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key");
+
+ /* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */
+ for (i = 3; i < TLD_MAX_DATA_CNT; i++) {
+ snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i);
+ tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key");
+ }
+ key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct));
+ ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key");
+
+ /* Access TLDs from multiple threads and check if they are thread-specific */
+ for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) {
+ err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto out;
+ }
+
+out:
+ for (i = 0; i < TEST_BASIC_THREAD_NUM; i++)
+ pthread_join(thread[i], NULL);
+
+ if (tld_keys) {
+ free(tld_keys);
+ tld_keys = NULL;
+ }
+ tld_free();
+ test_task_local_data__destroy(skel);
+}
+
+#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3)
+
+void *test_task_local_data_race_thread(void *arg)
+{
+ int err = 0, id = (intptr_t)arg;
+ char key_name[32];
+ tld_key_t key;
+
+ key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1);
+ if (tld_key_err_or_zero(key) != -E2BIG) {
+ err = 1;
+ goto out;
+ }
+
+ /* Only one thread will succeed in creating value1 */
+ key = tld_create_key("value1", sizeof(int));
+ if (!tld_key_is_err(key))
+ tld_keys[1] = key;
+
+ /* Only one thread will succeed in creating value2 */
+ key = tld_create_key("value2", sizeof(struct test_tld_struct));
+ if (!tld_key_is_err(key))
+ tld_keys[2] = key;
+
+ snprintf(key_name, 32, "thread_%d", id);
+ tld_keys[id] = tld_create_key(key_name, sizeof(int));
+ if (tld_key_is_err(tld_keys[id]))
+ err = 2;
+out:
+ return (void *)(intptr_t)err;
+}
+
+static void test_task_local_data_race(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ pthread_t thread[TEST_RACE_THREAD_NUM];
+ struct test_task_local_data *skel;
+ int fd, i, j, err, *data;
+ void *ret = NULL;
+
+ skel = test_task_local_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+ if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+ goto out;
+
+ fd = bpf_map__fd(skel->maps.tld_data_map);
+
+ ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+ tld_keys[0] = value0_key;
+
+ for (j = 0; j < 100; j++) {
+ reset_tld();
+
+ for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+ /*
+ * Try to make tld_create_key() race with each other. Call
+ * tld_create_key(), both valid and invalid, from different threads.
+ */
+ err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread,
+ (void *)(intptr_t)(i + 3));
+ if (CHECK_FAIL(err))
+ break;
+ }
+
+ /* Wait for all tld_create_key() to return */
+ for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+ pthread_join(thread[i], &ret);
+ if (CHECK_FAIL(ret))
+ break;
+ }
+
+ /* Write a unique number to each TLD */
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+ data = tld_get_data(fd, tld_keys[i]);
+ if (CHECK_FAIL(!data))
+ break;
+ *data = i;
+ }
+
+ /* Read TLDs and check the value to see if any address collides with another */
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+ data = tld_get_data(fd, tld_keys[i]);
+ if (CHECK_FAIL(*data != i))
+ break;
+ }
+
+ /* Run task_main to make sure no invalid TLDs are added */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+ }
+out:
+ if (tld_keys) {
+ free(tld_keys);
+ tld_keys = NULL;
+ }
+ tld_free();
+ test_task_local_data__destroy(skel);
+}
+
+void test_task_local_data(void)
+{
+ if (test__start_subtest("task_local_data_basic"))
+ test_task_local_data_basic();
+ if (test__start_subtest("task_local_data_race"))
+ test_task_local_data_race();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_work.c b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
new file mode 100644
index 000000000000..774b31a5f6ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work.skel.h"
+#include "task_work_fail.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+
+static int perf_event_open(__u32 type, __u64 config, int pid)
+{
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .size = sizeof(struct perf_event_attr),
+ .sample_period = 100000,
+ };
+
+ return syscall(__NR_perf_event_open, &attr, pid, -1, -1, 0);
+}
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+static int verify_map(struct bpf_map *map, const char *expected_data)
+{
+ int err;
+ struct elem value;
+ int processed_values = 0;
+ int k, sz;
+
+ sz = bpf_map__max_entries(map);
+ for (k = 0; k < sz; ++k) {
+ err = bpf_map__lookup_elem(map, &k, sizeof(int), &value, sizeof(struct elem), 0);
+ if (err)
+ continue;
+ if (!ASSERT_EQ(strcmp(expected_data, value.data), 0, "map data")) {
+ fprintf(stderr, "expected '%s', found '%s' in %s map", expected_data,
+ value.data, bpf_map__name(map));
+ return 2;
+ }
+ processed_values++;
+ }
+
+ return processed_values == 0;
+}
+
+static void task_work_run(const char *prog_name, const char *map_name)
+{
+ struct task_work *skel;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ struct bpf_link *link = NULL;
+ int err, pe_fd = -1, pid, status, pipefd[2];
+ char user_string[] = "hello world";
+
+ if (!ASSERT_NEQ(pipe(pipefd), -1, "pipe"))
+ return;
+
+ pid = fork();
+ if (pid == 0) {
+ __u64 num = 1;
+ int i;
+ char buf;
+
+ close(pipefd[1]);
+ read(pipefd[0], &buf, sizeof(buf));
+ close(pipefd[0]);
+
+ for (i = 0; i < 10000; ++i)
+ num *= time(0) % 7;
+ (void)num;
+ exit(0);
+ }
+ if (!ASSERT_GT(pid, 0, "fork() failed")) {
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return;
+ }
+
+ skel = task_work__open();
+ if (!ASSERT_OK_PTR(skel, "task_work__open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ bpf_program__set_autoload(prog, false);
+ }
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "prog_name"))
+ goto cleanup;
+ bpf_program__set_autoload(prog, true);
+ skel->bss->user_ptr = (char *)user_string;
+
+ err = task_work__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pe_fd = perf_event_open(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, pid);
+ if (pe_fd == -1 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_NEQ(pe_fd, -1, "pe_fd")) {
+ fprintf(stderr, "perf_event_open errno: %d, pid: %d\n", errno, pid);
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(prog, pe_fd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+
+ /* perf event fd ownership is passed to bpf_link */
+ pe_fd = -1;
+ close(pipefd[0]);
+ write(pipefd[1], user_string, 1);
+ close(pipefd[1]);
+ /* Wait to collect some samples */
+ waitpid(pid, &status, 0);
+ pid = 0;
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(map, "find map_name"))
+ goto cleanup;
+ if (!ASSERT_OK(verify_map(map, user_string), "verify map"))
+ goto cleanup;
+cleanup:
+ if (pe_fd >= 0)
+ close(pe_fd);
+ bpf_link__destroy(link);
+ task_work__destroy(skel);
+ if (pid > 0) {
+ close(pipefd[0]);
+ write(pipefd[1], user_string, 1);
+ close(pipefd[1]);
+ waitpid(pid, &status, 0);
+ }
+}
+
+void test_task_work(void)
+{
+ if (test__start_subtest("test_task_work_hash_map"))
+ task_work_run("oncpu_hash_map", "hmap");
+
+ if (test__start_subtest("test_task_work_array_map"))
+ task_work_run("oncpu_array_map", "arrmap");
+
+ if (test__start_subtest("test_task_work_lru_map"))
+ task_work_run("oncpu_lru_map", "lrumap");
+
+ RUN_TESTS(task_work_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
index 367f47e4a936..b38c16b4247f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -75,26 +75,26 @@ static void test_set_global_vars_succeeds(void)
" -vl2 > %s", fix->veristat, fix->tmpfile);
read(fix->fd, fix->output, fix->sz);
- __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
- __CHECK_STR("_w=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
- __CHECK_STR("_w=0x80000000 ", "var_s32 = -0x80000000");
- __CHECK_STR("_w=0x76543210 ", "var_u32 = 0x76543210");
- __CHECK_STR("_w=0x8000 ", "var_s16 = -32768");
- __CHECK_STR("_w=0xecec ", "var_u16 = 60652");
- __CHECK_STR("_w=128 ", "var_s8 = -128");
- __CHECK_STR("_w=255 ", "var_u8 = 255");
- __CHECK_STR("_w=11 ", "var_ea = EA2");
- __CHECK_STR("_w=12 ", "var_eb = EB2");
- __CHECK_STR("_w=13 ", "var_ec = EC2");
- __CHECK_STR("_w=1 ", "var_b = 1");
- __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170");
- __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa");
- __CHECK_STR("_w=171 ", "arr[3]= 171");
- __CHECK_STR("_w=172 ", "arr[EA2] =172");
- __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3");
- __CHECK_STR("_w=173 ", "matrix[31][7][11]=173");
- __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174");
- __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175");
+ __CHECK_STR("=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
+ __CHECK_STR("=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
+ __CHECK_STR("=0x80000000 ", "var_s32 = -0x80000000");
+ __CHECK_STR("=0x76543210 ", "var_u32 = 0x76543210");
+ __CHECK_STR("=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("=0xecec ", "var_u16 = 60652");
+ __CHECK_STR("=128 ", "var_s8 = -128");
+ __CHECK_STR("=255 ", "var_u8 = 255");
+ __CHECK_STR("=11 ", "var_ea = EA2");
+ __CHECK_STR("=12 ", "var_eb = EB2");
+ __CHECK_STR("=13 ", "var_ec = EC2");
+ __CHECK_STR("=1 ", "var_b = 1");
+ __CHECK_STR("=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170");
+ __CHECK_STR("=0xaaaa ", "union1.var_u16 = 0xaaaa");
+ __CHECK_STR("=171 ", "arr[3]= 171");
+ __CHECK_STR("=172 ", "arr[EA2] =172");
+ __CHECK_STR("=10 ", "enum_arr[EC2]=EA3");
+ __CHECK_STR("=173 ", "matrix[31][7][11]=173");
+ __CHECK_STR("=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174");
+ __CHECK_STR("=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175");
out:
teardown_fixture(fix);
@@ -117,8 +117,8 @@ static void test_set_global_vars_from_file_succeeds(void)
SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s",
fix->veristat, input_file, fix->tmpfile);
read(fix->fd, fix->output, fix->sz);
- __CHECK_STR("_w=0x8000 ", "var_s16 = -32768");
- __CHECK_STR("_w=0xecec ", "var_u16 = 60652");
+ __CHECK_STR("=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("=0xecec ", "var_u16 = 60652");
out:
close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 56f660ca567b..34f9ccce2602 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -3,6 +3,7 @@
#include <test_progs.h>
#include "timer.skel.h"
#include "timer_failure.skel.h"
+#include "timer_interrupt.skel.h"
#define NUM_THR 8
@@ -99,3 +100,36 @@ void serial_test_timer(void)
RUN_TESTS(timer_failure);
}
+
+void test_timer_interrupt(void)
+{
+ struct timer_interrupt *skel = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ skel = timer_interrupt__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(skel, "timer_interrupt__open_and_load"))
+ return;
+
+ err = timer_interrupt__attach(skel);
+ if (!ASSERT_OK(err, "timer_interrupt__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_timer_interrupt);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+
+ usleep(50);
+
+ ASSERT_EQ(skel->bss->in_interrupt, 0, "in_interrupt");
+ if (skel->bss->preempt_count)
+ ASSERT_NEQ(skel->bss->in_interrupt_cb, 0, "in_interrupt_cb");
+
+out:
+ timer_interrupt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
index 19e68d4b3532..6f8c0bfb0415 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -112,10 +112,39 @@ destroy_skel:
tracing_struct_many_args__destroy(skel);
}
+static void test_union_args(void)
+{
+ struct tracing_struct *skel;
+ int err;
+
+ skel = tracing_struct__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load"))
+ return;
+
+ err = tracing_struct__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct__attach"))
+ goto out;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->ut1_a_a, 1, "ut1:a.arg.a");
+ ASSERT_EQ(skel->bss->ut1_b, 4, "ut1:b");
+ ASSERT_EQ(skel->bss->ut1_c, 5, "ut1:c");
+
+ ASSERT_EQ(skel->bss->ut2_a, 6, "ut2:a");
+ ASSERT_EQ(skel->bss->ut2_b_a, 2, "ut2:b.arg.a");
+ ASSERT_EQ(skel->bss->ut2_b_b, 3, "ut2:b.arg.b");
+
+out:
+ tracing_struct__destroy(skel);
+}
+
void test_tracing_struct(void)
{
if (test__start_subtest("struct_args"))
test_struct_args();
if (test__start_subtest("struct_many_args"))
test_struct_many_args();
+ if (test__start_subtest("union_args"))
+ test_union_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c
index cf3e0e7a64fa..86404476c1da 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2023 Hengqi Chen */
#include <test_progs.h>
+#include <asm/ptrace.h>
#include "test_uprobe.skel.h"
static FILE *urand_spawn(int *pid)
@@ -33,7 +34,7 @@ static int urand_trigger(FILE **urand_pipe)
return exit_code;
}
-void test_uprobe(void)
+static void test_uprobe_attach(void)
{
LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
struct test_uprobe *skel;
@@ -93,3 +94,156 @@ cleanup:
pclose(urand_pipe);
test_uprobe__destroy(skel);
}
+
+#ifdef __x86_64__
+__naked __maybe_unused unsigned long uprobe_regs_change_trigger(void)
+{
+ asm volatile (
+ "ret\n"
+ );
+}
+
+static __naked void uprobe_regs_change(struct pt_regs *before, struct pt_regs *after)
+{
+ asm volatile (
+ "movq %r11, 48(%rdi)\n"
+ "movq %r10, 56(%rdi)\n"
+ "movq %r9, 64(%rdi)\n"
+ "movq %r8, 72(%rdi)\n"
+ "movq %rax, 80(%rdi)\n"
+ "movq %rcx, 88(%rdi)\n"
+ "movq %rdx, 96(%rdi)\n"
+ "movq %rsi, 104(%rdi)\n"
+ "movq %rdi, 112(%rdi)\n"
+
+ /* save 2nd argument */
+ "pushq %rsi\n"
+ "call uprobe_regs_change_trigger\n"
+
+ /* save return value and load 2nd argument pointer to rax */
+ "pushq %rax\n"
+ "movq 8(%rsp), %rax\n"
+
+ "movq %r11, 48(%rax)\n"
+ "movq %r10, 56(%rax)\n"
+ "movq %r9, 64(%rax)\n"
+ "movq %r8, 72(%rax)\n"
+ "movq %rcx, 88(%rax)\n"
+ "movq %rdx, 96(%rax)\n"
+ "movq %rsi, 104(%rax)\n"
+ "movq %rdi, 112(%rax)\n"
+
+ /* restore return value and 2nd argument */
+ "pop %rax\n"
+ "pop %rsi\n"
+
+ "movq %rax, 80(%rsi)\n"
+ "ret\n"
+ );
+}
+
+static void regs_common(void)
+{
+ struct pt_regs before = {}, after = {}, expected = {
+ .rax = 0xc0ffe,
+ .rcx = 0xbad,
+ .rdx = 0xdead,
+ .r8 = 0x8,
+ .r9 = 0x9,
+ .r10 = 0x10,
+ .r11 = 0x11,
+ .rdi = 0x12,
+ .rsi = 0x13,
+ };
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_pid = getpid();
+ skel->bss->regs = expected;
+
+ uprobe_opts.func_name = "uprobe_regs_change_trigger";
+ skel->links.test_regs_change = bpf_program__attach_uprobe_opts(skel->progs.test_regs_change,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test_regs_change, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ uprobe_regs_change(&before, &after);
+
+ ASSERT_EQ(after.rax, expected.rax, "ax");
+ ASSERT_EQ(after.rcx, expected.rcx, "cx");
+ ASSERT_EQ(after.rdx, expected.rdx, "dx");
+ ASSERT_EQ(after.r8, expected.r8, "r8");
+ ASSERT_EQ(after.r9, expected.r9, "r9");
+ ASSERT_EQ(after.r10, expected.r10, "r10");
+ ASSERT_EQ(after.r11, expected.r11, "r11");
+ ASSERT_EQ(after.rdi, expected.rdi, "rdi");
+ ASSERT_EQ(after.rsi, expected.rsi, "rsi");
+
+cleanup:
+ test_uprobe__destroy(skel);
+}
+
+static noinline unsigned long uprobe_regs_change_ip_1(void)
+{
+ return 0xc0ffee;
+}
+
+static noinline unsigned long uprobe_regs_change_ip_2(void)
+{
+ return 0xdeadbeef;
+}
+
+static void regs_ip(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+ unsigned long ret;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_pid = getpid();
+ skel->bss->ip = (unsigned long) uprobe_regs_change_ip_2;
+
+ uprobe_opts.func_name = "uprobe_regs_change_ip_1";
+ skel->links.test_regs_change_ip = bpf_program__attach_uprobe_opts(
+ skel->progs.test_regs_change_ip,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test_regs_change_ip, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ ret = uprobe_regs_change_ip_1();
+ ASSERT_EQ(ret, 0xdeadbeef, "ret");
+
+cleanup:
+ test_uprobe__destroy(skel);
+}
+
+static void test_uprobe_regs_change(void)
+{
+ if (test__start_subtest("regs_change_common"))
+ regs_common();
+ if (test__start_subtest("regs_change_ip"))
+ regs_ip();
+}
+#else
+static void test_uprobe_regs_change(void) { }
+#endif
+
+void test_uprobe(void)
+{
+ if (test__start_subtest("attach"))
+ test_uprobe_attach();
+ test_uprobe_regs_change();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 833eb87483a1..4f7f45e69315 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -40,12 +40,72 @@ static void __always_inline trigger_func(int x) {
}
}
+#if defined(__x86_64__) || defined(__i386__)
+/*
+ * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
+ * - 'size' is the size in bytes of the array element, and its sign indicates
+ * whether the type is signed (negative) or unsigned (positive).
+ * - 'base_reg' is the register holding the base address, normally rdx or edx
+ * - 'index_reg' is the register holding the index, normally rax or eax
+ * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
+ * size of the element type.
+ *
+ * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
+ * - size: -2 (negative because 'short' is signed)
+ * - scale: 2 (since sizeof(short) == 2)
+ *
+ * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
+ */
+static volatile short array[] = {-1, -2, -3, -4};
+
+#if defined(__x86_64__)
+#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
+#else
+#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
+#endif
+
+unsigned short test_usdt_sib_semaphore SEC(".probes");
+
+static void trigger_sib_spec(void)
+{
+ /*
+ * Force SIB addressing with inline assembly.
+ *
+ * You must compile with -std=gnu99 or -std=c99 to use the
+ * STAP_PROBE_ASM macro.
+ *
+ * The STAP_PROBE_ASM macro generates a quoted string that gets
+ * inserted between the surrounding assembly instructions. In this
+ * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
+ * stream, creating a probe point between the asm statement boundaries.
+ * It works fine with gcc/clang.
+ *
+ * Register constraints:
+ * - "d"(array): Binds the 'array' variable to %rdx or %edx register
+ * - "a"(0): Binds the constant 0 to %rax or %eax register
+ * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
+ * %%rax(%eax), they contain the expected values for SIB addressing.
+ *
+ * The "memory" clobber prevents the compiler from reordering memory
+ * accesses around the probe point, ensuring that the probe behavior
+ * is predictable and consistent.
+ */
+ asm volatile(
+ STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
+ :
+ : "d"(array), "a"(0)
+ : "memory"
+ );
+}
+#endif
+
static void subtest_basic_usdt(bool optimized)
{
LIBBPF_OPTS(bpf_usdt_opts, opts);
struct test_usdt *skel;
struct test_usdt__bss *bss;
int err, i, called;
+ const __u64 expected_cookie = 0xcafedeadbeeffeed;
#define TRIGGER(x) ({ \
trigger_func(x); \
@@ -66,20 +126,29 @@ static void subtest_basic_usdt(bool optimized)
goto cleanup;
/* usdt0 won't be auto-attached */
- opts.usdt_cookie = 0xcafedeadbeeffeed;
+ opts.usdt_cookie = expected_cookie;
skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
0 /*self*/, "/proc/self/exe",
"test", "usdt0", &opts);
if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
goto cleanup;
- called = TRIGGER(1);
+#if defined(__x86_64__) || defined(__i386__)
+ opts.usdt_cookie = expected_cookie;
+ skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt_sib", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
+ goto cleanup;
+#endif
+
+ alled = TRIGGER(1);
ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
- ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+ ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
@@ -163,6 +232,16 @@ static void subtest_basic_usdt(bool optimized)
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+#if defined(__x86_64__) || defined(__i386__)
+ trigger_sib_spec();
+ ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
+ ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
+ ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
+ ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
+ ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
+ ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
+#endif
+
cleanup:
test_usdt__destroy(skel);
#undef TRIGGER
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 77ec95d4ffaa..28e81161e6fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -46,6 +46,7 @@
#include "verifier_ldsx.skel.h"
#include "verifier_leak_ptr.skel.h"
#include "verifier_linked_scalars.skel.h"
+#include "verifier_live_stack.skel.h"
#include "verifier_load_acquire.skel.h"
#include "verifier_loops1.skel.h"
#include "verifier_lwt.skel.h"
@@ -59,6 +60,7 @@
#include "verifier_meta_access.skel.h"
#include "verifier_movsx.skel.h"
#include "verifier_mtu.skel.h"
+#include "verifier_mul.skel.h"
#include "verifier_netfilter_ctx.skel.h"
#include "verifier_netfilter_retcode.skel.h"
#include "verifier_bpf_fastcall.skel.h"
@@ -183,6 +185,7 @@ void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); }
+void test_verifier_live_stack(void) { RUN(verifier_live_stack); }
void test_verifier_loops1(void) { RUN(verifier_loops1); }
void test_verifier_lwt(void) { RUN(verifier_lwt); }
void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); }
@@ -194,6 +197,7 @@ void test_verifier_may_goto_1(void) { RUN(verifier_may_goto_1); }
void test_verifier_may_goto_2(void) { RUN(verifier_may_goto_2); }
void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
void test_verifier_movsx(void) { RUN(verifier_movsx); }
+void test_verifier_mul(void) { RUN(verifier_mul); }
void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index b9d9f0a502ce..178292d1251a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -9,6 +9,7 @@
#define TX_NETNS "xdp_context_tx"
#define RX_NETNS "xdp_context_rx"
#define TAP_NAME "tap0"
+#define DUMMY_NAME "dum0"
#define TAP_NETNS "xdp_context_tuntap"
#define TEST_PAYLOAD_LEN 32
@@ -96,9 +97,7 @@ void test_xdp_context_test_run(void)
/* Meta data must be 255 bytes or smaller */
test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
- /* Total size of data must match data_end - data_meta */
- test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
- sizeof(data) - 1, 0, 0, 0);
+ /* Total size of data must be data_end - data_meta or larger */
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
sizeof(data) + 1, 0, 0, 0);
@@ -156,15 +155,30 @@ err:
return -1;
}
-static void assert_test_result(struct test_xdp_meta *skel)
+static int write_test_packet(int tap_fd)
+{
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+ int n;
+
+ /* The ethernet header doesn't need to be valid for this test */
+ memset(packet, 0, sizeof(struct ethhdr));
+ memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
+
+ n = write(tap_fd, packet, sizeof(packet));
+ if (!ASSERT_EQ(n, sizeof(packet), "write packet"))
+ return -1;
+
+ return 0;
+}
+
+static void assert_test_result(const struct bpf_map *result_map)
{
int err;
__u32 map_key = 0;
__u8 map_value[TEST_PAYLOAD_LEN];
- err = bpf_map__lookup_elem(skel->maps.test_result, &map_key,
- sizeof(map_key), &map_value,
- TEST_PAYLOAD_LEN, BPF_ANY);
+ err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key),
+ &map_value, TEST_PAYLOAD_LEN, BPF_ANY);
if (!ASSERT_OK(err, "lookup test_result"))
return;
@@ -172,6 +186,18 @@ static void assert_test_result(struct test_xdp_meta *skel)
"test_result map contains test payload");
}
+static bool clear_test_result(struct bpf_map *result_map)
+{
+ const __u8 v[sizeof(test_payload)] = {};
+ const __u32 k = 0;
+ int err;
+
+ err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY);
+ ASSERT_OK(err, "update test_result");
+
+ return err == 0;
+}
+
void test_xdp_context_veth(void)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
@@ -248,7 +274,7 @@ void test_xdp_context_veth(void)
if (!ASSERT_OK(ret, "send_test_packet"))
goto close;
- assert_test_result(skel);
+ assert_test_result(skel->maps.test_result);
close:
close_netns(nstoken);
@@ -257,17 +283,21 @@ close:
netns_free(tx_ns);
}
-void test_xdp_context_tuntap(void)
+static void test_tuntap(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prio_1_prog,
+ struct bpf_program *tc_prio_2_prog,
+ struct bpf_map *result_map)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
struct netns_obj *ns = NULL;
- struct test_xdp_meta *skel = NULL;
- __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
int tap_fd = -1;
int tap_ifindex;
int ret;
+ if (!clear_test_result(result_map))
+ return;
+
ns = netns_new(TAP_NETNS, true);
if (!ASSERT_OK_PTR(ns, "create and open ns"))
return;
@@ -278,10 +308,6 @@ void test_xdp_context_tuntap(void)
SYS(close, "ip link set dev " TAP_NAME " up");
- skel = test_xdp_meta__open_and_load();
- if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
- goto close;
-
tap_ifindex = if_nametoindex(TAP_NAME);
if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
goto close;
@@ -291,33 +317,175 @@ void test_xdp_context_tuntap(void)
if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
goto close;
- tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls);
+ tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog);
ret = bpf_tc_attach(&tc_hook, &tc_opts);
if (!ASSERT_OK(ret, "bpf_tc_attach"))
goto close;
- ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp),
+ if (tc_prio_2_prog) {
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2,
+ .prog_fd = bpf_program__fd(tc_prio_2_prog));
+
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+ }
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog),
0, NULL);
if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
goto close;
- /* The ethernet header is not relevant for this test and doesn't need to
- * be meaningful.
- */
- struct ethhdr eth = { 0 };
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
- memcpy(packet, &eth, sizeof(eth));
- memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+ assert_test_result(result_map);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ netns_free(ns);
+}
+
+/* Write a packet to a tap dev and copy it to ingress of a dummy dev */
+static void test_tuntap_mirred(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prog,
+ bool *test_pass)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *ns = NULL;
+ int dummy_ifindex;
+ int tap_fd = -1;
+ int tap_ifindex;
+ int ret;
+
+ *test_pass = false;
+
+ ns = netns_new(TAP_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "netns_new"))
+ return;
- ret = write(tap_fd, packet, sizeof(packet));
- if (!ASSERT_EQ(ret, sizeof(packet), "write packet"))
+ /* Setup dummy interface */
+ SYS(close, "ip link add name " DUMMY_NAME " type dummy");
+ SYS(close, "ip link set dev " DUMMY_NAME " up");
+
+ dummy_ifindex = if_nametoindex(DUMMY_NAME);
+ if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex"))
goto close;
- assert_test_result(skel);
+ tc_hook.ifindex = dummy_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ /* Setup TAP interface */
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " up");
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ /* Copy all packets received from TAP to dummy ingress */
+ SYS(close, "tc qdisc add dev " TAP_NAME " clsact");
+ SYS(close, "tc filter add dev " TAP_NAME " ingress "
+ "protocol all matchall "
+ "action mirred ingress mirror dev " DUMMY_NAME);
+
+ /* Receive a packet on TAP */
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
+
+ ASSERT_TRUE(*test_pass, "test_pass");
close:
if (tap_fd >= 0)
close(tap_fd);
- test_xdp_meta__destroy(skel);
netns_free(ns);
}
+
+void test_xdp_context_tuntap(void)
+{
+ struct test_xdp_meta *skel = NULL;
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ return;
+
+ if (test__start_subtest("data_meta"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_read"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_read,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_slice"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_slice,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_write"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_write,
+ skel->progs.ing_cls_dynptr_read,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_slice_rdwr"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_slice_rdwr,
+ skel->progs.ing_cls_dynptr_slice,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_offset"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_offset_wr,
+ skel->progs.ing_cls_dynptr_offset_rd,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_offset_oob"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_offset_oob,
+ skel->progs.ing_cls,
+ skel->maps.test_result);
+ if (test__start_subtest("clone_data_meta_empty_on_data_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_empty_on_data_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_data_meta_empty_on_meta_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_empty_on_meta_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_empty_on_data_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_empty_on_data_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_empty_on_meta_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_rdonly_before_data_dynptr_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write,
+ &skel->bss->test_pass);
+
+ test_xdp_meta__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 461ab18705d5..a8ab05216c38 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -7,6 +7,7 @@
#include <test_progs.h>
#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_devmap_tailcall.skel.h"
#include "test_xdp_with_devmap_frags_helpers.skel.h"
#include "test_xdp_with_devmap_helpers.skel.h"
@@ -107,6 +108,29 @@ static void test_neg_xdp_devmap_helpers(void)
}
}
+static void test_xdp_devmap_tailcall(enum bpf_attach_type prog_dev,
+ enum bpf_attach_type prog_tail,
+ bool expect_reject)
+{
+ struct test_xdp_devmap_tailcall *skel;
+ int err;
+
+ skel = test_xdp_devmap_tailcall__open();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_devmap_tailcall__open"))
+ return;
+
+ bpf_program__set_expected_attach_type(skel->progs.xdp_devmap, prog_dev);
+ bpf_program__set_expected_attach_type(skel->progs.xdp_entry, prog_tail);
+
+ err = test_xdp_devmap_tailcall__load(skel);
+ if (expect_reject)
+ ASSERT_ERR(err, "test_xdp_devmap_tailcall__load");
+ else
+ ASSERT_OK(err, "test_xdp_devmap_tailcall__load");
+
+ test_xdp_devmap_tailcall__destroy(skel);
+}
+
static void test_xdp_with_devmap_frags_helpers(void)
{
struct test_xdp_with_devmap_frags_helpers *skel;
@@ -238,8 +262,13 @@ void serial_test_xdp_devmap_attach(void)
if (test__start_subtest("DEVMAP with frags programs in entries"))
test_xdp_with_devmap_frags_helpers();
- if (test__start_subtest("Verifier check of DEVMAP programs"))
+ if (test__start_subtest("Verifier check of DEVMAP programs")) {
test_neg_xdp_devmap_helpers();
+ test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, BPF_XDP_DEVMAP, false);
+ test_xdp_devmap_tailcall(0, 0, true);
+ test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, 0, true);
+ test_xdp_devmap_tailcall(0, BPF_XDP_DEVMAP, true);
+ }
if (test__start_subtest("DEVMAP with programs in entries on veth"))
test_xdp_with_devmap_helpers_veth();
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
new file mode 100644
index 000000000000..efa350d04ec5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_pull_data.skel.h"
+
+#define PULL_MAX (1 << 31)
+#define PULL_PLUS_ONE (1 << 30)
+
+#define XDP_PACKET_HEADROOM 256
+
+/* Find headroom and tailroom occupied by struct xdp_frame and struct
+ * skb_shared_info so that we can calculate the maximum pull lengths for
+ * test cases. They might not be the real size of the structures due to
+ * cache alignment.
+ */
+static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct xdp_md ctx = {};
+ int prog_fd, err;
+ __u8 *buf;
+
+ buf = calloc(frame_sz, sizeof(__u8));
+ if (!ASSERT_OK_PTR(buf, "calloc buf"))
+ return -ENOMEM;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = frame_sz;
+ topts.data_size_out = frame_sz;
+ /* Pass a data_end larger than the linear space available to make sure
+ * bpf_prog_test_run_xdp() will fill the linear data area so that
+ * xdp_find_sizes can infer the size of struct skb_shared_info
+ */
+ ctx.data_end = frame_sz;
+ topts.ctx_in = &ctx;
+ topts.ctx_out = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ topts.ctx_size_out = sizeof(ctx);
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ free(buf);
+
+ return err;
+}
+
+/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024]
+ * so caller expecting XDP_PASS should always pass pull_len no less than 1024
+ */
+static void run_test(struct test_xdp_pull_data *skel, int retval,
+ int frame_sz, int buff_len, int meta_len, int data_len,
+ int pull_len)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct xdp_md ctx = {};
+ int prog_fd, err;
+ __u8 *buf;
+
+ buf = calloc(buff_len, sizeof(__u8));
+ if (!ASSERT_OK_PTR(buf, "calloc buf"))
+ return;
+
+ buf[meta_len + 1023] = 0xaa;
+ buf[meta_len + 1024] = 0xbb;
+ buf[meta_len + 1025] = 0xcc;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = buff_len;
+ topts.data_size_out = buff_len;
+ ctx.data = meta_len;
+ ctx.data_end = meta_len + data_len;
+ topts.ctx_in = &ctx;
+ topts.ctx_out = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ topts.ctx_size_out = sizeof(ctx);
+
+ skel->bss->data_len = data_len;
+ if (pull_len & PULL_MAX) {
+ int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz;
+ int tailroom = frame_sz - XDP_PACKET_HEADROOM -
+ data_len - skel->bss->sinfo_sz;
+
+ pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0;
+ pull_len += headroom + tailroom + data_len;
+ }
+ skel->bss->pull_len = pull_len;
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval");
+
+ if (retval == XDP_DROP)
+ goto out;
+
+ ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size");
+ ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size");
+ /* Make sure data around xdp->data_end was not messed up by
+ * bpf_xdp_pull_data()
+ */
+ ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]");
+ ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]");
+ ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]");
+out:
+ free(buf);
+}
+
+static void test_xdp_pull_data_basic(void)
+{
+ u32 pg_sz, max_meta_len, max_data_len;
+ struct test_xdp_pull_data *skel;
+
+ skel = test_xdp_pull_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load"))
+ return;
+
+ pg_sz = sysconf(_SC_PAGE_SIZE);
+
+ if (find_xdp_sizes(skel, pg_sz))
+ goto out;
+
+ max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz;
+ max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz;
+
+ /* linear xdp pkt, pull 0 byte */
+ run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048);
+
+ /* multi-buf pkt, pull results in linear xdp pkt */
+ run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048);
+
+ /* multi-buf pkt, pull 1 byte to linear data area */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025);
+
+ /* multi-buf pkt, pull 0 byte to linear data area */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025);
+
+ /* multi-buf pkt, empty linear data area, pull requires memmove */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX);
+
+ /* multi-buf pkt, no headroom */
+ run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX);
+
+ /* multi-buf pkt, no tailroom, pull requires memmove */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX);
+
+ /* Test cases with invalid pull length */
+
+ /* linear xdp pkt, pull more than total data len */
+ run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049);
+
+ /* multi-buf pkt with no space left in linear data area */
+ run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len,
+ PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, empty linear data area */
+ run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, no headroom */
+ run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024,
+ PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, no tailroom */
+ run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len,
+ PULL_MAX | PULL_PLUS_ONE);
+
+out:
+ test_xdp_pull_data__destroy(skel);
+}
+
+void test_xdp_pull_data(void)
+{
+ if (test__start_subtest("xdp_pull_data"))
+ test_xdp_pull_data_basic();
+}
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
index a52feff98112..d1841aac94a2 100644
--- a/tools/testing/selftests/bpf/progs/arena_atomics.c
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -28,7 +28,8 @@ bool skip_all_tests = true;
#if defined(ENABLE_ATOMICS_TESTS) && \
defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false;
#else
bool skip_lacq_srel_tests = true;
@@ -314,7 +315,8 @@ int load_acquire(const void *ctx)
{
#if defined(ENABLE_ATOMICS_TESTS) && \
defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \
{ asm volatile ( \
@@ -365,7 +367,8 @@ int store_release(const void *ctx)
{
#if defined(ENABLE_ATOMICS_TESTS) && \
defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \
{ asm volatile ( \
diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
index c4500c37f85e..086b57a426cf 100644
--- a/tools/testing/selftests/bpf/progs/arena_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
@@ -37,8 +37,11 @@ int prog(void *ctx)
#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
unsigned long flags;
- if ((ret = arena_spin_lock_irqsave(&lock, flags)))
+ if ((ret = arena_spin_lock_irqsave(&lock, flags))) {
+ if (ret == -EOPNOTSUPP)
+ test_skip = 3;
return ret;
+ }
if (counter != limit)
counter++;
bpf_repeat(cs_count);
diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
index 1654a530aa3d..4e51785e7606 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
@@ -101,7 +101,7 @@ static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
tp->snd_cwnd = pkts_in_flight + sndcnt;
}
-/* Decide wheather to run the increase function of congestion control. */
+/* Decide whether to run the increase function of congestion control. */
static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
if (tcp_sk(sk)->reordering > TCP_REORDERING)
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 7cd73e75f52a..32c511bcd60b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
-/* WARNING: This implemenation is not necessarily the same
+/* WARNING: This implementation is not necessarily the same
* as the tcp_dctcp.c. The purpose is mainly for testing
* the kernel BPF logic.
*/
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index c1cfd297aabf..a7a1a684eed1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -33,7 +33,20 @@
* e.g. "foo{{[0-9]+}}" matches strings like "foo007".
* Extended POSIX regular expression syntax is allowed
* inside the brackets.
+ * __not_msg Message not expected to be found in verifier log.
+ * If __msg_not is situated between __msg tags
+ * framework matches __msg tags first, and then
+ * checks that __msg_not is not present in a portion of
+ * a log between bracketing __msg tags.
+ * Same regex syntax as for __msg is supported.
* __msg_unpriv Same as __msg but for unprivileged mode.
+ * __not_msg_unpriv Same as __not_msg but for unprivileged mode.
+ *
+ * __stderr Message expected to be found in bpf stderr stream. The
+ * same regex rules apply like __msg.
+ * __stderr_unpriv Same as __stderr but for unpriveleged mode.
+ * __stdout Same as __stderr but for stdout stream.
+ * __stdout_unpriv Same as __stdout but for unpriveleged mode.
*
* __xlated Expect a line in a disassembly log after verifier applies rewrites.
* Multiple __xlated attributes could be specified.
@@ -115,12 +128,14 @@
* __caps_unpriv Specify the capabilities that should be set when running the test.
*/
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
#define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg)))
#define __jited(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __xlated_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __jited_unpriv(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
@@ -136,9 +151,14 @@
#define __arch_x86_64 __arch("X86_64")
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
+#define __arch_s390x __arch("s390x")
#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
+#define __stderr(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg)))
+#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12
@@ -156,6 +176,10 @@
#define __imm_ptr(name) [name]"r"(&name)
#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
/* Magic constants used with __retval() */
#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
diff --git a/tools/testing/selftests/bpf/progs/bpf_test_utils.h b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
new file mode 100644
index 000000000000..f4e67b492dd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_TEST_UTILS_H__
+#define __BPF_TEST_UTILS_H__
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Clobber as many native registers and stack slots as possible. */
+static __always_inline void clobber_regs_stack(void)
+{
+ char tmp_str[] = "123456789";
+ unsigned long tmp;
+
+ bpf_strtoul(tmp_str, sizeof(tmp_str), 0, &tmp);
+ __sink(tmp);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
index 092db1d0435e..88e13e17ec9e 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
@@ -73,7 +73,7 @@ int BPF_PROG(use_css_iter_non_sleepable)
}
SEC("lsm.s/socket_connect")
-__failure __msg("expected an RCU CS")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock)
{
u64 cgrp_id = bpf_get_current_cgroup_id();
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
index 5354455a01be..02d8f160ca0e 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -221,3 +221,15 @@ int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path)
return 0;
}
+
+SEC("syscall")
+int test_cgrp_from_id_ns(void *ctx)
+{
+ struct cgroup *cg;
+
+ cg = bpf_cgroup_from_id(1);
+ if (!cg)
+ return 42;
+ bpf_cgroup_release(cg);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index bd8f15229f5c..dda6a8dada82 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
return SK_PASS;
}
+/* A metadata slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *(md + 1) = 42;
+
+ return SK_PASS;
+}
+
SEC("?raw_tp")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
@@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb)
return SK_PASS;
}
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem")
+int skb_meta_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?tc")
__failure __msg("invalid mem access 'scalar'")
@@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb)
return SK_PASS;
}
+/* Read-only skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *d;
+
+ return SK_PASS;
+}
+
+/* Read-write skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *d = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb data slice */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
@@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx)
return 0;
}
+/* Only supported prog type can create skb_meta-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed")
+int skb_meta_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr meta;
+
+ /* this should fail */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+
+ return 0;
+}
+
SEC("fentry/skb_tx_error")
__failure __msg("must be referenced or trusted")
int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
@@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb)
return 0;
}
+/* A skb clone's metadata slice becomes invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr clone, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ bpf_dynptr_clone(&meta, &clone);
+ md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return 0;
+}
+
/* A xdp clone's data slices should be invalid anytime packet data changes */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 8315273cb900..127dea342e5a 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -211,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb)
return 1;
}
+SEC("?tc")
+int test_dynptr_skb_meta_data(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ err = 2;
+ md = bpf_dynptr_data(&meta, 0, sizeof(*md));
+ if (md)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
+/* Check that skb metadata dynptr ops don't accept any flags. */
+SEC("?tc")
+int test_dynptr_skb_meta_flags(struct __sk_buff *skb)
+{
+ const __u64 INVALID_FLAGS = ~0ULL;
+ struct bpf_dynptr meta;
+ __u8 buf;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 2;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ err = 3;
+ ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 4;
+ ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
SEC("tp/syscalls/sys_enter_nanosleep")
int test_adjust(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 5e0a1ca96d4e..a01c2736890f 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,43 +18,43 @@
return *(u64 *)num; \
}
-__msg(": R0_w=0xffffffff80000000")
+__msg(": R0=0xffffffff80000000")
check_assert(s64, ==, eq_int_min, INT_MIN);
-__msg(": R0_w=0x7fffffff")
+__msg(": R0=0x7fffffff")
check_assert(s64, ==, eq_int_max, INT_MAX);
-__msg(": R0_w=0")
+__msg(": R0=0")
check_assert(s64, ==, eq_zero, 0);
-__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000")
+__msg(": R0=0x8000000000000000 R1=0x8000000000000000")
check_assert(s64, ==, eq_llong_min, LLONG_MIN);
-__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff")
+__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff")
check_assert(s64, ==, eq_llong_max, LLONG_MAX);
-__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)")
+__msg(": R0=scalar(id=1,smax=0x7ffffffe)")
check_assert(s64, <, lt_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, <, lt_zero, 0);
-__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff")
+__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff")
check_assert(s64, <, lt_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smax=0x7fffffff)")
+__msg(": R0=scalar(id=1,smax=0x7fffffff)")
check_assert(s64, <=, le_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smax=0)")
+__msg(": R0=scalar(id=1,smax=0)")
check_assert(s64, <=, le_zero, 0);
-__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000")
+__msg(": R0=scalar(id=1,smax=0xffffffff80000000")
check_assert(s64, <=, le_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >, gt_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >, gt_zero, 0);
-__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001")
+__msg(": R0=scalar(id=1,smin=0xffffffff80000001")
check_assert(s64, >, gt_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >=, ge_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >=, ge_zero, 0);
-__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000")
+__msg(": R0=scalar(id=1,smin=0xffffffff80000000")
check_assert(s64, >=, ge_neg, INT_MIN);
SEC("?tc")
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
index 544e5ac90461..d09bbd8ae8a8 100644
--- a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -12,7 +12,7 @@
SEC("freplace/connect_v4_prog")
int new_connect_v4_prog(struct bpf_sock_addr *ctx)
{
- // return value thats in invalid range
+ // return value that's in invalid range
return 255;
}
diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c
index f41257eadbb2..d273b46dfc7c 100644
--- a/tools/testing/selftests/bpf/progs/iters_state_safety.c
+++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c
@@ -30,7 +30,7 @@ int force_clang_to_emit_btf_for_externs(void *ctx)
SEC("?raw_tp")
__success __log_level(2)
-__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
int create_and_destroy(void *ctx)
{
struct bpf_iter_num iter;
@@ -196,7 +196,7 @@ int leak_iter_from_subprog_fail(void *ctx)
SEC("?raw_tp")
__success __log_level(2)
-__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
int valid_stack_reuse(void *ctx)
{
struct bpf_iter_num iter;
@@ -345,7 +345,7 @@ int __naked read_from_iter_slot_fail(void)
"r3 = 1000;"
"call %[bpf_iter_num_new];"
- /* attemp to leak bpf_iter_num state */
+ /* attempt to leak bpf_iter_num state */
"r7 = *(u64 *)(r6 + 0);"
"r8 = *(u64 *)(r6 + 8);"
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
index 6b1588d70652..fe3663dedbe1 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_failure.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -15,7 +15,7 @@ void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+__failure __msg("kernel func bpf_iter_task_new requires RCU critical section protection")
int BPF_PROG(iter_tasks_without_lock)
{
struct task_struct *pos;
@@ -27,7 +27,7 @@ int BPF_PROG(iter_tasks_without_lock)
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
int BPF_PROG(iter_css_without_lock)
{
u64 cg_id = bpf_get_current_cgroup_id();
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c
index 9e4b45201e69..5379e9960ffd 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod.c
@@ -123,3 +123,49 @@ out:
bpf_iter_num_destroy(&num_it);
return 0;
}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test requires RCU critical section protection")
+int iter_ret_rcu_test_protected(const void *ctx)
+{
+ struct task_struct *p;
+
+ p = bpf_kfunc_ret_rcu_test();
+ return p->pid;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=")
+int iter_ret_rcu_test_type(const void *ctx)
+{
+ struct task_struct *p;
+
+ bpf_rcu_read_lock();
+ p = bpf_kfunc_ret_rcu_test();
+ bpf_this_cpu_ptr(p);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test_nostruct requires RCU critical section protection")
+int iter_ret_rcu_test_protected_nostruct(const void *ctx)
+{
+ void *p;
+
+ p = bpf_kfunc_ret_rcu_test_nostruct(4);
+ return *(int *)p;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rdonly_rcu_mem_or_null expected=")
+int iter_ret_rcu_test_type_nostruct(const void *ctx)
+{
+ void *p;
+
+ bpf_rcu_read_lock();
+ p = bpf_kfunc_ret_rcu_test_nostruct(4);
+ bpf_this_cpu_ptr(p);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
index 6543d5b6e0a9..83791348bed5 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
@@ -20,7 +20,7 @@ __s64 res_empty;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_empty(const void *ctx)
@@ -38,7 +38,7 @@ __s64 res_full;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_full(const void *ctx)
@@ -58,7 +58,7 @@ static volatile int zero = 0;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_truncated(const void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
new file mode 100644
index 000000000000..f77aef0474d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#if defined(__TARGET_ARCH_x86)
+SEC("kprobe")
+int kprobe_write_ctx(struct pt_regs *ctx)
+{
+ ctx->ax = 0;
+ return 0;
+}
+
+SEC("kprobe.multi")
+int kprobe_multi_write_ctx(struct pt_regs *ctx)
+{
+ ctx->ax = 0;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
index 50e66772c046..b0fa26fb4760 100644
--- a/tools/testing/selftests/bpf/progs/loop1.c
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
index 947bb7e988c2..0227409d4b0e 100644
--- a/tools/testing/selftests/bpf/progs/loop2.c
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index 717dab14322b..5d1c9a775e6b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
index e4ff97fbcce1..dd36aff4fba3 100644
--- a/tools/testing/selftests/bpf/progs/loop6.c
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/ptrace.h>
-#include <stddef.h>
-#include <linux/bpf.h>
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
@@ -26,12 +25,6 @@ char _license[] SEC("license") = "GPL";
#define SG_CHAIN 0x01UL
#define SG_END 0x02UL
-struct scatterlist {
- unsigned long page_link;
- unsigned int offset;
- unsigned int length;
-};
-
#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
#define sg_is_last(sg) ((sg)->page_link & SG_END)
#define sg_chain_ptr(sg) \
@@ -62,7 +55,7 @@ static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
return sgp;
}
-int config = 0;
+int run_once = 0;
int result = 0;
SEC("kprobe/virtqueue_add_sgs")
@@ -73,14 +66,14 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
__u64 length1 = 0, length2 = 0;
unsigned int i, n, len;
- if (config != 0)
+ if (run_once != 0)
return 0;
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
__sink(out_sgs);
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
- bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ len = BPF_CORE_READ(sgp, length);
length1 += len;
n++;
}
@@ -90,13 +83,13 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
__sink(in_sgs);
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
- bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ len = BPF_CORE_READ(sgp, length);
length2 += len;
n++;
}
}
- config = 1;
+ run_once = 1;
result = length2 - length1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie.h b/tools/testing/selftests/bpf/progs/lpm_trie.h
new file mode 100644
index 000000000000..76aa5821807f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_LPM_TRIE_H
+#define __PROGS_LPM_TRIE_H
+
+struct trie_key {
+ __u32 prefixlen;
+ __u32 data;
+};
+
+/* Benchmark operations */
+enum {
+ LPM_OP_NOOP = 0,
+ LPM_OP_BASELINE,
+ LPM_OP_LOOKUP,
+ LPM_OP_INSERT,
+ LPM_OP_UPDATE,
+ LPM_OP_DELETE,
+ LPM_OP_FREE
+};
+
+/*
+ * Return values from run_bench.
+ *
+ * Negative values are also allowed and represent kernel error codes.
+ */
+#define LPM_BENCH_SUCCESS 0
+#define LPM_BENCH_REINIT_MAP 1 /* Reset trie to initial state for current op */
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
new file mode 100644
index 000000000000..a0e6ebd5507a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_atomic.h"
+#include "progs/lpm_trie.h"
+
+#define BPF_OBJ_NAME_LEN 16U
+#define MAX_ENTRIES 100000000
+#define NR_LOOPS 10000
+
+char _license[] SEC("license") = "GPL";
+
+/* Filled by userspace. See fill_map() in bench_lpm_trie_map.c */
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct trie_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+} trie_map SEC(".maps");
+
+long hits;
+long duration_ns;
+
+/* Configured from userspace */
+__u32 nr_entries;
+__u32 prefixlen;
+bool random;
+__u8 op;
+
+static __u64 latency_free_start;
+
+SEC("fentry/bpf_map_free_deferred")
+int BPF_PROG(trie_free_entry, struct work_struct *work)
+{
+ struct bpf_map *map = container_of(work, struct bpf_map, work);
+ char name[BPF_OBJ_NAME_LEN];
+ u32 map_type;
+
+ map_type = BPF_CORE_READ(map, map_type);
+ if (map_type != BPF_MAP_TYPE_LPM_TRIE)
+ return 0;
+
+ /*
+ * Ideally we'd have access to the map ID but that's already
+ * freed before we enter trie_free().
+ */
+ BPF_CORE_READ_STR_INTO(&name, map, name);
+ if (bpf_strncmp(name, BPF_OBJ_NAME_LEN, "trie_free_map"))
+ return 0;
+
+ latency_free_start = bpf_ktime_get_ns();
+
+ return 0;
+}
+
+SEC("fexit/bpf_map_free_deferred")
+int BPF_PROG(trie_free_exit, struct work_struct *work)
+{
+ __u64 val;
+
+ if (!latency_free_start)
+ return 0;
+
+ val = bpf_ktime_get_ns() - latency_free_start;
+ latency_free_start = 0;
+
+ __sync_add_and_fetch(&duration_ns, val);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+static __u32 cur_key;
+
+static __always_inline void generate_key(struct trie_key *key)
+{
+ key->prefixlen = prefixlen;
+
+ if (random)
+ key->data = bpf_get_prandom_u32() % nr_entries;
+ else
+ key->data = cur_key++ % nr_entries;
+}
+
+static int noop(__u32 index, __u32 *unused)
+{
+ return 0;
+}
+
+static int baseline(__u32 index, __u32 *unused)
+{
+ struct trie_key key;
+ __u32 blackbox = 0;
+
+ generate_key(&key);
+ /* Avoid compiler optimizing out the modulo */
+ barrier_var(blackbox);
+ blackbox = READ_ONCE(key.data);
+
+ return 0;
+}
+
+static int lookup(__u32 index, int *retval)
+{
+ struct trie_key key;
+
+ generate_key(&key);
+ if (!bpf_map_lookup_elem(&trie_map, &key)) {
+ *retval = -ENOENT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int insert(__u32 index, int *retval)
+{
+ struct trie_key key;
+ u32 val = 1;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_update_elem(&trie_map, &key, &val, BPF_NOEXIST);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ /* Is this the last entry? */
+ if (key.data == nr_entries - 1) {
+ /* For atomicity concerns, see the comment in delete() */
+ *retval = LPM_BENCH_REINIT_MAP;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int update(__u32 index, int *retval)
+{
+ struct trie_key key;
+ u32 val = 1;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_update_elem(&trie_map, &key, &val, BPF_EXIST);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int delete(__u32 index, int *retval)
+{
+ struct trie_key key;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_delete_elem(&trie_map, &key);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ /* Do we need to refill the map? */
+ if (key.data == nr_entries - 1) {
+ /*
+ * Atomicity isn't required because DELETE only supports
+ * one producer running concurrently. What we need is a
+ * way to track how many entries have been deleted from
+ * the trie between consecutive invocations of the BPF
+ * prog because a single bpf_loop() call might not
+ * delete all entries, e.g. when NR_LOOPS < nr_entries.
+ */
+ *retval = LPM_BENCH_REINIT_MAP;
+ return 1;
+ }
+
+ return 0;
+}
+
+SEC("xdp")
+int BPF_PROG(run_bench)
+{
+ int err = LPM_BENCH_SUCCESS;
+ u64 start, delta;
+ int loops;
+
+ start = bpf_ktime_get_ns();
+
+ switch (op) {
+ case LPM_OP_NOOP:
+ loops = bpf_loop(NR_LOOPS, noop, NULL, 0);
+ break;
+ case LPM_OP_BASELINE:
+ loops = bpf_loop(NR_LOOPS, baseline, NULL, 0);
+ break;
+ case LPM_OP_LOOKUP:
+ loops = bpf_loop(NR_LOOPS, lookup, &err, 0);
+ break;
+ case LPM_OP_INSERT:
+ loops = bpf_loop(NR_LOOPS, insert, &err, 0);
+ break;
+ case LPM_OP_UPDATE:
+ loops = bpf_loop(NR_LOOPS, update, &err, 0);
+ break;
+ case LPM_OP_DELETE:
+ loops = bpf_loop(NR_LOOPS, delete, &err, 0);
+ break;
+ default:
+ bpf_printk("invalid benchmark operation\n");
+ return -1;
+ }
+
+ delta = bpf_ktime_get_ns() - start;
+
+ __sync_add_and_fetch(&duration_ns, delta);
+ __sync_add_and_fetch(&hits, loops);
+
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_map.c b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
new file mode 100644
index 000000000000..6e60d686b664
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_ENTRIES 100000000
+
+struct trie_key {
+ __u32 prefixlen;
+ __u32 data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct trie_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+} trie_free_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/map_excl.c b/tools/testing/selftests/bpf/progs/map_excl.c
new file mode 100644
index 000000000000..d461684728e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_excl.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} excl_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_have_access(void *ctx)
+{
+ int key = 0, value = 0xdeadbeef;
+
+ bpf_map_update_elem(&excl_map, &key, &value, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_not_have_access(void *ctx)
+{
+ int key = 0, value = 0xdeadbeef;
+
+ bpf_map_update_elem(&excl_map, &key, &value, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
index 4f94c971ae86..3b984b6ae7c0 100644
--- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -8,8 +8,8 @@
SEC("tp_btf/sys_enter")
__success
__log_level(2)
-__msg("r8 = *(u64 *)(r7 +0) ; R7_w=ptr_nameidata(off={{[0-9]+}}) R8_w=rdonly_untrusted_mem(sz=0)")
-__msg("r9 = *(u8 *)(r8 +0) ; R8_w=rdonly_untrusted_mem(sz=0) R9_w=scalar")
+__msg("r8 = *(u64 *)(r7 +0) ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
+__msg("r9 = *(u8 *)(r8 +0) ; R8=rdonly_untrusted_mem(sz=0) R9=scalar")
int btf_id_to_ptr_mem(void *ctx)
{
struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c
index 098ef970fac1..b05565d1db0d 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_search.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_search.c
@@ -183,7 +183,7 @@ long test_##op##_spinlock_##dolock(void *ctx) \
}
/*
- * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG)
+ * Use a separate MSG macro instead of passing to TEST_XXX(..., MSG)
* to ensure the message itself is not in the bpf prog lineinfo
* which the verifier includes in its log.
* Otherwise, the test_loader will incorrectly match the prog lineinfo
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c
index 47568007b668..0c77df05be7f 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map.c
@@ -50,6 +50,7 @@ struct sched_switch_args {
int next_prio;
};
+__u32 stack_id;
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
{
@@ -64,6 +65,7 @@ int oncpu(struct sched_switch_args *ctx)
/* The size of stackmap and stackid_hmap should be the same */
key = bpf_get_stackid(ctx, &stackmap, 0);
if ((int)key >= 0) {
+ stack_id = key;
bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
stack_p = bpf_map_lookup_elem(&stack_amap, &key);
if (stack_p)
diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c
index 35790897dc87..4a5bd852f10c 100644
--- a/tools/testing/selftests/bpf/progs/stream.c
+++ b/tools/testing/selftests/bpf/progs/stream.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
struct arr_elem {
struct bpf_res_spin_lock lock;
@@ -17,10 +18,29 @@ struct {
__type(value, struct arr_elem);
} arrmap SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1); /* number of pages */
+} arena SEC(".maps");
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
#define ENOSPC 28
#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
int size;
+u64 fault_addr;
+void *arena_ptr;
SEC("syscall")
__success __retval(0)
@@ -37,7 +57,15 @@ int stream_exhaust(void *ctx)
}
SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__arch_s390x
__success __retval(0)
+__stderr("ERROR: Timeout detected for may_goto instruction")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
int stream_cond_break(void *ctx)
{
while (can_loop)
@@ -47,6 +75,15 @@ int stream_cond_break(void *ctx)
SEC("syscall")
__success __retval(0)
+__stderr("ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock")
+__stderr("{{Attempted lock = (0x[0-9a-fA-F]+)\n"
+"Total held locks = 1\n"
+"Held lock\\[ 0\\] = \\1}}")
+__stderr("...")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
int stream_deadlock(void *ctx)
{
struct bpf_res_spin_lock *lock, *nlock;
@@ -76,4 +113,125 @@ int stream_syscall(void *ctx)
return 0;
}
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_write_fault(void *ctx)
+{
+ struct bpf_arena *ptr = (void *)&arena;
+ u64 user_vm_start;
+
+ /* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+ * triggers bounds checking since the map definition is smaller than struct
+ * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+ * bounds analysis
+ */
+ barrier_var(ptr);
+ user_vm_start = ptr->user_vm_start;
+ fault_addr = user_vm_start + 0x7fff;
+ bpf_addr_space_cast(user_vm_start, 0, 1);
+ asm volatile (
+ "r1 = %0;"
+ "r2 = 1;"
+ "*(u32 *)(r1 + 0x7fff) = r2;"
+ :
+ : "r" (user_vm_start)
+ : "r1", "r2"
+ );
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena READ access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_read_fault(void *ctx)
+{
+ struct bpf_arena *ptr = (void *)&arena;
+ u64 user_vm_start;
+
+ /* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+ * triggers bounds checking since the map definition is smaller than struct
+ * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+ * bounds analysis
+ */
+ barrier_var(ptr);
+ user_vm_start = ptr->user_vm_start;
+ fault_addr = user_vm_start + 0x7fff;
+ bpf_addr_space_cast(user_vm_start, 0, 1);
+ asm volatile (
+ "r1 = %0;"
+ "r1 = *(u32 *)(r1 + 0x7fff);"
+ :
+ : "r" (user_vm_start)
+ : "r1"
+ );
+ return 0;
+}
+
+static __noinline void subprog(void)
+{
+ int __arena *addr = (int __arena *)0xdeadbeef;
+
+ arena_ptr = &arena;
+ *addr = 1;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_subprog_fault(void *ctx)
+{
+ subprog();
+ return 0;
+}
+
+static __noinline int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ int __arena *addr = (int __arena *)0xdeadbeef;
+
+ arena_ptr = &arena;
+ *addr = 1;
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_callback_fault(void *ctx)
+{
+ struct bpf_timer *arr_timer;
+
+ arr_timer = bpf_map_lookup_elem(&array, &(int){0});
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, 1);
+ bpf_timer_set_callback(arr_timer, timer_cb);
+ bpf_timer_start(arr_timer, 0, 0);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
index 53af438bd998..99d72c68f76a 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -31,6 +31,8 @@ char *invalid_kern_ptr = (char *)-1;
/* Passing NULL to string kfuncs (treated as a userspace ptr) */
SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
@@ -49,6 +51,8 @@ SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return
/* Passing userspace ptr to string kfuncs */
SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
@@ -69,6 +73,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { re
/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */
SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); }
SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); }
SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
index 89fb4669b0e9..e41cc5601994 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -7,6 +7,7 @@
char long_str[XATTR_SIZE_MAX + 1];
SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
+SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); }
SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
index a47690174e0e..2e3498e37b9c 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -12,6 +12,11 @@ char str[] = "hello world";
/* Functional tests */
__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); }
__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); }
+__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); }
+__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); }
+__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); }
+__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); }
+__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); }
__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
new file mode 100644
index 000000000000..ad8bb546c9bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP1_MAGIC 1234
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ return MAP1_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP1_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP1_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
new file mode 100644
index 000000000000..cea1a2f4b62f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP2_MAGIC 4567
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ return MAP2_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP2_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP2_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
index 36386b3c23a1..2b98b7710816 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
@@ -9,7 +9,7 @@ void bpf_task_release(struct task_struct *p) __ksym;
/* This test struct_ops BPF programs returning referenced kptr. The verifier should
* allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task
- * here is acquried automatically as the task argument is tagged with "__ref".
+ * here is acquired automatically as the task argument is tagged with "__ref".
*/
SEC("struct_ops/test_return_ref_kptr")
struct task_struct *BPF_PROG(kptr_return, int dummy,
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
index 76dcb6089d7f..9c0a65466356 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
@@ -9,7 +9,7 @@ __attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __k
/* This is a test BPF program that uses struct_ops to access a referenced
* kptr argument. This is a test for the verifier to ensure that it
- * 1) recongnizes the task as a referenced object (i.e., ref_obj_id > 0), and
+ * 1) recognizes the task as a referenced object (i.e., ref_obj_id > 0), and
* 2) the same reference can be acquired from multiple paths as long as it
* has not been released.
*/
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
index 327ca395e860..d556b19413d7 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_legacy.h"
+#include "bpf_test_utils.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -24,6 +25,8 @@ int entry(struct __sk_buff *skb)
{
int ret = 1;
+ clobber_regs_stack();
+
count++;
subprog_tail(skb);
subprog_tail(skb);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
index 72fd0d577506..ae94c9c70ab7 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_test_utils.h"
int classifier_0(struct __sk_buff *skb);
int classifier_1(struct __sk_buff *skb);
@@ -60,6 +61,8 @@ int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb)
{
int ret = 0;
+ clobber_regs_stack();
+
subprog_tail0(skb);
subprog_tail1(skb);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
index a7fb91cb05b7..56b6b0099840 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_test_utils.h"
int classifier_0(struct __sk_buff *skb);
@@ -53,6 +54,8 @@ int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb)
{
int ret = 0;
+ clobber_regs_stack();
+
bpf_tail_call_static(skb, &jmp_table0, 0);
__sink(ret);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
index c87f9ca982d3..5261395713cd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_test_utils.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -24,6 +25,8 @@ int subprog_tail(void *ctx)
SEC("fentry/dummy")
int BPF_PROG(fentry, struct sk_buff *skb)
{
+ clobber_regs_stack();
+
count++;
subprog_tail(ctx);
subprog_tail(ctx);
diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
new file mode 100644
index 000000000000..432fff2af844
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_BPF_H
+#define __TASK_LOCAL_DATA_BPF_H
+
+/*
+ * Task local data is a library that facilitates sharing per-task data
+ * between user space and bpf programs.
+ *
+ *
+ * USAGE
+ *
+ * A TLD, an entry of data in task local data, first needs to be created by the
+ * user space. This is done by calling user space API, TLD_DEFINE_KEY() or
+ * tld_create_key(), with the name of the TLD and the size.
+ *
+ * TLD_DEFINE_KEY(prio, "priority", sizeof(int));
+ *
+ * or
+ *
+ * void func_call(...) {
+ * tld_key_t prio, in_cs;
+ *
+ * prio = tld_create_key("priority", sizeof(int));
+ * in_cs = tld_create_key("in_critical_section", sizeof(bool));
+ * ...
+ *
+ * A key associated with the TLD, which has an opaque type tld_key_t, will be
+ * initialized or returned. It can be used to get a pointer to the TLD in the
+ * user space by calling tld_get_data().
+ *
+ * In a bpf program, tld_object_init() first needs to be called to initialized a
+ * tld_object on the stack. Then, TLDs can be accessed by calling tld_get_data().
+ * The API will try to fetch the key by the name and use it to locate the data.
+ * A pointer to the TLD will be returned. It also caches the key in a task local
+ * storage map, tld_key_map, whose value type, struct tld_keys, must be defined
+ * by the developer.
+ *
+ * struct tld_keys {
+ * tld_key_t prio;
+ * tld_key_t in_cs;
+ * };
+ *
+ * SEC("struct_ops")
+ * void prog(struct task_struct task, ...)
+ * {
+ * struct tld_object tld_obj;
+ * int err, *p;
+ *
+ * err = tld_object_init(task, &tld_obj);
+ * if (err)
+ * return;
+ *
+ * p = tld_get_data(&tld_obj, prio, "priority", sizeof(int));
+ * if (p)
+ * // do something depending on *p
+ */
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_MAX_DATA_CNT (__PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifndef TLD_KEY_MAP_CREATE_RETRY
+#define TLD_KEY_MAP_CREATE_RETRY 10
+#endif
+
+typedef struct {
+ __s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+ char name[TLD_NAME_LEN];
+ __u16 size;
+};
+
+struct tld_meta_u {
+ __u8 cnt;
+ __u16 size;
+ struct tld_metadata metadata[TLD_MAX_DATA_CNT];
+};
+
+struct tld_data_u {
+ __u64 start; /* offset of tld_data_u->data in a page */
+ char data[__PAGE_SIZE - sizeof(__u64)];
+};
+
+struct tld_map_value {
+ struct tld_data_u __uptr *data;
+ struct tld_meta_u __uptr *meta;
+};
+
+typedef struct tld_uptr_dummy {
+ struct tld_data_u data[0];
+ struct tld_meta_u meta[0];
+} *tld_uptr_dummy_t;
+
+struct tld_object {
+ struct tld_map_value *data_map;
+ struct tld_keys *key_map;
+ /*
+ * Force the compiler to generate the actual definition of tld_meta_u
+ * and tld_data_u in BTF. Without it, tld_meta_u and u_tld_data will
+ * be BTF_KIND_FWD.
+ */
+ tld_uptr_dummy_t dummy[0];
+};
+
+/*
+ * Map value of tld_key_map for caching keys. Must be defined by the developer.
+ * Members should be tld_key_t and passed to the 3rd argument of tld_fetch_key().
+ */
+struct tld_keys;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tld_map_value);
+} tld_data_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tld_keys);
+} tld_key_map SEC(".maps");
+
+/**
+ * tld_object_init() - Initialize a tld_object.
+ *
+ * @task: The task_struct of the target task
+ * @tld_obj: A pointer to a tld_object to be initialized
+ *
+ * Return 0 on success; -ENODATA if the user space did not initialize task local data
+ * for the current task through tld_get_data(); -ENOMEM if the creation of tld_key_map
+ * fails
+ */
+__attribute__((unused))
+static int tld_object_init(struct task_struct *task, struct tld_object *tld_obj)
+{
+ int i;
+
+ tld_obj->data_map = bpf_task_storage_get(&tld_data_map, task, 0, 0);
+ if (!tld_obj->data_map)
+ return -ENODATA;
+
+ bpf_for(i, 0, TLD_KEY_MAP_CREATE_RETRY) {
+ tld_obj->key_map = bpf_task_storage_get(&tld_key_map, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (likely(tld_obj->key_map))
+ break;
+ }
+ if (!tld_obj->key_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * Return the offset of TLD if @name is found. Otherwise, return the current TLD count
+ * using the nonpositive range so that the next tld_get_data() can skip fetching key if
+ * no new TLD is added or start comparing name from the first newly added TLD.
+ */
+__attribute__((unused))
+static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_start)
+{
+ struct tld_metadata *metadata;
+ int i, cnt, start, off = 0;
+
+ if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta)
+ return 0;
+
+ start = tld_obj->data_map->data->start;
+ cnt = tld_obj->data_map->meta->cnt;
+ metadata = tld_obj->data_map->meta->metadata;
+
+ bpf_for(i, 0, cnt) {
+ if (i >= TLD_MAX_DATA_CNT)
+ break;
+
+ if (i >= i_start && !bpf_strncmp(metadata[i].name, TLD_NAME_LEN, name))
+ return start + off;
+
+ off += TLD_ROUND_UP(metadata[i].size, 8);
+ }
+
+ return -cnt;
+}
+
+/**
+ * tld_get_data() - Retrieve a pointer to the TLD associated with the name.
+ *
+ * @tld_obj: A pointer to a valid tld_object initialized by tld_object_init()
+ * @key: The cached key of the TLD in tld_key_map
+ * @name: The name of the key associated with a TLD
+ * @size: The size of the TLD. Must be a known constant value
+ *
+ * Return a pointer to the TLD associated with @name; NULL if not found or @size is too
+ * big. @key is used to cache the key if the TLD is found to speed up subsequent calls.
+ * It should be defined as an member of tld_keys of tld_key_t type by the developer.
+ */
+#define tld_get_data(tld_obj, key, name, size) \
+ ({ \
+ void *data = NULL, *_data = (tld_obj)->data_map->data; \
+ long off = (tld_obj)->key_map->key.off; \
+ int cnt; \
+ \
+ if (likely(_data)) { \
+ if (likely(off > 0)) { \
+ barrier_var(off); \
+ if (likely(off < __PAGE_SIZE - size)) \
+ data = _data + off; \
+ } else { \
+ cnt = -(off); \
+ if (likely((tld_obj)->data_map->meta) && \
+ cnt < (tld_obj)->data_map->meta->cnt) { \
+ off = __tld_fetch_key(tld_obj, name, cnt); \
+ (tld_obj)->key_map->key.off = off; \
+ \
+ if (likely(off < __PAGE_SIZE - size)) { \
+ barrier_var(off); \
+ if (off > 0) \
+ data = _data + off; \
+ } \
+ } \
+ } \
+ } \
+ data; \
+ })
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
new file mode 100644
index 000000000000..23217f06a3ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} lrumap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ struct elem *work = value;
+
+ bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+ return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+int oncpu_hash_map(struct pt_regs *args)
+{
+ struct elem empty_work = { .data = { 0 } };
+ struct elem *work;
+ struct task_struct *task;
+ int err;
+
+ task = bpf_get_current_task_btf();
+ err = bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+ if (err)
+ return 0;
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work)
+ return 0;
+
+ bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+int oncpu_array_map(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+int oncpu_lru_map(struct pt_regs *args)
+{
+ struct elem empty_work = { .data = { 0 } };
+ struct elem *work;
+ struct task_struct *task;
+ int err;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&lrumap, &key);
+ if (work)
+ return 0;
+ err = bpf_map_update_elem(&lrumap, &key, &empty_work, BPF_NOEXIST);
+ if (err)
+ return 0;
+ work = bpf_map_lookup_elem(&lrumap, &key);
+ if (!work || work->data[0])
+ return 0;
+ bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
new file mode 100644
index 000000000000..77fe8f28facd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ struct elem *work = value;
+
+ bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+ return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+__failure __msg("doesn't match map pointer in R3")
+int mismatch_map(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("arg#1 doesn't point to a map value")
+int no_map_task_work(struct pt_regs *args)
+{
+ struct task_struct *task;
+ struct bpf_task_work tw;
+
+ task = bpf_get_current_task_btf();
+ bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int task_work_null(struct pt_regs *args)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg2")
+int map_null(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
new file mode 100644
index 000000000000..90fca06fff56
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+#define ENTRIES 128
+
+char _license[] SEC("license") = "GPL";
+
+__u64 callback_scheduled = 0;
+__u64 callback_success = 0;
+__u64 schedule_error = 0;
+__u64 delete_success = 0;
+
+struct elem {
+ __u32 count;
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, ENTRIES);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ __sync_fetch_and_add(&callback_success, 1);
+ return 0;
+}
+
+SEC("syscall")
+int schedule_task_work(void *ctx)
+{
+ struct elem empty_work = {.count = 0};
+ struct elem *work;
+ int key = 0, err;
+
+ key = bpf_ktime_get_ns() % ENTRIES;
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work) {
+ bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work)
+ return 0;
+ }
+ err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap,
+ process_work, NULL);
+ if (err)
+ __sync_fetch_and_add(&schedule_error, 1);
+ else
+ __sync_fetch_and_add(&callback_scheduled, 1);
+ return 0;
+}
+
+SEC("syscall")
+int delete_task_work(void *ctx)
+{
+ int key = 0, err;
+
+ key = bpf_get_prandom_u32() % ENTRIES;
+ err = bpf_map_delete_elem(&hmap, &key);
+ if (!err)
+ __sync_fetch_and_add(&delete_success, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index f344c6835e84..26a53e54b8fa 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -22,6 +22,7 @@
#include "bpf_compiler.h"
#include "test_cls_redirect.h"
+#include "bpf_misc.h"
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@@ -31,9 +32,6 @@
#define INLINING __always_inline
#endif
-#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
-
#define IP_OFFSET_MASK (0x1FFF)
#define IP_MF (0x2000)
@@ -129,7 +127,7 @@ typedef uint8_t *net_ptr __attribute__((align_value(8)));
typedef struct buf {
struct __sk_buff *skb;
net_ptr head;
- /* NB: tail musn't have alignment other than 1, otherwise
+ /* NB: tail mustn't have alignment other than 1, otherwise
* LLVM will go and eliminate code, e.g. when checking packet lengths.
*/
uint8_t *const tail;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
index d0f7670351e5..dfd4a2710391 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -494,7 +494,7 @@ static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_header
*offset += sizeof(*next_hop);
- /* Skip the remainig next hops (may be zero). */
+ /* Skip the remaining next hops (may be zero). */
return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1);
}
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index abb7344b531f..5edf3cdc213d 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -1,9 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
-#include <stdbool.h>
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_devmap.c b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
new file mode 100644
index 000000000000..c855f8f87eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap2 SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_task_local_data.c b/tools/testing/selftests/bpf/progs/test_task_local_data.c
new file mode 100644
index 000000000000..fffafc013044
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_local_data.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_local_data.bpf.h"
+
+struct tld_keys {
+ tld_key_t value0;
+ tld_key_t value1;
+ tld_key_t value2;
+ tld_key_t value_not_exist;
+};
+
+struct test_tld_struct {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+ __u64 d;
+};
+
+int test_value0;
+int test_value1;
+struct test_tld_struct test_value2;
+
+SEC("syscall")
+int task_main(void *ctx)
+{
+ struct tld_object tld_obj;
+ struct test_tld_struct *struct_p;
+ struct task_struct *task;
+ int err, *int_p;
+
+ task = bpf_get_current_task_btf();
+ err = tld_object_init(task, &tld_obj);
+ if (err)
+ return 1;
+
+ int_p = tld_get_data(&tld_obj, value0, "value0", sizeof(int));
+ if (int_p)
+ test_value0 = *int_p;
+ else
+ return 2;
+
+ int_p = tld_get_data(&tld_obj, value1, "value1", sizeof(int));
+ if (int_p)
+ test_value1 = *int_p;
+ else
+ return 3;
+
+ struct_p = tld_get_data(&tld_obj, value2, "value2", sizeof(struct test_tld_struct));
+ if (struct_p)
+ test_value2 = *struct_p;
+ else
+ return 4;
+
+ int_p = tld_get_data(&tld_obj, value_not_exist, "value_not_exist", sizeof(int));
+ if (int_p)
+ return 5;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
index 5f4e87ee949a..1ecdf4c54de4 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -14,10 +14,7 @@
#include <bpf/bpf_endian.h>
#define BPF_PROG_TEST_TCP_HDR_OPTIONS
#include "test_tcp_hdr_options.h"
-
-#ifndef sizeof_field
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#endif
+#include "bpf_misc.h"
__u8 test_kind = TCPOPT_EXP;
__u16 test_magic = 0xeB9F;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index 540181c115a8..ef00d38b0a8d 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -23,7 +23,6 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(max_entries, 2);
__type(key, int);
__type(value, __u32);
} perf_event_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c
index 896c88a4960d..12f4065fca20 100644
--- a/tools/testing/selftests/bpf/progs/test_uprobe.c
+++ b/tools/testing/selftests/bpf/progs/test_uprobe.c
@@ -59,3 +59,41 @@ int BPF_UPROBE(test4)
test4_result = 1;
return 0;
}
+
+#if defined(__TARGET_ARCH_x86)
+struct pt_regs regs;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ ctx->ax = regs.ax;
+ ctx->cx = regs.cx;
+ ctx->dx = regs.dx;
+ ctx->r8 = regs.r8;
+ ctx->r9 = regs.r9;
+ ctx->r10 = regs.r10;
+ ctx->r11 = regs.r11;
+ ctx->di = regs.di;
+ ctx->si = regs.si;
+ return 0;
+}
+
+unsigned long ip;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change_ip)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ ctx->ip = ip;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index 096488f47fbc..a78c87537b07 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -107,4 +107,35 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
return 0;
}
+int usdt_sib_called;
+u64 usdt_sib_cookie;
+int usdt_sib_arg_cnt;
+int usdt_sib_arg_ret;
+short usdt_sib_arg;
+int usdt_sib_arg_size;
+
+/*
+ * usdt_sib is only tested on x86-related architectures, so it requires
+ * manual attach since auto-attach will panic tests under other architectures
+ */
+SEC("usdt")
+int usdt_sib(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_sib_called, 1);
+
+ usdt_sib_cookie = bpf_usdt_cookie(ctx);
+ usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt_sib_arg = (short)tmp;
+ usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
new file mode 100644
index 000000000000..814e2a980e97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+SEC("xdp")
+int xdp_devmap(struct xdp_md *ctx)
+{
+ return ctx->egress_ifindex;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, int (void *));
+} xdp_map SEC(".maps") = {
+ .values = {
+ [0] = (void *)&xdp_devmap,
+ },
+};
+
+SEC("xdp")
+int xdp_entry(struct xdp_md *ctx)
+{
+ bpf_tail_call(ctx, &xdp_map, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index fcf6ca14f2ea..d79cb74b571e 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -1,8 +1,11 @@
+#include <stdbool.h>
#include <linux/bpf.h>
+#include <linux/errno.h>
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_kfuncs.h"
#define META_SIZE 32
@@ -23,6 +26,8 @@ struct {
__uint(value_size, META_SIZE);
} test_result SEC(".maps");
+bool test_pass;
+
SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
@@ -40,6 +45,231 @@ int ing_cls(struct __sk_buff *ctx)
return TC_ACT_SHOT;
}
+/* Read from metadata using bpf_dynptr_read helper */
+SEC("tc")
+int ing_cls_dynptr_read(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 zero = 0;
+ __u8 *dst;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using bpf_dynptr_write helper */
+SEC("tc")
+int ing_cls_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, META_SIZE, 0);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read from metadata using read-only dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 zero = 0;
+ __u8 *dst, *src;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(dst, src, META_SIZE);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using writeable dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src, *dst;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(dst, src, META_SIZE);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read skb metadata in chunks from various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 chunk_len = META_SIZE / 4;
+ const __u32 zero = 0;
+ __u8 *dst, *src;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ /* 1. Regular read */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 2. Read from an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 3. Read at an offset */
+ bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0);
+ dst += chunk_len;
+
+ /* 4. Read from a slice starting at an offset */
+ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!src)
+ return TC_ACT_SHOT;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write skb metadata in chunks at various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx)
+{
+ const __u32 chunk_len = META_SIZE / 4;
+ __u8 payload[META_SIZE];
+ struct bpf_dynptr meta;
+ __u8 *dst, *src;
+
+ bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload));
+ src = payload;
+
+ /* 1. Regular write */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 2. Write to an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 3. Write at an offset */
+ bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 4. Write to a slice starting at an offset */
+ dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!dst)
+ return TC_ACT_SHOT;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Pass an OOB offset to dynptr read, write, adjust, slice. */
+SEC("tc")
+int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ __u8 md, *p;
+ int err;
+
+ err = bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (err)
+ goto fail;
+
+ /* read offset OOB */
+ err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* write offset OOB */
+ err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* adjust end offset OOB */
+ err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* adjust start offset OOB */
+ err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* slice offset OOB */
+ p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ /* slice rdwr offset OOB */
+ p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ return TC_ACT_UNSPEC;
+fail:
+ return TC_ACT_SHOT;
+}
+
+/* Reserve and clear space for metadata but don't populate it */
+SEC("xdp")
+int ing_xdp_zalloc_meta(struct xdp_md *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+ __u8 *meta;
+ int ret;
+
+ /* Drop any non-test packets */
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ return XDP_DROP;
+ if (eth->h_proto != 0)
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
+ if (ret < 0)
+ return XDP_DROP;
+
+ meta = ctx_ptr(ctx, data_meta);
+ if (meta + META_SIZE > ctx_ptr(ctx, data))
+ return XDP_DROP;
+
+ __builtin_memset(meta, 0, META_SIZE);
+
+ return XDP_PASS;
+}
+
SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
@@ -73,4 +303,193 @@ int ing_xdp(struct xdp_md *ctx)
return XDP_PASS;
}
+/*
+ * Check that skb->data_meta..skb->data is empty if prog writes to packet
+ * _payload_ using packet pointers. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ if (ctx->data_meta != ctx->data)
+ goto out;
+
+ /* Packet write to trigger unclone in prologue */
+ eth->h_proto = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb->data_meta..skb->data is empty if prog writes to packet
+ * _metadata_ using packet pointers. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+ __u8 *md = ctx_ptr(ctx, data_meta);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ if (md + 1 > ctx_ptr(ctx, data)) {
+ /* Expect no metadata */
+ test_pass = true;
+ } else {
+ /* Metadata write to trigger unclone in prologue */
+ *md = 42;
+ }
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is writable but empty if prog writes to packet
+ * _payload_ using a dynptr slice. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+ goto out;
+
+ /* Packet write to trigger unclone in prologue */
+ eth->h_proto = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is writable but empty if prog writes to packet
+ * _metadata_ using a dynptr slice. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+ goto out;
+
+ /* Metadata write to trigger unclone in prologue */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (md)
+ *md = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is read-only before prog writes to packet payload
+ * using dynptr_write helper. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect read-only metadata before unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+ goto out;
+
+ /* Helper write to payload will unclone the packet */
+ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
+
+ /* Expect no metadata after unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0)
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is read-only if prog writes to packet
+ * metadata using dynptr_write helper. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect read-only metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+ goto out;
+
+ /* Metadata write. Expect failure. */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL)
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
new file mode 100644
index 000000000000..c41a21413eaa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+int xdpf_sz;
+int sinfo_sz;
+int data_len;
+int pull_len;
+
+#define XDP_PACKET_HEADROOM 256
+
+SEC("xdp.frags")
+int xdp_find_sizes(struct xdp_md *ctx)
+{
+ xdpf_sz = sizeof(struct xdp_frame);
+ sinfo_sz = __PAGE_SIZE - XDP_PACKET_HEADROOM -
+ (ctx->data_end - ctx->data);
+
+ return XDP_PASS;
+}
+
+SEC("xdp.frags")
+int xdp_pull_data_prog(struct xdp_md *ctx)
+{
+ __u8 *data_end = (void *)(long)ctx->data_end;
+ __u8 *data = (void *)(long)ctx->data;
+ __u8 *val_p;
+ int err;
+
+ if (data_len != data_end - data)
+ return XDP_DROP;
+
+ err = bpf_xdp_pull_data(ctx, pull_len);
+ if (err)
+ return XDP_DROP;
+
+ val_p = (void *)(long)ctx->data + 1024;
+ if (val_p + 1 > (void *)(long)ctx->data_end)
+ return XDP_DROP;
+
+ if (*val_p != 0xbb)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer_interrupt.c b/tools/testing/selftests/bpf/progs/timer_interrupt.c
new file mode 100644
index 000000000000..19180a455f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_interrupt.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define CLOCK_MONOTONIC 1
+
+int preempt_count;
+int in_interrupt;
+int in_interrupt_cb;
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+static int timer_in_interrupt(void *map, int *key, struct bpf_timer *timer)
+{
+ preempt_count = get_preempt_count();
+ in_interrupt_cb = bpf_in_interrupt();
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_timer_interrupt)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&array, &key);
+ if (!timer)
+ return 0;
+
+ in_interrupt = bpf_in_interrupt();
+ bpf_timer_init(timer, &array, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(timer, timer_in_interrupt);
+ bpf_timer_start(timer, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
index c435a3a8328a..d460732e2023 100644
--- a/tools/testing/selftests/bpf/progs/tracing_struct.c
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -18,6 +18,18 @@ struct bpf_testmod_struct_arg_3 {
int b[];
};
+union bpf_testmod_union_arg_1 {
+ char a;
+ short b;
+ struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+ int a;
+ long b;
+ struct bpf_testmod_struct_arg_2 arg;
+};
+
long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
@@ -26,6 +38,9 @@ long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
long t5_ret;
int t6;
+long ut1_a_a, ut1_b, ut1_c;
+long ut2_a, ut2_b_a, ut2_b_b;
+
SEC("fentry/bpf_testmod_test_struct_arg_1")
int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
{
@@ -130,4 +145,22 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
return 0;
}
+SEC("fexit/bpf_testmod_test_union_arg_1")
+int BPF_PROG2(test_union_arg_1, union bpf_testmod_union_arg_1, a, int, b, int, c)
+{
+ ut1_a_a = a.arg.a;
+ ut1_b = b;
+ ut1_c = c;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_union_arg_2")
+int BPF_PROG2(test_union_arg_2, int, a, union bpf_testmod_union_arg_2, b)
+{
+ ut2_a = a;
+ ut2_b_a = b.arg.a;
+ ut2_b_b = b.arg.b;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 044a6d78923e..3d5f30c29ae3 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -97,6 +97,12 @@ int bench_trigger_kprobe_multi(void *ctx)
return 0;
}
+SEC("?kprobe.multi/bpf_get_numa_node_id")
+int bench_kprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
SEC("?kretprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kretprobe_multi(void *ctx)
{
@@ -104,6 +110,12 @@ int bench_trigger_kretprobe_multi(void *ctx)
return 0;
}
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
+int bench_kretprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
index 9fdcf396b8f4..a2951e2f1711 100644
--- a/tools/testing/selftests/bpf/progs/uretprobe_stack.c
+++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
@@ -26,8 +26,8 @@ int usdt_len;
SEC("uprobe//proc/self/exe:target_1")
int BPF_UPROBE(uprobe_1)
{
- /* target_1 is recursive wit depth of 2, so we capture two separate
- * stack traces, depending on which occurence it is
+ /* target_1 is recursive with depth of 2, so we capture two separate
+ * stack traces, depending on which occurrence it is
*/
static bool recur = false;
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index 9dbdf123542d..f19e15400b3e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -240,6 +240,7 @@ int big_alloc2(void *ctx)
return 5;
bpf_arena_free_pages(&arena, (void __arena *)pg, 2);
page[i] = NULL;
+ barrier();
page[i + 1] = NULL;
cond_break;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 87a2c60d86e6..0a72e0228ea9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -926,7 +926,7 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for non const xor src dst")
__success __log_level(2)
-__msg("5: (af) r0 ^= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__msg("5: (af) r0 ^= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
__naked void non_const_xor_src_dst(void)
{
asm volatile (" \
@@ -947,7 +947,7 @@ __naked void non_const_xor_src_dst(void)
SEC("socket")
__description("bounds check for non const or src dst")
__success __log_level(2)
-__msg("5: (4f) r0 |= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__msg("5: (4f) r0 |= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
__naked void non_const_or_src_dst(void)
{
asm volatile (" \
@@ -968,7 +968,7 @@ __naked void non_const_or_src_dst(void)
SEC("socket")
__description("bounds check for non const mul regs")
__success __log_level(2)
-__msg("5: (2f) r0 *= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
+__msg("5: (2f) r0 *= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
__naked void non_const_mul_regs(void)
{
asm volatile (" \
@@ -1241,7 +1241,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("multiply mixed sign bounds. test 1")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
__naked void mult_mixed0_sign(void)
{
asm volatile (
@@ -1264,7 +1264,7 @@ __naked void mult_mixed0_sign(void)
SEC("tc")
__description("multiply mixed sign bounds. test 2")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=smin32=-100,smax=smax32=200)")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=smin32=-100,smax=smax32=200)")
__naked void mult_mixed1_sign(void)
{
asm volatile (
@@ -1287,7 +1287,7 @@ __naked void mult_mixed1_sign(void)
SEC("tc")
__description("multiply negative bounds")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
__naked void mult_sign_bounds(void)
{
asm volatile (
@@ -1311,7 +1311,7 @@ __naked void mult_sign_bounds(void)
SEC("tc")
__description("multiply bounds that don't cross signed boundary")
__success __log_level(2)
-__msg("r8 *= r6 {{.*}}; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8_w=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
+__msg("r8 *= r6 {{.*}}; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
__naked void mult_no_sign_crossing(void)
{
asm volatile (
@@ -1331,7 +1331,7 @@ __naked void mult_no_sign_crossing(void)
SEC("tc")
__description("multiplication overflow, result in unbounded reg. test 1")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar()")
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
__naked void mult_unsign_ovf(void)
{
asm volatile (
@@ -1353,7 +1353,7 @@ __naked void mult_unsign_ovf(void)
SEC("tc")
__description("multiplication overflow, result in unbounded reg. test 2")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar()")
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
__naked void mult_sign_ovf(void)
{
asm volatile (
@@ -1376,7 +1376,7 @@ __naked void mult_sign_ovf(void)
SEC("socket")
__description("64-bit addition, all outcomes overflow")
__success __log_level(2)
-__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
+__msg("5: (0f) r3 += r3 {{.*}} R3=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
__retval(0)
__naked void add64_full_overflow(void)
{
@@ -1396,7 +1396,7 @@ __naked void add64_full_overflow(void)
SEC("socket")
__description("64-bit addition, partial overflow, result in unbounded reg")
__success __log_level(2)
-__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()")
+__msg("4: (0f) r3 += r3 {{.*}} R3=scalar()")
__retval(0)
__naked void add64_partial_overflow(void)
{
@@ -1416,7 +1416,7 @@ __naked void add64_partial_overflow(void)
SEC("socket")
__description("32-bit addition overflow, all outcomes overflow")
__success __log_level(2)
-__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
__retval(0)
__naked void add32_full_overflow(void)
{
@@ -1436,7 +1436,7 @@ __naked void add32_full_overflow(void)
SEC("socket")
__description("32-bit addition, partial overflow, result in unbounded u32 bounds")
__success __log_level(2)
-__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
__retval(0)
__naked void add32_partial_overflow(void)
{
@@ -1456,7 +1456,7 @@ __naked void add32_partial_overflow(void)
SEC("socket")
__description("64-bit subtraction, all outcomes underflow")
__success __log_level(2)
-__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)")
+__msg("6: (1f) r3 -= r1 {{.*}} R3=scalar(umin=1,umax=0x8000000000000000)")
__retval(0)
__naked void sub64_full_overflow(void)
{
@@ -1477,7 +1477,7 @@ __naked void sub64_full_overflow(void)
SEC("socket")
__description("64-bit subtraction, partial overflow, result in unbounded reg")
__success __log_level(2)
-__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()")
+__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()")
__retval(0)
__naked void sub64_partial_overflow(void)
{
@@ -1496,7 +1496,7 @@ __naked void sub64_partial_overflow(void)
SEC("socket")
__description("32-bit subtraction overflow, all outcomes underflow")
__success __log_level(2)
-__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
+__msg("5: (1c) w3 -= w1 {{.*}} R3=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
__retval(0)
__naked void sub32_full_overflow(void)
{
@@ -1517,7 +1517,7 @@ __naked void sub32_full_overflow(void)
SEC("socket")
__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds")
__success __log_level(2)
-__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__msg("3: (1c) w3 -= w2 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
__retval(0)
__naked void sub32_partial_overflow(void)
{
@@ -1617,7 +1617,7 @@ l0_%=: r0 = 0; \
SEC("socket")
__description("bounds deduction cross sign boundary, positive overlap")
__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
-__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
__retval(0)
__naked void bounds_deduct_positive_overlap(void)
{
@@ -1650,7 +1650,7 @@ l0_%=: r0 = 0; \
SEC("socket")
__description("bounds deduction cross sign boundary, two overlaps")
__failure __flag(BPF_F_TEST_REG_INVARIANTS)
-__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
__msg("frame pointer is read only")
__naked void bounds_deduct_two_overlaps(void)
{
@@ -1668,4 +1668,45 @@ l0_%=: r0 = 0; \
: __clobber_all);
}
+SEC("socket")
+__description("dead jne branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jne_disagreeing_tnums(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w0 = w0; \
+ r0 >>= 30; \
+ r0 <<= 30; \
+ r1 = r0; \
+ r1 += 1024; \
+ if r1 != r0 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead jeq branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jeq_disagreeing_tnums(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w0 = w0; \
+ r0 >>= 30; \
+ r0 <<= 30; \
+ r1 = r0; \
+ r1 += 1024; \
+ if r1 == r0 goto +1; \
+ exit; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
index c258b0722e04..fb4fa465d67c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -660,19 +660,24 @@ __naked void may_goto_interaction_x86_64(void)
SEC("raw_tp")
__arch_arm64
-__log_level(4) __msg("stack depth 16")
-/* may_goto counter at -16 */
-__xlated("0: *(u64 *)(r10 -16) =")
-__xlated("1: r1 = 1")
-__xlated("2: call bpf_get_smp_processor_id")
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
+__xlated("3: call bpf_get_smp_processor_id")
/* may_goto expansion starts */
-__xlated("3: r11 = *(u64 *)(r10 -16)")
-__xlated("4: if r11 == 0x0 goto pc+3")
-__xlated("5: r11 -= 1")
-__xlated("6: *(u64 *)(r10 -16) = r11")
+__xlated("4: r11 = *(u64 *)(r10 -24)")
+__xlated("5: if r11 == 0x0 goto pc+6")
+__xlated("6: r11 -= 1")
+__xlated("7: if r11 != 0x0 goto pc+2")
+__xlated("8: r11 = -24")
+__xlated("9: call unknown")
+__xlated("10: *(u64 *)(r10 -24) = r11")
/* may_goto expansion ends */
-__xlated("7: *(u64 *)(r10 -8) = r1")
-__xlated("8: exit")
+__xlated("11: *(u64 *)(r10 -8) = r1")
+__xlated("12: exit")
__success
__naked void may_goto_interaction_arm64(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
index 424463094760..5ebf7d9bcc55 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -5,8 +5,6 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-
SEC("tc")
__description("context stores via BPF_ATOMIC")
__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
@@ -264,4 +262,34 @@ narrow_load("sockops", bpf_sock_ops, skb_hwtstamp);
unaligned_access("flow_dissector", __sk_buff, data);
unaligned_access("netfilter", bpf_nf_ctx, skb);
+#define padding_access(type, ctx, prev_field, sz) \
+ SEC(type) \
+ __description("access on " #ctx " padding after " #prev_field) \
+ __naked void padding_ctx_access_##ctx(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u%[size] *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(size, sz * 8), \
+ __imm_const(off, offsetofend(struct ctx, prev_field)) \
+ : __clobber_all); \
+ }
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/bind4", bpf_sock_addr, msg_src_ip6[3], 4);
+
+__success
+padding_access("sk_lookup", bpf_sk_lookup, remote_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("tc", __sk_buff, tstamp_type, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/post_bind4", bpf_sock, dst_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("sk_reuseport", sk_reuseport_md, hash, 4);
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 181da86ba5f0..6630a92b1b47 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -215,7 +215,7 @@ __weak int subprog_untrusted(const volatile struct task_struct *restrict task __
SEC("tp_btf/sys_enter")
__success
__log_level(2)
-__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()")
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
__msg("Func#1 ('subprog_untrusted') is global and assumed valid.")
__msg("Validating subprog_untrusted() func#1...")
__msg(": R1=untrusted_ptr_task_struct")
@@ -278,7 +278,7 @@ __weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted)
SEC("tp_btf/sys_enter")
__success
__log_level(2)
-__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()")
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.")
__msg("Validating subprog_void_untrusted() func#1...")
__msg(": R1=rdonly_untrusted_mem(sz=0)")
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 52edee41caf6..c8494b682c31 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_arena_common.h"
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
@@ -10,6 +11,12 @@
defined(__TARGET_ARCH_loongarch)) && \
__clang_major__ >= 18
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1);
+} arena SEC(".maps");
+
SEC("socket")
__description("LDSX, S8")
__success __success_unpriv __retval(-2)
@@ -65,7 +72,7 @@ __naked void ldsx_s32(void)
SEC("socket")
__description("LDSX, S8 range checking, privileged")
__log_level(2) __success __retval(1)
-__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)")
+__msg("R1=scalar(smin=smin32=-128,smax=smax32=127)")
__naked void ldsx_s8_range_priv(void)
{
asm volatile (
@@ -256,6 +263,175 @@ __naked void ldsx_ctx_8(void)
: __clobber_all);
}
+SEC("syscall")
+__description("Arena LDSX Disasm")
+__success
+__arch_x86_64
+__jited("movslq 0x10(%rax,%r12), %r14")
+__jited("movswq 0x18(%rax,%r12), %r14")
+__jited("movsbq 0x20(%rax,%r12), %r14")
+__jited("movslq 0x10(%rdi,%r12), %r15")
+__jited("movswq 0x18(%rdi,%r12), %r15")
+__jited("movsbq 0x20(%rdi,%r12), %r15")
+__arch_arm64
+__jited("add x11, x7, x28")
+__jited("ldrsw x21, [x11, #0x10]")
+__jited("add x11, x7, x28")
+__jited("ldrsh x21, [x11, #0x18]")
+__jited("add x11, x7, x28")
+__jited("ldrsb x21, [x11, #0x20]")
+__jited("add x11, x0, x28")
+__jited("ldrsw x22, [x11, #0x10]")
+__jited("add x11, x0, x28")
+__jited("ldrsh x22, [x11, #0x18]")
+__jited("add x11, x0, x28")
+__jited("ldrsb x22, [x11, #0x20]")
+__naked void arena_ldsx_disasm(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = r0;"
+ "r8 = *(s32 *)(r0 + 16);"
+ "r8 = *(s16 *)(r0 + 24);"
+ "r8 = *(s8 *)(r0 + 32);"
+ "r9 = *(s32 *)(r1 + 16);"
+ "r9 = *(s16 *)(r1 + 24);"
+ "r9 = *(s8 *)(r1 + 32);"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX Exception")
+__success __retval(0)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_exception(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r0 = 0xdeadbeef;"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fe;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r0 = *(s8 *)(r0 + 0);"
+ "exit;"
+ :
+ : __imm_addr(arena)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S8")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s8(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s8 *)(r0 + 0);"
+#else
+ "r0 = *(s8 *)(r0 + 7);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S16")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s16(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fffe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s16 *)(r0 + 0);"
+#else
+ "r0 = *(s16 *)(r0 + 6);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S32")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s32(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0xfffffffe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s32 *)(r0 + 0);"
+#else
+ "r0 = *(s32 *)(r0 + 4);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+ bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
new file mode 100644
index 000000000000..c0e808509268
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, long long);
+} map SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) frame 0 insn 1 +live -24")
+__msg("(0) frame 0 insn 1 +written -8")
+__msg("(0) frame 0 insn 0 +live -8,-24")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void simple_read_simple_write(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = *(u64 *)(r10 - 24);"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 1 +live -8")
+__not_msg("(0) frame 0 insn 1 +written")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 1 +live -16")
+__msg("(0) frame 0 insn 1 +written -32")
+__msg("(0) live stack update done in 2 iterations")
+__naked void read_write_join(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > 42 goto 1f;"
+ "r0 = *(u64 *)(r10 - 8);"
+ "*(u64 *)(r10 - 32) = r0;"
+ "*(u64 *)(r10 - 40) = r0;"
+ "exit;"
+"1:"
+ "r0 = *(u64 *)(r10 - 16);"
+ "*(u64 *)(r10 - 32) = r0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: (25) if r0 > 0x2a goto pc+1")
+__msg("7: (95) exit")
+__msg("(0) frame 0 insn 2 +written -16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("7: (95) exit")
+__not_msg("(0) frame 0 insn 2")
+__msg("(0) live stack update done in 1 iterations")
+__naked void must_write_not_same_slot(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r1 = -8;"
+ "if r0 > 42 goto 1f;"
+ "r1 = -16;"
+"1:"
+ "r2 = r10;"
+ "r2 += r1;"
+ "*(u64 *)(r2 + 0) = r0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 0 +written -8,-16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void must_write_not_same_type(void)
+{
+ asm volatile (
+ "*(u64*)(r10 - 8) = 0;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 != 0 goto 1f;"
+ "r0 = r10;"
+ "r0 += -16;"
+"1:"
+ "*(u64 *)(r0 + 0) = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(2,4) frame 0 insn 4 +written -8")
+__msg("(2,4) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_write(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call write_first_param;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void write_first_param(void)
+{
+ asm volatile (
+ "*(u64 *)(r1 + 0) = 7;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+/* caller_stack_read() function */
+__msg("2: .12345.... (85) call pc+4")
+__msg("5: .12345.... (85) call pc+1")
+__msg("6: 0......... (95) exit")
+/* read_first_param() function */
+__msg("7: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("8: 0......... (95) exit")
+/* update for callsite at (2) */
+__msg("(2,7) frame 0 insn 7 +live -8")
+__msg("(2,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +live -8")
+__msg("(0) live stack update done in 2 iterations")
+/* update for callsite at (5) */
+__msg("(5,7) frame 0 insn 7 +live -16")
+__msg("(5,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 5 +live -16")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_read(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call read_first_param;"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call read_first_param;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void read_first_param(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+/* read_first_param2() function */
+__msg(" 9: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("10: .......... (b7) r0 = 0")
+__msg("11: 0......... (05) goto pc+0")
+__msg("12: 0......... (95) exit")
+/*
+ * The purpose of the test is to check that checkpoint in
+ * read_first_param2() stops path traversal. This will only happen if
+ * verifier understands that fp[0]-8 at insn (12) is not alive.
+ */
+__msg("12: safe")
+__msg("processed 20 insns")
+__naked void caller_stack_pruning(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto 1f;"
+ "r0 = %[map] ll;"
+"1:"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ /*
+ * fp[0]-8 is either pointer to map or a scalar,
+ * preventing state pruning at checkpoint created for call.
+ */
+ "call read_first_param2;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void read_first_param2(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ /*
+ * Checkpoint at goto +0 should fire,
+ * as caller stack fp[0]-8 is not alive at this point.
+ */
+ "goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg("R1 type=scalar expected=map_ptr")
+__naked void caller_stack_pruning_callback(void)
+{
+ asm volatile (
+ "r0 = %[map] ll;"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = 2;"
+ "r2 = loop_cb ll;"
+ "r3 = r10;"
+ "r3 += -8;"
+ "r4 = 0;"
+ /*
+ * fp[0]-8 is either pointer to map or a scalar,
+ * preventing state pruning at checkpoint created for call.
+ */
+ "call %[bpf_loop];"
+ "r0 = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_loop),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void loop_cb(void)
+{
+ asm volatile (
+ /*
+ * Checkpoint at function entry should not fire, as caller
+ * stack fp[0]-8 is alive at this point.
+ */
+ "r6 = r2;"
+ "r1 = *(u64 *)(r6 + 0);"
+ "*(u64*)(r10 - 8) = 7;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call %[bpf_map_lookup_elem];"
+ /*
+ * This should stop verifier on a second loop iteration,
+ * but only if verifier correctly maintains that fp[0]-8
+ * is still alive.
+ */
+ "*(u64 *)(r6 + 0) = 0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Because of a bug in verifier.c:compute_postorder()
+ * the program below overflowed traversal queue in that function.
+ */
+SEC("socket")
+__naked void syzbot_postorder_bug1(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 != 0 goto -1;"
+ "exit;"
+ ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index e07b43b78fd2..fbdde80e7b90 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -283,4 +283,25 @@ exit_%=: \
: __clobber_all);
}
+/*
+ * This test case triggered a bug in verifier.c:maybe_exit_scc().
+ * Speculative execution path reaches stack access instruction,
+ * stops and triggers maybe_exit_scc() w/o accompanying maybe_enter_scc() call.
+ */
+SEC("socket")
+__arch_x86_64
+__caps_unpriv(CAP_BPF)
+__naked void maybe_exit_scc_bug1(void)
+{
+ asm volatile (
+ "r0 = 100;"
+"1:"
+ /* Speculative execution path reaches and stops here. */
+ "*(u64 *)(r10 - 512) = r0;"
+ /* Condition is always false, but verifier speculatively executes the true branch. */
+ "if r0 <= 0x0 goto 1b;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
index 11a079145966..e2767d27d8aa 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
@@ -70,10 +70,13 @@ __naked void bpf_map_ptr_write_rejected(void)
: __clobber_all);
}
+/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing
+ * into this array is valid. The opts field is now at offset 33.
+ */
SEC("socket")
__description("bpf_map_ptr: read non-existent field rejected")
__failure
-__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4")
+__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4")
__failure_unpriv
__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
__flag(BPF_F_ANY_ALIGNMENT)
@@ -82,7 +85,7 @@ __naked void read_non_existent_field_rejected(void)
asm volatile (" \
r6 = 0; \
r1 = %[map_array_48b] ll; \
- r6 = *(u32*)(r1 + 1); \
+ r6 = *(u32*)(r1 + 33); \
r0 = 1; \
exit; \
" :
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
index 3966d827f288..6d1edaef9213 100644
--- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
@@ -9,6 +9,8 @@
SEC("raw_tp")
__description("may_goto 0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -27,6 +29,8 @@ __naked void may_goto_simple(void)
SEC("raw_tp")
__description("batch 2 of may_goto 0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -47,6 +51,8 @@ __naked void may_goto_batch_0(void)
SEC("raw_tp")
__description("may_goto batch with offsets 2/1/0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -69,8 +75,10 @@ __naked void may_goto_batch_1(void)
}
SEC("raw_tp")
-__description("may_goto batch with offsets 2/0 - x86_64")
+__description("may_goto batch with offsets 2/0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: *(u64 *)(r10 -16) = 65535")
__xlated("1: *(u64 *)(r10 -8) = 0")
__xlated("2: r11 = *(u64 *)(r10 -16)")
@@ -84,33 +92,7 @@ __xlated("9: r0 = 1")
__xlated("10: r0 = 2")
__xlated("11: exit")
__success
-__naked void may_goto_batch_2_x86_64(void)
-{
- asm volatile (
- ".8byte %[may_goto1];"
- ".8byte %[may_goto3];"
- "r0 = 1;"
- "r0 = 2;"
- "exit;"
- :
- : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
- __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
- : __clobber_all);
-}
-
-SEC("raw_tp")
-__description("may_goto batch with offsets 2/0 - arm64")
-__arch_arm64
-__xlated("0: *(u64 *)(r10 -8) = 8388608")
-__xlated("1: r11 = *(u64 *)(r10 -8)")
-__xlated("2: if r11 == 0x0 goto pc+3")
-__xlated("3: r11 -= 1")
-__xlated("4: *(u64 *)(r10 -8) = r11")
-__xlated("5: r0 = 1")
-__xlated("6: r0 = 2")
-__xlated("7: exit")
-__success
-__naked void may_goto_batch_2_arm64(void)
+__naked void may_goto_batch_2(void)
{
asm volatile (
".8byte %[may_goto1];"
diff --git a/tools/testing/selftests/bpf/progs/verifier_mul.c b/tools/testing/selftests/bpf/progs/verifier_mul.c
new file mode 100644
index 000000000000..7145fe3351d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_mul.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Nandakumar Edamana */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+/* Intended to test the abstract multiplication technique(s) used by
+ * the verifier. Using assembly to avoid compiler optimizations.
+ */
+SEC("fentry/bpf_fentry_test1")
+void BPF_PROG(mul_precise, int x)
+{
+ /* First, force the verifier to be uncertain about the value:
+ * unsigned int a = (bpf_get_prandom_u32() & 0x2) | 0x1;
+ *
+ * Assuming the verifier is using tnum, a must be tnum{.v=0x1, .m=0x2}.
+ * Then a * 0x3 would be m0m1 (m for uncertain). Added imprecision
+ * would cause the following to fail, because the required return value
+ * is 0:
+ * return (a * 0x3) & 0x4);
+ */
+ asm volatile ("\
+ call %[bpf_get_prandom_u32];\
+ r0 &= 0x2;\
+ r0 |= 0x1;\
+ r0 *= 0x3;\
+ r0 &= 0x4;\
+ if r0 != 0 goto l0_%=;\
+ r0 = 0;\
+ goto l1_%=;\
+l0_%=:\
+ r0 = 1;\
+l1_%=:\
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 73fee2aec698..1fe090cd6744 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -144,21 +144,21 @@ SEC("?raw_tp")
__success __log_level(2)
/*
* Without the bug fix there will be no history between "last_idx 3 first_idx 3"
- * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * and "parent state regs=" lines. "R0=6" parts are here to help anchor
* expected log messages to the one specific mark_chain_precision operation.
*
* This is quite fragile: if verifier checkpointing heuristic changes, this
* might need adjusting.
*/
-__msg("2: (07) r0 += 1 ; R0_w=6")
+__msg("2: (07) r0 += 1 ; R0=6")
__msg("3: (35) if r0 >= 0xa goto pc+1")
__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
-__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4")
-__msg("3: R0_w=6")
+__msg("mark_precise: frame0: parent state regs= stack=: R0=P4")
+__msg("3: R0=6")
__naked int state_loop_first_last_equal(void)
{
asm volatile (
@@ -233,8 +233,8 @@ __naked void bpf_cond_op_not_r10(void)
SEC("lsm.s/socket_connect")
__success __log_level(2)
-__msg("0: (b7) r0 = 1 ; R0_w=1")
-__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff")
+__msg("0: (b7) r0 = 1 ; R0=1")
+__msg("1: (84) w0 = -w0 ; R0=0xffffffff")
__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0")
__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
@@ -268,8 +268,8 @@ __naked int bpf_neg_3(void)
SEC("lsm.s/socket_connect")
__success __log_level(2)
-__msg("0: (b7) r0 = 1 ; R0_w=1")
-__msg("1: (87) r0 = -r0 ; R0_w=-1")
+__msg("0: (b7) r0 = 1 ; R0=1")
+__msg("1: (87) r0 = -r0 ; R0=-1")
__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0")
__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
index 7c5e5e6d10eb..c0ce690ddb68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -349,11 +349,11 @@ __naked void precision_two_ids(void)
SEC("socket")
__success __log_level(2)
__flag(BPF_F_TEST_STATE_FREQ)
-/* check thar r0 and r6 have different IDs after 'if',
+/* check that r0 and r6 have different IDs after 'if',
* collect_linked_regs() can't tie more than 6 registers for a single insn.
*/
__msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1")
-__msg("9: (bf) r6 = r6 ; R6_w=scalar(id=2")
+__msg("9: (bf) r6 = r6 ; R6=scalar(id=2")
/* check that r{0-5} are marked precise after 'if' */
__msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0")
__msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:")
@@ -779,12 +779,12 @@ __success
__retval(0)
/* Check that verifier believes r1/r0 are zero at exit */
__log_level(2)
-__msg("4: (77) r1 >>= 32 ; R1_w=0")
-__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0")
+__msg("4: (77) r1 >>= 32 ; R1=0")
+__msg("5: (bf) r0 = r1 ; R0=0 R1=0")
__msg("6: (95) exit")
__msg("from 3 to 4")
-__msg("4: (77) r1 >>= 32 ; R1_w=0")
-__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0")
+__msg("4: (77) r1 >>= 32 ; R1=0")
+__msg("5: (bf) r0 = r1 ; R0=0 R1=0")
__msg("6: (95) exit")
/* Verify that statements to randomize upper half of r1 had not been
* generated.
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
index 0d5e56dffabb..2b4610b53382 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -1,14 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Converted from tools/testing/selftests/bpf/verifier/sock.c */
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
-
struct {
__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
__uint(max_entries, 1);
@@ -1073,6 +1069,48 @@ int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk)
}
__noinline
+long xdp_pull_data2(struct xdp_md *x, __u32 len)
+{
+ return bpf_xdp_pull_data(x, len);
+}
+
+__noinline
+long xdp_pull_data1(struct xdp_md *x, __u32 len)
+{
+ return xdp_pull_data2(x, len);
+}
+
+/* global function calls bpf_xdp_pull_data(), which invalidates packet
+ * pointers established before global function call.
+ */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers_from_global_func(struct xdp_md *x)
+{
+ int *p = (void *)(long)x->data;
+
+ if ((void *)(p + 1) > (void *)(long)x->data_end)
+ return XDP_DROP;
+ xdp_pull_data1(x, 0);
+ *p = 42; /* this is unsafe */
+ return XDP_PASS;
+}
+
+/* XDP packet changing kfunc calls invalidate packet pointers */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers(struct xdp_md *x)
+{
+ int *p = (void *)(long)x->data;
+
+ if ((void *)(p + 1) > (void *)(long)x->data_end)
+ return XDP_DROP;
+ bpf_xdp_pull_data(x, 0);
+ *p = 42; /* this is unsafe */
+ return XDP_PASS;
+}
+
+__noinline
int tail_call(struct __sk_buff *sk)
{
bpf_tail_call_static(sk, &jmp_table, 0);
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 1e5a511e8494..7a13dbd794b2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -506,17 +506,17 @@ SEC("raw_tp")
__log_level(2)
__success
/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
-__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0")
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8=0")
/* but fp-16 is spilled IMPRECISE zero const reg */
-__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
+__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0=0 R10=fp0 fp-16=0")
/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register
* precise immediately; if necessary, it will be marked precise later
*/
-__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0")
+__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0")
/* similarly, when R2 is assigned from spilled register, it is initially
* imprecise, but will be marked precise later once it is used in precise context
*/
-__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0")
+__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2=0 R10=fp0 fp-16=0")
__msg("11: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
@@ -598,7 +598,7 @@ __log_level(2)
__success
/* fp-4 is STACK_ZERO */
__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????")
-__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????")
+__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0000????")
__msg("5: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)")
@@ -640,25 +640,25 @@ SEC("raw_tp")
__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
__success
/* make sure fp-8 is IMPRECISE fake register spill */
-__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1")
+__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8=1")
/* and fp-16 is spilled IMPRECISE const reg */
-__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1")
+__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=1")
/* validate load from fp-8, which was initialized using BPF_ST_MEM */
-__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1")
+__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2=1 R10=fp0 fp-8=1")
__msg("9: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
-__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
/* validate load from fp-16, which was initialized using BPF_STX_MEM */
-__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1")
+__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2=1 R10=fp0 fp-16=1")
__msg("13: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)")
@@ -668,12 +668,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
/* now both fp-8 and fp-16 are precise, very good */
-__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
__naked void stack_load_preserves_const_precision(void)
{
asm volatile (
@@ -719,22 +719,22 @@ __success
/* make sure fp-8 is 32-bit FAKE subregister spill */
__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1")
/* but fp-16 is spilled IMPRECISE zero const reg */
-__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1")
+__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=????1")
/* validate load from fp-8, which was initialized using BPF_ST_MEM */
-__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1")
+__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2=1 R10=fp0 fp-8=????1")
__msg("9: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
-__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
/* validate load from fp-16, which was initialized using BPF_STX_MEM */
-__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1")
+__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2=1 R10=fp0 fp-16=????1")
__msg("13: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)")
@@ -743,12 +743,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2
__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
__naked void stack_load_preserves_const_precision_subreg(void)
{
asm volatile (
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 9d415f7ce599..ac3e418c2a96 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -105,7 +105,7 @@ __msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
__msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit")
__msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10")
-__msg("7: R0_w=scalar")
+__msg("7: R0=scalar")
__naked int fp_precise_subprog_result(void)
{
asm volatile (
@@ -141,7 +141,7 @@ __msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1")
* anyways, at which point we'll break precision chain
*/
__msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10")
-__msg("7: R0_w=scalar")
+__msg("7: R0=scalar")
__naked int sneaky_fp_precise_subprog_result(void)
{
asm volatile (
@@ -681,7 +681,7 @@ __msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=2 R6=1 R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1")
__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
index 1d36d01b746e..f345466bca68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_var_off.c
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -114,8 +114,8 @@ __naked void stack_write_priv_vs_unpriv(void)
}
/* Similar to the previous test, but this time also perform a read from the
- * address written to with a variable offset. The read is allowed, showing that,
- * after a variable-offset write, a priviledged program can read the slots that
+ * address written to with a variable offet. The read is allowed, showing that,
+ * after a variable-offset write, a privileged program can read the slots that
* were in the range of that write (even if the verifier doesn't actually know if
* the slot being read was really written to or not.
*
@@ -157,7 +157,7 @@ __naked void stack_write_followed_by_read(void)
SEC("socket")
__description("variable-offset stack write clobbers spilled regs")
__failure
-/* In the priviledged case, dereferencing a spilled-and-then-filled
+/* In the privileged case, dereferencing a spilled-and-then-filled
* register is rejected because the previous variable offset stack
* write might have overwritten the spilled pointer (i.e. we lose track
* of the spilled register when we analyze the write).
diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile
index d4e50c4509c9..63c4d3f6a12f 100644
--- a/tools/testing/selftests/bpf/test_kmods/Makefile
+++ b/tools/testing/selftests/bpf/test_kmods/Makefile
@@ -8,7 +8,7 @@ Q = @
endif
MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
- bpf_test_modorder_y.ko
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
$(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o)))
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
new file mode 100644
index 000000000000..769206fc70e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/prandom.h>
+#include <asm/rqspinlock.h>
+#include <linux/perf_event.h>
+#include <linux/kthread.h>
+#include <linux/atomic.h>
+#include <linux/slab.h>
+
+static struct perf_event_attr hw_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 1,
+ .sample_period = 100000,
+};
+
+static rqspinlock_t lock_a;
+static rqspinlock_t lock_b;
+
+static struct perf_event **rqsl_evts;
+static int rqsl_nevts;
+
+static bool test_ab = false;
+module_param(test_ab, bool, 0644);
+MODULE_PARM_DESC(test_ab, "Test ABBA situations instead of AA situations");
+
+static struct task_struct **rqsl_threads;
+static int rqsl_nthreads;
+static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0);
+
+static int pause = 0;
+
+static bool nmi_locks_a(int cpu)
+{
+ return (cpu & 1) && test_ab;
+}
+
+static int rqspinlock_worker_fn(void *arg)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ int ret;
+
+ if (cpu) {
+ atomic_inc(&rqsl_ready_cpus);
+
+ while (!kthread_should_stop()) {
+ if (READ_ONCE(pause)) {
+ msleep(1000);
+ continue;
+ }
+ if (nmi_locks_a(cpu))
+ ret = raw_res_spin_lock_irqsave(&lock_b, flags);
+ else
+ ret = raw_res_spin_lock_irqsave(&lock_a, flags);
+ mdelay(20);
+ if (nmi_locks_a(cpu) && !ret)
+ raw_res_spin_unlock_irqrestore(&lock_b, flags);
+ else if (!ret)
+ raw_res_spin_unlock_irqrestore(&lock_a, flags);
+ cpu_relax();
+ }
+ return 0;
+ }
+
+ while (!kthread_should_stop()) {
+ int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0;
+ int ready = atomic_read(&rqsl_ready_cpus);
+
+ if (ready == expected && !READ_ONCE(pause)) {
+ for (int i = 0; i < rqsl_nevts; i++)
+ perf_event_enable(rqsl_evts[i]);
+ pr_err("Waiting 5 secs to pause the test\n");
+ msleep(1000 * 5);
+ WRITE_ONCE(pause, 1);
+ pr_err("Paused the test\n");
+ } else {
+ msleep(1000);
+ cpu_relax();
+ }
+ }
+ return 0;
+}
+
+static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ int ret;
+
+ if (!cpu || READ_ONCE(pause))
+ return;
+
+ if (nmi_locks_a(cpu))
+ ret = raw_res_spin_lock_irqsave(&lock_a, flags);
+ else
+ ret = raw_res_spin_lock_irqsave(test_ab ? &lock_b : &lock_a, flags);
+
+ mdelay(10);
+
+ if (nmi_locks_a(cpu) && !ret)
+ raw_res_spin_unlock_irqrestore(&lock_a, flags);
+ else if (!ret)
+ raw_res_spin_unlock_irqrestore(test_ab ? &lock_b : &lock_a, flags);
+}
+
+static void free_rqsl_threads(void)
+{
+ int i;
+
+ if (rqsl_threads) {
+ for_each_online_cpu(i) {
+ if (rqsl_threads[i])
+ kthread_stop(rqsl_threads[i]);
+ }
+ kfree(rqsl_threads);
+ }
+}
+
+static void free_rqsl_evts(void)
+{
+ int i;
+
+ if (rqsl_evts) {
+ for (i = 0; i < rqsl_nevts; i++) {
+ if (rqsl_evts[i])
+ perf_event_release_kernel(rqsl_evts[i]);
+ }
+ kfree(rqsl_evts);
+ }
+}
+
+static int bpf_test_rqspinlock_init(void)
+{
+ int i, ret;
+ int ncpus = num_online_cpus();
+
+ pr_err("Mode = %s\n", test_ab ? "ABBA" : "AA");
+
+ if (ncpus < 3)
+ return -ENOTSUPP;
+
+ raw_res_spin_lock_init(&lock_a);
+ raw_res_spin_lock_init(&lock_b);
+
+ rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL);
+ if (!rqsl_evts)
+ return -ENOMEM;
+ rqsl_nevts = ncpus - 1;
+
+ for (i = 1; i < ncpus; i++) {
+ struct perf_event *e;
+
+ e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL);
+ if (IS_ERR(e)) {
+ ret = PTR_ERR(e);
+ goto err_perf_events;
+ }
+ rqsl_evts[i - 1] = e;
+ }
+
+ rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL);
+ if (!rqsl_threads) {
+ ret = -ENOMEM;
+ goto err_perf_events;
+ }
+ rqsl_nthreads = ncpus;
+
+ for_each_online_cpu(i) {
+ struct task_struct *t;
+
+ t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ goto err_threads_create;
+ }
+ kthread_bind(t, i);
+ rqsl_threads[i] = t;
+ wake_up_process(t);
+ }
+ return 0;
+
+err_threads_create:
+ free_rqsl_threads();
+err_perf_events:
+ free_rqsl_evts();
+ return ret;
+}
+
+module_init(bpf_test_rqspinlock_init);
+
+static void bpf_test_rqspinlock_exit(void)
+{
+ free_rqsl_threads();
+ free_rqsl_evts();
+}
+
+module_exit(bpf_test_rqspinlock_exit);
+
+MODULE_AUTHOR("Kumar Kartikeya Dwivedi");
+MODULE_DESCRIPTION("BPF rqspinlock stress test module");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index 511911053bdc..8074bc5f6f20 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -62,6 +62,18 @@ struct bpf_testmod_struct_arg_5 {
long d;
};
+union bpf_testmod_union_arg_1 {
+ char a;
+ short b;
+ struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+ int a;
+ long b;
+ struct bpf_testmod_struct_arg_2 arg;
+};
+
__bpf_hook_start();
noinline int
@@ -129,6 +141,20 @@ bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f,
}
noinline int
+bpf_testmod_test_union_arg_1(union bpf_testmod_union_arg_1 a, int b, int c)
+{
+ bpf_testmod_test_struct_arg_result = a.arg.a + b + c;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_union_arg_2(int a, union bpf_testmod_union_arg_2 b)
+{
+ bpf_testmod_test_struct_arg_result = a + b.arg.a + b.arg.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
bpf_testmod_test_struct_arg_result = a->a;
return bpf_testmod_test_struct_arg_result;
@@ -218,6 +244,16 @@ __bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr)
{
}
+__bpf_kfunc struct task_struct *bpf_kfunc_ret_rcu_test(void)
+{
+ return NULL;
+}
+
+__bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size)
+{
+ return NULL;
+}
+
__bpf_kfunc struct bpf_testmod_ctx *
bpf_testmod_ctx_create(int *err)
{
@@ -398,6 +434,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
struct bpf_testmod_struct_arg_3 *struct_arg3;
struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26};
+ union bpf_testmod_union_arg_1 union_arg1 = { .arg = {1} };
+ union bpf_testmod_union_arg_2 union_arg2 = { .arg = {2, 3} };
int i = 1;
while (bpf_testmod_return_ptr(i))
@@ -415,6 +453,9 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
(void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20,
21, 22, struct_arg5, 27);
+ (void)bpf_testmod_test_union_arg_1(union_arg1, 4, 5);
+ (void)bpf_testmod_test_union_arg_2(6, union_arg2);
+
(void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
(void)trace_bpf_testmod_test_raw_tp_null_tp(NULL);
@@ -630,6 +671,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1)
@@ -1064,6 +1107,8 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args)
return args->a;
}
+__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id);
+
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -1104,6 +1149,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABL
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@@ -1535,6 +1581,114 @@ static struct bpf_struct_ops testmod_st_ops = {
.owner = THIS_MODULE,
};
+struct hlist_head multi_st_ops_list;
+static DEFINE_SPINLOCK(multi_st_ops_lock);
+
+static int multi_st_ops_init(struct btf *btf)
+{
+ spin_lock_init(&multi_st_ops_lock);
+ INIT_HLIST_HEAD(&multi_st_ops_list);
+
+ return 0;
+}
+
+static int multi_st_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+
+ hlist_for_each_entry(st_ops, &multi_st_ops_list, node) {
+ if (st_ops->id == id)
+ return st_ops;
+ }
+
+ return NULL;
+}
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ unsigned long flags;
+ int ret = -1;
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ st_ops = multi_st_ops_find_nolock(id);
+ if (st_ops)
+ ret = st_ops->test_1(args);
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+ return ret;
+}
+
+static int multi_st_ops_reg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_multi_st_ops *st_ops =
+ (struct bpf_testmod_multi_st_ops *)kdata;
+ unsigned long flags;
+ int err = 0;
+ u32 id;
+
+ if (!st_ops->test_1)
+ return -EINVAL;
+
+ id = bpf_struct_ops_id(kdata);
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ if (multi_st_ops_find_nolock(id)) {
+ pr_err("multi_st_ops(id:%d) has already been registered\n", id);
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ st_ops->id = id;
+ hlist_add_head(&st_ops->node, &multi_st_ops_list);
+unlock:
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+ return err;
+}
+
+static void multi_st_ops_unreg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ unsigned long flags;
+ u32 id;
+
+ id = bpf_struct_ops_id(kdata);
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ st_ops = multi_st_ops_find_nolock(id);
+ if (st_ops)
+ hlist_del(&st_ops->node);
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+}
+
+static int bpf_testmod_multi_st_ops__test_1(struct st_ops_args *args)
+{
+ return 0;
+}
+
+static struct bpf_testmod_multi_st_ops multi_st_ops_cfi_stubs = {
+ .test_1 = bpf_testmod_multi_st_ops__test_1,
+};
+
+struct bpf_struct_ops testmod_multi_st_ops = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = multi_st_ops_init,
+ .init_member = multi_st_ops_init_member,
+ .reg = multi_st_ops_reg,
+ .unreg = multi_st_ops_unreg,
+ .cfi_stubs = &multi_st_ops_cfi_stubs,
+ .name = "bpf_testmod_multi_st_ops",
+ .owner = THIS_MODULE,
+};
+
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
@@ -1557,6 +1711,7 @@ static int bpf_testmod_init(void)
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3);
ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops);
+ ret = ret ?: register_bpf_struct_ops(&testmod_multi_st_ops, bpf_testmod_multi_st_ops);
ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors,
ARRAY_SIZE(bpf_testmod_dtors),
THIS_MODULE);
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
index c9fab51f16e2..f6e492f9d042 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
@@ -116,4 +116,10 @@ struct bpf_testmod_st_ops {
struct module *owner;
};
+struct bpf_testmod_multi_st_ops {
+ int (*test_1)(struct st_ops_args *args);
+ struct hlist_node node;
+ int id;
+};
+
#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
index b58817938deb..4df6fa6a92cb 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
@@ -158,5 +158,9 @@ void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym;
void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym;
void bpf_kfunc_trusted_num_test(int *ptr) __ksym;
void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym;
+struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym;
+int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym;
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym;
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index 4694422aa76c..88e4aeab21b7 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -74,7 +74,7 @@ int main(int argc, char **argv)
/* Let's try detach it before it was ever attached */
ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
- if (ret != -1 || errno != ENOENT) {
+ if (ret != -ENOENT) {
printf("bpf_prog_detach2 not attached should fail: %m\n");
return 1;
}
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 78423cf89e01..74ecc281bb8c 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <linux/capability.h>
#include <stdlib.h>
-#include <regex.h>
#include <test_progs.h>
#include <bpf/btf.h>
@@ -20,10 +19,12 @@
#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX "comment:test_expect_not_msg="
#define TEST_TAG_EXPECT_XLATED_PFX "comment:test_expect_xlated="
#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV "comment:test_expect_not_msg_unpriv="
#define TEST_TAG_EXPECT_XLATED_PFX_UNPRIV "comment:test_expect_xlated_unpriv="
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
@@ -38,6 +39,10 @@
#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
+#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr="
+#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
+#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
+#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xbadcafe
@@ -61,24 +66,14 @@ enum load_mode {
NO_JITED = 1 << 1,
};
-struct expect_msg {
- const char *substr; /* substring match */
- regex_t regex;
- bool is_regex;
- bool on_next_line;
-};
-
-struct expected_msgs {
- struct expect_msg *patterns;
- size_t cnt;
-};
-
struct test_subspec {
char *name;
bool expect_failure;
struct expected_msgs expect_msgs;
struct expected_msgs expect_xlated;
struct expected_msgs jited;
+ struct expected_msgs stderr;
+ struct expected_msgs stdout;
int retval;
bool execute;
__u64 caps;
@@ -139,6 +134,10 @@ static void free_test_spec(struct test_spec *spec)
free_msgs(&spec->unpriv.expect_xlated);
free_msgs(&spec->priv.jited);
free_msgs(&spec->unpriv.jited);
+ free_msgs(&spec->unpriv.stderr);
+ free_msgs(&spec->priv.stderr);
+ free_msgs(&spec->unpriv.stdout);
+ free_msgs(&spec->priv.stdout);
free(spec->priv.name);
free(spec->unpriv.name);
@@ -206,7 +205,8 @@ static int compile_regex(const char *pattern, regex_t *regex)
return 0;
}
-static int __push_msg(const char *pattern, bool on_next_line, struct expected_msgs *msgs)
+static int __push_msg(const char *pattern, bool on_next_line, bool negative,
+ struct expected_msgs *msgs)
{
struct expect_msg *msg;
void *tmp;
@@ -222,6 +222,7 @@ static int __push_msg(const char *pattern, bool on_next_line, struct expected_ms
msg = &msgs->patterns[msgs->cnt];
msg->on_next_line = on_next_line;
msg->substr = pattern;
+ msg->negative = negative;
msg->is_regex = false;
if (strstr(pattern, "{{")) {
err = compile_regex(pattern, &msg->regex);
@@ -240,16 +241,16 @@ static int clone_msgs(struct expected_msgs *from, struct expected_msgs *to)
for (i = 0; i < from->cnt; i++) {
msg = &from->patterns[i];
- err = __push_msg(msg->substr, msg->on_next_line, to);
+ err = __push_msg(msg->substr, msg->on_next_line, msg->negative, to);
if (err)
return err;
}
return 0;
}
-static int push_msg(const char *substr, struct expected_msgs *msgs)
+static int push_msg(const char *substr, bool negative, struct expected_msgs *msgs)
{
- return __push_msg(substr, false, msgs);
+ return __push_msg(substr, false, negative, msgs);
}
static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct expected_msgs *msgs)
@@ -260,7 +261,7 @@ static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct exp
*on_next_line = false;
return 0;
}
- err = __push_msg(regex_str, *on_next_line, msgs);
+ err = __push_msg(regex_str, *on_next_line, false, msgs);
if (err)
return err;
*on_next_line = true;
@@ -374,6 +375,7 @@ enum arch {
ARCH_X86_64 = 0x2,
ARCH_ARM64 = 0x4,
ARCH_RISCV64 = 0x8,
+ ARCH_S390X = 0x10,
};
static int get_current_arch(void)
@@ -384,6 +386,8 @@ static int get_current_arch(void)
return ARCH_ARM64;
#elif defined(__riscv) && __riscv_xlen == 64
return ARCH_RISCV64;
+#elif defined(__s390x__)
+ return ARCH_S390X;
#endif
return ARCH_UNKNOWN;
}
@@ -404,6 +408,10 @@ static int parse_test_spec(struct test_loader *tester,
bool xlated_on_next_line = true;
bool unpriv_jit_on_next_line;
bool jit_on_next_line;
+ bool stderr_on_next_line = true;
+ bool unpriv_stderr_on_next_line = true;
+ bool stdout_on_next_line = true;
+ bool unpriv_stdout_on_next_line = true;
bool collect_jit = false;
int func_id, i, err = 0;
u32 arch_mask = 0;
@@ -465,12 +473,22 @@ static int parse_test_spec(struct test_loader *tester,
spec->auxiliary = true;
spec->mode_mask |= UNPRIV;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX))) {
- err = push_msg(msg, &spec->priv.expect_msgs);
+ err = push_msg(msg, false, &spec->priv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX))) {
+ err = push_msg(msg, true, &spec->priv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= PRIV;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV))) {
- err = push_msg(msg, &spec->unpriv.expect_msgs);
+ err = push_msg(msg, false, &spec->unpriv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV))) {
+ err = push_msg(msg, true, &spec->unpriv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
@@ -565,8 +583,10 @@ static int parse_test_spec(struct test_loader *tester,
arch = ARCH_ARM64;
} else if (strcmp(val, "RISCV64") == 0) {
arch = ARCH_RISCV64;
+ } else if (strcmp(val, "s390x") == 0) {
+ arch = ARCH_S390X;
} else {
- PRINT_FAIL("bad arch spec: '%s'", val);
+ PRINT_FAIL("bad arch spec: '%s'\n", val);
err = -EINVAL;
goto cleanup;
}
@@ -593,6 +613,26 @@ static int parse_test_spec(struct test_loader *tester,
err = -EINVAL;
goto cleanup;
}
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) {
+ err = push_disasm_msg(msg, &stderr_on_next_line,
+ &spec->priv.stderr);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_stderr_on_next_line,
+ &spec->unpriv.stderr);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) {
+ err = push_disasm_msg(msg, &stdout_on_next_line,
+ &spec->priv.stdout);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_stdout_on_next_line,
+ &spec->unpriv.stdout);
+ if (err)
+ goto cleanup;
}
}
@@ -646,6 +686,10 @@ static int parse_test_spec(struct test_loader *tester,
clone_msgs(&spec->priv.expect_xlated, &spec->unpriv.expect_xlated);
if (spec->unpriv.jited.cnt == 0)
clone_msgs(&spec->priv.jited, &spec->unpriv.jited);
+ if (spec->unpriv.stderr.cnt == 0)
+ clone_msgs(&spec->priv.stderr, &spec->unpriv.stderr);
+ if (spec->unpriv.stdout.cnt == 0)
+ clone_msgs(&spec->priv.stdout, &spec->unpriv.stdout);
}
spec->valid = true;
@@ -707,44 +751,155 @@ static void emit_jited(const char *jited, bool force)
fprintf(stdout, "JITED:\n=============\n%s=============\n", jited);
}
-static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
- void (*emit_fn)(const char *buf, bool force))
+static void emit_stderr(const char *stderr, bool force)
{
- const char *log = log_buf, *prev_match;
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr);
+}
+
+static void emit_stdout(const char *bpf_stdout, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "STDOUT:\n=============\n%s=============\n", bpf_stdout);
+}
+
+static const char *match_msg(struct expect_msg *msg, const char **log)
+{
+ const char *match = NULL;
regmatch_t reg_match[1];
- int prev_match_line;
- int match_line;
- int i, j, err;
+ int err;
+
+ if (!msg->is_regex) {
+ match = strstr(*log, msg->substr);
+ if (match)
+ *log = match + strlen(msg->substr);
+ } else {
+ err = regexec(&msg->regex, *log, 1, reg_match, 0);
+ if (err == 0) {
+ match = *log + reg_match[0].rm_so;
+ *log += reg_match[0].rm_eo;
+ }
+ }
+ return match;
+}
+
+static int count_lines(const char *start, const char *end)
+{
+ const char *tmp;
+ int n = 0;
+
+ for (tmp = start; tmp < end; ++tmp)
+ if (*tmp == '\n')
+ n++;
+ return n;
+}
+
+struct match {
+ const char *start;
+ const char *end;
+ int line;
+};
+
+/*
+ * Positive messages are matched sequentially, each next message
+ * is looked for starting from the end of a previous matched one.
+ */
+static void match_positive_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+ const char *prev_match;
+ int i, line;
- prev_match_line = -1;
- match_line = 0;
prev_match = log;
+ line = 0;
for (i = 0; i < msgs->cnt; i++) {
struct expect_msg *msg = &msgs->patterns[i];
- const char *match = NULL, *pat_status;
- bool wrong_line = false;
-
- if (!msg->is_regex) {
- match = strstr(log, msg->substr);
- if (match)
- log = match + strlen(msg->substr);
- } else {
- err = regexec(&msg->regex, log, 1, reg_match, 0);
- if (err == 0) {
- match = log + reg_match[0].rm_so;
- log += reg_match[0].rm_eo;
+ const char *match = NULL;
+
+ if (msg->negative)
+ continue;
+
+ match = match_msg(msg, &log);
+ if (match) {
+ line += count_lines(prev_match, match);
+ matches[i].start = match;
+ matches[i].end = log;
+ matches[i].line = line;
+ prev_match = match;
+ }
+ }
+}
+
+/*
+ * Each negative messages N located between positive messages P1 and P2
+ * is matched in the span P1.end .. P2.start. Consequently, negative messages
+ * are unordered within the span.
+ */
+static void match_negative_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+ const char *start = log, *end, *next, *match;
+ const char *log_end = log + strlen(log);
+ int i, j, next_positive;
+
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+
+ /* positive message bumps span start */
+ if (!msg->negative) {
+ start = matches[i].end ?: start;
+ continue;
+ }
+
+ /* count stride of negative patterns and adjust span end */
+ end = log_end;
+ for (next_positive = i + 1; next_positive < msgs->cnt; next_positive++) {
+ if (!msgs->patterns[next_positive].negative) {
+ end = matches[next_positive].start;
+ break;
}
}
- if (match) {
- for (; prev_match < match; ++prev_match)
- if (*prev_match == '\n')
- ++match_line;
- wrong_line = msg->on_next_line && prev_match_line >= 0 &&
- prev_match_line + 1 != match_line;
+ /* try matching negative messages within identified span */
+ for (j = i; j < next_positive; j++) {
+ next = start;
+ match = match_msg(msg, &next);
+ if (match && next <= end) {
+ matches[j].start = match;
+ matches[j].end = next;
+ }
}
- if (!match || wrong_line) {
+ /* -1 to account for i++ */
+ i = next_positive - 1;
+ }
+}
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+ void (*emit_fn)(const char *buf, bool force))
+{
+ struct match matches[msgs->cnt];
+ struct match *prev_match = NULL;
+ int i, j;
+
+ memset(matches, 0, sizeof(*matches) * msgs->cnt);
+ match_positive_msgs(log_buf, msgs, matches);
+ match_negative_msgs(log_buf, msgs, matches);
+
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+ struct match *match = &matches[i];
+ const char *pat_status;
+ bool unexpected;
+ bool wrong_line;
+ bool no_match;
+
+ no_match = !msg->negative && !match->start;
+ wrong_line = !msg->negative &&
+ msg->on_next_line &&
+ prev_match && prev_match->line + 1 != match->line;
+ unexpected = msg->negative && match->start;
+ if (no_match || wrong_line || unexpected) {
PRINT_FAIL("expect_msg\n");
if (env.verbosity == VERBOSE_NONE)
emit_fn(log_buf, true /*force*/);
@@ -754,8 +909,10 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
pat_status = "MATCHED ";
else if (wrong_line)
pat_status = "WRONG LINE";
- else
+ else if (no_match)
pat_status = "EXPECTED ";
+ else
+ pat_status = "UNEXPECTED";
msg = &msgs->patterns[j];
fprintf(stderr, "%s %s: '%s'\n",
pat_status,
@@ -765,12 +922,13 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
if (wrong_line) {
fprintf(stderr,
"expecting match at line %d, actual match is at line %d\n",
- prev_match_line + 1, match_line);
+ prev_match->line + 1, match->line);
}
break;
}
- prev_match_line = match_line;
+ if (!msg->negative)
+ prev_match = match;
}
}
@@ -929,6 +1087,19 @@ out:
return err;
}
+/* Read the bpf stream corresponding to the stream_id */
+static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz)
+{
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret;
+
+ ret = bpf_prog_stream_read(prog_fd, stream_id, text, text_sz, &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ text[ret] = '\0';
+
+ return ret;
+}
+
/* this function is forced noinline and has short generic name to look better
* in test_progs output (in case of a failure)
*/
@@ -1083,7 +1254,7 @@ void run_subtest(struct test_loader *tester,
link = bpf_map__attach_struct_ops(map);
if (!link) {
PRINT_FAIL("bpf_map__attach_struct_ops failed for map %s: err=%d\n",
- bpf_map__name(map), err);
+ bpf_map__name(map), -errno);
goto tobj_cleanup;
}
links[links_cnt++] = link;
@@ -1103,6 +1274,31 @@ void run_subtest(struct test_loader *tester,
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;
}
+
+ if (subspec->stderr.cnt) {
+ err = get_stream(2, bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err <= 0) {
+ PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+ err, errno);
+ goto tobj_cleanup;
+ }
+ emit_stderr(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr);
+ }
+
+ if (subspec->stdout.cnt) {
+ err = get_stream(1, bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err <= 0) {
+ PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+ err, errno);
+ goto tobj_cleanup;
+ }
+ emit_stdout(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->stdout, emit_stdout);
+ }
+
/* redo bpf_map__attach_struct_ops for each test */
while (links_cnt > 0)
bpf_link__destroy(links[--links_cnt]);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 309d9d4a8ace..02a85dda30e6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -14,12 +14,14 @@
#include <netinet/in.h>
#include <sys/select.h>
#include <sys/socket.h>
+#include <linux/keyctl.h>
#include <sys/un.h>
#include <bpf/btf.h>
#include <time.h>
#include "json_writer.h"
#include "network_helpers.h"
+#include "verification_cert.h"
/* backtrace() and backtrace_symbols_fd() are glibc specific,
* use header file when glibc is available and provide stub
@@ -1928,6 +1930,13 @@ static void free_test_states(void)
}
}
+static __u32 register_session_key(const char *key_data, size_t key_data_size)
+{
+ return syscall(__NR_add_key, "asymmetric", "libbpf_session_key",
+ (const void *)key_data, key_data_size,
+ KEY_SPEC_SESSION_KEYRING);
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -1961,6 +1970,10 @@ int main(int argc, char **argv)
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
+ err = register_session_key((const char *)test_progs_verification_cert,
+ test_progs_verification_cert_len);
+ if (err < 0)
+ return err;
traffic_monitor_set_print(traffic_monitor_print_fn);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index df2222a1806f..eebfc18cdcd2 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -7,6 +7,7 @@
#include <errno.h>
#include <string.h>
#include <assert.h>
+#include <regex.h>
#include <stdlib.h>
#include <stdarg.h>
#include <time.h>
@@ -546,4 +547,20 @@ extern void test_loader_fini(struct test_loader *tester);
test_loader_fini(&tester); \
})
+struct expect_msg {
+ const char *substr; /* substring match */
+ regex_t regex;
+ bool is_regex;
+ bool on_next_line;
+ bool negative;
+};
+
+struct expected_msgs {
+ struct expect_msg *patterns;
+ size_t cnt;
+};
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+ void (*emit_fn)(const char *buf, bool force));
+
#endif /* __TEST_PROGS_H */
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index fd2da2234cc9..76568db7a664 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1372,7 +1372,7 @@ run:
} else
fprintf(stderr, "unknown test\n");
out:
- /* Detatch and zero all the maps */
+ /* Detach and zero all the maps */
bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS);
for (i = 0; i < ARRAY_SIZE(links); i++) {
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 595194453ff8..35b4893ccdf8 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -15,20 +15,18 @@
#include <bpf/libbpf.h>
#include <sys/ioctl.h>
#include <linux/rtnetlink.h>
-#include <signal.h>
#include <linux/perf_event.h>
-#include <linux/err.h>
-#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "test_tcpnotify.h"
-#include "trace_helpers.h"
#include "testing_helpers.h"
#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L)
pthread_t tid;
+static bool exit_thread;
+
int rx_callbacks;
static void dummyfn(void *ctx, int cpu, void *data, __u32 size)
@@ -45,7 +43,7 @@ void tcp_notifier_poller(struct perf_buffer *pb)
{
int err;
- while (1) {
+ while (!exit_thread) {
err = perf_buffer__poll(pb, 100);
if (err < 0 && err != -EINTR) {
printf("failed perf_buffer__poll: %d\n", err);
@@ -78,15 +76,10 @@ int main(int argc, char **argv)
int error = EXIT_FAILURE;
struct bpf_object *obj;
char test_script[80];
- cpu_set_t cpuset;
__u32 key = 0;
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
- pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
-
cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
@@ -151,6 +144,13 @@ int main(int argc, char **argv)
sleep(10);
+ exit_thread = true;
+ int ret = pthread_join(tid, NULL);
+ if (ret) {
+ printf("FAILED: pthread_join\n");
+ goto err;
+ }
+
if (verify_result(&g)) {
printf("FAILED: Wrong stats Expected %d calls, got %d\n",
g.ncalls, rx_callbacks);
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 65aafe0003db..62db060298a4 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -241,4 +241,6 @@ done
if [ $failures -eq 0 ]; then
echo "All tests successful!"
+else
+ exit 1
fi
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 5e9f16683be5..16eb37e5bad6 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -399,7 +399,7 @@ int unload_module(const char *name, bool verbose)
return 0;
}
-int load_module(const char *path, bool verbose)
+static int __load_module(const char *path, const char *param_values, bool verbose)
{
int fd;
@@ -411,7 +411,7 @@ int load_module(const char *path, bool verbose)
fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno);
return -ENOENT;
}
- if (finit_module(fd, "", 0)) {
+ if (finit_module(fd, param_values, 0)) {
fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno);
close(fd);
return -EINVAL;
@@ -423,6 +423,16 @@ int load_module(const char *path, bool verbose)
return 0;
}
+int load_module_params(const char *path, const char *param_values, bool verbose)
+{
+ return __load_module(path, param_values, verbose);
+}
+
+int load_module(const char *path, bool verbose)
+{
+ return __load_module(path, "", verbose);
+}
+
int unload_bpf_testmod(bool verbose)
{
return unload_module("bpf_testmod", verbose);
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 46d7f7089f63..eb20d3772218 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -39,6 +39,7 @@ int kern_sync_rcu(void);
int finit_module(int fd, const char *param_values, int flags);
int delete_module(const char *name, int flags);
int load_module(const char *path, bool verbose);
+int load_module_params(const char *path, const char *param_values, bool verbose);
int unload_module(const char *name, bool verbose);
static inline __u64 get_time_ns(void)
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 81943c6254e6..171987627f3a 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -17,7 +17,9 @@
#include <linux/limits.h>
#include <libelf.h>
#include <gelf.h>
+#include "bpf/hashmap.h"
#include "bpf/libbpf_internal.h"
+#include "bpf_util.h"
#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
@@ -519,3 +521,235 @@ void read_trace_pipe(void)
{
read_trace_pipe_iter(trace_pipe_cb, NULL, 0);
}
+
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
+{
+ return str_hash((const char *) key);
+}
+
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static bool is_invalid_entry(char *buf, bool kernel)
+{
+ if (kernel && strchr(buf, '['))
+ return true;
+ if (!kernel && !strchr(buf, '['))
+ return true;
+ return false;
+}
+
+static const char * const trace_blacklist[] = {
+ "migrate_disable",
+ "migrate_enable",
+ "rcu_read_unlock_strict",
+ "preempt_count_add",
+ "preempt_count_sub",
+ "__rcu_read_lock",
+ "__rcu_read_unlock",
+ "bpf_get_numa_node_id",
+};
+
+static bool skip_entry(char *name)
+{
+ int i;
+
+ /*
+ * We attach to almost all kernel functions and some of them
+ * will cause 'suspicious RCU usage' when fprobe is attached
+ * to them. Filter out the current culprits - arch_cpu_idle
+ * default_idle and rcu_* functions.
+ */
+ if (!strcmp(name, "arch_cpu_idle"))
+ return true;
+ if (!strcmp(name, "default_idle"))
+ return true;
+ if (!strncmp(name, "rcu_", 4))
+ return true;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ return true;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) {
+ if (!strcmp(name, trace_blacklist[i]))
+ return true;
+ }
+
+ return false;
+}
+
+/* Do comparison by ignoring '.llvm.<hash>' suffixes. */
+static int compare_name(const char *name1, const char *name2)
+{
+ const char *res1, *res2;
+ int len1, len2;
+
+ res1 = strstr(name1, ".llvm.");
+ res2 = strstr(name2, ".llvm.");
+ len1 = res1 ? res1 - name1 : strlen(name1);
+ len2 = res2 ? res2 - name2 : strlen(name2);
+
+ if (len1 == len2)
+ return strncmp(name1, name2, len1);
+ if (len1 < len2)
+ return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
+ return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
+}
+
+static int load_kallsyms_compare(const void *p1, const void *p2)
+{
+ return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
+}
+
+static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
+{
+ return compare_name(p1, p2->name);
+}
+
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
+{
+ size_t cap = 0, cnt = 0;
+ char *name = NULL, *ksym_name, **syms = NULL;
+ struct hashmap *map;
+ struct ksyms *ksyms;
+ struct ksym *ks;
+ char buf[256];
+ FILE *f;
+ int err = 0;
+
+ ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
+ if (!ksyms)
+ return -EINVAL;
+
+ /*
+ * The available_filter_functions contains many duplicates,
+ * but other than that all symbols are usable to trace.
+ * Filtering out duplicates by using hashmap__add, which won't
+ * add existing entry.
+ */
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
+ if (!f)
+ return -EINVAL;
+
+ map = hashmap__new(symbol_hash, symbol_equal, NULL);
+ if (IS_ERR(map)) {
+ err = libbpf_get_error(map);
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
+ if (!ks) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ ksym_name = ks->name;
+ err = hashmap__add(map, ksym_name, 0);
+ if (err == -EEXIST) {
+ err = 0;
+ continue;
+ }
+ if (err)
+ goto error;
+
+ err = libbpf_ensure_mem((void **) &syms, &cap,
+ sizeof(*syms), cnt + 1);
+ if (err)
+ goto error;
+
+ syms[cnt++] = ksym_name;
+ }
+
+ *symsp = syms;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ hashmap__free(map);
+ if (err)
+ free(syms);
+ return err;
+}
+
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
+{
+ unsigned long *addr, *addrs, *tmp_addrs;
+ int err = 0, max_cnt, inc_cnt;
+ char *name = NULL;
+ size_t cnt = 0;
+ char buf[256];
+ FILE *f;
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
+
+ if (!f)
+ return -ENOENT;
+
+ /* In my local setup, the number of entries is 50k+ so Let us initially
+ * allocate space to hold 64k entries. If 64k is not enough, incrementally
+ * increase 1k each time.
+ */
+ max_cnt = 65536;
+ inc_cnt = 1024;
+ addrs = malloc(max_cnt * sizeof(long));
+ if (addrs == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ if (cnt == max_cnt) {
+ max_cnt += inc_cnt;
+ tmp_addrs = realloc(addrs, max_cnt);
+ if (!tmp_addrs) {
+ err = -ENOMEM;
+ goto error;
+ }
+ addrs = tmp_addrs;
+ }
+
+ addrs[cnt++] = (unsigned long)addr;
+ }
+
+ *addrsp = addrs;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ if (err)
+ free(addrs);
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 2ce873c9f9aa..9437bdd4afa5 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -41,4 +41,7 @@ ssize_t get_rel_offset(uintptr_t addr);
int read_build_id(const char *path, char *build_id, size_t size);
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel);
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel);
+
#endif
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
index b616575c3b00..ce13002c7a19 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -93,7 +93,7 @@
.expected_attach_type = BPF_SK_LOOKUP,
.result = VERBOSE_ACCEPT,
.runs = -1,
- .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\
+ .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8=-44\
2: (c5) if r0 s< 0x0 goto pc+2\
- R0_w=-44",
+ R0=-44",
},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index f3492efc8834..c8d640802cce 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1375,7 +1375,7 @@
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
/* write into map value */
@@ -1439,7 +1439,7 @@
/* second time with fp-16 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -1493,7 +1493,7 @@
/* second time with fp-16 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -2380,7 +2380,7 @@
*/
BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
- /* r9 = *r9 ; verifier get's to this point via two paths:
+ /* r9 = *r9 ; verifier gets to this point via two paths:
* ; (I) one including r9 = r8, verified first;
* ; (II) one excluding r9 = r8, verified next.
* ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
index f2cac42298ba..09179fb551f0 100755
--- a/tools/testing/selftests/bpf/verify_sig_setup.sh
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -32,7 +32,7 @@ usage()
exit 1
}
-setup()
+genkey()
{
local tmp_dir="$1"
@@ -45,9 +45,14 @@ setup()
openssl x509 -in ${tmp_dir}/signing_key.pem -out \
${tmp_dir}/signing_key.der -outform der
+}
- key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
+setup()
+{
+ local tmp_dir="$1"
+ genkey "${tmp_dir}"
+ key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
keyring_id=$(keyctl newring ebpf_testing_keyring @s)
keyctl link $key_id $keyring_id
}
@@ -105,6 +110,8 @@ main()
if [[ "${action}" == "setup" ]]; then
setup "${tmp_dir}"
+ elif [[ "${action}" == "genkey" ]]; then
+ genkey "${tmp_dir}"
elif [[ "${action}" == "cleanup" ]]; then
cleanup "${tmp_dir}"
elif [[ "${action}" == "fsverity-create-sign" ]]; then
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index d532dd82a3a8..e962f133250c 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -181,6 +181,12 @@ struct var_preset {
bool applied;
};
+enum dump_mode {
+ DUMP_NONE = 0,
+ DUMP_XLATED = 1,
+ DUMP_JITED = 2,
+};
+
static struct env {
char **filenames;
int filename_cnt;
@@ -227,6 +233,7 @@ static struct env {
char orig_cgroup[PATH_MAX];
char stat_cgroup[PATH_MAX];
int memory_peak_fd;
+ __u32 dump_mode;
} env;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -271,6 +278,7 @@ const char argp_program_doc[] =
enum {
OPT_LOG_FIXED = 1000,
OPT_LOG_SIZE = 1001,
+ OPT_DUMP = 1002,
};
static const struct argp_option opts[] = {
@@ -295,6 +303,7 @@ static const struct argp_option opts[] = {
"Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
{ "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" },
+ { "dump", OPT_DUMP, "DUMP_MODE", OPTION_ARG_OPTIONAL, "Print BPF program dump (xlated, jited)" },
{},
};
@@ -427,6 +436,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return err;
}
break;
+ case OPT_DUMP:
+ if (!arg || strcasecmp(arg, "xlated") == 0) {
+ env.dump_mode |= DUMP_XLATED;
+ } else if (strcasecmp(arg, "jited") == 0) {
+ env.dump_mode |= DUMP_JITED;
+ } else {
+ fprintf(stderr, "Unrecognized dump mode '%s'\n", arg);
+ return -EINVAL;
+ }
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
@@ -1554,6 +1573,36 @@ static int parse_rvalue(const char *val, struct rvalue *rvalue)
return 0;
}
+static void dump(__u32 prog_id, enum dump_mode mode, const char *file_name, const char *prog_name)
+{
+ char command[64], buf[4096];
+ FILE *fp;
+ int status;
+
+ status = system("command -v bpftool > /dev/null 2>&1");
+ if (status != 0) {
+ fprintf(stderr, "bpftool is not available, can't print program dump\n");
+ return;
+ }
+ snprintf(command, sizeof(command), "bpftool prog dump %s id %u",
+ mode == DUMP_JITED ? "jited" : "xlated", prog_id);
+ fp = popen(command, "r");
+ if (!fp) {
+ fprintf(stderr, "bpftool failed with error: %d\n", errno);
+ return;
+ }
+
+ printf("DUMP (%s) %s/%s:\n", mode == DUMP_JITED ? "JITED" : "XLATED", file_name, prog_name);
+ while (fgets(buf, sizeof(buf), fp))
+ fputs(buf, stdout);
+ fprintf(stdout, "\n");
+
+ if (ferror(fp))
+ fprintf(stderr, "Failed to dump BPF prog with error: %d\n", errno);
+
+ pclose(fp);
+}
+
static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
{
const char *base_filename = basename(strdupa(filename));
@@ -1630,8 +1679,13 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
memset(&info, 0, info_len);
fd = bpf_program__fd(prog);
- if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0)
+ if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) {
stats->stats[JITED_SIZE] = info.jited_prog_len;
+ if (env.dump_mode & DUMP_JITED)
+ dump(info.id, DUMP_JITED, base_filename, prog_name);
+ if (env.dump_mode & DUMP_XLATED)
+ dump(info.id, DUMP_XLATED, base_filename, prog_name);
+ }
parse_verif_log(buf, buf_sz, stats);
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 1503a1d2faa0..9ed8c796645d 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -155,7 +155,7 @@ int main(int argc, char **argv)
}
if (!server) {
- /* Only supports IPv4; see hints initiailization above. */
+ /* Only supports IPv4; see hints initialization above. */
if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
fprintf(stderr, "Could not resolve %s\n", argv[optind]);
return 1;
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 93c2cc413cfc..48729da142c2 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -93,8 +93,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
/* Refresh the local tail pointer.
* cached_cons is r->size bigger than the real consumer pointer so
* that this addition can be avoided in the more frequently
- * executed code that computs free_entries in the beginning of
- * this function. Without this optimization it whould have been
+ * executed code that computes free_entries in the beginning of
+ * this function. Without this optimization it would have been
* free_entries = r->cached_prod - r->cached_cons + r->size.
*/
r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index a29de0713f19..352adc8df2d1 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -2276,25 +2276,13 @@ static int testapp_xdp_metadata_copy(struct test_spec *test)
{
struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
- struct bpf_map *data_map;
- int count = 0;
- int key = 0;
test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
skel_tx->progs.xsk_xdp_populate_metadata,
skel_rx->maps.xsk, skel_tx->maps.xsk);
test->ifobj_rx->use_metadata = true;
- data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
- if (!data_map || !bpf_map__is_internal(data_map)) {
- ksft_print_msg("Error: could not find bss section of XDP program\n");
- return TEST_FAILURE;
- }
-
- if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) {
- ksft_print_msg("Error: could not update count element\n");
- return TEST_FAILURE;
- }
+ skel_rx->bss->count = 0;
return testapp_validate_traffic(test);
}
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
index 521ba38f2ddd..df4eea5c192b 100644
--- a/tools/testing/selftests/net/lib/xdp_native.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -14,6 +14,8 @@
#define MAX_PAYLOAD_LEN 5000
#define MAX_HDR_LEN 64
+extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak;
+
enum {
XDP_MODE = 0,
XDP_PORT = 1,
@@ -68,30 +70,57 @@ static void record_stats(struct xdp_md *ctx, __u32 stat_type)
static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
struct udphdr *udph = NULL;
- struct ethhdr *eth = data;
+ void *data, *data_end;
+ struct ethhdr *eth;
+ int err;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = eth = (void *)(long)ctx->data;
if (data + sizeof(*eth) > data_end)
return NULL;
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
- struct iphdr *iph = data + sizeof(*eth);
+ struct iphdr *iph;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+ sizeof(*udph));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ iph = data + sizeof(*eth);
if (iph + 1 > (struct iphdr *)data_end ||
iph->protocol != IPPROTO_UDP)
return NULL;
- udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
- } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
- struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ udph = data + sizeof(*iph) + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+ sizeof(*udph));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ ipv6h = data + sizeof(*eth);
if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
ipv6h->nexthdr != IPPROTO_UDP)
return NULL;
- udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
} else {
return NULL;
}
@@ -145,17 +174,34 @@ static void swap_machdr(void *data)
static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
struct udphdr *udph = NULL;
- struct ethhdr *eth = data;
+ void *data, *data_end;
+ struct ethhdr *eth;
+ int err;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = eth = (void *)(long)ctx->data;
if (data + sizeof(*eth) > data_end)
return XDP_PASS;
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
- struct iphdr *iph = data + sizeof(*eth);
- __be32 tmp_ip = iph->saddr;
+ struct iphdr *iph;
+ __be32 tmp_ip;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+ sizeof(*udph));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ iph = data + sizeof(*eth);
if (iph + 1 > (struct iphdr *)data_end ||
iph->protocol != IPPROTO_UDP)
@@ -169,8 +215,10 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_PASS;
record_stats(ctx, STATS_RX);
+ eth = data;
swap_machdr((void *)eth);
+ tmp_ip = iph->saddr;
iph->saddr = iph->daddr;
iph->daddr = tmp_ip;
@@ -178,9 +226,19 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_TX;
- } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
- struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
struct in6_addr tmp_ipv6;
+ struct ipv6hdr *ipv6h;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+ sizeof(*udph));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ ipv6h = data + sizeof(*eth);
if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
ipv6h->nexthdr != IPPROTO_UDP)
@@ -194,6 +252,7 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_PASS;
record_stats(ctx, STATS_RX);
+ eth = data;
swap_machdr((void *)eth);
__builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));