summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_iterators.rst2
-rw-r--r--Documentation/bpf/btf.rst25
-rw-r--r--Documentation/bpf/standardization/instruction-set.rst20
-rw-r--r--arch/arm64/include/asm/insn.h12
-rw-r--r--arch/arm64/lib/insn.c29
-rw-r--r--arch/arm64/net/bpf_jit.h20
-rw-r--r--arch/arm64/net/bpf_jit_comp.c92
-rw-r--r--arch/s390/net/bpf_jit_comp.c14
-rw-r--r--arch/x86/net/Makefile2
-rw-r--r--arch/x86/net/bpf_jit_comp.c100
-rw-r--r--arch/x86/net/bpf_timed_may_goto.S55
-rw-r--r--fs/bpf_fs_kfuncs.c225
-rw-r--r--include/linux/bpf-cgroup.h1
-rw-r--r--include/linux/bpf.h31
-rw-r--r--include/linux/bpf_lsm.h18
-rw-r--r--include/linux/bpf_verifier.h32
-rw-r--r--include/linux/btf.h3
-rw-r--r--include/linux/filter.h20
-rw-r--r--include/linux/lsm_hook_defs.h6
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/security.h12
-rw-r--r--include/uapi/linux/bpf.h10
-rw-r--r--include/uapi/linux/btf.h3
-rw-r--r--include/uapi/linux/xattr.h4
-rw-r--r--kernel/bpf/arena.c4
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c11
-rw-r--r--kernel/bpf/bpf_iter.c13
-rw-r--r--kernel/bpf/bpf_lsm.c2
-rw-r--r--kernel/bpf/bpf_struct_ops.c135
-rw-r--r--kernel/bpf/btf.c127
-rw-r--r--kernel/bpf/cgroup.c33
-rw-r--r--kernel/bpf/core.c117
-rw-r--r--kernel/bpf/cpumask.c53
-rw-r--r--kernel/bpf/disasm.c16
-rw-r--r--kernel/bpf/hashtab.c9
-rw-r--r--kernel/bpf/helpers.c123
-rw-r--r--kernel/bpf/preload/bpf_preload_kern.c1
-rw-r--r--kernel/bpf/syscall.c48
-rw-r--r--kernel/bpf/verifier.c1358
-rw-r--r--kernel/trace/bpf_trace.c14
-rw-r--r--mm/memory.c118
-rw-r--r--mm/nommu.c79
-rw-r--r--net/core/filter.c6
-rw-r--r--samples/bpf/Makefile2
-rw-r--r--scripts/Makefile.btf2
-rw-r--r--security/security.c15
-rw-r--r--security/selinux/hooks.c6
-rw-r--r--tools/bpf/bpftool/Makefile7
-rw-r--r--tools/bpf/bpftool/btf.c14
-rw-r--r--tools/bpf/bpftool/btf_dumper.c2
-rw-r--r--tools/bpf/bpftool/cgroup.c2
-rw-r--r--tools/bpf/bpftool/common.c7
-rw-r--r--tools/bpf/bpftool/gen.c12
-rw-r--r--tools/bpf/bpftool/jit_disasm.c3
-rw-r--r--tools/bpf/bpftool/link.c14
-rw-r--r--tools/bpf/bpftool/main.c8
-rw-r--r--tools/bpf/bpftool/map.c14
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c6
-rw-r--r--tools/bpf/bpftool/net.c4
-rw-r--r--tools/bpf/bpftool/netlink_dumper.c6
-rw-r--r--tools/bpf/bpftool/prog.c13
-rw-r--r--tools/bpf/bpftool/tracelog.c2
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c6
-rw-r--r--tools/bpf/runqslower/Makefile3
-rw-r--r--tools/include/uapi/linux/bpf.h10
-rw-r--r--tools/include/uapi/linux/btf.h3
-rw-r--r--tools/lib/bpf/bpf.c3
-rw-r--r--tools/lib/bpf/bpf.h3
-rw-r--r--tools/lib/bpf/btf.c105
-rw-r--r--tools/lib/bpf/btf.h3
-rw-r--r--tools/lib/bpf/btf_dump.c5
-rw-r--r--tools/lib/bpf/libbpf.c237
-rw-r--r--tools/lib/bpf/libbpf.h13
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/lib/bpf/libbpf_internal.h1
-rw-r--r--tools/lib/bpf/linker.c2
-rw-r--r--tools/lib/bpf/relo_core.c24
-rw-r--r--tools/lib/bpf/str_error.c2
-rw-r--r--tools/lib/bpf/str_error.h7
-rw-r--r--tools/lib/bpf/usdt.bpf.h32
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch649
-rw-r--r--tools/testing/selftests/bpf/Makefile28
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_spin_lock.h533
-rw-r--r--tools/testing/selftests/bpf/bpf_atomic.h140
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h15
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h5
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.c8
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c111
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_atomics.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c147
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/compute_live_registers.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c162
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kernel_flag.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c540
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c176
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prepare.c99
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pro_epilogue.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/read_vsyscall.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/setget_sockopt.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/summarization.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c633
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c139
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c638
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c97
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_vlan.c175
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c121
-rw-r--r--tools/testing/selftests/bpf/progs/arena_spin_lock.c51
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tasks.c110
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h4
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_preorder.c41
-rw-r--r--tools/testing/selftests/bpf/progs/changes_pkt_data.c39
-rw-r--r--tools/testing/selftests/bpf/progs/compute_live_registers.c424
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_dropper.c4
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h10
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h1
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c38
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c120
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c123
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_noreturns.c15
-rw-r--r--tools/testing/selftests/bpf/progs/irq.c71
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c139
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c9
-rw-r--r--tools/testing/selftests/bpf/progs/preempt_lock.c68
-rw-r--r--tools/testing/selftests/bpf/progs/prepare.c28
-rw-r--r--tools/testing/selftests/bpf/progs/priv_freplace_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c88
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c61
-rw-r--r--tools/testing/selftests/bpf/progs/read_vsyscall.c11
-rw-r--r--tools/testing/selftests/bpf/progs/set_global_vars.c47
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_bench.c5
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c26
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c34
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c25
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c30
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted.c31
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c22
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c36
-rw-r--r--tools/testing/selftests/bpf/progs/summarization.c78
-rw-r--r--tools/testing/selftests/bpf/progs/summarization_freplace.c (renamed from tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c)17
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_xattr.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_kernel_flag.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_key.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ptr_untrusted.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_set_remove_xattr.c133
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock_fail.c69
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_under_cgroup.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c20
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c58
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotol.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_load_acquire.c218
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_1.c34
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c49
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_stack_ptr.c52
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_store_release.c286
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_map.c88
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c41
-rw-r--r--tools/testing/selftests/bpf/test_btf.h6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c108
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h6
-rw-r--r--tools/testing/selftests/bpf/test_loader.c32
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh476
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh156
-rw-r--r--tools/testing/selftests/bpf/test_maps.c9
-rw-r--r--tools/testing/selftests/bpf/test_progs.c72
-rw-r--r--tools/testing/selftests/bpf/test_progs.h8
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh645
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh214
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan.sh233
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh9
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh9
-rw-r--r--tools/testing/selftests/bpf/veristat.c367
-rwxr-xr-xtools/testing/selftests/bpf/with_addr.sh54
-rwxr-xr-xtools/testing/selftests/bpf/with_tunnels.sh36
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c226
213 files changed, 10634 insertions, 3488 deletions
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst
index 07433915aa41..7f514cb6b052 100644
--- a/Documentation/bpf/bpf_iterators.rst
+++ b/Documentation/bpf/bpf_iterators.rst
@@ -86,7 +86,7 @@ following steps:
The following are a few examples of selftest BPF iterator programs:
* `bpf_iter_tcp4.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c>`_
-* `bpf_iter_task_vma.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c>`_
+* `bpf_iter_task_vmas.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c>`_
* `bpf_iter_task_file.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c>`_
Let us look at ``bpf_iter_task_file.c``, which runs in kernel space:
diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 2478cef758f8..3b60583f5db2 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -102,7 +102,8 @@ Each type contains the following common data::
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union, fwd, enum and enum64.
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64.
@@ -478,7 +479,7 @@ No additional type data follow ``btf_type``.
``struct btf_type`` encoding requirement:
* ``name_off``: offset to a non-empty string
- * ``info.kind_flag``: 0
+ * ``info.kind_flag``: 0 or 1
* ``info.kind``: BTF_KIND_DECL_TAG
* ``info.vlen``: 0
* ``type``: ``struct``, ``union``, ``func``, ``var`` or ``typedef``
@@ -489,7 +490,6 @@ No additional type data follow ``btf_type``.
__u32 component_idx;
};
-The ``name_off`` encodes btf_decl_tag attribute string.
The ``type`` should be ``struct``, ``union``, ``func``, ``var`` or ``typedef``.
For ``var`` or ``typedef`` type, ``btf_decl_tag.component_idx`` must be ``-1``.
For the other three types, if the btf_decl_tag attribute is
@@ -499,12 +499,21 @@ the attribute is applied to a ``struct``/``union`` member or
a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a
valid index (starting from 0) pointing to a member or an argument.
+If ``info.kind_flag`` is 0, then this is a normal decl tag, and the
+``name_off`` encodes btf_decl_tag attribute string.
+
+If ``info.kind_flag`` is 1, then the decl tag represents an arbitrary
+__attribute__. In this case, ``name_off`` encodes a string
+representing the attribute-list of the attribute specifier. For
+example, for an ``__attribute__((aligned(4)))`` the string's contents
+is ``aligned(4)``.
+
2.2.18 BTF_KIND_TYPE_TAG
~~~~~~~~~~~~~~~~~~~~~~~~
``struct btf_type`` encoding requirement:
* ``name_off``: offset to a non-empty string
- * ``info.kind_flag``: 0
+ * ``info.kind_flag``: 0 or 1
* ``info.kind``: BTF_KIND_TYPE_TAG
* ``info.vlen``: 0
* ``type``: the type with ``btf_type_tag`` attribute
@@ -522,6 +531,14 @@ type_tag, then zero or more const/volatile/restrict/typedef
and finally the base type. The base type is one of
int, ptr, array, struct, union, enum, func_proto and float types.
+Similarly to decl tags, if the ``info.kind_flag`` is 0, then this is a
+normal type tag, and the ``name_off`` encodes btf_type_tag attribute
+string.
+
+If ``info.kind_flag`` is 1, then the type tag represents an arbitrary
+__attribute__, and the ``name_off`` encodes a string representing the
+attribute-list of the attribute specifier.
+
2.2.19 BTF_KIND_ENUM64
~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst
index ab820d565052..fbe975585236 100644
--- a/Documentation/bpf/standardization/instruction-set.rst
+++ b/Documentation/bpf/standardization/instruction-set.rst
@@ -324,34 +324,42 @@ register.
.. table:: Arithmetic instructions
- ===== ===== ======= ==========================================================
+ ===== ===== ======= ===================================================================================
name code offset description
- ===== ===== ======= ==========================================================
+ ===== ===== ======= ===================================================================================
ADD 0x0 0 dst += src
SUB 0x1 0 dst -= src
MUL 0x2 0 dst \*= src
DIV 0x3 0 dst = (src != 0) ? (dst / src) : 0
- SDIV 0x3 1 dst = (src != 0) ? (dst s/ src) : 0
+ SDIV 0x3 1 dst = (src == 0) ? 0 : ((src == -1 && dst == LLONG_MIN) ? LLONG_MIN : (dst s/ src))
OR 0x4 0 dst \|= src
AND 0x5 0 dst &= src
LSH 0x6 0 dst <<= (src & mask)
RSH 0x7 0 dst >>= (src & mask)
NEG 0x8 0 dst = -dst
MOD 0x9 0 dst = (src != 0) ? (dst % src) : dst
- SMOD 0x9 1 dst = (src != 0) ? (dst s% src) : dst
+ SMOD 0x9 1 dst = (src == 0) ? dst : ((src == -1 && dst == LLONG_MIN) ? 0: (dst s% src))
XOR 0xa 0 dst ^= src
MOV 0xb 0 dst = src
MOVSX 0xb 8/16/32 dst = (s8,s16,s32)src
ARSH 0xc 0 :term:`sign extending<Sign Extend>` dst >>= (src & mask)
END 0xd 0 byte swap operations (see `Byte swap instructions`_ below)
- ===== ===== ======= ==========================================================
+ ===== ===== ======= ===================================================================================
Underflow and overflow are allowed during arithmetic operations, meaning
the 64-bit or 32-bit value will wrap. If BPF program execution would
result in division by zero, the destination register is instead set to zero.
+Otherwise, for ``ALU64``, if execution would result in ``LLONG_MIN``
+dividing -1, the desination register is instead set to ``LLONG_MIN``. For
+``ALU``, if execution would result in ``INT_MIN`` dividing -1, the
+desination register is instead set to ``INT_MIN``.
+
If execution would result in modulo by zero, for ``ALU64`` the value of
the destination register is unchanged whereas for ``ALU`` the upper
-32 bits of the destination register are zeroed.
+32 bits of the destination register are zeroed. Otherwise, for ``ALU64``,
+if execution would resuslt in ``LLONG_MIN`` modulo -1, the destination
+register is instead set to 0. For ``ALU``, if execution would result in
+``INT_MIN`` modulo -1, the destination register is instead set to 0.
``{ADD, X, ALU}``, where 'code' = ``ADD``, 'source' = ``X``, and 'class' = ``ALU``, means::
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index e390c432f546..39577f1d079a 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -188,8 +188,10 @@ enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
+ AARCH64_INSN_LDST_LOAD_ACQ,
AARCH64_INSN_LDST_LOAD_EX,
AARCH64_INSN_LDST_LOAD_ACQ_EX,
+ AARCH64_INSN_LDST_STORE_REL,
AARCH64_INSN_LDST_STORE_EX,
AARCH64_INSN_LDST_STORE_REL_EX,
AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
@@ -351,8 +353,10 @@ __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
-__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
-__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
+__AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00)
+__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00)
+__AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000)
+__AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000)
__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
@@ -602,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
int offset,
enum aarch64_insn_variant variant,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register base,
enum aarch64_insn_register state,
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index b008a9b46a7f..9bef696e2230 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -540,6 +540,35 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
offset >> shift);
}
+u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type)
+{
+ u32 insn;
+
+ switch (type) {
+ case AARCH64_INSN_LDST_LOAD_ACQ:
+ insn = aarch64_insn_get_load_acq_value();
+ break;
+ case AARCH64_INSN_LDST_STORE_REL:
+ insn = aarch64_insn_get_store_rel_value();
+ break;
+ default:
+ pr_err("%s: unknown load-acquire/store-release encoding %d\n",
+ __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_ldst_size(size, insn);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+ reg);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ base);
+}
+
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register base,
enum aarch64_insn_register state,
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index b22ab2f97a30..a3b0e693a125 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -119,6 +119,26 @@
aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
AARCH64_INSN_LDST_STORE_REL_EX)
+/* Load-acquire & store-release */
+#define A64_LDAR(Rt, Rn, size) \
+ aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
+ AARCH64_INSN_LDST_LOAD_ACQ)
+#define A64_STLR(Rt, Rn, size) \
+ aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
+ AARCH64_INSN_LDST_STORE_REL)
+
+/* Rt = [Rn] (load acquire) */
+#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8)
+#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16)
+#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32)
+#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64)
+
+/* [Rn] = Rt (store release) */
+#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8)
+#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16)
+#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32)
+#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64)
+
/*
* LSE atomics
*
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 8446848edddb..70d7c89d3ac9 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -272,7 +272,7 @@ static inline void emit_a64_add_i(const bool is64, const int dst, const int src,
{
if (is_addsub_imm(imm)) {
emit(A64_ADD_I(is64, dst, src, imm), ctx);
- } else if (is_addsub_imm(-imm)) {
+ } else if (is_addsub_imm(-(u32)imm)) {
emit(A64_SUB_I(is64, dst, src, -imm), ctx);
} else {
emit_a64_mov_i(is64, tmp, imm, ctx);
@@ -647,6 +647,81 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
return 0;
}
+static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+ const s32 imm = insn->imm;
+ const s16 off = insn->off;
+ const u8 code = insn->code;
+ const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
+ const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
+ const u8 dst = bpf2a64[insn->dst_reg];
+ const u8 src = bpf2a64[insn->src_reg];
+ const u8 tmp = bpf2a64[TMP_REG_1];
+ u8 reg;
+
+ switch (imm) {
+ case BPF_LOAD_ACQ:
+ reg = src;
+ break;
+ case BPF_STORE_REL:
+ reg = dst;
+ break;
+ default:
+ pr_err_once("unknown atomic load/store op code %02x\n", imm);
+ return -EINVAL;
+ }
+
+ if (off) {
+ emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
+ reg = tmp;
+ }
+ if (arena) {
+ emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
+ reg = tmp;
+ }
+
+ switch (imm) {
+ case BPF_LOAD_ACQ:
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit(A64_LDARB(dst, reg), ctx);
+ break;
+ case BPF_H:
+ emit(A64_LDARH(dst, reg), ctx);
+ break;
+ case BPF_W:
+ emit(A64_LDAR32(dst, reg), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_LDAR64(dst, reg), ctx);
+ break;
+ }
+ break;
+ case BPF_STORE_REL:
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ emit(A64_STLRB(src, reg), ctx);
+ break;
+ case BPF_H:
+ emit(A64_STLRH(src, reg), ctx);
+ break;
+ case BPF_W:
+ emit(A64_STLR32(src, reg), ctx);
+ break;
+ case BPF_DW:
+ emit(A64_STLR64(src, reg), ctx);
+ break;
+ }
+ break;
+ default:
+ pr_err_once("unexpected atomic load/store op code %02x\n",
+ imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_ARM64_LSE_ATOMICS
static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
{
@@ -1159,7 +1234,7 @@ emit_bswap_uxt:
case BPF_ALU64 | BPF_SUB | BPF_K:
if (is_addsub_imm(imm)) {
emit(A64_SUB_I(is64, dst, dst, imm), ctx);
- } else if (is_addsub_imm(-imm)) {
+ } else if (is_addsub_imm(-(u32)imm)) {
emit(A64_ADD_I(is64, dst, dst, -imm), ctx);
} else {
emit_a64_mov_i(is64, tmp, imm, ctx);
@@ -1330,7 +1405,7 @@ emit_cond_jmp:
case BPF_JMP32 | BPF_JSLE | BPF_K:
if (is_addsub_imm(imm)) {
emit(A64_CMP_I(is64, dst, imm), ctx);
- } else if (is_addsub_imm(-imm)) {
+ } else if (is_addsub_imm(-(u32)imm)) {
emit(A64_CMN_I(is64, dst, -imm), ctx);
} else {
emit_a64_mov_i(is64, tmp, imm, ctx);
@@ -1641,11 +1716,17 @@ emit_cond_jmp:
return ret;
break;
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
- if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
+ if (bpf_atomic_is_load_store(insn))
+ ret = emit_atomic_ld_st(insn, ctx);
+ else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
ret = emit_lse_atomic(insn, ctx);
else
ret = emit_ll_sc_atomic(insn, ctx);
@@ -2669,7 +2750,8 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
switch (insn->code) {
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
- if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
+ if (!bpf_atomic_is_load_store(insn) &&
+ !cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
return false;
}
return true;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 9d440a0b729e..0776dfde2dba 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2919,10 +2919,16 @@ bool bpf_jit_supports_arena(void)
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
{
- /*
- * Currently the verifier uses this function only to check which
- * atomic stores to arena are supported, and they all are.
- */
+ if (!in_arena)
+ return true;
+ switch (insn->code) {
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
+ if (bpf_atomic_is_load_store(insn))
+ return false;
+ }
return true;
}
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index 383c87300b0d..dddbefc0f439 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -6,5 +6,5 @@
ifeq ($(CONFIG_X86_32),y)
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
else
- obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+ obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o
endif
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 72776dcb75aa..9e5fe2ba858f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1250,8 +1250,8 @@ static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
}
-static int emit_atomic(u8 **pprog, u8 atomic_op,
- u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
+static int emit_atomic_rmw(u8 **pprog, u32 atomic_op,
+ u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
{
u8 *prog = *pprog;
@@ -1291,8 +1291,9 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
return 0;
}
-static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
- u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size,
+ u32 dst_reg, u32 src_reg, u32 index_reg,
+ int off)
{
u8 *prog = *pprog;
@@ -1305,7 +1306,7 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg));
break;
default:
- pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n");
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
return -EFAULT;
}
@@ -1339,6 +1340,49 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
return 0;
}
+static int emit_atomic_ld_st(u8 **pprog, u32 atomic_op, u32 dst_reg,
+ u32 src_reg, s16 off, u8 bpf_size)
+{
+ switch (atomic_op) {
+ case BPF_LOAD_ACQ:
+ /* dst_reg = smp_load_acquire(src_reg + off16) */
+ emit_ldx(pprog, bpf_size, dst_reg, src_reg, off);
+ break;
+ case BPF_STORE_REL:
+ /* smp_store_release(dst_reg + off16, src_reg) */
+ emit_stx(pprog, bpf_size, dst_reg, src_reg, off);
+ break;
+ default:
+ pr_err("bpf_jit: unknown atomic load/store opcode %02x\n",
+ atomic_op);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
+ u32 dst_reg, u32 src_reg, u32 index_reg,
+ int off)
+{
+ switch (atomic_op) {
+ case BPF_LOAD_ACQ:
+ /* dst_reg = smp_load_acquire(src_reg + idx_reg + off16) */
+ emit_ldx_index(pprog, size, dst_reg, src_reg, index_reg, off);
+ break;
+ case BPF_STORE_REL:
+ /* smp_store_release(dst_reg + idx_reg + off16, src_reg) */
+ emit_stx_index(pprog, size, dst_reg, src_reg, index_reg, off);
+ break;
+ default:
+ pr_err("bpf_jit: unknown atomic load/store opcode %02x\n",
+ atomic_op);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
#define DONT_CLEAR 1
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
@@ -2121,6 +2165,13 @@ populate_extable:
}
break;
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
+ if (!bpf_atomic_is_load_store(insn)) {
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
+ return -EFAULT;
+ }
+ fallthrough;
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
if (insn->imm == (BPF_AND | BPF_FETCH) ||
@@ -2156,10 +2207,10 @@ populate_extable:
EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
add_2reg(0xC0, AUX_REG, real_src_reg));
/* Attempt to swap in new value */
- err = emit_atomic(&prog, BPF_CMPXCHG,
- real_dst_reg, AUX_REG,
- insn->off,
- BPF_SIZE(insn->code));
+ err = emit_atomic_rmw(&prog, BPF_CMPXCHG,
+ real_dst_reg, AUX_REG,
+ insn->off,
+ BPF_SIZE(insn->code));
if (WARN_ON(err))
return err;
/*
@@ -2174,17 +2225,35 @@ populate_extable:
break;
}
- err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
- insn->off, BPF_SIZE(insn->code));
+ if (bpf_atomic_is_load_store(insn))
+ err = emit_atomic_ld_st(&prog, insn->imm, dst_reg, src_reg,
+ insn->off, BPF_SIZE(insn->code));
+ else
+ err = emit_atomic_rmw(&prog, insn->imm, dst_reg, src_reg,
+ insn->off, BPF_SIZE(insn->code));
if (err)
return err;
break;
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
+ if (!bpf_atomic_is_load_store(insn)) {
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
+ return -EFAULT;
+ }
+ fallthrough;
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
start_of_ldx = prog;
- err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code),
- dst_reg, src_reg, X86_REG_R12, insn->off);
+
+ if (bpf_atomic_is_load_store(insn))
+ err = emit_atomic_ld_st_index(&prog, insn->imm,
+ BPF_SIZE(insn->code), dst_reg,
+ src_reg, X86_REG_R12, insn->off);
+ else
+ err = emit_atomic_rmw_index(&prog, insn->imm, BPF_SIZE(insn->code),
+ dst_reg, src_reg, X86_REG_R12,
+ insn->off);
if (err)
return err;
goto populate_extable;
@@ -3801,3 +3870,8 @@ u64 bpf_arch_uaddress_limit(void)
{
return 0;
}
+
+bool bpf_jit_supports_timed_may_goto(void)
+{
+ return true;
+}
diff --git a/arch/x86/net/bpf_timed_may_goto.S b/arch/x86/net/bpf_timed_may_goto.S
new file mode 100644
index 000000000000..54c690cae190
--- /dev/null
+++ b/arch/x86/net/bpf_timed_may_goto.S
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
+
+ .code64
+ .section .text, "ax"
+
+SYM_FUNC_START(arch_bpf_timed_may_goto)
+ ANNOTATE_NOENDBR
+
+ /*
+ * r10 passes us stack depth, load the pointer to count and timestamp
+ * into r10 by adding it to BPF frame pointer.
+ */
+ leaq (%rbp, %r10, 1), %r10
+
+ /* Setup frame. */
+ pushq %rbp
+ movq %rsp, %rbp
+
+ /* Save r0-r5. */
+ pushq %rax
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %r8
+
+ /*
+ * r10 has the pointer to count and timestamp, pass it as first
+ * argument.
+ */
+ movq %r10, %rdi
+
+ /* Emit call depth accounting for call below. */
+ CALL_DEPTH_ACCOUNT
+ call bpf_check_timed_may_goto
+
+ /* BPF_REG_AX=r10 will be stored into count, so move return value to it. */
+ movq %rax, %r10
+
+ /* Restore r5-r0. */
+ popq %r8
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ popq %rax
+
+ leave
+ RET
+SYM_FUNC_END(arch_bpf_timed_may_goto)
diff --git a/fs/bpf_fs_kfuncs.c b/fs/bpf_fs_kfuncs.c
index 3fe9f59ef867..08412532db1b 100644
--- a/fs/bpf_fs_kfuncs.c
+++ b/fs/bpf_fs_kfuncs.c
@@ -2,10 +2,12 @@
/* Copyright (c) 2024 Google LLC. */
#include <linux/bpf.h>
+#include <linux/bpf_lsm.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/dcache.h>
#include <linux/fs.h>
+#include <linux/fsnotify.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/xattr.h>
@@ -93,6 +95,24 @@ __bpf_kfunc int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz)
return len;
}
+static bool match_security_bpf_prefix(const char *name__str)
+{
+ return !strncmp(name__str, XATTR_NAME_BPF_LSM, XATTR_NAME_BPF_LSM_LEN);
+}
+
+static int bpf_xattr_read_permission(const char *name, struct inode *inode)
+{
+ if (WARN_ON(!inode))
+ return -EINVAL;
+
+ /* Allow reading xattr with user. and security.bpf. prefix */
+ if (strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+ !match_security_bpf_prefix(name))
+ return -EPERM;
+
+ return inode_permission(&nop_mnt_idmap, inode, MAY_READ);
+}
+
/**
* bpf_get_dentry_xattr - get xattr of a dentry
* @dentry: dentry to get xattr from
@@ -101,9 +121,10 @@ __bpf_kfunc int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz)
*
* Get xattr *name__str* of *dentry* and store the output in *value_ptr*.
*
- * For security reasons, only *name__str* with prefix "user." is allowed.
+ * For security reasons, only *name__str* with prefixes "user." or
+ * "security.bpf." are allowed.
*
- * Return: 0 on success, a negative value on error.
+ * Return: length of the xattr value on success, a negative value on error.
*/
__bpf_kfunc int bpf_get_dentry_xattr(struct dentry *dentry, const char *name__str,
struct bpf_dynptr *value_p)
@@ -114,18 +135,12 @@ __bpf_kfunc int bpf_get_dentry_xattr(struct dentry *dentry, const char *name__st
void *value;
int ret;
- if (WARN_ON(!inode))
- return -EINVAL;
-
- if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
- return -EPERM;
-
value_len = __bpf_dynptr_size(value_ptr);
value = __bpf_dynptr_data_rw(value_ptr, value_len);
if (!value)
return -EINVAL;
- ret = inode_permission(&nop_mnt_idmap, inode, MAY_READ);
+ ret = bpf_xattr_read_permission(name__str, inode);
if (ret)
return ret;
return __vfs_getxattr(dentry, inode, name__str, value, value_len);
@@ -139,9 +154,10 @@ __bpf_kfunc int bpf_get_dentry_xattr(struct dentry *dentry, const char *name__st
*
* Get xattr *name__str* of *file* and store the output in *value_ptr*.
*
- * For security reasons, only *name__str* with prefix "user." is allowed.
+ * For security reasons, only *name__str* with prefixes "user." or
+ * "security.bpf." are allowed.
*
- * Return: 0 on success, a negative value on error.
+ * Return: length of the xattr value on success, a negative value on error.
*/
__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
struct bpf_dynptr *value_p)
@@ -154,6 +170,160 @@ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
__bpf_kfunc_end_defs();
+static int bpf_xattr_write_permission(const char *name, struct inode *inode)
+{
+ if (WARN_ON(!inode))
+ return -EINVAL;
+
+ /* Only allow setting and removing security.bpf. xattrs */
+ if (!match_security_bpf_prefix(name))
+ return -EPERM;
+
+ return inode_permission(&nop_mnt_idmap, inode, MAY_WRITE);
+}
+
+/**
+ * bpf_set_dentry_xattr_locked - set a xattr of a dentry
+ * @dentry: dentry to get xattr from
+ * @name__str: name of the xattr
+ * @value_p: xattr value
+ * @flags: flags to pass into filesystem operations
+ *
+ * Set xattr *name__str* of *dentry* to the value in *value_ptr*.
+ *
+ * For security reasons, only *name__str* with prefix "security.bpf."
+ * is allowed.
+ *
+ * The caller already locked dentry->d_inode.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str,
+ const struct bpf_dynptr *value_p, int flags)
+{
+
+ struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p;
+ struct inode *inode = d_inode(dentry);
+ const void *value;
+ u32 value_len;
+ int ret;
+
+ value_len = __bpf_dynptr_size(value_ptr);
+ value = __bpf_dynptr_data(value_ptr, value_len);
+ if (!value)
+ return -EINVAL;
+
+ ret = bpf_xattr_write_permission(name__str, inode);
+ if (ret)
+ return ret;
+
+ ret = __vfs_setxattr(&nop_mnt_idmap, dentry, inode, name__str,
+ value, value_len, flags);
+ if (!ret) {
+ fsnotify_xattr(dentry);
+
+ /* This xattr is set by BPF LSM, so we do not call
+ * security_inode_post_setxattr. Otherwise, we would
+ * risk deadlocks by calling back to the same kfunc.
+ *
+ * This is the same as security_inode_setsecurity().
+ */
+ }
+ return ret;
+}
+
+/**
+ * bpf_remove_dentry_xattr_locked - remove a xattr of a dentry
+ * @dentry: dentry to get xattr from
+ * @name__str: name of the xattr
+ *
+ * Rmove xattr *name__str* of *dentry*.
+ *
+ * For security reasons, only *name__str* with prefix "security.bpf."
+ * is allowed.
+ *
+ * The caller already locked dentry->d_inode.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str)
+{
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ ret = bpf_xattr_write_permission(name__str, inode);
+ if (ret)
+ return ret;
+
+ ret = __vfs_removexattr(&nop_mnt_idmap, dentry, name__str);
+ if (!ret) {
+ fsnotify_xattr(dentry);
+
+ /* This xattr is removed by BPF LSM, so we do not call
+ * security_inode_post_removexattr. Otherwise, we would
+ * risk deadlocks by calling back to the same kfunc.
+ */
+ }
+ return ret;
+}
+
+__bpf_kfunc_start_defs();
+
+/**
+ * bpf_set_dentry_xattr - set a xattr of a dentry
+ * @dentry: dentry to get xattr from
+ * @name__str: name of the xattr
+ * @value_p: xattr value
+ * @flags: flags to pass into filesystem operations
+ *
+ * Set xattr *name__str* of *dentry* to the value in *value_ptr*.
+ *
+ * For security reasons, only *name__str* with prefix "security.bpf."
+ * is allowed.
+ *
+ * The caller has not locked dentry->d_inode.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str,
+ const struct bpf_dynptr *value_p, int flags)
+{
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ inode_lock(inode);
+ ret = bpf_set_dentry_xattr_locked(dentry, name__str, value_p, flags);
+ inode_unlock(inode);
+ return ret;
+}
+
+/**
+ * bpf_remove_dentry_xattr - remove a xattr of a dentry
+ * @dentry: dentry to get xattr from
+ * @name__str: name of the xattr
+ *
+ * Rmove xattr *name__str* of *dentry*.
+ *
+ * For security reasons, only *name__str* with prefix "security.bpf."
+ * is allowed.
+ *
+ * The caller has not locked dentry->d_inode.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str)
+{
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ inode_lock(inode);
+ ret = bpf_remove_dentry_xattr_locked(dentry, name__str);
+ inode_unlock(inode);
+ return ret;
+}
+
+__bpf_kfunc_end_defs();
+
BTF_KFUNCS_START(bpf_fs_kfunc_set_ids)
BTF_ID_FLAGS(func, bpf_get_task_exe_file,
KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
@@ -161,6 +331,8 @@ BTF_ID_FLAGS(func, bpf_put_file, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_path_d_path, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_get_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_set_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_remove_dentry_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_fs_kfunc_set_ids)
static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
@@ -171,6 +343,37 @@ static int bpf_fs_kfuncs_filter(const struct bpf_prog *prog, u32 kfunc_id)
return -EACCES;
}
+/* bpf_[set|remove]_dentry_xattr.* hooks have KF_TRUSTED_ARGS and
+ * KF_SLEEPABLE, so they are only available to sleepable hooks with
+ * dentry arguments.
+ *
+ * Setting and removing xattr requires exclusive lock on dentry->d_inode.
+ * Some hooks already locked d_inode, while some hooks have not locked
+ * d_inode. Therefore, we need different kfuncs for different hooks.
+ * Specifically, hooks in the following list (d_inode_locked_hooks)
+ * should call bpf_[set|remove]_dentry_xattr_locked; while other hooks
+ * should call bpf_[set|remove]_dentry_xattr.
+ */
+BTF_SET_START(d_inode_locked_hooks)
+BTF_ID(func, bpf_lsm_inode_post_removexattr)
+BTF_ID(func, bpf_lsm_inode_post_setattr)
+BTF_ID(func, bpf_lsm_inode_post_setxattr)
+BTF_ID(func, bpf_lsm_inode_removexattr)
+BTF_ID(func, bpf_lsm_inode_rmdir)
+BTF_ID(func, bpf_lsm_inode_setattr)
+BTF_ID(func, bpf_lsm_inode_setxattr)
+BTF_ID(func, bpf_lsm_inode_unlink)
+#ifdef CONFIG_SECURITY_PATH
+BTF_ID(func, bpf_lsm_path_unlink)
+BTF_ID(func, bpf_lsm_path_rmdir)
+#endif /* CONFIG_SECURITY_PATH */
+BTF_SET_END(d_inode_locked_hooks)
+
+bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog)
+{
+ return btf_id_set_contains(&d_inode_locked_hooks, prog->aux->attach_btf_id);
+}
+
static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_fs_kfunc_set_ids,
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 7fc69083e745..9de7adb68294 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -111,6 +111,7 @@ struct bpf_prog_list {
struct bpf_prog *prog;
struct bpf_cgroup_link *link;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+ u32 flags;
};
int cgroup_bpf_inherit(struct cgroup *cgrp);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f3f50e29d639..111bea4e507f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -968,6 +968,7 @@ struct bpf_insn_access_aux {
struct {
struct btf *btf;
u32 btf_id;
+ u32 ref_obj_id;
};
};
struct bpf_verifier_log *log; /* for verbose logs */
@@ -990,6 +991,21 @@ static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
}
+/* Given a BPF_ATOMIC instruction @atomic_insn, return true if it is an
+ * atomic load or store, and false if it is a read-modify-write instruction.
+ */
+static inline bool
+bpf_atomic_is_load_store(const struct bpf_insn *atomic_insn)
+{
+ switch (atomic_insn->imm) {
+ case BPF_LOAD_ACQ:
+ case BPF_STORE_REL:
+ return true;
+ default:
+ return false;
+ }
+}
+
struct bpf_prog_ops {
int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
@@ -1481,6 +1497,8 @@ struct bpf_ctx_arg_aux {
enum bpf_reg_type reg_type;
struct btf *btf;
u32 btf_id;
+ u32 ref_obj_id;
+ bool refcounted;
};
struct btf_mod_pair {
@@ -1503,11 +1521,12 @@ struct bpf_prog_aux {
u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ u32 attach_st_ops_member_off;
u32 ctx_arg_info_size;
u32 max_rdonly_access;
u32 max_rdwr_access;
struct btf *attach_btf;
- const struct bpf_ctx_arg_aux *ctx_arg_info;
+ struct bpf_ctx_arg_aux *ctx_arg_info;
void __percpu *priv_stack_ptr;
struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
struct bpf_prog *dst_prog;
@@ -1528,6 +1547,7 @@ struct bpf_prog_aux {
bool jits_use_priv_stack;
bool priv_stack_requested;
bool changes_pkt_data;
+ bool might_sleep;
u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
struct bpf_arena *arena;
@@ -1547,6 +1567,7 @@ struct bpf_prog_aux {
#endif
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
+ const struct bpf_struct_ops *st_ops;
struct bpf_map **used_maps;
struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */
struct btf_mod_pair *used_btfs;
@@ -1945,6 +1966,9 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op
#endif
+int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
+ const struct bpf_ctx_arg_aux *info, u32 cnt);
+
#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
int cgroup_atype);
@@ -1980,6 +2004,7 @@ struct bpf_array {
*/
enum {
BPF_MAX_LOOPS = 8 * 1024 * 1024,
+ BPF_MAX_TIMED_LOOPS = 0xffff,
};
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
@@ -2036,6 +2061,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void);
+const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void);
+
typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
unsigned long off, unsigned long len);
typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
@@ -2546,7 +2573,7 @@ struct bpf_iter__bpf_map_elem {
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
-bool bpf_iter_prog_supported(struct bpf_prog *prog);
+int bpf_iter_prog_supported(struct bpf_prog *prog);
const struct bpf_func_proto *
bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index aefcd6564251..643809cc78c3 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -48,6 +48,11 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func)
int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
struct bpf_retval_range *range);
+int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str,
+ const struct bpf_dynptr *value_p, int flags);
+int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str);
+bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog);
+
#else /* !CONFIG_BPF_LSM */
static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -86,6 +91,19 @@ static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
{
return -EOPNOTSUPP;
}
+static inline int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str,
+ const struct bpf_dynptr *value_p, int flags)
+{
+ return -EOPNOTSUPP;
+}
+static inline int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str)
+{
+ return -EOPNOTSUPP;
+}
+static inline bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog)
+{
+ return false;
+}
#endif /* CONFIG_BPF_LSM */
#endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 32c23f2a3086..d6cfc4ee6820 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -427,11 +427,6 @@ struct bpf_verifier_state {
bool active_rcu_lock;
bool speculative;
- /* If this state was ever pointed-to by other state's loop_entry field
- * this flag would be set to true. Used to avoid freeing such states
- * while they are still in use.
- */
- bool used_as_loop_entry;
bool in_sleepable;
/* first and last insn idx of this verifier state */
@@ -458,6 +453,11 @@ struct bpf_verifier_state {
u32 dfs_depth;
u32 callback_unroll_depth;
u32 may_goto_depth;
+ /* If this state was ever pointed-to by other state's loop_entry field
+ * this flag would be set to true. Used to avoid freeing such states
+ * while they are still in use.
+ */
+ u32 used_as_loop_entry;
};
#define bpf_get_spilled_reg(slot, frame, mask) \
@@ -498,8 +498,10 @@ struct bpf_verifier_state {
/* linked list of verifier states used to prune search */
struct bpf_verifier_state_list {
struct bpf_verifier_state state;
- struct bpf_verifier_state_list *next;
- int miss_cnt, hit_cnt;
+ struct list_head node;
+ u32 miss_cnt;
+ u32 hit_cnt:31;
+ u32 in_free_list:1;
};
struct bpf_loop_inline_state {
@@ -589,6 +591,8 @@ struct bpf_insn_aux_data {
* accepts callback function as a parameter.
*/
bool calls_callback;
+ /* registers alive before this instruction. */
+ u16 live_regs_before;
};
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
@@ -665,6 +669,7 @@ struct bpf_subprog_info {
/* true if bpf_fastcall stack region is used by functions that can't be inlined */
bool keep_fastcall_stack: 1;
bool changes_pkt_data: 1;
+ bool might_sleep: 1;
enum priv_stack_mode priv_stack_mode;
u8 arg_cnt;
@@ -710,8 +715,11 @@ struct bpf_verifier_env {
bool test_state_freq; /* test verifier with different pruning frequency */
bool test_reg_invariants; /* fail verification on register invariants violations */
struct bpf_verifier_state *cur_state; /* current verifier state */
- struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
- struct bpf_verifier_state_list *free_list;
+ /* Search pruning optimization, array of list_heads for
+ * lists of struct bpf_verifier_state_list.
+ */
+ struct list_head *explored_states;
+ struct list_head free_list; /* list of struct bpf_verifier_state_list */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */
u32 used_map_cnt; /* number of used maps */
@@ -742,7 +750,11 @@ struct bpf_verifier_env {
struct {
int *insn_state;
int *insn_stack;
+ /* vector of instruction indexes sorted in post-order */
+ int *insn_postorder;
int cur_stack;
+ /* current position in the insn_postorder vector */
+ int cur_postorder;
} cfg;
struct backtrack_state bt;
struct bpf_insn_hist_entry *insn_hist;
@@ -767,6 +779,8 @@ struct bpf_verifier_env {
u32 peak_states;
/* longest register parentage chain walked for liveness marking */
u32 longest_mark_read_walk;
+ u32 free_list_size;
+ u32 explored_states_size;
bpfptr_t fd_array;
/* bit mask to keep track of whether a register has been accessed
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 2a08a2b55592..ebc0c0c9b944 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -76,6 +76,9 @@
#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */
#define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */
#define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */
+#define KF_ARENA_RET (1 << 13) /* kfunc returns an arena pointer */
+#define KF_ARENA_ARG1 (1 << 14) /* kfunc takes an arena pointer as its first argument */
+#define KF_ARENA_ARG2 (1 << 15) /* kfunc takes an arena pointer as its second argument */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d36d5d5180b1..f5cf4d35d83e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -364,6 +364,8 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn)
* BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg);
* BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
* BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
+ * BPF_LOAD_ACQ dst_reg = smp_load_acquire(src_reg + off16)
+ * BPF_STORE_REL smp_store_release(dst_reg + off16, src_reg)
*/
#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
@@ -469,6 +471,16 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn)
.off = 0, \
.imm = BPF_CALL_IMM(FUNC) })
+/* Kfunc call */
+
+#define BPF_CALL_KFUNC(OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = BPF_PSEUDO_KFUNC_CALL, \
+ .off = OFF, \
+ .imm = IMM })
+
/* Raw code statement block */
#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
@@ -659,6 +671,11 @@ struct bpf_prog_stats {
struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));
+struct bpf_timed_may_goto {
+ u64 count;
+ u64 timestamp;
+};
+
struct sk_filter {
refcount_t refcnt;
struct rcu_head rcu;
@@ -1120,8 +1137,11 @@ bool bpf_jit_supports_ptr_xchg(void);
bool bpf_jit_supports_arena(void);
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena);
bool bpf_jit_supports_private_stack(void);
+bool bpf_jit_supports_timed_may_goto(void);
u64 bpf_arch_uaddress_limit(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
+u64 arch_bpf_timed_may_goto(void);
+u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *);
bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id);
static inline bool bpf_dump_raw_ok(const struct cred *cred)
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 2bf909fa3394..bf3bbac4e02a 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -426,14 +426,14 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
#endif /* CONFIG_AUDIT */
#ifdef CONFIG_BPF_SYSCALL
-LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size)
+LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode)
LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog)
LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr,
- struct bpf_token *token)
+ struct bpf_token *token, bool kernel)
LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map)
LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr,
- struct bpf_token *token)
+ struct bpf_token *token, bool kernel)
LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
const struct path *path)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 386c9a78cf9e..4a50ba8002e9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2477,6 +2477,11 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
+#ifdef CONFIG_BPF_SYSCALL
+extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags);
+#endif
+
long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
diff --git a/include/linux/security.h b/include/linux/security.h
index 1545d515a66b..cc9b54d95d22 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2249,14 +2249,14 @@ struct bpf_map;
struct bpf_prog;
struct bpf_token;
#ifdef CONFIG_SECURITY
-extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size, bool kernel);
extern int security_bpf_map(struct bpf_map *map, fmode_t fmode);
extern int security_bpf_prog(struct bpf_prog *prog);
extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
- struct bpf_token *token);
+ struct bpf_token *token, bool kernel);
extern void security_bpf_map_free(struct bpf_map *map);
extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
- struct bpf_token *token);
+ struct bpf_token *token, bool kernel);
extern void security_bpf_prog_free(struct bpf_prog *prog);
extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
const struct path *path);
@@ -2265,7 +2265,7 @@ extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cm
extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
#else
static inline int security_bpf(int cmd, union bpf_attr *attr,
- unsigned int size)
+ unsigned int size, bool kernel)
{
return 0;
}
@@ -2281,7 +2281,7 @@ static inline int security_bpf_prog(struct bpf_prog *prog)
}
static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
- struct bpf_token *token)
+ struct bpf_token *token, bool kernel)
{
return 0;
}
@@ -2290,7 +2290,7 @@ static inline void security_bpf_map_free(struct bpf_map *map)
{ }
static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
- struct bpf_token *token)
+ struct bpf_token *token, bool kernel)
{
return 0;
}
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index defa5bb881f4..28705ae67784 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -51,6 +51,9 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_LOAD_ACQ 0x100 /* load-acquire */
+#define BPF_STORE_REL 0x110 /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
@@ -1207,6 +1210,7 @@ enum bpf_perf_event_type {
#define BPF_F_BEFORE (1U << 3)
#define BPF_F_AFTER (1U << 4)
#define BPF_F_ID (1U << 5)
+#define BPF_F_PREORDER (1U << 6)
#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
@@ -1648,6 +1652,7 @@ union bpf_attr {
};
__u32 next_id;
__u32 open_flags;
+ __s32 fd_by_id_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
@@ -6019,7 +6024,10 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
- /* */
+ /* This helper list is effectively frozen. If you are trying to \
+ * add a new helper, you should add a kfunc instead which has \
+ * less stability guarantees. See Documentation/bpf/kfuncs.rst \
+ */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
* know or care about integer value that is now passed as second argument
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index ec1798b6d3ff..266d4ffa6c07 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -36,7 +36,8 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union, enum, fwd and enum64
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index 9854f9cff3c6..c7c85bb504ba 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -83,6 +83,10 @@ struct xattr_args {
#define XATTR_CAPS_SUFFIX "capability"
#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
+#define XATTR_BPF_LSM_SUFFIX "bpf."
+#define XATTR_NAME_BPF_LSM (XATTR_SECURITY_PREFIX XATTR_BPF_LSM_SUFFIX)
+#define XATTR_NAME_BPF_LSM_LEN (sizeof(XATTR_NAME_BPF_LSM) - 1)
+
#define XATTR_POSIX_ACL_ACCESS "posix_acl_access"
#define XATTR_NAME_POSIX_ACL_ACCESS XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_ACCESS
#define XATTR_POSIX_ACL_DEFAULT "posix_acl_default"
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c
index 095a9554e1de..647b709d7d77 100644
--- a/kernel/bpf/arena.c
+++ b/kernel/bpf/arena.c
@@ -577,8 +577,8 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(arena_kfuncs)
-BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2)
+BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
BTF_KFUNCS_END(arena_kfuncs)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 54ff2a85d4c0..148da8f7ff36 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -161,6 +161,7 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
+ bool nobusy;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
@@ -169,21 +170,21 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
if (!cgroup)
return (unsigned long)NULL;
- if (!bpf_cgrp_storage_trylock())
- return (unsigned long)NULL;
+ nobusy = bpf_cgrp_storage_trylock();
- sdata = cgroup_storage_lookup(cgroup, map, true);
+ sdata = cgroup_storage_lookup(cgroup, map, nobusy);
if (sdata)
goto unlock;
/* only allocate new storage, when the cgroup is refcounted */
if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
- (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
+ (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy)
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, BPF_NOEXIST, false, gfp_flags);
unlock:
- bpf_cgrp_storage_unlock();
+ if (nobusy)
+ bpf_cgrp_storage_unlock();
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
}
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 106735145948..380e9a7cac75 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -335,7 +335,7 @@ static void cache_btf_id(struct bpf_iter_target_info *tinfo,
tinfo->btf_id = prog->aux->attach_btf_id;
}
-bool bpf_iter_prog_supported(struct bpf_prog *prog)
+int bpf_iter_prog_supported(struct bpf_prog *prog)
{
const char *attach_fname = prog->aux->attach_func_name;
struct bpf_iter_target_info *tinfo = NULL, *iter;
@@ -344,7 +344,7 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
int prefix_len = strlen(prefix);
if (strncmp(attach_fname, prefix, prefix_len))
- return false;
+ return -EINVAL;
mutex_lock(&targets_mutex);
list_for_each_entry(iter, &targets, list) {
@@ -360,12 +360,11 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
}
mutex_unlock(&targets_mutex);
- if (tinfo) {
- prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
- prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
- }
+ if (!tinfo)
+ return -EINVAL;
- return tinfo != NULL;
+ return bpf_prog_ctx_arg_info_init(prog, tinfo->reg_info->ctx_arg_info,
+ tinfo->reg_info->ctx_arg_info_size);
}
const struct bpf_func_proto *
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 967492b65185..0a59df1c550a 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -316,7 +316,9 @@ BTF_ID(func, bpf_lsm_inode_getxattr)
BTF_ID(func, bpf_lsm_inode_mknod)
BTF_ID(func, bpf_lsm_inode_need_killpriv)
BTF_ID(func, bpf_lsm_inode_post_setxattr)
+BTF_ID(func, bpf_lsm_inode_post_removexattr)
BTF_ID(func, bpf_lsm_inode_readlink)
+BTF_ID(func, bpf_lsm_inode_removexattr)
BTF_ID(func, bpf_lsm_inode_rename)
BTF_ID(func, bpf_lsm_inode_rmdir)
BTF_ID(func, bpf_lsm_inode_setattr)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 040fb1cd840b..db13ee70d94d 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -146,39 +146,7 @@ void bpf_struct_ops_image_free(void *image)
}
#define MAYBE_NULL_SUFFIX "__nullable"
-#define MAX_STUB_NAME 128
-
-/* Return the type info of a stub function, if it exists.
- *
- * The name of a stub function is made up of the name of the struct_ops and
- * the name of the function pointer member, separated by "__". For example,
- * if the struct_ops type is named "foo_ops" and the function pointer
- * member is named "bar", the stub function name would be "foo_ops__bar".
- */
-static const struct btf_type *
-find_stub_func_proto(const struct btf *btf, const char *st_op_name,
- const char *member_name)
-{
- char stub_func_name[MAX_STUB_NAME];
- const struct btf_type *func_type;
- s32 btf_id;
- int cp;
-
- cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
- st_op_name, member_name);
- if (cp >= MAX_STUB_NAME) {
- pr_warn("Stub function name too long\n");
- return NULL;
- }
- btf_id = btf_find_by_name_kind(btf, stub_func_name, BTF_KIND_FUNC);
- if (btf_id < 0)
- return NULL;
- func_type = btf_type_by_id(btf, btf_id);
- if (!func_type)
- return NULL;
-
- return btf_type_by_id(btf, func_type->type); /* FUNC_PROTO */
-}
+#define REFCOUNTED_SUFFIX "__ref"
/* Prepare argument info for every nullable argument of a member of a
* struct_ops type.
@@ -203,27 +171,44 @@ find_stub_func_proto(const struct btf *btf, const char *st_op_name,
static int prepare_arg_info(struct btf *btf,
const char *st_ops_name,
const char *member_name,
- const struct btf_type *func_proto,
+ const struct btf_type *func_proto, void *stub_func_addr,
struct bpf_struct_ops_arg_info *arg_info)
{
const struct btf_type *stub_func_proto, *pointed_type;
+ bool is_nullable = false, is_refcounted = false;
const struct btf_param *stub_args, *args;
struct bpf_ctx_arg_aux *info, *info_buf;
u32 nargs, arg_no, info_cnt = 0;
+ char ksym[KSYM_SYMBOL_LEN];
+ const char *stub_fname;
+ const char *suffix;
+ s32 stub_func_id;
u32 arg_btf_id;
int offset;
- stub_func_proto = find_stub_func_proto(btf, st_ops_name, member_name);
- if (!stub_func_proto)
- return 0;
+ stub_fname = kallsyms_lookup((unsigned long)stub_func_addr, NULL, NULL, NULL, ksym);
+ if (!stub_fname) {
+ pr_warn("Cannot find the stub function name for the %s in struct %s\n",
+ member_name, st_ops_name);
+ return -ENOENT;
+ }
+
+ stub_func_id = btf_find_by_name_kind(btf, stub_fname, BTF_KIND_FUNC);
+ if (stub_func_id < 0) {
+ pr_warn("Cannot find the stub function %s in btf\n", stub_fname);
+ return -ENOENT;
+ }
+
+ stub_func_proto = btf_type_by_id(btf, stub_func_id);
+ stub_func_proto = btf_type_by_id(btf, stub_func_proto->type);
/* Check if the number of arguments of the stub function is the same
* as the number of arguments of the function pointer.
*/
nargs = btf_type_vlen(func_proto);
if (nargs != btf_type_vlen(stub_func_proto)) {
- pr_warn("the number of arguments of the stub function %s__%s does not match the number of arguments of the member %s of struct %s\n",
- st_ops_name, member_name, member_name, st_ops_name);
+ pr_warn("the number of arguments of the stub function %s does not match the number of arguments of the member %s of struct %s\n",
+ stub_fname, member_name, st_ops_name);
return -EINVAL;
}
@@ -241,10 +226,18 @@ static int prepare_arg_info(struct btf *btf,
info = info_buf;
for (arg_no = 0; arg_no < nargs; arg_no++) {
/* Skip arguments that is not suffixed with
- * "__nullable".
+ * "__nullable or __ref".
*/
- if (!btf_param_match_suffix(btf, &stub_args[arg_no],
- MAYBE_NULL_SUFFIX))
+ is_nullable = btf_param_match_suffix(btf, &stub_args[arg_no],
+ MAYBE_NULL_SUFFIX);
+ is_refcounted = btf_param_match_suffix(btf, &stub_args[arg_no],
+ REFCOUNTED_SUFFIX);
+
+ if (is_nullable)
+ suffix = MAYBE_NULL_SUFFIX;
+ else if (is_refcounted)
+ suffix = REFCOUNTED_SUFFIX;
+ else
continue;
/* Should be a pointer to struct */
@@ -253,30 +246,34 @@ static int prepare_arg_info(struct btf *btf,
&arg_btf_id);
if (!pointed_type ||
!btf_type_is_struct(pointed_type)) {
- pr_warn("stub function %s__%s has %s tagging to an unsupported type\n",
- st_ops_name, member_name, MAYBE_NULL_SUFFIX);
+ pr_warn("stub function %s has %s tagging to an unsupported type\n",
+ stub_fname, suffix);
goto err_out;
}
offset = btf_ctx_arg_offset(btf, func_proto, arg_no);
if (offset < 0) {
- pr_warn("stub function %s__%s has an invalid trampoline ctx offset for arg#%u\n",
- st_ops_name, member_name, arg_no);
+ pr_warn("stub function %s has an invalid trampoline ctx offset for arg#%u\n",
+ stub_fname, arg_no);
goto err_out;
}
if (args[arg_no].type != stub_args[arg_no].type) {
- pr_warn("arg#%u type in stub function %s__%s does not match with its original func_proto\n",
- arg_no, st_ops_name, member_name);
+ pr_warn("arg#%u type in stub function %s does not match with its original func_proto\n",
+ arg_no, stub_fname);
goto err_out;
}
/* Fill the information of the new argument */
- info->reg_type =
- PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL;
info->btf_id = arg_btf_id;
info->btf = btf;
info->offset = offset;
+ if (is_nullable) {
+ info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL;
+ } else if (is_refcounted) {
+ info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID;
+ info->refcounted = true;
+ }
info++;
info_cnt++;
@@ -324,6 +321,13 @@ static bool is_module_member(const struct btf *btf, u32 id)
return !strcmp(btf_name_by_offset(btf, t->name_off), "module");
}
+int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
+{
+ void *func_ptr = *(void **)(st_ops->cfi_stubs + moff);
+
+ return func_ptr ? 0 : -ENOTSUPP;
+}
+
int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
struct btf *btf,
struct bpf_verifier_log *log)
@@ -386,8 +390,11 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
st_ops_desc->value_type = btf_type_by_id(btf, value_id);
for_each_member(i, t, member) {
- const struct btf_type *func_proto;
+ const struct btf_type *func_proto, *ret_type;
+ void **stub_func_addr;
+ u32 moff;
+ moff = __btf_member_bit_offset(t, member) / 8;
mname = btf_name_by_offset(btf, member->name_off);
if (!*mname) {
pr_warn("anon member in struct %s is not supported\n",
@@ -413,9 +420,23 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
func_proto = btf_type_resolve_func_ptr(btf,
member->type,
NULL);
- if (!func_proto)
+
+ /* The member is not a function pointer or
+ * the function pointer is not supported.
+ */
+ if (!func_proto || bpf_struct_ops_supported(st_ops, moff))
continue;
+ if (func_proto->type) {
+ ret_type = btf_type_resolve_ptr(btf, func_proto->type, NULL);
+ if (ret_type && !__btf_type_is_struct(ret_type)) {
+ pr_warn("func ptr %s in struct %s returns non-struct pointer, which is not supported\n",
+ mname, st_ops->name);
+ err = -EOPNOTSUPP;
+ goto errout;
+ }
+ }
+
if (btf_distill_func_proto(log, btf,
func_proto, mname,
&st_ops->func_models[i])) {
@@ -425,8 +446,9 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
goto errout;
}
+ stub_func_addr = *(void **)(st_ops->cfi_stubs + moff);
err = prepare_arg_info(btf, st_ops->name, mname,
- func_proto,
+ func_proto, stub_func_addr,
arg_info + i);
if (err)
goto errout;
@@ -1152,13 +1174,6 @@ void bpf_struct_ops_put(const void *kdata)
bpf_map_put(&st_map->map);
}
-int bpf_struct_ops_supported(const struct bpf_struct_ops *st_ops, u32 moff)
-{
- void *func_ptr = *(void **)(st_ops->cfi_stubs + moff);
-
- return func_ptr ? 0 : -ENOTSUPP;
-}
-
static bool bpf_struct_ops_valid_to_reg(struct bpf_map *map)
{
struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index eacb701bc2be..91ce49b7ae28 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -606,6 +606,7 @@ s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p)
spin_unlock_bh(&btf_idr_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(bpf_find_btf_id);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
u32 id, u32 *res_id)
@@ -2575,7 +2576,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
+ if (btf_type_kflag(t) && !btf_type_is_type_tag(t)) {
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
return -EINVAL;
}
@@ -3332,6 +3333,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
u32 off, int sz, struct btf_field_info *info, u32 field_mask)
{
enum btf_field_type type;
+ const char *tag_value;
+ bool is_type_tag;
u32 res_id;
/* Permit modifiers on the pointer itself */
@@ -3341,19 +3344,20 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
if (!btf_type_is_ptr(t))
return BTF_FIELD_IGNORE;
t = btf_type_by_id(btf, t->type);
-
- if (!btf_type_is_type_tag(t))
+ is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t);
+ if (!is_type_tag)
return BTF_FIELD_IGNORE;
/* Reject extra tags */
if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
return -EINVAL;
- if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off)))
+ tag_value = __btf_name_by_offset(btf, t->name_off);
+ if (!strcmp("kptr_untrusted", tag_value))
type = BPF_KPTR_UNREF;
- else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("kptr", tag_value))
type = BPF_KPTR_REF;
- else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("percpu_kptr", tag_value))
type = BPF_KPTR_PERCPU;
- else if (!strcmp("uptr", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("uptr", tag_value))
type = BPF_UPTR;
else
return -EINVAL;
@@ -4944,11 +4948,6 @@ static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
- if (btf_type_kflag(t)) {
- btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
- return -EINVAL;
- }
-
component_idx = btf_type_decl_tag(t)->component_idx;
if (component_idx < -1) {
btf_verifier_log_type(env, t, "Invalid component_idx");
@@ -6507,6 +6506,8 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
/* rxrpc */
{ "rxrpc_recvdata", 0x1 },
{ "rxrpc_resend", 0x10 },
+ { "rxrpc_tq", 0x10 },
+ { "rxrpc_client", 0x1 },
/* skb */
{"kfree_skb", 0x1000},
/* sunrpc */
@@ -6529,6 +6530,103 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
{ "mr_integ_alloc", 0x2000 },
/* bpf_testmod */
{ "bpf_testmod_test_read", 0x0 },
+ /* amdgpu */
+ { "amdgpu_vm_bo_map", 0x1 },
+ { "amdgpu_vm_bo_unmap", 0x1 },
+ /* netfs */
+ { "netfs_folioq", 0x1 },
+ /* xfs from xfs_defer_pending_class */
+ { "xfs_defer_create_intent", 0x1 },
+ { "xfs_defer_cancel_list", 0x1 },
+ { "xfs_defer_pending_finish", 0x1 },
+ { "xfs_defer_pending_abort", 0x1 },
+ { "xfs_defer_relog_intent", 0x1 },
+ { "xfs_defer_isolate_paused", 0x1 },
+ { "xfs_defer_item_pause", 0x1 },
+ { "xfs_defer_item_unpause", 0x1 },
+ /* xfs from xfs_defer_pending_item_class */
+ { "xfs_defer_add_item", 0x1 },
+ { "xfs_defer_cancel_item", 0x1 },
+ { "xfs_defer_finish_item", 0x1 },
+ /* xfs from xfs_icwalk_class */
+ { "xfs_ioc_free_eofblocks", 0x10 },
+ { "xfs_blockgc_free_space", 0x10 },
+ /* xfs from xfs_btree_cur_class */
+ { "xfs_btree_updkeys", 0x100 },
+ { "xfs_btree_overlapped_query_range", 0x100 },
+ /* xfs from xfs_imap_class*/
+ { "xfs_map_blocks_found", 0x10000 },
+ { "xfs_map_blocks_alloc", 0x10000 },
+ { "xfs_iomap_alloc", 0x1000 },
+ { "xfs_iomap_found", 0x1000 },
+ /* xfs from xfs_fs_class */
+ { "xfs_inodegc_flush", 0x1 },
+ { "xfs_inodegc_push", 0x1 },
+ { "xfs_inodegc_start", 0x1 },
+ { "xfs_inodegc_stop", 0x1 },
+ { "xfs_inodegc_queue", 0x1 },
+ { "xfs_inodegc_throttle", 0x1 },
+ { "xfs_fs_sync_fs", 0x1 },
+ { "xfs_blockgc_start", 0x1 },
+ { "xfs_blockgc_stop", 0x1 },
+ { "xfs_blockgc_worker", 0x1 },
+ { "xfs_blockgc_flush_all", 0x1 },
+ /* xfs_scrub */
+ { "xchk_nlinks_live_update", 0x10 },
+ /* xfs_scrub from xchk_metapath_class */
+ { "xchk_metapath_lookup", 0x100 },
+ /* nfsd */
+ { "nfsd_dirent", 0x1 },
+ { "nfsd_file_acquire", 0x1001 },
+ { "nfsd_file_insert_err", 0x1 },
+ { "nfsd_file_cons_err", 0x1 },
+ /* nfs4 */
+ { "nfs4_setup_sequence", 0x1 },
+ { "pnfs_update_layout", 0x10000 },
+ { "nfs4_inode_callback_event", 0x200 },
+ { "nfs4_inode_stateid_callback_event", 0x200 },
+ /* nfs from pnfs_layout_event */
+ { "pnfs_mds_fallback_pg_init_read", 0x10000 },
+ { "pnfs_mds_fallback_pg_init_write", 0x10000 },
+ { "pnfs_mds_fallback_pg_get_mirror_count", 0x10000 },
+ { "pnfs_mds_fallback_read_done", 0x10000 },
+ { "pnfs_mds_fallback_write_done", 0x10000 },
+ { "pnfs_mds_fallback_read_pagelist", 0x10000 },
+ { "pnfs_mds_fallback_write_pagelist", 0x10000 },
+ /* coda */
+ { "coda_dec_pic_run", 0x10 },
+ { "coda_dec_pic_done", 0x10 },
+ /* cfg80211 */
+ { "cfg80211_scan_done", 0x11 },
+ { "rdev_set_coalesce", 0x10 },
+ { "cfg80211_report_wowlan_wakeup", 0x100 },
+ { "cfg80211_inform_bss_frame", 0x100 },
+ { "cfg80211_michael_mic_failure", 0x10000 },
+ /* cfg80211 from wiphy_work_event */
+ { "wiphy_work_queue", 0x10 },
+ { "wiphy_work_run", 0x10 },
+ { "wiphy_work_cancel", 0x10 },
+ { "wiphy_work_flush", 0x10 },
+ /* hugetlbfs */
+ { "hugetlbfs_alloc_inode", 0x10 },
+ /* spufs */
+ { "spufs_context", 0x10 },
+ /* kvm_hv */
+ { "kvm_page_fault_enter", 0x100 },
+ /* dpu */
+ { "dpu_crtc_setup_mixer", 0x100 },
+ /* binder */
+ { "binder_transaction", 0x100 },
+ /* bcachefs */
+ { "btree_path_free", 0x100 },
+ /* hfi1_tx */
+ { "hfi1_sdma_progress", 0x1000 },
+ /* iptfs */
+ { "iptfs_ingress_postq_event", 0x1000 },
+ /* neigh */
+ { "neigh_update", 0x10 },
+ /* snd_firewire_lib */
+ { "amdtp_packet", 0x100 },
};
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
@@ -6679,6 +6777,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->reg_type = ctx_arg_info->reg_type;
info->btf = ctx_arg_info->btf ? : btf_vmlinux;
info->btf_id = ctx_arg_info->btf_id;
+ info->ref_obj_id = ctx_arg_info->ref_obj_id;
return true;
}
}
@@ -6745,7 +6844,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_type_tag(t)) {
+ if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off);
if (strcmp(tag_value, "user") == 0)
info->reg_type |= MEM_USER;
@@ -7004,7 +7103,7 @@ error:
/* check type tag */
t = btf_type_by_id(btf, mtype->type);
- if (btf_type_is_type_tag(t)) {
+ if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off);
/* check __user tag */
if (strcmp(tag_value, "user") == 0)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 46e5db65dbc8..84f58f3d028a 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -369,7 +369,7 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
/* count number of elements in the list.
* it's slow but the list cannot be long
*/
-static u32 prog_list_length(struct hlist_head *head)
+static u32 prog_list_length(struct hlist_head *head, int *preorder_cnt)
{
struct bpf_prog_list *pl;
u32 cnt = 0;
@@ -377,6 +377,8 @@ static u32 prog_list_length(struct hlist_head *head)
hlist_for_each_entry(pl, head, node) {
if (!prog_list_prog(pl))
continue;
+ if (preorder_cnt && (pl->flags & BPF_F_PREORDER))
+ (*preorder_cnt)++;
cnt++;
}
return cnt;
@@ -400,7 +402,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
if (flags & BPF_F_ALLOW_MULTI)
return true;
- cnt = prog_list_length(&p->bpf.progs[atype]);
+ cnt = prog_list_length(&p->bpf.progs[atype], NULL);
WARN_ON_ONCE(cnt > 1);
if (cnt == 1)
return !!(flags & BPF_F_ALLOW_OVERRIDE);
@@ -423,12 +425,12 @@ static int compute_effective_progs(struct cgroup *cgrp,
struct bpf_prog_array *progs;
struct bpf_prog_list *pl;
struct cgroup *p = cgrp;
- int cnt = 0;
+ int i, j, cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart;
/* count number of effective programs by walking parents */
do {
if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
- cnt += prog_list_length(&p->bpf.progs[atype]);
+ cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt);
p = cgroup_parent(p);
} while (p);
@@ -439,20 +441,34 @@ static int compute_effective_progs(struct cgroup *cgrp,
/* populate the array with effective progs */
cnt = 0;
p = cgrp;
+ fstart = preorder_cnt;
+ bstart = preorder_cnt - 1;
do {
if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
continue;
+ init_bstart = bstart;
hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
if (!prog_list_prog(pl))
continue;
- item = &progs->items[cnt];
+ if (pl->flags & BPF_F_PREORDER) {
+ item = &progs->items[bstart];
+ bstart--;
+ } else {
+ item = &progs->items[fstart];
+ fstart++;
+ }
item->prog = prog_list_prog(pl);
bpf_cgroup_storages_assign(item->cgroup_storage,
pl->storage);
cnt++;
}
+
+ /* reverse pre-ordering progs at this cgroup level */
+ for (i = bstart + 1, j = init_bstart; i < j; i++, j--)
+ swap(progs->items[i], progs->items[j]);
+
} while ((p = cgroup_parent(p)));
*array = progs;
@@ -663,7 +679,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
*/
return -EPERM;
- if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
+ if (prog_list_length(progs, NULL) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG;
pl = find_attach_entry(progs, prog, link, replace_prog,
@@ -698,6 +714,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
pl->prog = prog;
pl->link = link;
+ pl->flags = flags;
bpf_cgroup_storages_assign(pl->storage, storage);
cgrp->bpf.flags[atype] = saved_flags;
@@ -1073,7 +1090,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
lockdep_is_held(&cgroup_mutex));
total_cnt += bpf_prog_array_length(effective);
} else {
- total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
+ total_cnt += prog_list_length(&cgrp->bpf.progs[atype], NULL);
}
}
@@ -1105,7 +1122,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
u32 id;
progs = &cgrp->bpf.progs[atype];
- cnt = min_t(int, prog_list_length(progs), total_cnt);
+ cnt = min_t(int, prog_list_length(progs, NULL), total_cnt);
i = 0;
hlist_for_each_entry(pl, progs, node) {
prog = prog_list_prog(pl);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index da729cbbaeb9..ba6b6118cf50 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1663,14 +1663,17 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
INSN_3(JMP, JSET, K), \
INSN_2(JMP, JA), \
INSN_2(JMP32, JA), \
+ /* Atomic operations. */ \
+ INSN_3(STX, ATOMIC, B), \
+ INSN_3(STX, ATOMIC, H), \
+ INSN_3(STX, ATOMIC, W), \
+ INSN_3(STX, ATOMIC, DW), \
/* Store instructions. */ \
/* Register based. */ \
INSN_3(STX, MEM, B), \
INSN_3(STX, MEM, H), \
INSN_3(STX, MEM, W), \
INSN_3(STX, MEM, DW), \
- INSN_3(STX, ATOMIC, W), \
- INSN_3(STX, ATOMIC, DW), \
/* Immediate based. */ \
INSN_3(ST, MEM, B), \
INSN_3(ST, MEM, H), \
@@ -2152,24 +2155,33 @@ out:
if (BPF_SIZE(insn->code) == BPF_W) \
atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \
(DST + insn->off)); \
- else \
+ else if (BPF_SIZE(insn->code) == BPF_DW) \
atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \
(DST + insn->off)); \
+ else \
+ goto default_label; \
break; \
case BOP | BPF_FETCH: \
if (BPF_SIZE(insn->code) == BPF_W) \
SRC = (u32) atomic_fetch_##KOP( \
(u32) SRC, \
(atomic_t *)(unsigned long) (DST + insn->off)); \
- else \
+ else if (BPF_SIZE(insn->code) == BPF_DW) \
SRC = (u64) atomic64_fetch_##KOP( \
(u64) SRC, \
(atomic64_t *)(unsigned long) (DST + insn->off)); \
+ else \
+ goto default_label; \
break;
STX_ATOMIC_DW:
STX_ATOMIC_W:
+ STX_ATOMIC_H:
+ STX_ATOMIC_B:
switch (IMM) {
+ /* Atomic read-modify-write instructions support only W and DW
+ * size modifiers.
+ */
ATOMIC_ALU_OP(BPF_ADD, add)
ATOMIC_ALU_OP(BPF_AND, and)
ATOMIC_ALU_OP(BPF_OR, or)
@@ -2181,20 +2193,63 @@ out:
SRC = (u32) atomic_xchg(
(atomic_t *)(unsigned long) (DST + insn->off),
(u32) SRC);
- else
+ else if (BPF_SIZE(insn->code) == BPF_DW)
SRC = (u64) atomic64_xchg(
(atomic64_t *)(unsigned long) (DST + insn->off),
(u64) SRC);
+ else
+ goto default_label;
break;
case BPF_CMPXCHG:
if (BPF_SIZE(insn->code) == BPF_W)
BPF_R0 = (u32) atomic_cmpxchg(
(atomic_t *)(unsigned long) (DST + insn->off),
(u32) BPF_R0, (u32) SRC);
- else
+ else if (BPF_SIZE(insn->code) == BPF_DW)
BPF_R0 = (u64) atomic64_cmpxchg(
(atomic64_t *)(unsigned long) (DST + insn->off),
(u64) BPF_R0, (u64) SRC);
+ else
+ goto default_label;
+ break;
+ /* Atomic load and store instructions support all size
+ * modifiers.
+ */
+ case BPF_LOAD_ACQ:
+ switch (BPF_SIZE(insn->code)) {
+#define LOAD_ACQUIRE(SIZEOP, SIZE) \
+ case BPF_##SIZEOP: \
+ DST = (SIZE)smp_load_acquire( \
+ (SIZE *)(unsigned long)(SRC + insn->off)); \
+ break;
+ LOAD_ACQUIRE(B, u8)
+ LOAD_ACQUIRE(H, u16)
+ LOAD_ACQUIRE(W, u32)
+#ifdef CONFIG_64BIT
+ LOAD_ACQUIRE(DW, u64)
+#endif
+#undef LOAD_ACQUIRE
+ default:
+ goto default_label;
+ }
+ break;
+ case BPF_STORE_REL:
+ switch (BPF_SIZE(insn->code)) {
+#define STORE_RELEASE(SIZEOP, SIZE) \
+ case BPF_##SIZEOP: \
+ smp_store_release( \
+ (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \
+ break;
+ STORE_RELEASE(B, u8)
+ STORE_RELEASE(H, u16)
+ STORE_RELEASE(W, u32)
+#ifdef CONFIG_64BIT
+ STORE_RELEASE(DW, u64)
+#endif
+#undef STORE_RELEASE
+ default:
+ goto default_label;
+ }
break;
default:
@@ -2290,17 +2345,18 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
insn->code = BPF_JMP | BPF_CALL_ARGS;
}
#endif
-#else
+#endif
+
static unsigned int __bpf_prog_ret0_warn(const void *ctx,
const struct bpf_insn *insn)
{
/* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON
- * is not working properly, so warn about it!
+ * is not working properly, or interpreter is being used when
+ * prog->jit_requested is not 0, so warn about it!
*/
WARN_ON_ONCE(1);
return 0;
}
-#endif
bool bpf_prog_map_compatible(struct bpf_map *map,
const struct bpf_prog *fp)
@@ -2380,8 +2436,18 @@ static void bpf_prog_select_func(struct bpf_prog *fp)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+ u32 idx = (round_up(stack_depth, 32) / 32) - 1;
- fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+ /* may_goto may cause stack size > 512, leading to idx out-of-bounds.
+ * But for non-JITed programs, we don't need bpf_func, so no bounds
+ * check needed.
+ */
+ if (!fp->jit_requested &&
+ !WARN_ON_ONCE(idx >= ARRAY_SIZE(interpreters))) {
+ fp->bpf_func = interpreters[idx];
+ } else {
+ fp->bpf_func = __bpf_prog_ret0_warn;
+ }
#else
fp->bpf_func = __bpf_prog_ret0_warn;
#endif
@@ -2906,6 +2972,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
return NULL;
}
+const struct bpf_func_proto * __weak bpf_get_perf_event_read_value_proto(void)
+{
+ return NULL;
+}
+
u64 __weak
bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
@@ -3058,6 +3129,32 @@ void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp,
{
}
+bool __weak bpf_jit_supports_timed_may_goto(void)
+{
+ return false;
+}
+
+u64 __weak arch_bpf_timed_may_goto(void)
+{
+ return 0;
+}
+
+u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *p)
+{
+ u64 time = ktime_get_mono_fast_ns();
+
+ /* Populate the timestamp for this stack frame, and refresh count. */
+ if (!p->timestamp) {
+ p->timestamp = time;
+ return BPF_MAX_TIMED_LOOPS;
+ }
+ /* Check if we've exhausted our time slice, and zero count. */
+ if (time - p->timestamp >= (NSEC_PER_SEC / 4))
+ return 0;
+ /* Refresh the count for the stack frame. */
+ return BPF_MAX_TIMED_LOOPS;
+}
+
/* for configs without MMU or 32-bit */
__weak const struct bpf_map_ops arena_map_ops;
__weak u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena)
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index cfa1c18e3a48..9876c5fe6c2a 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -45,6 +45,10 @@ __bpf_kfunc_start_defs();
*
* bpf_cpumask_create() allocates memory using the BPF memory allocator, and
* will not block. It may return NULL if no memory is available.
+ *
+ * Return:
+ * * A pointer to a new struct bpf_cpumask instance on success.
+ * * NULL if the BPF memory allocator is out of memory.
*/
__bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void)
{
@@ -71,6 +75,10 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void)
* Acquires a reference to a BPF cpumask. The cpumask returned by this function
* must either be embedded in a map as a kptr, or freed with
* bpf_cpumask_release().
+ *
+ * Return:
+ * * The struct bpf_cpumask pointer passed to the function.
+ *
*/
__bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
{
@@ -106,6 +114,9 @@ CFI_NOSEAL(bpf_cpumask_release_dtor);
*
* Find the index of the first nonzero bit of the cpumask. A struct bpf_cpumask
* pointer may be safely passed to this function.
+ *
+ * Return:
+ * * The index of the first nonzero bit in the struct cpumask.
*/
__bpf_kfunc u32 bpf_cpumask_first(const struct cpumask *cpumask)
{
@@ -119,6 +130,9 @@ __bpf_kfunc u32 bpf_cpumask_first(const struct cpumask *cpumask)
*
* Find the index of the first unset bit of the cpumask. A struct bpf_cpumask
* pointer may be safely passed to this function.
+ *
+ * Return:
+ * * The index of the first zero bit in the struct cpumask.
*/
__bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask)
{
@@ -133,6 +147,9 @@ __bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask)
*
* Find the index of the first nonzero bit of the AND of two cpumasks.
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ *
+ * Return:
+ * * The index of the first bit that is nonzero in both cpumask instances.
*/
__bpf_kfunc u32 bpf_cpumask_first_and(const struct cpumask *src1,
const struct cpumask *src2)
@@ -414,12 +431,47 @@ __bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
* @cpumask: The cpumask being queried.
*
* Count the number of set bits in the given cpumask.
+ *
+ * Return:
+ * * The number of bits set in the mask.
*/
__bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask)
{
return cpumask_weight(cpumask);
}
+/**
+ * bpf_cpumask_populate() - Populate the CPU mask from the contents of
+ * a BPF memory region.
+ *
+ * @cpumask: The cpumask being populated.
+ * @src: The BPF memory holding the bit pattern.
+ * @src__sz: Length of the BPF memory region in bytes.
+ *
+ * Return:
+ * * 0 if the struct cpumask * instance was populated successfully.
+ * * -EACCES if the memory region is too small to populate the cpumask.
+ * * -EINVAL if the memory region is not aligned to the size of a long
+ * and the architecture does not support efficient unaligned accesses.
+ */
+__bpf_kfunc int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz)
+{
+ unsigned long source = (unsigned long)src;
+
+ /* The memory region must be large enough to populate the entire CPU mask. */
+ if (src__sz < bitmap_size(nr_cpu_ids))
+ return -EACCES;
+
+ /* If avoiding unaligned accesses, the input region must be aligned to the nearest long. */
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
+ !IS_ALIGNED(source, sizeof(long)))
+ return -EINVAL;
+
+ bitmap_copy(cpumask_bits(cpumask), src, nr_cpu_ids);
+
+ return 0;
+}
+
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(cpumask_kfunc_btf_ids)
@@ -448,6 +500,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_populate, KF_RCU)
BTF_KFUNCS_END(cpumask_kfunc_btf_ids)
static const struct btf_kfunc_id_set cpumask_kfunc_set = {
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 309c4aa1b026..20883c6b1546 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -202,7 +202,7 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
insn->dst_reg, class == BPF_ALU ? 'w' : 'r',
insn->dst_reg);
} else if (is_addr_space_cast(insn)) {
- verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n",
+ verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %u, %u)\n",
insn->code, insn->dst_reg,
insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm);
} else if (is_mov_percpu_addr(insn)) {
@@ -267,6 +267,18 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
BPF_SIZE(insn->code) == BPF_DW ? "64" : "",
bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
insn->dst_reg, insn->off, insn->src_reg);
+ } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_LOAD_ACQ) {
+ verbose(cbs->private_data, "(%02x) r%d = load_acquire((%s *)(r%d %+d))\n",
+ insn->code, insn->dst_reg,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->src_reg, insn->off);
+ } else if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_STORE_REL) {
+ verbose(cbs->private_data, "(%02x) store_release((%s *)(r%d %+d), r%d)\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg, insn->off, insn->src_reg);
} else {
verbose(cbs->private_data, "BUG_%02x\n", insn->code);
}
@@ -369,7 +381,7 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
insn->code, class == BPF_JMP32 ? 'w' : 'r',
insn->dst_reg,
bpf_jmp_string[BPF_OP(insn->code) >> 4],
- insn->imm, insn->off);
+ (u32)insn->imm, insn->off);
}
} else {
verbose(cbs->private_data, "(%02x) %s\n",
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4a9eeb7aef85..877298133fda 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -198,12 +198,12 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
void __percpu *pptr)
{
- *(void __percpu **)(l->key + key_size) = pptr;
+ *(void __percpu **)(l->key + roundup(key_size, 8)) = pptr;
}
static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
{
- return *(void __percpu **)(l->key + key_size);
+ return *(void __percpu **)(l->key + roundup(key_size, 8));
}
static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
@@ -787,6 +787,9 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
static void check_and_free_fields(struct bpf_htab *htab,
struct htab_elem *elem)
{
+ if (IS_ERR_OR_NULL(htab->map.record))
+ return;
+
if (htab_is_percpu(htab)) {
void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size);
int cpu;
@@ -2354,7 +2357,7 @@ static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn
*insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3);
*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0,
- offsetof(struct htab_elem, key) + map->key_size);
+ offsetof(struct htab_elem, key) + roundup(map->key_size, 8));
*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
*insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 672abe111282..e3a2662f4e33 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1758,8 +1758,8 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE,
};
-BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
- u32, offset, u64, flags)
+static int __bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr_kern *src,
+ u32 offset, u64 flags)
{
enum bpf_dynptr_type type;
int err;
@@ -1792,6 +1792,12 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern
}
}
+BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
+ u32, offset, u64, flags)
+{
+ return __bpf_dynptr_read(dst, len, src, offset, flags);
+}
+
static const struct bpf_func_proto bpf_dynptr_read_proto = {
.func = bpf_dynptr_read,
.gpl_only = false,
@@ -1803,8 +1809,8 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
.arg5_type = ARG_ANYTHING,
};
-BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
- u32, len, u64, flags)
+static int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, void *src,
+ u32 len, u64 flags)
{
enum bpf_dynptr_type type;
int err;
@@ -1842,6 +1848,12 @@ BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, v
}
}
+BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
+ u32, len, u64, flags)
+{
+ return __bpf_dynptr_write(dst, offset, src, len, flags);
+}
+
static const struct bpf_func_proto bpf_dynptr_write_proto = {
.func = bpf_dynptr_write,
.gpl_only = false,
@@ -2043,6 +2055,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_task_pt_regs_proto;
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
+ case BPF_FUNC_perf_event_read_value:
+ return bpf_get_perf_event_read_value_proto();
default:
return NULL;
}
@@ -2757,6 +2771,61 @@ __bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p,
return 0;
}
+/**
+ * bpf_dynptr_copy() - Copy data from one dynptr to another.
+ * @dst_ptr: Destination dynptr - where data should be copied to
+ * @dst_off: Offset into the destination dynptr
+ * @src_ptr: Source dynptr - where data should be copied from
+ * @src_off: Offset into the source dynptr
+ * @size: Length of the data to copy from source to destination
+ *
+ * Copies data from source dynptr to destination dynptr.
+ * Returns 0 on success; negative error, otherwise.
+ */
+__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off,
+ struct bpf_dynptr *src_ptr, u32 src_off, u32 size)
+{
+ struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr;
+ struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr;
+ void *src_slice, *dst_slice;
+ char buf[256];
+ u32 off;
+
+ src_slice = bpf_dynptr_slice(src_ptr, src_off, NULL, size);
+ dst_slice = bpf_dynptr_slice_rdwr(dst_ptr, dst_off, NULL, size);
+
+ if (src_slice && dst_slice) {
+ memmove(dst_slice, src_slice, size);
+ return 0;
+ }
+
+ if (src_slice)
+ return __bpf_dynptr_write(dst, dst_off, src_slice, size, 0);
+
+ if (dst_slice)
+ return __bpf_dynptr_read(dst_slice, size, src, src_off, 0);
+
+ if (bpf_dynptr_check_off_len(dst, dst_off, size) ||
+ bpf_dynptr_check_off_len(src, src_off, size))
+ return -E2BIG;
+
+ off = 0;
+ while (off < size) {
+ u32 chunk_sz = min_t(u32, sizeof(buf), size - off);
+ int err;
+
+ err = __bpf_dynptr_read(buf, chunk_sz, src, src_off + off, 0);
+ if (err)
+ return err;
+ err = __bpf_dynptr_write(dst, dst_off + off, buf, chunk_sz, 0);
+ if (err)
+ return err;
+
+ off += chunk_sz;
+ }
+ return 0;
+}
+
__bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
{
return obj;
@@ -3066,6 +3135,50 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user
return ret + 1;
}
+/**
+ * bpf_copy_from_user_task_str() - Copy a string from an task's address space
+ * @dst: Destination address, in kernel space. This buffer must be
+ * at least @dst__sz bytes long.
+ * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL.
+ * @unsafe_ptr__ign: Source address in the task's address space.
+ * @tsk: The task whose address space will be used
+ * @flags: The only supported flag is BPF_F_PAD_ZEROS
+ *
+ * Copies a NUL terminated string from a task's address space to @dst__sz
+ * buffer. If user string is too long this will still ensure zero termination
+ * in the @dst__sz buffer unless buffer size is 0.
+ *
+ * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst__sz to 0 on success
+ * and memset all of @dst__sz on failure.
+ *
+ * Return: The number of copied bytes on success including the NUL terminator.
+ * A negative error code on failure.
+ */
+__bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz,
+ const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk, u64 flags)
+{
+ int ret;
+
+ if (unlikely(flags & ~BPF_F_PAD_ZEROS))
+ return -EINVAL;
+
+ if (unlikely(dst__sz == 0))
+ return 0;
+
+ ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_ptr__ign, dst, dst__sz, 0);
+ if (ret < 0) {
+ if (flags & BPF_F_PAD_ZEROS)
+ memset(dst, 0, dst__sz);
+ return ret;
+ }
+
+ if (flags & BPF_F_PAD_ZEROS)
+ memset(dst + ret, 0, dst__sz - ret);
+
+ return ret + 1;
+}
+
/* Keep unsinged long in prototype so that kfunc is usable when emitted to
* vmlinux.h in BPF programs directly, but note that while in BPF prog, the
* unsigned long always points to 8-byte region on stack, the kernel may only
@@ -3161,6 +3274,7 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
BTF_ID_FLAGS(func, bpf_dynptr_size)
BTF_ID_FLAGS(func, bpf_dynptr_clone)
+BTF_ID_FLAGS(func, bpf_dynptr_copy)
#ifdef CONFIG_NET
BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
#endif
@@ -3173,6 +3287,7 @@ BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_copy_from_user_task_str, KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_get_kmem_cache)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE)
diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c
index 0c63bc2cd895..2fdf3c978db1 100644
--- a/kernel/bpf/preload/bpf_preload_kern.c
+++ b/kernel/bpf/preload/bpf_preload_kern.c
@@ -90,3 +90,4 @@ static void __exit fini(void)
late_initcall(load);
module_exit(fini);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Embedded BPF programs for introspection in bpffs");
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e1e42e918ba7..380b445a304c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1315,7 +1315,7 @@ static bool bpf_net_capable(void)
#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
/* called via syscall */
-static int map_create(union bpf_attr *attr)
+static int map_create(union bpf_attr *attr, bool kernel)
{
const struct bpf_map_ops *ops;
struct bpf_token *token = NULL;
@@ -1505,7 +1505,7 @@ static int map_create(union bpf_attr *attr)
attr->btf_vmlinux_value_type_id;
}
- err = security_bpf_map_create(map, attr, token);
+ err = security_bpf_map_create(map, attr, token, kernel);
if (err)
goto free_map_sec;
@@ -1593,11 +1593,8 @@ struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
{
- spin_lock_bh(&map_idr_lock);
- map = __bpf_map_inc_not_zero(map, false);
- spin_unlock_bh(&map_idr_lock);
-
- return map;
+ lockdep_assert(rcu_read_lock_held());
+ return __bpf_map_inc_not_zero(map, false);
}
EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
@@ -2314,6 +2311,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
kvfree(prog->aux->jited_linfo);
kvfree(prog->aux->linfo);
kfree(prog->aux->kfunc_tab);
+ kfree(prog->aux->ctx_arg_info);
if (prog->aux->attach_btf)
btf_put(prog->aux->attach_btf);
@@ -2944,7 +2942,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (err < 0)
goto free_prog;
- err = security_bpf_prog_load(prog, attr, token);
+ err = security_bpf_prog_load(prog, attr, token, uattr.is_kernel);
if (err)
goto free_prog_sec;
@@ -4169,7 +4167,8 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
#define BPF_F_ATTACH_MASK_BASE \
(BPF_F_ALLOW_OVERRIDE | \
BPF_F_ALLOW_MULTI | \
- BPF_F_REPLACE)
+ BPF_F_REPLACE | \
+ BPF_F_PREORDER)
#define BPF_F_ATTACH_MASK_MPROG \
(BPF_F_REPLACE | \
@@ -4733,6 +4732,8 @@ static int bpf_prog_get_info_by_fd(struct file *file,
info.recursion_misses = stats.misses;
info.verified_insns = prog->aux->verified_insns;
+ if (prog->aux->btf)
+ info.btf_id = btf_obj_id(prog->aux->btf);
if (!bpf_capable()) {
info.jited_prog_len = 0;
@@ -4879,8 +4880,6 @@ static int bpf_prog_get_info_by_fd(struct file *file,
}
}
- if (prog->aux->btf)
- info.btf_id = btf_obj_id(prog->aux->btf);
info.attach_btf_id = prog->aux->attach_btf_id;
if (attach_btf)
info.attach_btf_obj_id = btf_obj_id(attach_btf);
@@ -5121,15 +5120,34 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_
return btf_new_fd(attr, uattr, uattr_size);
}
-#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD btf_id
+#define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd
static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
{
+ struct bpf_token *token = NULL;
+
if (CHECK_ATTR(BPF_BTF_GET_FD_BY_ID))
return -EINVAL;
- if (!capable(CAP_SYS_ADMIN))
+ if (attr->open_flags & ~BPF_F_TOKEN_FD)
+ return -EINVAL;
+
+ if (attr->open_flags & BPF_F_TOKEN_FD) {
+ token = bpf_token_get_from_fd(attr->fd_by_id_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+ if (!bpf_token_allow_cmd(token, BPF_BTF_GET_FD_BY_ID)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ if (!bpf_token_capable(token, CAP_SYS_ADMIN)) {
+ bpf_token_put(token);
return -EPERM;
+ }
+
+ bpf_token_put(token);
return btf_get_fd_by_id(attr->btf_id);
}
@@ -5768,13 +5786,13 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size)
if (copy_from_bpfptr(&attr, uattr, size) != 0)
return -EFAULT;
- err = security_bpf(cmd, &attr, size);
+ err = security_bpf(cmd, &attr, size, uattr.is_kernel);
if (err < 0)
return err;
switch (cmd) {
case BPF_MAP_CREATE:
- err = map_create(&attr);
+ err = map_create(&attr, uattr.is_kernel);
break;
case BPF_MAP_LOOKUP_ELEM:
err = map_lookup_elem(&attr);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6e604caa870c..5ae0df7a855a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -579,6 +579,13 @@ static bool is_cmpxchg_insn(const struct bpf_insn *insn)
insn->imm == BPF_CMPXCHG;
}
+static bool is_atomic_load_insn(const struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_ATOMIC &&
+ insn->imm == BPF_LOAD_ACQ;
+}
+
static int __get_spi(s32 off)
{
return (-off - 1) / BPF_REG_SIZE;
@@ -1545,6 +1552,17 @@ static void release_reference_state(struct bpf_verifier_state *state, int idx)
return;
}
+static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id)
+{
+ int i;
+
+ for (i = 0; i < state->acquired_refs; i++)
+ if (state->refs[i].id == ptr_id)
+ return true;
+
+ return false;
+}
+
static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
{
int i;
@@ -1600,6 +1618,14 @@ static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *st
return NULL;
}
+static void update_peak_states(struct bpf_verifier_env *env)
+{
+ u32 cur_states;
+
+ cur_states = env->explored_states_size + env->free_list_size;
+ env->peak_states = max(env->peak_states, cur_states);
+}
+
static void free_func_state(struct bpf_func_state *state)
{
if (!state)
@@ -1622,6 +1648,50 @@ static void free_verifier_state(struct bpf_verifier_state *state,
kfree(state);
}
+/* struct bpf_verifier_state->{parent,loop_entry} refer to states
+ * that are in either of env->{expored_states,free_list}.
+ * In both cases the state is contained in struct bpf_verifier_state_list.
+ */
+static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
+{
+ if (st->parent)
+ return container_of(st->parent, struct bpf_verifier_state_list, state);
+ return NULL;
+}
+
+static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st)
+{
+ if (st->loop_entry)
+ return container_of(st->loop_entry, struct bpf_verifier_state_list, state);
+ return NULL;
+}
+
+/* A state can be freed if it is no longer referenced:
+ * - is in the env->free_list;
+ * - has no children states;
+ * - is not used as loop_entry.
+ *
+ * Freeing a state can make it's loop_entry free-able.
+ */
+static void maybe_free_verifier_state(struct bpf_verifier_env *env,
+ struct bpf_verifier_state_list *sl)
+{
+ struct bpf_verifier_state_list *loop_entry_sl;
+
+ while (sl && sl->in_free_list &&
+ sl->state.branches == 0 &&
+ sl->state.used_as_loop_entry == 0) {
+ loop_entry_sl = state_loop_entry_as_list(&sl->state);
+ if (loop_entry_sl)
+ loop_entry_sl->state.used_as_loop_entry--;
+ list_del(&sl->node);
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ env->free_list_size--;
+ sl = loop_entry_sl;
+ }
+}
+
/* copy verifier state from src to dst growing dst stack space
* when necessary to accommodate larger src stack
*/
@@ -1661,6 +1731,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->callback_unroll_depth = src->callback_unroll_depth;
dst_state->used_as_loop_entry = src->used_as_loop_entry;
dst_state->may_goto_depth = src->may_goto_depth;
+ dst_state->loop_entry = src->loop_entry;
for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i];
if (!dst) {
@@ -1681,7 +1752,7 @@ static u32 state_htab_size(struct bpf_verifier_env *env)
return env->prog->len;
}
-static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
+static struct list_head *explored_state(struct bpf_verifier_env *env, int idx)
{
struct bpf_verifier_state *cur = env->cur_state;
struct bpf_func_state *state = cur->frame[cur->curframe];
@@ -1789,16 +1860,13 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
* # Find outermost loop entry known for n
* def get_loop_entry(n):
* h = entries.get(n, None)
- * while h in entries and entries[h] != h:
+ * while h in entries:
* h = entries[h]
* return h
*
- * # Update n's loop entry if h's outermost entry comes
- * # before n's outermost entry in current DFS path.
+ * # Update n's loop entry if h comes before n in current DFS path.
* def update_loop_entry(n, h):
- * n1 = get_loop_entry(n) or n
- * h1 = get_loop_entry(h) or h
- * if h1 in path and depths[h1] <= depths[n1]:
+ * if h in path and depths[entries.get(n, n)] < depths[n]:
* entries[n] = h1
*
* def dfs(n, depth):
@@ -1810,7 +1878,7 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
* # Case A: explore succ and update cur's loop entry
* # only if succ's entry is in current DFS path.
* dfs(succ, depth + 1)
- * h = get_loop_entry(succ)
+ * h = entries.get(succ, None)
* update_loop_entry(n, h)
* else:
* # Case B or C depending on `h1 in path` check in update_loop_entry().
@@ -1822,46 +1890,49 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
* and cur's loop entry has to be updated (case A), handle this in
* update_branch_counts();
* - use st->branch > 0 as a signal that st is in the current DFS path;
- * - handle cases B and C in is_state_visited();
- * - update topmost loop entry for intermediate states in get_loop_entry().
+ * - handle cases B and C in is_state_visited().
*/
-static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st)
+static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
{
- struct bpf_verifier_state *topmost = st->loop_entry, *old;
+ struct bpf_verifier_state *topmost = st->loop_entry;
+ u32 steps = 0;
- while (topmost && topmost->loop_entry && topmost != topmost->loop_entry)
+ while (topmost && topmost->loop_entry) {
+ if (steps++ > st->dfs_depth) {
+ WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n");
+ verbose(env, "verifier bug: infinite loop in get_loop_entry()\n");
+ return ERR_PTR(-EFAULT);
+ }
topmost = topmost->loop_entry;
- /* Update loop entries for intermediate states to avoid this
- * traversal in future get_loop_entry() calls.
- */
- while (st && st->loop_entry != topmost) {
- old = st->loop_entry;
- st->loop_entry = topmost;
- st = old;
}
return topmost;
}
-static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
+static void update_loop_entry(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
{
- struct bpf_verifier_state *cur1, *hdr1;
-
- cur1 = get_loop_entry(cur) ?: cur;
- hdr1 = get_loop_entry(hdr) ?: hdr;
- /* The head1->branches check decides between cases B and C in
- * comment for get_loop_entry(). If hdr1->branches == 0 then
+ /* The hdr->branches check decides between cases B and C in
+ * comment for get_loop_entry(). If hdr->branches == 0 then
* head's topmost loop entry is not in current DFS path,
* hence 'cur' and 'hdr' are not in the same loop and there is
* no need to update cur->loop_entry.
*/
- if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) {
+ if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) {
+ if (cur->loop_entry) {
+ cur->loop_entry->used_as_loop_entry--;
+ maybe_free_verifier_state(env, state_loop_entry_as_list(cur));
+ }
cur->loop_entry = hdr;
- hdr->used_as_loop_entry = true;
+ hdr->used_as_loop_entry++;
}
}
static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
+ struct bpf_verifier_state_list *sl = NULL, *parent_sl;
+ struct bpf_verifier_state *parent;
+
while (st) {
u32 br = --st->branches;
@@ -1871,7 +1942,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
* This is a part of 'case A' in get_loop_entry() comment.
*/
if (br == 0 && st->parent && st->loop_entry)
- update_loop_entry(st->parent, st->loop_entry);
+ update_loop_entry(env, st->parent, st->loop_entry);
/* WARN_ON(br > 1) technically makes sense here,
* but see comment in push_stack(), hence:
@@ -1881,7 +1952,12 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
br);
if (br)
break;
- st = st->parent;
+ parent = st->parent;
+ parent_sl = state_parent_as_list(st);
+ if (sl)
+ maybe_free_verifier_state(env, sl);
+ st = parent;
+ sl = parent_sl;
}
}
@@ -3206,6 +3282,21 @@ bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
return res ? &res->func_model : NULL;
}
+static int add_kfunc_in_insns(struct bpf_verifier_env *env,
+ struct bpf_insn *insn, int cnt)
+{
+ int i, ret;
+
+ for (i = 0; i < cnt; i++, insn++) {
+ if (bpf_pseudo_kfunc_call(insn)) {
+ ret = add_kfunc_call(env, insn->imm, insn->off);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
{
struct bpf_subprog_info *subprog = env->subprog_info;
@@ -3269,6 +3360,15 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
return 0;
}
+static int jmp_offset(struct bpf_insn *insn)
+{
+ u8 code = insn->code;
+
+ if (code == (BPF_JMP32 | BPF_JA))
+ return insn->imm;
+ return insn->off;
+}
+
static int check_subprogs(struct bpf_verifier_env *env)
{
int i, subprog_start, subprog_end, off, cur_subprog = 0;
@@ -3295,10 +3395,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
goto next;
if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
goto next;
- if (code == (BPF_JMP32 | BPF_JA))
- off = i + insn[i].imm + 1;
- else
- off = i + insn[i].off + 1;
+ off = i + jmp_offset(&insn[i]) + 1;
if (off < subprog_start || off >= subprog_end) {
verbose(env, "jump out of range from insn %d to %d\n", i, off);
return -EINVAL;
@@ -3483,7 +3580,7 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
if (class == BPF_STX) {
- /* BPF_STX (including atomic variants) has multiple source
+ /* BPF_STX (including atomic variants) has one or more source
* operands, one of which is a ptr. Check whether the caller is
* asking about it.
*/
@@ -3828,6 +3925,17 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
return btf_name_by_offset(desc_btf, func->name_off);
}
+static void verbose_insn(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_call = disasm_kfunc_name,
+ .cb_print = verbose,
+ .private_data = env,
+ };
+
+ print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
+}
+
static inline void bt_init(struct backtrack_state *bt, u32 frame)
{
bt->frame = frame;
@@ -4028,11 +4136,6 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
struct bpf_insn_hist_entry *hist, struct backtrack_state *bt)
{
- const struct bpf_insn_cbs cbs = {
- .cb_call = disasm_kfunc_name,
- .cb_print = verbose,
- .private_data = env,
- };
struct bpf_insn *insn = env->prog->insnsi + idx;
u8 class = BPF_CLASS(insn->code);
u8 opcode = BPF_OP(insn->code);
@@ -4050,7 +4153,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
verbose(env, "stack=%s before ", env->tmp_str_buf);
verbose(env, "%d: ", idx);
- print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
+ verbose_insn(env, insn);
}
/* If there is a history record that some registers gained range at this insn,
@@ -4097,7 +4200,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* dreg still needs precision before this insn
*/
}
- } else if (class == BPF_LDX) {
+ } else if (class == BPF_LDX || is_atomic_load_insn(insn)) {
if (!bt_is_reg_set(bt, dreg))
return 0;
bt_clear_reg(bt, dreg);
@@ -5982,18 +6085,10 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
- enum bpf_access_type t, enum bpf_reg_type *reg_type,
- struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx)
+ enum bpf_access_type t, struct bpf_insn_access_aux *info)
{
- struct bpf_insn_access_aux info = {
- .reg_type = *reg_type,
- .log = &env->log,
- .is_retval = false,
- .is_ldsx = is_ldsx,
- };
-
if (env->ops->is_valid_access &&
- env->ops->is_valid_access(off, size, t, env->prog, &info)) {
+ env->ops->is_valid_access(off, size, t, env->prog, info)) {
/* A non zero info.ctx_field_size indicates that this field is a
* candidate for later verifier transformation to load the whole
* field and then apply a mask when accessed with a narrower
@@ -6001,14 +6096,15 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
* will only allow for whole field access and rejects any other
* type of narrower access.
*/
- *reg_type = info.reg_type;
- *is_retval = info.is_retval;
-
- if (base_type(*reg_type) == PTR_TO_BTF_ID) {
- *btf = info.btf;
- *btf_id = info.btf_id;
+ if (base_type(info->reg_type) == PTR_TO_BTF_ID) {
+ if (info->ref_obj_id &&
+ !find_reference_state(env->cur_state, info->ref_obj_id)) {
+ verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
+ off);
+ return -EACCES;
+ }
} else {
- env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+ env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
}
/* remember the offset of last byte accessed in ctx */
if (env->prog->aux->max_ctx_offset < off + size)
@@ -6118,6 +6214,26 @@ static bool is_arena_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_ARENA;
}
+/* Return false if @regno contains a pointer whose type isn't supported for
+ * atomic instruction @insn.
+ */
+static bool atomic_ptr_type_ok(struct bpf_verifier_env *env, int regno,
+ struct bpf_insn *insn)
+{
+ if (is_ctx_reg(env, regno))
+ return false;
+ if (is_pkt_reg(env, regno))
+ return false;
+ if (is_flow_key_reg(env, regno))
+ return false;
+ if (is_sk_reg(env, regno))
+ return false;
+ if (is_arena_reg(env, regno))
+ return bpf_jit_supports_insn(insn, true);
+
+ return true;
+}
+
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
#ifdef CONFIG_NET
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
@@ -7365,11 +7481,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
- bool is_retval = false;
struct bpf_retval_range range;
- enum bpf_reg_type reg_type = SCALAR_VALUE;
- struct btf *btf = NULL;
- u32 btf_id = 0;
+ struct bpf_insn_access_aux info = {
+ .reg_type = SCALAR_VALUE,
+ .is_ldsx = is_ldsx,
+ .log = &env->log,
+ };
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
@@ -7381,8 +7498,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
- &btf_id, &is_retval, is_ldsx);
+ err = check_ctx_access(env, insn_idx, off, size, t, &info);
if (err)
verbose_linfo(env, insn_idx, "; ");
if (!err && t == BPF_READ && value_regno >= 0) {
@@ -7390,8 +7506,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* PTR_TO_PACKET[_META,_END]. In the latter
* case, we know the offset is zero.
*/
- if (reg_type == SCALAR_VALUE) {
- if (is_retval && get_func_retval_range(env->prog, &range)) {
+ if (info.reg_type == SCALAR_VALUE) {
+ if (info.is_retval && get_func_retval_range(env->prog, &range)) {
err = __mark_reg_s32_range(env, regs, value_regno,
range.minval, range.maxval);
if (err)
@@ -7402,7 +7518,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
} else {
mark_reg_known_zero(env, regs,
value_regno);
- if (type_may_be_null(reg_type))
+ if (type_may_be_null(info.reg_type))
regs[value_regno].id = ++env->id_gen;
/* A load of ctx field could have different
* actual load size with the one encoded in the
@@ -7410,12 +7526,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
* a sub-register.
*/
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
- if (base_type(reg_type) == PTR_TO_BTF_ID) {
- regs[value_regno].btf = btf;
- regs[value_regno].btf_id = btf_id;
+ if (base_type(info.reg_type) == PTR_TO_BTF_ID) {
+ regs[value_regno].btf = info.btf;
+ regs[value_regno].btf_id = info.btf_id;
+ regs[value_regno].ref_obj_id = info.ref_obj_id;
}
}
- regs[value_regno].type = reg_type;
+ regs[value_regno].type = info.reg_type;
}
} else if (reg->type == PTR_TO_STACK) {
@@ -7518,27 +7635,72 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
bool allow_trust_mismatch);
-static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
+static int check_load_mem(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ bool strict_alignment_once, bool is_ldsx,
+ bool allow_trust_mismatch, const char *ctx)
{
- int load_reg;
+ struct bpf_reg_state *regs = cur_regs(env);
+ enum bpf_reg_type src_reg_type;
int err;
- switch (insn->imm) {
- case BPF_ADD:
- case BPF_ADD | BPF_FETCH:
- case BPF_AND:
- case BPF_AND | BPF_FETCH:
- case BPF_OR:
- case BPF_OR | BPF_FETCH:
- case BPF_XOR:
- case BPF_XOR | BPF_FETCH:
- case BPF_XCHG:
- case BPF_CMPXCHG:
- break;
- default:
- verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
- return -EINVAL;
- }
+ /* check src operand */
+ err = check_reg_arg(env, insn->src_reg, SRC_OP);
+ if (err)
+ return err;
+
+ /* check dst operand */
+ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+ if (err)
+ return err;
+
+ src_reg_type = regs[insn->src_reg].type;
+
+ /* Check if (src_reg + off) is readable. The state of dst_reg will be
+ * updated by this call.
+ */
+ err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off,
+ BPF_SIZE(insn->code), BPF_READ, insn->dst_reg,
+ strict_alignment_once, is_ldsx);
+ err = err ?: save_aux_ptr_type(env, src_reg_type,
+ allow_trust_mismatch);
+ err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], ctx);
+
+ return err;
+}
+
+static int check_store_reg(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ bool strict_alignment_once)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ enum bpf_reg_type dst_reg_type;
+ int err;
+
+ /* check src1 operand */
+ err = check_reg_arg(env, insn->src_reg, SRC_OP);
+ if (err)
+ return err;
+
+ /* check src2 operand */
+ err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+ if (err)
+ return err;
+
+ dst_reg_type = regs[insn->dst_reg].type;
+
+ /* Check if (dst_reg + off) is writeable. */
+ err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
+ BPF_SIZE(insn->code), BPF_WRITE, insn->src_reg,
+ strict_alignment_once, false);
+ err = err ?: save_aux_ptr_type(env, dst_reg_type, false);
+
+ return err;
+}
+
+static int check_atomic_rmw(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
+{
+ int load_reg;
+ int err;
if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
verbose(env, "invalid atomic operand size\n");
@@ -7574,11 +7736,7 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
return -EACCES;
}
- if (is_ctx_reg(env, insn->dst_reg) ||
- is_pkt_reg(env, insn->dst_reg) ||
- is_flow_key_reg(env, insn->dst_reg) ||
- is_sk_reg(env, insn->dst_reg) ||
- (is_arena_reg(env, insn->dst_reg) && !bpf_jit_supports_insn(insn, true))) {
+ if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
insn->dst_reg,
reg_type_str(env, reg_state(env, insn->dst_reg)->type));
@@ -7605,12 +7763,12 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
/* Check whether we can read the memory, with second call for fetch
* case to simulate the register fill.
*/
- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+ err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1, true, false);
if (!err && load_reg >= 0)
- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
- BPF_SIZE(insn->code), BPF_READ, load_reg,
- true, false);
+ err = check_mem_access(env, env->insn_idx, insn->dst_reg,
+ insn->off, BPF_SIZE(insn->code),
+ BPF_READ, load_reg, true, false);
if (err)
return err;
@@ -7620,13 +7778,86 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
return err;
}
/* Check whether we can write into the same memory. */
- err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
+ err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
if (err)
return err;
return 0;
}
+static int check_atomic_load(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
+{
+ int err;
+
+ err = check_load_mem(env, insn, true, false, false, "atomic_load");
+ if (err)
+ return err;
+
+ if (!atomic_ptr_type_ok(env, insn->src_reg, insn)) {
+ verbose(env, "BPF_ATOMIC loads from R%d %s is not allowed\n",
+ insn->src_reg,
+ reg_type_str(env, reg_state(env, insn->src_reg)->type));
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int check_atomic_store(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
+{
+ int err;
+
+ err = check_store_reg(env, insn, true);
+ if (err)
+ return err;
+
+ if (!atomic_ptr_type_ok(env, insn->dst_reg, insn)) {
+ verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
+ insn->dst_reg,
+ reg_type_str(env, reg_state(env, insn->dst_reg)->type));
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int check_atomic(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+ switch (insn->imm) {
+ case BPF_ADD:
+ case BPF_ADD | BPF_FETCH:
+ case BPF_AND:
+ case BPF_AND | BPF_FETCH:
+ case BPF_OR:
+ case BPF_OR | BPF_FETCH:
+ case BPF_XOR:
+ case BPF_XOR | BPF_FETCH:
+ case BPF_XCHG:
+ case BPF_CMPXCHG:
+ return check_atomic_rmw(env, insn);
+ case BPF_LOAD_ACQ:
+ if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
+ verbose(env,
+ "64-bit load-acquires are only supported on 64-bit arches\n");
+ return -EOPNOTSUPP;
+ }
+ return check_atomic_load(env, insn);
+ case BPF_STORE_REL:
+ if (BPF_SIZE(insn->code) == BPF_DW && BITS_PER_LONG != 64) {
+ verbose(env,
+ "64-bit store-releases are only supported on 64-bit arches\n");
+ return -EOPNOTSUPP;
+ }
+ return check_atomic_store(env, insn);
+ default:
+ verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n",
+ insn->imm);
+ return -EINVAL;
+ }
+}
+
/* When register 'regno' is used to read the stack (either directly or through
* a helper function) make sure that it's within stack boundary and, depending
* on the access type and privileges, that all elements of the stack are
@@ -8431,10 +8662,12 @@ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
{
struct bpf_verifier_state_list *sl;
struct bpf_verifier_state *st;
+ struct list_head *pos, *head;
/* Explored states are pushed in stack order, most recent states come first */
- sl = *explored_state(env, insn_idx);
- for (; sl; sl = sl->next) {
+ head = explored_state(env, insn_idx);
+ list_for_each(pos, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
/* If st->branches != 0 state is a part of current DFS verification path,
* hence cur & st for a loop.
*/
@@ -9666,7 +9899,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
goto error;
if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
- verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
+ verbose(env, "mixing of tail_calls and bpf-to-bpf calls is not supported\n");
return -EINVAL;
}
break;
@@ -10237,23 +10470,18 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (subprog_is_global(env, subprog)) {
const char *sub_name = subprog_name(env, subprog);
- /* Only global subprogs cannot be called with a lock held. */
if (env->cur_state->active_locks) {
verbose(env, "global function calls are not allowed while holding a lock,\n"
"use static function instead\n");
return -EINVAL;
}
- /* Only global subprogs cannot be called with preemption disabled. */
- if (env->cur_state->active_preempt_locks) {
- verbose(env, "global function calls are not allowed with preemption disabled,\n"
- "use static function instead\n");
- return -EINVAL;
- }
-
- if (env->cur_state->active_irq_id) {
- verbose(env, "global function calls are not allowed with IRQs disabled,\n"
- "use static function instead\n");
+ if (env->subprog_info[subprog].might_sleep &&
+ (env->cur_state->active_rcu_lock || env->cur_state->active_preempt_locks ||
+ env->cur_state->active_irq_id || !in_sleepable(env))) {
+ verbose(env, "global functions that may sleep are not allowed in non-sleepable context,\n"
+ "i.e., in a RCU/IRQ/preempt-disabled section, or in\n"
+ "a non-sleepable BPF program context\n");
return -EINVAL;
}
@@ -10752,6 +10980,8 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
{
struct bpf_verifier_state *state = env->cur_state;
+ enum bpf_prog_type type = resolve_prog_type(env->prog);
+ struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
bool refs_lingering = false;
int i;
@@ -10761,6 +10991,12 @@ static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exi
for (i = 0; i < state->acquired_refs; i++) {
if (state->refs[i].type != REF_TYPE_PTR)
continue;
+ /* Allow struct_ops programs to return a referenced kptr back to
+ * kernel. Type checks are performed later in check_return_code.
+ */
+ if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
+ reg->ref_obj_id == state->refs[i].id)
+ continue;
verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
state->refs[i].id, state->refs[i].insn_idx);
refs_lingering = true;
@@ -11781,6 +12017,8 @@ enum special_kfunc_type {
KF_bpf_iter_num_new,
KF_bpf_iter_num_next,
KF_bpf_iter_num_destroy,
+ KF_bpf_set_dentry_xattr,
+ KF_bpf_remove_dentry_xattr,
};
BTF_SET_START(special_kfunc_set)
@@ -11810,6 +12048,10 @@ BTF_ID(func, bpf_wq_set_callback_impl)
#ifdef CONFIG_CGROUPS
BTF_ID(func, bpf_iter_css_task_new)
#endif
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_set_dentry_xattr)
+BTF_ID(func, bpf_remove_dentry_xattr)
+#endif
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -11859,6 +12101,13 @@ BTF_ID(func, bpf_local_irq_restore)
BTF_ID(func, bpf_iter_num_new)
BTF_ID(func, bpf_iter_num_next)
BTF_ID(func, bpf_iter_num_destroy)
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_set_dentry_xattr)
+BTF_ID(func, bpf_remove_dentry_xattr)
+#else
+BTF_ID_UNUSED
+BTF_ID_UNUSED
+#endif
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -16399,13 +16648,14 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
const char *exit_ctx = "At program exit";
struct tnum enforce_attach_type_range = tnum_unknown;
const struct bpf_prog *prog = env->prog;
- struct bpf_reg_state *reg;
+ struct bpf_reg_state *reg = reg_state(env, regno);
struct bpf_retval_range range = retval_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
struct bpf_func_state *frame = env->cur_state->frame[0];
const bool is_subprog = frame->subprogno;
bool return_32bit = false;
+ const struct btf_type *reg_type, *ret_type = NULL;
/* LSM and struct_ops func-ptr's return type could be "void" */
if (!is_subprog || frame->in_exception_callback_fn) {
@@ -16414,10 +16664,26 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
if (prog->expected_attach_type == BPF_LSM_CGROUP)
/* See below, can be 0 or 0-1 depending on hook. */
break;
- fallthrough;
+ if (!prog->aux->attach_func_proto->type)
+ return 0;
+ break;
case BPF_PROG_TYPE_STRUCT_OPS:
if (!prog->aux->attach_func_proto->type)
return 0;
+
+ if (frame->in_exception_callback_fn)
+ break;
+
+ /* Allow a struct_ops program to return a referenced kptr if it
+ * matches the operator's return type and is in its unmodified
+ * form. A scalar zero (i.e., a null pointer) is also allowed.
+ */
+ reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
+ ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
+ prog->aux->attach_func_proto->type,
+ NULL);
+ if (ret_type && ret_type == reg_type && reg->ref_obj_id)
+ return __check_ptr_off_reg(env, reg, regno, false);
break;
default:
break;
@@ -16439,8 +16705,6 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
return -EACCES;
}
- reg = cur_regs(env) + regno;
-
if (frame->in_async_callback_fn) {
/* enforce return zero from async callbacks like timer */
exit_ctx = "At async callback return";
@@ -16539,6 +16803,11 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
case BPF_PROG_TYPE_NETFILTER:
range = retval_range(NF_DROP, NF_ACCEPT);
break;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ if (!ret_type)
+ return 0;
+ range = retval_range(0, 0);
+ break;
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.
@@ -16582,6 +16851,14 @@ static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off)
subprog->changes_pkt_data = true;
}
+static void mark_subprog_might_sleep(struct bpf_verifier_env *env, int off)
+{
+ struct bpf_subprog_info *subprog;
+
+ subprog = find_containing_subprog(env, off);
+ subprog->might_sleep = true;
+}
+
/* 't' is an index of a call-site.
* 'w' is a callee entry point.
* Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED.
@@ -16595,6 +16872,7 @@ static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w)
caller = find_containing_subprog(env, t);
callee = find_containing_subprog(env, w);
caller->changes_pkt_data |= callee->changes_pkt_data;
+ caller->might_sleep |= callee->might_sleep;
}
/* non-recursive DFS pseudo code
@@ -16753,27 +17031,6 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
/* Bitmask with 1s for all caller saved registers */
#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
-/* Return a bitmask specifying which caller saved registers are
- * clobbered by a call to a helper *as if* this helper follows
- * bpf_fastcall contract:
- * - includes R0 if function is non-void;
- * - includes R1-R5 if corresponding parameter has is described
- * in the function prototype.
- */
-static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn)
-{
- u32 mask;
- int i;
-
- mask = 0;
- if (fn->ret_type != RET_VOID)
- mask |= BIT(BPF_REG_0);
- for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i)
- if (fn->arg_type[i] != ARG_DONTCARE)
- mask |= BIT(BPF_REG_1 + i);
- return mask;
-}
-
/* True if do_misc_fixups() replaces calls to helper number 'imm',
* replacement patch is presumed to follow bpf_fastcall contract
* (see mark_fastcall_pattern_for_call() below).
@@ -16790,24 +17047,54 @@ static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm)
}
}
-/* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */
-static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta)
+struct call_summary {
+ u8 num_params;
+ bool is_void;
+ bool fastcall;
+};
+
+/* If @call is a kfunc or helper call, fills @cs and returns true,
+ * otherwise returns false.
+ */
+static bool get_call_summary(struct bpf_verifier_env *env, struct bpf_insn *call,
+ struct call_summary *cs)
{
- u32 vlen, i, mask;
+ struct bpf_kfunc_call_arg_meta meta;
+ const struct bpf_func_proto *fn;
+ int i;
- vlen = btf_type_vlen(meta->func_proto);
- mask = 0;
- if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type)))
- mask |= BIT(BPF_REG_0);
- for (i = 0; i < vlen; ++i)
- mask |= BIT(BPF_REG_1 + i);
- return mask;
-}
+ if (bpf_helper_call(call)) {
-/* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */
-static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta)
-{
- return meta->kfunc_flags & KF_FASTCALL;
+ if (get_helper_proto(env, call->imm, &fn) < 0)
+ /* error would be reported later */
+ return false;
+ cs->fastcall = fn->allow_fastcall &&
+ (verifier_inlines_helper_call(env, call->imm) ||
+ bpf_jit_inlines_helper_call(call->imm));
+ cs->is_void = fn->ret_type == RET_VOID;
+ cs->num_params = 0;
+ for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) {
+ if (fn->arg_type[i] == ARG_DONTCARE)
+ break;
+ cs->num_params++;
+ }
+ return true;
+ }
+
+ if (bpf_pseudo_kfunc_call(call)) {
+ int err;
+
+ err = fetch_kfunc_meta(env, call, &meta, NULL);
+ if (err < 0)
+ /* error would be reported later */
+ return false;
+ cs->num_params = btf_type_vlen(meta.func_proto);
+ cs->fastcall = meta.kfunc_flags & KF_FASTCALL;
+ cs->is_void = btf_type_is_void(btf_type_by_id(meta.btf, meta.func_proto->type));
+ return true;
+ }
+
+ return false;
}
/* LLVM define a bpf_fastcall function attribute.
@@ -16890,39 +17177,23 @@ static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
{
struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx;
struct bpf_insn *call = &env->prog->insnsi[insn_idx];
- const struct bpf_func_proto *fn;
- u32 clobbered_regs_mask = ALL_CALLER_SAVED_REGS;
+ u32 clobbered_regs_mask;
+ struct call_summary cs;
u32 expected_regs_mask;
- bool can_be_inlined = false;
s16 off;
int i;
- if (bpf_helper_call(call)) {
- if (get_helper_proto(env, call->imm, &fn) < 0)
- /* error would be reported later */
- return;
- clobbered_regs_mask = helper_fastcall_clobber_mask(fn);
- can_be_inlined = fn->allow_fastcall &&
- (verifier_inlines_helper_call(env, call->imm) ||
- bpf_jit_inlines_helper_call(call->imm));
- }
-
- if (bpf_pseudo_kfunc_call(call)) {
- struct bpf_kfunc_call_arg_meta meta;
- int err;
-
- err = fetch_kfunc_meta(env, call, &meta, NULL);
- if (err < 0)
- /* error would be reported later */
- return;
-
- clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta);
- can_be_inlined = is_fastcall_kfunc_call(&meta);
- }
-
- if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS)
+ if (!get_call_summary(env, call, &cs))
return;
+ /* A bitmask specifying which caller saved registers are clobbered
+ * by a call to a helper/kfunc *as if* this helper/kfunc follows
+ * bpf_fastcall contract:
+ * - includes R0 if function is non-void;
+ * - includes R1-R5 if corresponding parameter has is described
+ * in the function prototype.
+ */
+ clobbered_regs_mask = GENMASK(cs.num_params, cs.is_void ? 1 : 0);
/* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */
expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS;
@@ -16980,7 +17251,7 @@ static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env,
* don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills()
* does not remove spill/fill pair {4,6}.
*/
- if (can_be_inlined)
+ if (cs.fastcall)
env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1;
else
subprog->keep_fastcall_stack = 1;
@@ -17062,9 +17333,20 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
mark_prune_point(env, t);
mark_jmp_point(env, t);
}
- if (bpf_helper_call(insn) && bpf_helper_changes_pkt_data(insn->imm))
- mark_subprog_changes_pkt_data(env, t);
- if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ if (bpf_helper_call(insn)) {
+ const struct bpf_func_proto *fp;
+
+ ret = get_helper_proto(env, insn->imm, &fp);
+ /* If called in a non-sleepable context program will be
+ * rejected anyway, so we should end up with precise
+ * sleepable marks on subprogs, except for dead code
+ * elimination.
+ */
+ if (ret == 0 && fp->might_sleep)
+ mark_subprog_might_sleep(env, t);
+ if (bpf_helper_changes_pkt_data(insn->imm))
+ mark_subprog_changes_pkt_data(env, t);
+ } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
struct bpf_kfunc_call_arg_meta meta;
ret = fetch_kfunc_meta(env, insn, &meta, NULL);
@@ -17083,6 +17365,13 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
*/
mark_force_checkpoint(env, t);
}
+ /* Same as helpers, if called in a non-sleepable context
+ * program will be rejected anyway, so we should end up
+ * with precise sleepable marks on subprogs, except for
+ * dead code elimination.
+ */
+ if (ret == 0 && is_kfunc_sleepable(&meta))
+ mark_subprog_might_sleep(env, t);
}
return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
@@ -17125,9 +17414,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
static int check_cfg(struct bpf_verifier_env *env)
{
int insn_cnt = env->prog->len;
- int *insn_stack, *insn_state;
+ int *insn_stack, *insn_state, *insn_postorder;
int ex_insn_beg, i, ret = 0;
- bool ex_done = false;
insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
if (!insn_state)
@@ -17139,6 +17427,17 @@ static int check_cfg(struct bpf_verifier_env *env)
return -ENOMEM;
}
+ insn_postorder = env->cfg.insn_postorder = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ if (!insn_postorder) {
+ kvfree(insn_state);
+ kvfree(insn_stack);
+ return -ENOMEM;
+ }
+
+ ex_insn_beg = env->exception_callback_subprog
+ ? env->subprog_info[env->exception_callback_subprog].start
+ : 0;
+
insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
insn_stack[0] = 0; /* 0 is the first instruction */
env->cfg.cur_stack = 1;
@@ -17152,6 +17451,7 @@ walk_cfg:
case DONE_EXPLORING:
insn_state[t] = EXPLORED;
env->cfg.cur_stack--;
+ insn_postorder[env->cfg.cur_postorder++] = t;
break;
case KEEP_EXPLORING:
break;
@@ -17170,13 +17470,10 @@ walk_cfg:
goto err_free;
}
- if (env->exception_callback_subprog && !ex_done) {
- ex_insn_beg = env->subprog_info[env->exception_callback_subprog].start;
-
+ if (ex_insn_beg && insn_state[ex_insn_beg] != EXPLORED) {
insn_state[ex_insn_beg] = DISCOVERED;
insn_stack[0] = ex_insn_beg;
env->cfg.cur_stack = 1;
- ex_done = true;
goto walk_cfg;
}
@@ -17199,6 +17496,7 @@ walk_cfg:
}
ret = 0; /* cfg looks good */
env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data;
+ env->prog->aux->might_sleep = env->subprog_info[0].might_sleep;
err_free:
kvfree(insn_state);
@@ -17815,18 +18113,22 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
static void clean_live_states(struct bpf_verifier_env *env, int insn,
struct bpf_verifier_state *cur)
{
+ struct bpf_verifier_state *loop_entry;
struct bpf_verifier_state_list *sl;
+ struct list_head *pos, *head;
- sl = *explored_state(env, insn);
- while (sl) {
+ head = explored_state(env, insn);
+ list_for_each(pos, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
if (sl->state.branches)
- goto next;
+ continue;
+ loop_entry = get_loop_entry(env, &sl->state);
+ if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches)
+ continue;
if (sl->state.insn_idx != insn ||
!same_callsites(&sl->state, cur))
- goto next;
+ continue;
clean_verifier_state(env, &sl->state);
-next:
- sl = sl->next;
}
}
@@ -18210,15 +18512,17 @@ static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *c
* the current state will reach 'bpf_exit' instruction safely
*/
static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
- struct bpf_func_state *cur, enum exact_level exact)
+ struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact)
{
- int i;
+ u16 live_regs = env->insn_aux_data[insn_idx].live_regs_before;
+ u16 i;
if (old->callback_depth > cur->callback_depth)
return false;
for (i = 0; i < MAX_BPF_REG; i++)
- if (!regsafe(env, &old->regs[i], &cur->regs[i],
+ if (((1 << i) & live_regs) &&
+ !regsafe(env, &old->regs[i], &cur->regs[i],
&env->idmap_scratch, exact))
return false;
@@ -18239,6 +18543,7 @@ static bool states_equal(struct bpf_verifier_env *env,
struct bpf_verifier_state *cur,
enum exact_level exact)
{
+ u32 insn_idx;
int i;
if (old->curframe != cur->curframe)
@@ -18262,9 +18567,12 @@ static bool states_equal(struct bpf_verifier_env *env,
* and all frame states need to be equivalent
*/
for (i = 0; i <= old->curframe; i++) {
+ insn_idx = i == old->curframe
+ ? env->insn_idx
+ : old->frame[i + 1]->callsite;
if (old->frame[i]->callsite != cur->frame[i]->callsite)
return false;
- if (!func_states_equal(env, old->frame[i], cur->frame[i], exact))
+ if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
return false;
}
return true;
@@ -18517,10 +18825,11 @@ static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct bpf_verifier_state_list *new_sl;
- struct bpf_verifier_state_list *sl, **pprev;
+ struct bpf_verifier_state_list *sl;
struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
int i, j, n, err, states_cnt = 0;
bool force_new_state, add_new_state, force_exact;
+ struct list_head *pos, *tmp, *head;
force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
/* Avoid accumulating infinitely long jmp history */
@@ -18539,15 +18848,14 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
env->insn_processed - env->prev_insn_processed >= 8)
add_new_state = true;
- pprev = explored_state(env, insn_idx);
- sl = *pprev;
-
clean_live_states(env, insn_idx, cur);
- while (sl) {
+ head = explored_state(env, insn_idx);
+ list_for_each_safe(pos, tmp, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
states_cnt++;
if (sl->state.insn_idx != insn_idx)
- goto next;
+ continue;
if (sl->state.branches) {
struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
@@ -18621,7 +18929,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
- update_loop_entry(cur, &sl->state);
+ update_loop_entry(env, cur, &sl->state);
goto hit;
}
}
@@ -18630,7 +18938,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
if (is_may_goto_insn_at(env, insn_idx)) {
if (sl->state.may_goto_depth != cur->may_goto_depth &&
states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
- update_loop_entry(cur, &sl->state);
+ update_loop_entry(env, cur, &sl->state);
goto hit;
}
}
@@ -18697,11 +19005,13 @@ skip_inf_loop_check:
*
* Additional details are in the comment before get_loop_entry().
*/
- loop_entry = get_loop_entry(&sl->state);
+ loop_entry = get_loop_entry(env, &sl->state);
+ if (IS_ERR(loop_entry))
+ return PTR_ERR(loop_entry);
force_exact = loop_entry && loop_entry->branches > 0;
if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
if (force_exact)
- update_loop_entry(cur, loop_entry);
+ update_loop_entry(env, cur, loop_entry);
hit:
sl->hit_cnt++;
/* reached equivalent register/stack state,
@@ -18750,31 +19060,13 @@ miss:
/* the state is unlikely to be useful. Remove it to
* speed up verification
*/
- *pprev = sl->next;
- if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE &&
- !sl->state.used_as_loop_entry) {
- u32 br = sl->state.branches;
-
- WARN_ONCE(br,
- "BUG live_done but branches_to_explore %d\n",
- br);
- free_verifier_state(&sl->state, false);
- kfree(sl);
- env->peak_states--;
- } else {
- /* cannot free this state, since parentage chain may
- * walk it later. Add it for free_list instead to
- * be freed at the end of verification
- */
- sl->next = env->free_list;
- env->free_list = sl;
- }
- sl = *pprev;
- continue;
+ sl->in_free_list = true;
+ list_del(&sl->node);
+ list_add(&sl->node, &env->free_list);
+ env->free_list_size++;
+ env->explored_states_size--;
+ maybe_free_verifier_state(env, sl);
}
-next:
- pprev = &sl->next;
- sl = *pprev;
}
if (env->max_states_per_insn < states_cnt)
@@ -18799,7 +19091,8 @@ next:
if (!new_sl)
return -ENOMEM;
env->total_states++;
- env->peak_states++;
+ env->explored_states_size++;
+ update_peak_states(env);
env->prev_jmps_processed = env->jmps_processed;
env->prev_insn_processed = env->insn_processed;
@@ -18823,8 +19116,8 @@ next:
cur->first_insn_idx = insn_idx;
cur->insn_hist_start = cur->insn_hist_end;
cur->dfs_depth = new->dfs_depth + 1;
- new_sl->next = *explored_state(env, insn_idx);
- *explored_state(env, insn_idx) = new_sl;
+ list_add(&new_sl->node, head);
+
/* connect new state to parentage chain. Current frame needs all
* registers connected. Only r6 - r9 of the callers are alive (pushed
* to the stack implicitly by JITs) so in callers' frames connect just
@@ -19011,19 +19304,13 @@ static int do_check(struct bpf_verifier_env *env)
}
if (env->log.level & BPF_LOG_LEVEL) {
- const struct bpf_insn_cbs cbs = {
- .cb_call = disasm_kfunc_name,
- .cb_print = verbose,
- .private_data = env,
- };
-
if (verifier_state_scratched(env))
print_insn_state(env, state, state->curframe);
verbose_linfo(env, env->insn_idx, "; ");
env->prev_log_pos = env->log.end_pos;
verbose(env, "%d: ", env->insn_idx);
- print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
+ verbose_insn(env, insn);
env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
env->prev_log_pos = env->log.end_pos;
}
@@ -19045,37 +19332,18 @@ static int do_check(struct bpf_verifier_env *env)
return err;
} else if (class == BPF_LDX) {
- enum bpf_reg_type src_reg_type;
-
- /* check for reserved fields is already done */
-
- /* check src operand */
- err = check_reg_arg(env, insn->src_reg, SRC_OP);
- if (err)
- return err;
-
- err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
- if (err)
- return err;
+ bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX;
- src_reg_type = regs[insn->src_reg].type;
-
- /* check that memory (src_reg + off) is readable,
- * the state of dst_reg will be updated by this func
+ /* Check for reserved fields is already done in
+ * resolve_pseudo_ldimm64().
*/
- err = check_mem_access(env, env->insn_idx, insn->src_reg,
- insn->off, BPF_SIZE(insn->code),
- BPF_READ, insn->dst_reg, false,
- BPF_MODE(insn->code) == BPF_MEMSX);
- err = err ?: save_aux_ptr_type(env, src_reg_type, true);
- err = err ?: reg_bounds_sanity_check(env, &regs[insn->dst_reg], "ldx");
+ err = check_load_mem(env, insn, false, is_ldsx, true,
+ "ldx");
if (err)
return err;
} else if (class == BPF_STX) {
- enum bpf_reg_type dst_reg_type;
-
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
- err = check_atomic(env, env->insn_idx, insn);
+ err = check_atomic(env, insn);
if (err)
return err;
env->insn_idx++;
@@ -19087,25 +19355,7 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
- /* check src1 operand */
- err = check_reg_arg(env, insn->src_reg, SRC_OP);
- if (err)
- return err;
- /* check src2 operand */
- err = check_reg_arg(env, insn->dst_reg, SRC_OP);
- if (err)
- return err;
-
- dst_reg_type = regs[insn->dst_reg].type;
-
- /* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, env->insn_idx, insn->dst_reg,
- insn->off, BPF_SIZE(insn->code),
- BPF_WRITE, insn->src_reg, false, false);
- if (err)
- return err;
-
- err = save_aux_ptr_type(env, dst_reg_type, false);
+ err = check_store_reg(env, insn, false);
if (err)
return err;
} else if (class == BPF_ST) {
@@ -19245,6 +19495,10 @@ process_bpf_exit:
return err;
break;
} else {
+ if (WARN_ON_ONCE(env->cur_state->loop_entry)) {
+ verbose(env, "verifier bug: env->cur_state->loop_entry != NULL\n");
+ return -EFAULT;
+ }
do_print_state = true;
continue;
}
@@ -20334,7 +20588,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
struct bpf_subprog_info *subprogs = env->subprog_info;
const struct bpf_verifier_ops *ops = env->ops;
- int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0;
+ int i, cnt, size, ctx_field_size, ret, delta = 0, epilogue_cnt = 0;
const int insn_cnt = env->prog->len;
struct bpf_insn *epilogue_buf = env->epilogue_buf;
struct bpf_insn *insn_buf = env->insn_buf;
@@ -20363,6 +20617,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
return -ENOMEM;
env->prog = new_prog;
delta += cnt - 1;
+
+ ret = add_kfunc_in_insns(env, epilogue_buf, epilogue_cnt - 1);
+ if (ret < 0)
+ return ret;
}
}
@@ -20383,6 +20641,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
env->prog = new_prog;
delta += cnt - 1;
+
+ ret = add_kfunc_in_insns(env, insn_buf, cnt - 1);
+ if (ret < 0)
+ return ret;
}
}
@@ -20415,7 +20677,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
type = BPF_WRITE;
- } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
+ } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_B) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_H) ||
+ insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) ||
insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) &&
env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) {
insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code);
@@ -20723,6 +20987,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb;
func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data;
+ func[i]->aux->might_sleep = env->subprog_info[i].might_sleep;
if (!i)
func[i]->aux->exception_boundary = env->seen_exception;
func[i] = bpf_int_jit_compile(func[i]);
@@ -20939,6 +21204,14 @@ static void specialize_kfunc(struct bpf_verifier_env *env,
*/
env->seen_direct_write = seen_direct_write;
}
+
+ if (func_id == special_kfunc_list[KF_bpf_set_dentry_xattr] &&
+ bpf_lsm_has_d_inode_locked(prog))
+ *addr = (unsigned long)bpf_set_dentry_xattr_locked;
+
+ if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr] &&
+ bpf_lsm_has_d_inode_locked(prog))
+ *addr = (unsigned long)bpf_remove_dentry_xattr_locked;
}
static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
@@ -21373,7 +21646,50 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto next_insn;
}
- if (is_may_goto_insn(insn)) {
+ if (is_may_goto_insn(insn) && bpf_jit_supports_timed_may_goto()) {
+ int stack_off_cnt = -stack_depth - 16;
+
+ /*
+ * Two 8 byte slots, depth-16 stores the count, and
+ * depth-8 stores the start timestamp of the loop.
+ *
+ * The starting value of count is BPF_MAX_TIMED_LOOPS
+ * (0xffff). Every iteration loads it and subs it by 1,
+ * until the value becomes 0 in AX (thus, 1 in stack),
+ * after which we call arch_bpf_timed_may_goto, which
+ * either sets AX to 0xffff to keep looping, or to 0
+ * upon timeout. AX is then stored into the stack. In
+ * the next iteration, we either see 0 and break out, or
+ * continue iterating until the next time value is 0
+ * after subtraction, rinse and repeat.
+ */
+ stack_depth_extra = 16;
+ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off_cnt);
+ if (insn->off >= 0)
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 5);
+ else
+ insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1);
+ insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1);
+ insn_buf[3] = BPF_JMP_IMM(BPF_JNE, BPF_REG_AX, 0, 2);
+ /*
+ * AX is used as an argument to pass in stack_off_cnt
+ * (to add to r10/fp), and also as the return value of
+ * the call to arch_bpf_timed_may_goto.
+ */
+ insn_buf[4] = BPF_MOV64_IMM(BPF_REG_AX, stack_off_cnt);
+ insn_buf[5] = BPF_EMIT_CALL(arch_bpf_timed_may_goto);
+ insn_buf[6] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off_cnt);
+ cnt = 7;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ goto next_insn;
+ } else if (is_may_goto_insn(insn)) {
int stack_off = -stack_depth - 8;
stack_depth_extra = 8;
@@ -21897,6 +22213,13 @@ next_insn:
if (subprogs[cur_subprog + 1].start == i + delta + 1) {
subprogs[cur_subprog].stack_depth += stack_depth_extra;
subprogs[cur_subprog].stack_extra = stack_depth_extra;
+
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ if (stack_depth > MAX_BPF_STACK && !prog->jit_requested) {
+ verbose(env, "stack size %d(extra %d) is too large\n",
+ stack_depth, stack_depth_extra);
+ return -EINVAL;
+ }
cur_subprog++;
stack_depth = subprogs[cur_subprog].stack_depth;
stack_depth_extra = 0;
@@ -21907,23 +22230,33 @@ next_insn:
env->prog->aux->stack_depth = subprogs[0].stack_depth;
for (i = 0; i < env->subprog_cnt; i++) {
+ int delta = bpf_jit_supports_timed_may_goto() ? 2 : 1;
int subprog_start = subprogs[i].start;
int stack_slots = subprogs[i].stack_extra / 8;
+ int slots = delta, cnt = 0;
if (!stack_slots)
continue;
- if (stack_slots > 1) {
+ /* We need two slots in case timed may_goto is supported. */
+ if (stack_slots > slots) {
verbose(env, "verifier bug: stack_slots supports may_goto only\n");
return -EFAULT;
}
- /* Add ST insn to subprog prologue to init extra stack */
- insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP,
- -subprogs[i].stack_depth, BPF_MAX_LOOPS);
+ stack_depth = subprogs[i].stack_depth;
+ if (bpf_jit_supports_timed_may_goto()) {
+ insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
+ BPF_MAX_TIMED_LOOPS);
+ insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth + 8, 0);
+ } else {
+ /* Add ST insn to subprog prologue to init extra stack */
+ insn_buf[cnt++] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, -stack_depth,
+ BPF_MAX_LOOPS);
+ }
/* Copy first actual insn to preserve it */
- insn_buf[1] = env->prog->insnsi[subprog_start];
+ insn_buf[cnt++] = env->prog->insnsi[subprog_start];
- new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2);
+ new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
env->prog = prog = new_prog;
@@ -21933,7 +22266,7 @@ next_insn:
* to insn after BPF_ST that inits may_goto count.
* Adjustment will succeed because bpf_patch_insn_data() didn't fail.
*/
- WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1));
+ WARN_ON(adjust_jmp_off(env->prog, subprog_start, delta));
}
/* Since poke tab is now finalized, publish aux to tracker. */
@@ -22131,31 +22464,29 @@ static int remove_fastcall_spills_fills(struct bpf_verifier_env *env)
static void free_states(struct bpf_verifier_env *env)
{
- struct bpf_verifier_state_list *sl, *sln;
+ struct bpf_verifier_state_list *sl;
+ struct list_head *head, *pos, *tmp;
int i;
- sl = env->free_list;
- while (sl) {
- sln = sl->next;
+ list_for_each_safe(pos, tmp, &env->free_list) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
free_verifier_state(&sl->state, false);
kfree(sl);
- sl = sln;
}
- env->free_list = NULL;
+ INIT_LIST_HEAD(&env->free_list);
if (!env->explored_states)
return;
for (i = 0; i < state_htab_size(env); i++) {
- sl = env->explored_states[i];
+ head = &env->explored_states[i];
- while (sl) {
- sln = sl->next;
+ list_for_each_safe(pos, tmp, head) {
+ sl = container_of(pos, struct bpf_verifier_state_list, node);
free_verifier_state(&sl->state, false);
kfree(sl);
- sl = sln;
}
- env->explored_states[i] = NULL;
+ INIT_LIST_HEAD(&env->explored_states[i]);
}
}
@@ -22163,6 +22494,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
struct bpf_subprog_info *sub = subprog_info(env, subprog);
+ struct bpf_prog_aux *aux = env->prog->aux;
struct bpf_verifier_state *state;
struct bpf_reg_state *regs;
int ret, i;
@@ -22270,6 +22602,13 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
mark_reg_known_zero(env, regs, BPF_REG_1);
}
+ /* Acquire references for struct_ops program arguments tagged with "__ref" */
+ if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+ for (i = 0; i < aux->ctx_arg_info_size; i++)
+ aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ?
+ acquire_reference(env, 0) : 0;
+ }
+
ret = do_check(env);
out:
/* check for NULL is necessary, since cur_state can be freed inside
@@ -22392,6 +22731,15 @@ static void print_verification_stats(struct bpf_verifier_env *env)
env->peak_states, env->longest_mark_read_walk);
}
+int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
+ const struct bpf_ctx_arg_aux *info, u32 cnt)
+{
+ prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL);
+ prog->aux->ctx_arg_info_size = cnt;
+
+ return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
+}
+
static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
{
const struct btf_type *t, *func_proto;
@@ -22399,10 +22747,11 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
const struct bpf_struct_ops *st_ops;
const struct btf_member *member;
struct bpf_prog *prog = env->prog;
- u32 btf_id, member_idx;
+ bool has_refcounted_arg = false;
+ u32 btf_id, member_idx, member_off;
struct btf *btf;
const char *mname;
- int err;
+ int i, err;
if (!prog->gpl_compatible) {
verbose(env, "struct ops programs must have a GPL compatible license\n");
@@ -22450,7 +22799,8 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
}
- err = bpf_struct_ops_supported(st_ops, __btf_member_bit_offset(t, member) / 8);
+ member_off = __btf_member_bit_offset(t, member) / 8;
+ err = bpf_struct_ops_supported(st_ops, member_off);
if (err) {
verbose(env, "attach to unsupported member %s of struct %s\n",
mname, st_ops->name);
@@ -22472,17 +22822,32 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
return -EACCES;
}
- /* btf_ctx_access() used this to provide argument type info */
- prog->aux->ctx_arg_info =
- st_ops_desc->arg_info[member_idx].info;
- prog->aux->ctx_arg_info_size =
- st_ops_desc->arg_info[member_idx].cnt;
+ for (i = 0; i < st_ops_desc->arg_info[member_idx].cnt; i++) {
+ if (st_ops_desc->arg_info[member_idx].info->refcounted) {
+ has_refcounted_arg = true;
+ break;
+ }
+ }
+
+ /* Tail call is not allowed for programs with refcounted arguments since we
+ * cannot guarantee that valid refcounted kptrs will be passed to the callee.
+ */
+ for (i = 0; i < env->subprog_cnt; i++) {
+ if (has_refcounted_arg && env->subprog_info[i].has_tail_call) {
+ verbose(env, "program with __ref argument cannot tail call\n");
+ return -EINVAL;
+ }
+ }
+
+ prog->aux->st_ops = st_ops;
+ prog->aux->attach_st_ops_member_off = member_off;
prog->aux->attach_func_proto = func_proto;
prog->aux->attach_func_name = mname;
env->ops = st_ops->verifier_ops;
- return 0;
+ return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
+ st_ops_desc->arg_info[member_idx].cnt);
}
#define SECURITY_PREFIX "security_"
@@ -22558,6 +22923,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
if (tgt_prog) {
struct bpf_prog_aux *aux = tgt_prog->aux;
bool tgt_changes_pkt_data;
+ bool tgt_might_sleep;
if (bpf_prog_is_dev_bound(prog->aux) &&
!bpf_prog_dev_bound_match(prog, tgt_prog)) {
@@ -22600,6 +22966,15 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
"Extension program changes packet data, while original does not\n");
return -EINVAL;
}
+
+ tgt_might_sleep = aux->func
+ ? aux->func[subprog]->aux->might_sleep
+ : aux->might_sleep;
+ if (prog->aux->might_sleep && !tgt_might_sleep) {
+ bpf_log(log,
+ "Extension program may sleep, while original does not\n");
+ return -EINVAL;
+ }
}
if (!tgt_prog->jited) {
bpf_log(log, "Can attach to only JITed progs\n");
@@ -22856,6 +23231,33 @@ BTF_ID(func, __rcu_read_unlock)
#endif
BTF_SET_END(btf_id_deny)
+/* fexit and fmod_ret can't be used to attach to __noreturn functions.
+ * Currently, we must manually list all __noreturn functions here. Once a more
+ * robust solution is implemented, this workaround can be removed.
+ */
+BTF_SET_START(noreturn_deny)
+#ifdef CONFIG_IA32_EMULATION
+BTF_ID(func, __ia32_sys_exit)
+BTF_ID(func, __ia32_sys_exit_group)
+#endif
+#ifdef CONFIG_KUNIT
+BTF_ID(func, __kunit_abort)
+BTF_ID(func, kunit_try_catch_throw)
+#endif
+#ifdef CONFIG_MODULES
+BTF_ID(func, __module_put_and_kthread_exit)
+#endif
+#ifdef CONFIG_X86_64
+BTF_ID(func, __x64_sys_exit)
+BTF_ID(func, __x64_sys_exit_group)
+#endif
+BTF_ID(func, do_exit)
+BTF_ID(func, do_group_exit)
+BTF_ID(func, kthread_complete_and_exit)
+BTF_ID(func, kthread_exit)
+BTF_ID(func, make_task_dead)
+BTF_SET_END(noreturn_deny)
+
static bool can_be_sleepable(struct bpf_prog *prog)
{
if (prog->type == BPF_PROG_TYPE_TRACING) {
@@ -22932,9 +23334,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
prog->aux->attach_btf_trace = true;
return 0;
} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
- if (!bpf_iter_prog_supported(prog))
- return -EINVAL;
- return 0;
+ return bpf_iter_prog_supported(prog);
}
if (prog->type == BPF_PROG_TYPE_LSM) {
@@ -22944,6 +23344,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
} else if (prog->type == BPF_PROG_TYPE_TRACING &&
btf_id_set_contains(&btf_id_deny, btf_id)) {
return -EINVAL;
+ } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
+ prog->expected_attach_type == BPF_MODIFY_RETURN) &&
+ btf_id_set_contains(&noreturn_deny, btf_id)) {
+ verbose(env, "Attaching fexit/fmod_ret to __noreturn functions is rejected.\n");
+ return -EINVAL;
}
key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
@@ -23036,6 +23441,302 @@ static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr,
return 0;
}
+static bool can_fallthrough(struct bpf_insn *insn)
+{
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode = BPF_OP(insn->code);
+
+ if (class != BPF_JMP && class != BPF_JMP32)
+ return true;
+
+ if (opcode == BPF_EXIT || opcode == BPF_JA)
+ return false;
+
+ return true;
+}
+
+static bool can_jump(struct bpf_insn *insn)
+{
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode = BPF_OP(insn->code);
+
+ if (class != BPF_JMP && class != BPF_JMP32)
+ return false;
+
+ switch (opcode) {
+ case BPF_JA:
+ case BPF_JEQ:
+ case BPF_JNE:
+ case BPF_JLT:
+ case BPF_JLE:
+ case BPF_JGT:
+ case BPF_JGE:
+ case BPF_JSGT:
+ case BPF_JSGE:
+ case BPF_JSLT:
+ case BPF_JSLE:
+ case BPF_JCOND:
+ return true;
+ }
+
+ return false;
+}
+
+static int insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2])
+{
+ struct bpf_insn *insn = &prog->insnsi[idx];
+ int i = 0, insn_sz;
+ u32 dst;
+
+ insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
+ if (can_fallthrough(insn) && idx + 1 < prog->len)
+ succ[i++] = idx + insn_sz;
+
+ if (can_jump(insn)) {
+ dst = idx + jmp_offset(insn) + 1;
+ if (i == 0 || succ[0] != dst)
+ succ[i++] = dst;
+ }
+
+ return i;
+}
+
+/* Each field is a register bitmask */
+struct insn_live_regs {
+ u16 use; /* registers read by instruction */
+ u16 def; /* registers written by instruction */
+ u16 in; /* registers that may be alive before instruction */
+ u16 out; /* registers that may be alive after instruction */
+};
+
+/* Bitmask with 1s for all caller saved registers */
+#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1)
+
+/* Compute info->{use,def} fields for the instruction */
+static void compute_insn_live_regs(struct bpf_verifier_env *env,
+ struct bpf_insn *insn,
+ struct insn_live_regs *info)
+{
+ struct call_summary cs;
+ u8 class = BPF_CLASS(insn->code);
+ u8 code = BPF_OP(insn->code);
+ u8 mode = BPF_MODE(insn->code);
+ u16 src = BIT(insn->src_reg);
+ u16 dst = BIT(insn->dst_reg);
+ u16 r0 = BIT(0);
+ u16 def = 0;
+ u16 use = 0xffff;
+
+ switch (class) {
+ case BPF_LD:
+ switch (mode) {
+ case BPF_IMM:
+ if (BPF_SIZE(insn->code) == BPF_DW) {
+ def = dst;
+ use = 0;
+ }
+ break;
+ case BPF_LD | BPF_ABS:
+ case BPF_LD | BPF_IND:
+ /* stick with defaults */
+ break;
+ }
+ break;
+ case BPF_LDX:
+ switch (mode) {
+ case BPF_MEM:
+ case BPF_MEMSX:
+ def = dst;
+ use = src;
+ break;
+ }
+ break;
+ case BPF_ST:
+ switch (mode) {
+ case BPF_MEM:
+ def = 0;
+ use = dst;
+ break;
+ }
+ break;
+ case BPF_STX:
+ switch (mode) {
+ case BPF_MEM:
+ def = 0;
+ use = dst | src;
+ break;
+ case BPF_ATOMIC:
+ switch (insn->imm) {
+ case BPF_CMPXCHG:
+ use = r0 | dst | src;
+ def = r0;
+ break;
+ case BPF_LOAD_ACQ:
+ def = dst;
+ use = src;
+ break;
+ case BPF_STORE_REL:
+ def = 0;
+ use = dst | src;
+ break;
+ default:
+ use = dst | src;
+ if (insn->imm & BPF_FETCH)
+ def = src;
+ else
+ def = 0;
+ }
+ break;
+ }
+ break;
+ case BPF_ALU:
+ case BPF_ALU64:
+ switch (code) {
+ case BPF_END:
+ use = dst;
+ def = dst;
+ break;
+ case BPF_MOV:
+ def = dst;
+ if (BPF_SRC(insn->code) == BPF_K)
+ use = 0;
+ else
+ use = src;
+ break;
+ default:
+ def = dst;
+ if (BPF_SRC(insn->code) == BPF_K)
+ use = dst;
+ else
+ use = dst | src;
+ }
+ break;
+ case BPF_JMP:
+ case BPF_JMP32:
+ switch (code) {
+ case BPF_JA:
+ case BPF_JCOND:
+ def = 0;
+ use = 0;
+ break;
+ case BPF_EXIT:
+ def = 0;
+ use = r0;
+ break;
+ case BPF_CALL:
+ def = ALL_CALLER_SAVED_REGS;
+ use = def & ~BIT(BPF_REG_0);
+ if (get_call_summary(env, insn, &cs))
+ use = GENMASK(cs.num_params, 1);
+ break;
+ default:
+ def = 0;
+ if (BPF_SRC(insn->code) == BPF_K)
+ use = dst;
+ else
+ use = dst | src;
+ }
+ break;
+ }
+
+ info->def = def;
+ info->use = use;
+}
+
+/* Compute may-live registers after each instruction in the program.
+ * The register is live after the instruction I if it is read by some
+ * instruction S following I during program execution and is not
+ * overwritten between I and S.
+ *
+ * Store result in env->insn_aux_data[i].live_regs.
+ */
+static int compute_live_registers(struct bpf_verifier_env *env)
+{
+ struct bpf_insn_aux_data *insn_aux = env->insn_aux_data;
+ struct bpf_insn *insns = env->prog->insnsi;
+ struct insn_live_regs *state;
+ int insn_cnt = env->prog->len;
+ int err = 0, i, j;
+ bool changed;
+
+ /* Use the following algorithm:
+ * - define the following:
+ * - I.use : a set of all registers read by instruction I;
+ * - I.def : a set of all registers written by instruction I;
+ * - I.in : a set of all registers that may be alive before I execution;
+ * - I.out : a set of all registers that may be alive after I execution;
+ * - insn_successors(I): a set of instructions S that might immediately
+ * follow I for some program execution;
+ * - associate separate empty sets 'I.in' and 'I.out' with each instruction;
+ * - visit each instruction in a postorder and update
+ * state[i].in, state[i].out as follows:
+ *
+ * state[i].out = U [state[s].in for S in insn_successors(i)]
+ * state[i].in = (state[i].out / state[i].def) U state[i].use
+ *
+ * (where U stands for set union, / stands for set difference)
+ * - repeat the computation while {in,out} fields changes for
+ * any instruction.
+ */
+ state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL);
+ if (!state) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < insn_cnt; ++i)
+ compute_insn_live_regs(env, &insns[i], &state[i]);
+
+ changed = true;
+ while (changed) {
+ changed = false;
+ for (i = 0; i < env->cfg.cur_postorder; ++i) {
+ int insn_idx = env->cfg.insn_postorder[i];
+ struct insn_live_regs *live = &state[insn_idx];
+ int succ_num;
+ u32 succ[2];
+ u16 new_out = 0;
+ u16 new_in = 0;
+
+ succ_num = insn_successors(env->prog, insn_idx, succ);
+ for (int s = 0; s < succ_num; ++s)
+ new_out |= state[succ[s]].in;
+ new_in = (new_out & ~live->def) | live->use;
+ if (new_out != live->out || new_in != live->in) {
+ live->in = new_in;
+ live->out = new_out;
+ changed = true;
+ }
+ }
+ }
+
+ for (i = 0; i < insn_cnt; ++i)
+ insn_aux[i].live_regs_before = state[i].in;
+
+ if (env->log.level & BPF_LOG_LEVEL2) {
+ verbose(env, "Live regs before insn:\n");
+ for (i = 0; i < insn_cnt; ++i) {
+ verbose(env, "%3d: ", i);
+ for (j = BPF_REG_0; j < BPF_REG_10; ++j)
+ if (insn_aux[i].live_regs_before & BIT(j))
+ verbose(env, "%d", j);
+ else
+ verbose(env, ".");
+ verbose(env, " ");
+ verbose_insn(env, &insns[i]);
+ if (bpf_is_ldimm64(&insns[i]))
+ i++;
+ }
+ }
+
+out:
+ kvfree(state);
+ kvfree(env->cfg.insn_postorder);
+ env->cfg.insn_postorder = NULL;
+ env->cfg.cur_postorder = 0;
+ return err;
+}
+
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
u64 start_time = ktime_get_ns();
@@ -23113,12 +23814,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
env->explored_states = kvcalloc(state_htab_size(env),
- sizeof(struct bpf_verifier_state_list *),
+ sizeof(struct list_head),
GFP_USER);
ret = -ENOMEM;
if (!env->explored_states)
goto skip_full_check;
+ for (i = 0; i < state_htab_size(env); i++)
+ INIT_LIST_HEAD(&env->explored_states[i]);
+ INIT_LIST_HEAD(&env->free_list);
+
ret = check_btf_info_early(env, attr, uattr);
if (ret < 0)
goto skip_full_check;
@@ -23153,6 +23858,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret)
goto skip_full_check;
+ ret = compute_live_registers(env);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = mark_fastcall_patterns(env);
if (ret < 0)
goto skip_full_check;
@@ -23291,6 +24000,7 @@ err_unlock:
vfree(env->insn_aux_data);
kvfree(env->insn_hist);
err_free_env:
+ kvfree(env->cfg.insn_postorder);
kvfree(env);
return ret;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 997fb2a47c92..d23dc3d489e0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -392,7 +392,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
.arg2_type = ARG_CONST_SIZE,
};
-static void __set_printk_clr_event(void)
+static void __set_printk_clr_event(struct work_struct *work)
{
/*
* This program might be calling bpf_trace_printk,
@@ -405,10 +405,11 @@ static void __set_printk_clr_event(void)
if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
pr_warn_ratelimited("could not enable bpf_trace_printk events");
}
+static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
{
- __set_printk_clr_event();
+ schedule_work(&set_printk_work);
return &bpf_trace_printk_proto;
}
@@ -451,7 +452,7 @@ static const struct bpf_func_proto bpf_trace_vprintk_proto = {
const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
{
- __set_printk_clr_event();
+ schedule_work(&set_printk_work);
return &bpf_trace_vprintk_proto;
}
@@ -606,6 +607,11 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void)
+{
+ return &bpf_perf_event_read_value_proto;
+}
+
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
u64 flags, struct perf_raw_record *raw,
@@ -843,7 +849,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struc
if (unlikely(is_global_init(task)))
return -EPERM;
- if (!preemptible()) {
+ if (preempt_count() != 0 || irqs_disabled()) {
/* Do an early check on signal validity. Otherwise,
* the error is lost in deferred irq_work.
*/
diff --git a/mm/memory.c b/mm/memory.c
index b43428ae76cd..bea87c082f27 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6807,6 +6807,124 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
}
EXPORT_SYMBOL_GPL(access_process_vm);
+#ifdef CONFIG_BPF_SYSCALL
+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ void *old_buf = buf;
+ int err = 0;
+
+ *(char *)buf = '\0';
+
+ if (mmap_read_lock_killable(mm))
+ return -EFAULT;
+
+ addr = untagged_addr_remote(mm, addr);
+
+ /* Avoid triggering the temporary warning in __get_user_pages */
+ if (!vma_lookup(mm, addr)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ while (len) {
+ int bytes, offset, retval;
+ void *maddr;
+ struct page *page;
+ struct vm_area_struct *vma = NULL;
+
+ page = get_user_page_vma_remote(mm, addr, gup_flags, &vma);
+ if (IS_ERR(page)) {
+ /*
+ * Treat as a total failure for now until we decide how
+ * to handle the CONFIG_HAVE_IOREMAP_PROT case and
+ * stack expansion.
+ */
+ *(char *)buf = '\0';
+ err = -EFAULT;
+ goto out;
+ }
+
+ bytes = len;
+ offset = addr & (PAGE_SIZE - 1);
+ if (bytes > PAGE_SIZE - offset)
+ bytes = PAGE_SIZE - offset;
+
+ maddr = kmap_local_page(page);
+ retval = strscpy(buf, maddr + offset, bytes);
+ if (retval >= 0) {
+ /* Found the end of the string */
+ buf += retval;
+ unmap_and_put_page(page, maddr);
+ break;
+ }
+
+ buf += bytes - 1;
+ /*
+ * Because strscpy always NUL terminates we need to
+ * copy the last byte in the page if we are going to
+ * load more pages
+ */
+ if (bytes != len) {
+ addr += bytes - 1;
+ copy_from_user_page(vma, page, addr, buf, maddr + (PAGE_SIZE - 1), 1);
+ buf += 1;
+ addr += 1;
+ }
+ len -= bytes;
+
+ unmap_and_put_page(page, maddr);
+ }
+
+out:
+ mmap_read_unlock(mm);
+ if (err)
+ return err;
+ return buf - old_buf;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk: the task of the target address space
+ * @addr: start address to read from
+ * @buf: destination buffer
+ * @len: number of bytes to copy
+ * @gup_flags: flags modifying lookup behaviour
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination);
+ * not including the trailing NUL. Always guaranteed to leave NUL-terminated
+ * buffer. On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ if (unlikely(len == 0))
+ return 0;
+
+ mm = get_task_mm(tsk);
+ if (!mm) {
+ *(char *)buf = '\0';
+ return -EFAULT;
+ }
+
+ ret = __copy_remote_vm_str(mm, addr, buf, len, gup_flags);
+
+ mmput(mm);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+#endif /* CONFIG_BPF_SYSCALL */
+
/*
* Print the name of a VMA.
*/
diff --git a/mm/nommu.c b/mm/nommu.c
index acc18ce611ed..753384666bae 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1714,6 +1714,85 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
}
EXPORT_SYMBOL_GPL(access_process_vm);
+#ifdef CONFIG_BPF_SYSCALL
+/*
+ * Copy a string from another process's address space as given in mm.
+ * If there is any error return -EFAULT.
+ */
+static int __copy_remote_vm_str(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len)
+{
+ unsigned long addr_end;
+ struct vm_area_struct *vma;
+ int ret = -EFAULT;
+
+ *(char *)buf = '\0';
+
+ if (mmap_read_lock_killable(mm))
+ return ret;
+
+ /* the access must start within one of the target process's mappings */
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto out;
+
+ if (check_add_overflow(addr, len, &addr_end))
+ goto out;
+
+ /* don't overrun this mapping */
+ if (addr_end > vma->vm_end)
+ len = vma->vm_end - addr;
+
+ /* only read mappings where it is permitted */
+ if (vma->vm_flags & VM_MAYREAD) {
+ ret = strscpy(buf, (char *)addr, len);
+ if (ret < 0)
+ ret = len - 1;
+ }
+
+out:
+ mmap_read_unlock(mm);
+ return ret;
+}
+
+/**
+ * copy_remote_vm_str - copy a string from another process's address space.
+ * @tsk: the task of the target address space
+ * @addr: start address to read from
+ * @buf: destination buffer
+ * @len: number of bytes to copy
+ * @gup_flags: flags modifying lookup behaviour (unused)
+ *
+ * The caller must hold a reference on @mm.
+ *
+ * Return: number of bytes copied from @addr (source) to @buf (destination);
+ * not including the trailing NUL. Always guaranteed to leave NUL-terminated
+ * buffer. On any error, return -EFAULT.
+ */
+int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ if (unlikely(len == 0))
+ return 0;
+
+ mm = get_task_mm(tsk);
+ if (!mm) {
+ *(char *)buf = '\0';
+ return -EFAULT;
+ }
+
+ ret = __copy_remote_vm_str(mm, addr, buf, len);
+
+ mmput(mm);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copy_remote_vm_str);
+#endif /* CONFIG_BPF_SYSCALL */
+
/**
* nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
* @inode: The inode to check
diff --git a/net/core/filter.c b/net/core/filter.c
index 46ae8eb7a03c..bc6828761a47 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8137,6 +8137,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skb_load_bytes_relative_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
case BPF_FUNC_perf_event_output:
@@ -9697,7 +9699,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, queue_mapping):
if (type == BPF_WRITE) {
- u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size);
+ u32 offset = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size);
if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) {
*insn++ = BPF_JMP_A(0); /* noop */
@@ -9706,7 +9708,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
if (BPF_CLASS(si->code) == BPF_STX)
*insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
- *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
+ *insn++ = BPF_EMIT_STORE(BPF_H, si, offset);
} else {
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff,
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index dd9944a97b7e..5b632635e00d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -307,7 +307,7 @@ $(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check=
VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \
$(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \
- $(abspath ./vmlinux)
+ $(abspath $(objtree)/vmlinux)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
diff --git a/scripts/Makefile.btf b/scripts/Makefile.btf
index c3cbeb13de50..fbaaec2187e5 100644
--- a/scripts/Makefile.btf
+++ b/scripts/Makefile.btf
@@ -24,7 +24,7 @@ else
pahole-flags-$(call test-ge, $(pahole-ver), 126) = -j$(JOBS) --btf_features=encode_force,var,float,enum64,decl_tag,type_tag,optimized_func,consistent_func,decl_tag_kfuncs
ifneq ($(KBUILD_EXTMOD),)
-module-pahole-flags-$(call test-ge, $(pahole-ver), 126) += --btf_features=distilled_base
+module-pahole-flags-$(call test-ge, $(pahole-ver), 128) += --btf_features=distilled_base
endif
endif
diff --git a/security/security.c b/security/security.c
index 8aa839232c73..fb57e8fddd91 100644
--- a/security/security.c
+++ b/security/security.c
@@ -5627,6 +5627,7 @@ int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op,
* @cmd: command
* @attr: bpf attribute
* @size: size
+ * @kernel: whether or not call originated from kernel
*
* Do a initial check for all bpf syscalls after the attribute is copied into
* the kernel. The actual security module can implement their own rules to
@@ -5634,9 +5635,9 @@ int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op,
*
* Return: Returns 0 if permission is granted.
*/
-int security_bpf(int cmd, union bpf_attr *attr, unsigned int size)
+int security_bpf(int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
- return call_int_hook(bpf, cmd, attr, size);
+ return call_int_hook(bpf, cmd, attr, size, kernel);
}
/**
@@ -5673,6 +5674,7 @@ int security_bpf_prog(struct bpf_prog *prog)
* @map: BPF map object
* @attr: BPF syscall attributes used to create BPF map
* @token: BPF token used to grant user access
+ * @kernel: whether or not call originated from kernel
*
* Do a check when the kernel creates a new BPF map. This is also the
* point where LSM blob is allocated for LSMs that need them.
@@ -5680,9 +5682,9 @@ int security_bpf_prog(struct bpf_prog *prog)
* Return: Returns 0 on success, error on failure.
*/
int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
- struct bpf_token *token)
+ struct bpf_token *token, bool kernel)
{
- return call_int_hook(bpf_map_create, map, attr, token);
+ return call_int_hook(bpf_map_create, map, attr, token, kernel);
}
/**
@@ -5690,6 +5692,7 @@ int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
* @prog: BPF program object
* @attr: BPF syscall attributes used to create BPF program
* @token: BPF token used to grant user access to BPF subsystem
+ * @kernel: whether or not call originated from kernel
*
* Perform an access control check when the kernel loads a BPF program and
* allocates associated BPF program object. This hook is also responsible for
@@ -5698,9 +5701,9 @@ int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
* Return: Returns 0 on success, error on failure.
*/
int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
- struct bpf_token *token)
+ struct bpf_token *token, bool kernel)
{
- return call_int_hook(bpf_prog_load, prog, attr, token);
+ return call_int_hook(bpf_prog_load, prog, attr, token, kernel);
}
/**
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 7150c953fec3..fb66ed2e97cf 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6907,7 +6907,7 @@ static int selinux_ib_alloc_security(void *ib_sec)
#ifdef CONFIG_BPF_SYSCALL
static int selinux_bpf(int cmd, union bpf_attr *attr,
- unsigned int size)
+ unsigned int size, bool kernel)
{
u32 sid = current_sid();
int ret;
@@ -6994,7 +6994,7 @@ static int selinux_bpf_prog(struct bpf_prog *prog)
}
static int selinux_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
- struct bpf_token *token)
+ struct bpf_token *token, bool kernel)
{
struct bpf_security_struct *bpfsec;
@@ -7017,7 +7017,7 @@ static void selinux_bpf_map_free(struct bpf_map *map)
}
static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
- struct bpf_token *token)
+ struct bpf_token *token, bool kernel)
{
struct bpf_security_struct *bpfsec;
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 6ea4823b770c..9e9a5f006cd2 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -65,7 +65,12 @@ prefix ?= /usr/local
bash_compdir ?= /usr/share/bash-completion/completions
CFLAGS += -O2
-CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
+CFLAGS += -W
+CFLAGS += -Wall
+CFLAGS += -Wextra
+CFLAGS += -Wformat-signedness
+CFLAGS += -Wno-unused-parameter
+CFLAGS += -Wno-missing-field-initializers
CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
-I$(or $(OUTPUT),.) \
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 2636655ac180..6b14cbfa58aa 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -253,7 +253,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
if (btf_kflag(t))
printf("\n\t'%s' val=%d", name, v->val);
else
- printf("\n\t'%s' val=%u", name, v->val);
+ printf("\n\t'%s' val=%u", name, (__u32)v->val);
}
}
if (json_output)
@@ -1022,7 +1022,7 @@ static int do_dump(int argc, char **argv)
for (i = 0; i < root_type_cnt; i++) {
if (root_type_ids[i] == root_id) {
err = -EINVAL;
- p_err("duplicate root_id %d supplied", root_id);
+ p_err("duplicate root_id %u supplied", root_id);
goto done;
}
}
@@ -1132,7 +1132,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
break;
default:
err = -1;
- p_err("unexpected object type: %d", type);
+ p_err("unexpected object type: %u", type);
goto err_free;
}
if (err) {
@@ -1155,7 +1155,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
break;
default:
err = -1;
- p_err("unexpected object type: %d", type);
+ p_err("unexpected object type: %u", type);
goto err_free;
}
if (fd < 0) {
@@ -1188,7 +1188,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
break;
default:
err = -1;
- p_err("unexpected object type: %d", type);
+ p_err("unexpected object type: %u", type);
goto err_free;
}
if (!btf_id)
@@ -1254,12 +1254,12 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
n = 0;
hashmap__for_each_key_entry(btf_prog_table, entry, info->id) {
- printf("%s%lu", n++ == 0 ? " prog_ids " : ",", entry->value);
+ printf("%s%lu", n++ == 0 ? " prog_ids " : ",", (unsigned long)entry->value);
}
n = 0;
hashmap__for_each_key_entry(btf_map_table, entry, info->id) {
- printf("%s%lu", n++ == 0 ? " map_ids " : ",", entry->value);
+ printf("%s%lu", n++ == 0 ? " map_ids " : ",", (unsigned long)entry->value);
}
emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 527fe867a8fb..4e896d8a2416 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -653,7 +653,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
case BTF_KIND_ARRAY:
array = (struct btf_array *)(t + 1);
BTF_PRINT_TYPE(array->type);
- BTF_PRINT_ARG("[%d]", array->nelems);
+ BTF_PRINT_ARG("[%u]", array->nelems);
break;
case BTF_KIND_PTR:
BTF_PRINT_TYPE(t->type);
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 9af426d43299..93b139bfb988 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -191,7 +191,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
if (attach_btf_name)
printf(" %-15s", attach_btf_name);
else if (info.attach_btf_id)
- printf(" attach_btf_obj_id=%d attach_btf_id=%d",
+ printf(" attach_btf_obj_id=%u attach_btf_id=%u",
info.attach_btf_obj_id, info.attach_btf_id);
printf("\n");
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 9b75639434b8..ecfa790adc13 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -461,10 +461,11 @@ int get_fd_type(int fd)
p_err("can't read link type: %s", strerror(errno));
return -1;
}
- if (n == sizeof(path)) {
+ if (n == sizeof(buf)) {
p_err("can't read link type: path too long!");
return -1;
}
+ buf[n] = '\0';
if (strstr(buf, "bpf-map"))
return BPF_OBJ_MAP;
@@ -713,7 +714,7 @@ ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt)
int vendor_id;
if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
- p_err("Can't get net device name for ifindex %d: %s", ifindex,
+ p_err("Can't get net device name for ifindex %u: %s", ifindex,
strerror(errno));
return NULL;
}
@@ -738,7 +739,7 @@ ifindex_to_arch(__u32 ifindex, __u64 ns_dev, __u64 ns_ino, const char **opt)
/* No NFP support in LLVM, we have no valid triple to return. */
default:
p_err("Can't get arch name for device vendor id 0x%04x",
- vendor_id);
+ (unsigned int)vendor_id);
return NULL;
}
}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 5a4d3240689e..67a60114368f 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -670,7 +670,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
continue;
if (bpf_map__is_internal(map) &&
(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
- printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zd);\n",
+ printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zu);\n",
ident, bpf_map_mmap_sz(map));
codegen("\
\n\
@@ -984,7 +984,7 @@ static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident,
offset = m->offset / 8;
if (next_offset < offset)
- printf("\t\t\tchar __padding_%d[%d];\n", i, offset - next_offset);
+ printf("\t\t\tchar __padding_%d[%u];\n", i, offset - next_offset);
switch (btf_kind(member_type)) {
case BTF_KIND_INT:
@@ -1052,7 +1052,7 @@ static int walk_st_ops_shadow_vars(struct btf *btf, const char *ident,
/* Cannot fail since it must be a struct type */
size = btf__resolve_size(btf, map_type_id);
if (next_offset < (__u32)size)
- printf("\t\t\tchar __padding_end[%d];\n", size - next_offset);
+ printf("\t\t\tchar __padding_end[%u];\n", size - next_offset);
out:
btf_dump__free(d);
@@ -2095,7 +2095,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi
break;
/* tells if some other type needs to be handled */
default:
- p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id);
+ p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), type_id);
return -EINVAL;
}
@@ -2147,7 +2147,7 @@ static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_sp
btf_type = btf__type_by_id(btf, type_id);
break;
default:
- p_err("unsupported kind: %s (%d)",
+ p_err("unsupported kind: %s (%u)",
btf_kind_str(btf_type), btf_type->type);
return -EINVAL;
}
@@ -2246,7 +2246,7 @@ static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool
}
/* tells if some other type needs to be handled */
default:
- p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id);
+ p_err("unsupported kind: %s (%u)", btf_kind_str(btf_type), type_id);
return -EINVAL;
}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index c032d2c6ab6d..8895b4e1f690 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -343,7 +343,8 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
{
const struct bpf_line_info *linfo = NULL;
unsigned int nr_skip = 0;
- int count, i, pc = 0;
+ int count, i;
+ unsigned int pc = 0;
disasm_ctx_t ctx;
if (!len)
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 5cd503b763d7..52fd2c9fac56 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -107,7 +107,7 @@ static int link_parse_fd(int *argc, char ***argv)
fd = bpf_link_get_fd_by_id(id);
if (fd < 0)
- p_err("failed to get link with ID %d: %s", id, strerror(errno));
+ p_err("failed to get link with ID %u: %s", id, strerror(errno));
return fd;
} else if (is_prefix(**argv, "pinned")) {
char *path;
@@ -404,7 +404,7 @@ static char *perf_config_hw_cache_str(__u64 config)
if (hw_cache)
snprintf(str, PERF_HW_CACHE_LEN, "%s-", hw_cache);
else
- snprintf(str, PERF_HW_CACHE_LEN, "%lld-", config & 0xff);
+ snprintf(str, PERF_HW_CACHE_LEN, "%llu-", config & 0xff);
op = perf_event_name(evsel__hw_cache_op, (config >> 8) & 0xff);
if (op)
@@ -412,7 +412,7 @@ static char *perf_config_hw_cache_str(__u64 config)
"%s-", op);
else
snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str),
- "%lld-", (config >> 8) & 0xff);
+ "%llu-", (config >> 8) & 0xff);
result = perf_event_name(evsel__hw_cache_result, config >> 16);
if (result)
@@ -420,7 +420,7 @@ static char *perf_config_hw_cache_str(__u64 config)
"%s", result);
else
snprintf(str + strlen(str), PERF_HW_CACHE_LEN - strlen(str),
- "%lld", config >> 16);
+ "%llu", config >> 16);
return str;
}
@@ -623,7 +623,7 @@ static void show_link_ifindex_plain(__u32 ifindex)
else
snprintf(devname, sizeof(devname), "(detached)");
if (ret)
- snprintf(devname, sizeof(devname), "%s(%d)",
+ snprintf(devname, sizeof(devname), "%s(%u)",
tmpname, ifindex);
printf("ifindex %s ", devname);
}
@@ -699,7 +699,7 @@ void netfilter_dump_plain(const struct bpf_link_info *info)
if (pfname)
printf("\n\t%s", pfname);
else
- printf("\n\tpf: %d", pf);
+ printf("\n\tpf: %u", pf);
if (hookname)
printf(" %s", hookname);
@@ -773,7 +773,7 @@ static void show_uprobe_multi_plain(struct bpf_link_info *info)
printf("func_cnt %u ", info->uprobe_multi.count);
if (info->uprobe_multi.pid)
- printf("pid %d ", info->uprobe_multi.pid);
+ printf("pid %u ", info->uprobe_multi.pid);
printf("\n\t%-16s %-16s %-16s", "offset", "ref_ctr_offset", "cookies");
for (i = 0; i < info->uprobe_multi.count; i++) {
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 08d0ac543c67..cd5963cb6058 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -152,7 +152,7 @@ static int do_version(int argc, char **argv)
BPFTOOL_MINOR_VERSION, BPFTOOL_PATCH_VERSION);
#endif
jsonw_name(json_wtr, "libbpf_version");
- jsonw_printf(json_wtr, "\"%d.%d\"",
+ jsonw_printf(json_wtr, "\"%u.%u\"",
libbpf_major_version(), libbpf_minor_version());
jsonw_name(json_wtr, "features");
@@ -370,7 +370,7 @@ static int do_batch(int argc, char **argv)
while ((cp = strstr(buf, "\\\n")) != NULL) {
if (!fgets(contline, sizeof(contline), fp) ||
strlen(contline) == 0) {
- p_err("missing continuation line on command %d",
+ p_err("missing continuation line on command %u",
lines);
err = -1;
goto err_close;
@@ -381,7 +381,7 @@ static int do_batch(int argc, char **argv)
*cp = '\0';
if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
- p_err("command %d is too long", lines);
+ p_err("command %u is too long", lines);
err = -1;
goto err_close;
}
@@ -423,7 +423,7 @@ static int do_batch(int argc, char **argv)
err = -1;
} else {
if (!json_output)
- printf("processed %d commands\n", lines);
+ printf("processed %u commands\n", lines);
}
err_close:
if (fp != stdin)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b89bd792c1d5..81cc668b4b05 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -285,7 +285,7 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
}
if (info->value_size) {
for (i = 0; i < n; i++) {
- printf("value (CPU %02d):%c",
+ printf("value (CPU %02u):%c",
i, info->value_size > 16 ? '\n' : ' ');
fprint_hex(stdout, value + i * step,
info->value_size, " ");
@@ -316,7 +316,7 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val,
}
if (i != n) {
- p_err("%s expected %d bytes got %d", name, n, i);
+ p_err("%s expected %u bytes got %u", name, n, i);
return NULL;
}
@@ -462,7 +462,7 @@ static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr)
jsonw_string_field(wtr, "name", info->name);
jsonw_name(wtr, "flags");
- jsonw_printf(wtr, "%d", info->map_flags);
+ jsonw_printf(wtr, "%u", info->map_flags);
}
static int show_map_close_json(int fd, struct bpf_map_info *info)
@@ -588,7 +588,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (prog_type_str)
printf("owner_prog_type %s ", prog_type_str);
else
- printf("owner_prog_type %d ", prog_type);
+ printf("owner_prog_type %u ", prog_type);
}
if (owner_jited)
printf("owner%s jited",
@@ -615,7 +615,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
printf("\n\t");
if (info->btf_id)
- printf("btf_id %d", info->btf_id);
+ printf("btf_id %u", info->btf_id);
if (frozen)
printf("%sfrozen", info->btf_id ? " " : "");
@@ -1270,6 +1270,10 @@ static int do_create(int argc, char **argv)
} else if (is_prefix(*argv, "name")) {
NEXT_ARG();
map_name = GET_ARG();
+ if (strlen(map_name) > BPF_OBJ_NAME_LEN - 1) {
+ p_info("Warning: map name is longer than %u characters, it will be truncated.",
+ BPF_OBJ_NAME_LEN - 1);
+ }
} else if (is_prefix(*argv, "key")) {
if (parse_u32_arg(&argc, &argv, &key_size,
"key size"))
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 21d7d447e1f3..552b4ca40c27 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -91,15 +91,15 @@ print_bpf_output(void *private_data, int cpu, struct perf_event_header *event)
jsonw_end_object(json_wtr);
} else {
if (e->header.type == PERF_RECORD_SAMPLE) {
- printf("== @%lld.%09lld CPU: %d index: %d =====\n",
+ printf("== @%llu.%09llu CPU: %d index: %d =====\n",
e->time / 1000000000ULL, e->time % 1000000000ULL,
cpu, idx);
fprint_hex(stdout, e->data, e->size, " ");
printf("\n");
} else if (e->header.type == PERF_RECORD_LOST) {
- printf("lost %lld events\n", lost->lost);
+ printf("lost %llu events\n", lost->lost);
} else {
- printf("unknown event type=%d size=%d\n",
+ printf("unknown event type=%u size=%u\n",
e->header.type, e->header.size);
}
}
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index d2242d9f8441..64f958f437b0 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -476,7 +476,7 @@ static void __show_dev_tc_bpf(const struct ip_devname_ifindex *dev,
for (i = 0; i < optq.count; i++) {
NET_START_OBJECT;
NET_DUMP_STR("devname", "%s", dev->devname);
- NET_DUMP_UINT("ifindex", "(%u)", dev->ifindex);
+ NET_DUMP_UINT("ifindex", "(%u)", (unsigned int)dev->ifindex);
NET_DUMP_STR("kind", " %s", attach_loc_strings[loc]);
ret = __show_dev_tc_bpf_name(prog_ids[i], prog_name,
sizeof(prog_name));
@@ -831,7 +831,7 @@ static void show_link_netfilter(void)
if (err) {
if (errno == ENOENT)
break;
- p_err("can't get next link: %s (id %d)", strerror(errno), id);
+ p_err("can't get next link: %s (id %u)", strerror(errno), id);
break;
}
diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c
index 5f65140b003b..0a3c7e96c797 100644
--- a/tools/bpf/bpftool/netlink_dumper.c
+++ b/tools/bpf/bpftool/netlink_dumper.c
@@ -45,7 +45,7 @@ static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex,
NET_START_OBJECT;
if (name)
NET_DUMP_STR("devname", "%s", name);
- NET_DUMP_UINT("ifindex", "(%d)", ifindex);
+ NET_DUMP_UINT("ifindex", "(%u)", ifindex);
if (mode == XDP_ATTACHED_MULTI) {
if (json_output) {
@@ -74,7 +74,7 @@ int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb)
if (!tb[IFLA_XDP])
return 0;
- return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index,
+ return do_xdp_dump_one(tb[IFLA_XDP], (unsigned int)ifinfo->ifi_index,
libbpf_nla_getattr_str(tb[IFLA_IFNAME]));
}
@@ -168,7 +168,7 @@ int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind,
NET_START_OBJECT;
if (devname[0] != '\0')
NET_DUMP_STR("devname", "%s", devname);
- NET_DUMP_UINT("ifindex", "(%u)", ifindex);
+ NET_DUMP_UINT("ifindex", "(%u)", (unsigned int)ifindex);
NET_DUMP_STR("kind", " %s", kind);
ret = do_bpf_filter_dump(tb[TCA_OPTIONS]);
NET_END_OBJECT_FINAL;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index e71be67f1d86..f010295350be 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -521,10 +521,10 @@ static void print_prog_header_plain(struct bpf_prog_info *info, int fd)
print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
printf("%s", info->gpl_compatible ? " gpl" : "");
if (info->run_time_ns)
- printf(" run_time_ns %lld run_cnt %lld",
+ printf(" run_time_ns %llu run_cnt %llu",
info->run_time_ns, info->run_cnt);
if (info->recursion_misses)
- printf(" recursion_misses %lld", info->recursion_misses);
+ printf(" recursion_misses %llu", info->recursion_misses);
printf("\n");
}
@@ -569,7 +569,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd, bool orphaned)
}
if (info->btf_id)
- printf("\n\tbtf_id %d", info->btf_id);
+ printf("\n\tbtf_id %u", info->btf_id);
emit_obj_refs_plain(refs_table, info->id, "\n\tpids ");
@@ -1164,7 +1164,7 @@ static int get_run_data(const char *fname, void **data_ptr, unsigned int *size)
}
if (nb_read > buf_size - block_size) {
if (buf_size == UINT32_MAX) {
- p_err("data_in/ctx_in is too long (max: %d)",
+ p_err("data_in/ctx_in is too long (max: %u)",
UINT32_MAX);
goto err_free;
}
@@ -1928,6 +1928,7 @@ static int do_loader(int argc, char **argv)
obj = bpf_object__open_file(file, &open_opts);
if (!obj) {
+ err = -1;
p_err("failed to open object file");
goto err_close_obj;
}
@@ -2251,7 +2252,7 @@ static char *profile_target_name(int tgt_fd)
t = btf__type_by_id(btf, func_info.type_id);
if (!t) {
- p_err("btf %d doesn't have type %d",
+ p_err("btf %u doesn't have type %u",
info.btf_id, func_info.type_id);
goto out;
}
@@ -2329,7 +2330,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj)
continue;
for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
if (profile_open_perf_event(m, cpu, map_fd)) {
- p_err("failed to create event %s on cpu %d",
+ p_err("failed to create event %s on cpu %u",
metrics[m].name, cpu);
return -1;
}
diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c
index bf1f02212797..31d806e3bdaa 100644
--- a/tools/bpf/bpftool/tracelog.c
+++ b/tools/bpf/bpftool/tracelog.c
@@ -78,7 +78,7 @@ static bool get_tracefs_pipe(char *mnt)
return false;
/* Allow room for NULL terminating byte and pipe file name */
- snprintf(format, sizeof(format), "%%*s %%%zds %%99s %%*s %%*d %%*d\\n",
+ snprintf(format, sizeof(format), "%%*s %%%zus %%99s %%*s %%*d %%*d\\n",
PATH_MAX - strlen(pipe_name) - 1);
while (fscanf(fp, format, mnt, type) == 2)
if (strcmp(type, fstype) == 0) {
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index d0094345fb2b..5e7cb8b36fef 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -199,13 +199,13 @@ static const char *print_imm(void *private_data,
if (insn->src_reg == BPF_PSEUDO_MAP_FD)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "map[id:%u]", insn->imm);
+ "map[id:%d]", insn->imm);
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+ "map[id:%d][0]+%d", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
- "map[idx:%u]+%u", insn->imm, (insn + 1)->imm);
+ "map[idx:%d]+%d", insn->imm, (insn + 1)->imm);
else if (insn->src_reg == BPF_PSEUDO_FUNC)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"subprog[%+d]", insn->imm);
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index e49203ebd48c..78a436c4072e 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -6,6 +6,7 @@ OUTPUT ?= $(abspath .output)/
BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
+BPF_TARGET_ENDIAN ?= --target=bpf
LIBBPF_SRC := $(abspath ../../lib/bpf)
BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
@@ -60,7 +61,7 @@ $(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
$(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
- $(QUIET_GEN)$(CLANG) -g -O2 --target=bpf $(INCLUDES) \
+ $(QUIET_GEN)$(CLANG) -g -O2 $(BPF_TARGET_ENDIAN) $(INCLUDES) \
-c $(filter %.c,$^) -o $@ && \
$(LLVM_STRIP) -g $@
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index defa5bb881f4..28705ae67784 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -51,6 +51,9 @@
#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+#define BPF_LOAD_ACQ 0x100 /* load-acquire */
+#define BPF_STORE_REL 0x110 /* store-release */
+
enum bpf_cond_pseudo_jmp {
BPF_MAY_GOTO = 0,
};
@@ -1207,6 +1210,7 @@ enum bpf_perf_event_type {
#define BPF_F_BEFORE (1U << 3)
#define BPF_F_AFTER (1U << 4)
#define BPF_F_ID (1U << 5)
+#define BPF_F_PREORDER (1U << 6)
#define BPF_F_LINK BPF_F_LINK /* 1 << 13 */
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
@@ -1648,6 +1652,7 @@ union bpf_attr {
};
__u32 next_id;
__u32 open_flags;
+ __s32 fd_by_id_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
@@ -6019,7 +6024,10 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
- /* */
+ /* This helper list is effectively frozen. If you are trying to \
+ * add a new helper, you should add a kfunc instead which has \
+ * less stability guarantees. See Documentation/bpf/kfuncs.rst \
+ */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
* know or care about integer value that is now passed as second argument
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index ec1798b6d3ff..266d4ffa6c07 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -36,7 +36,8 @@ struct btf_type {
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bit 31: kind_flag, currently used by
- * struct, union, enum, fwd and enum64
+ * struct, union, enum, fwd, enum64,
+ * decl_tag and type_tag
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT, UNION, DATASEC and ENUM64.
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 359f73ead613..a9c3e33d0f8a 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -1097,7 +1097,7 @@ int bpf_map_get_fd_by_id(__u32 id)
int bpf_btf_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
+ const size_t attr_sz = offsetofend(union bpf_attr, fd_by_id_token_fd);
union bpf_attr attr;
int fd;
@@ -1107,6 +1107,7 @@ int bpf_btf_get_fd_by_id_opts(__u32 id,
memset(&attr, 0, attr_sz);
attr.btf_id = id;
attr.open_flags = OPTS_GET(opts, open_flags, 0);
+ attr.fd_by_id_token_fd = OPTS_GET(opts, token_fd, 0);
fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
return libbpf_err_errno(fd);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 435da95d2058..777627d33d25 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -487,9 +487,10 @@ LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
struct bpf_get_fd_by_id_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 open_flags; /* permissions requested for the operation on fd */
+ __u32 token_fd;
size_t :0;
};
-#define bpf_get_fd_by_id_opts__last_field open_flags
+#define bpf_get_fd_by_id_opts__last_field token_fd
LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id,
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 48c66f3a9200..38bc6b14b066 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1619,12 +1619,18 @@ exit_free:
return btf;
}
-struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
+struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd)
{
struct btf *btf;
int btf_fd;
+ LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts);
+
+ if (token_fd) {
+ opts.open_flags |= BPF_F_TOKEN_FD;
+ opts.token_fd = token_fd;
+ }
- btf_fd = bpf_btf_get_fd_by_id(id);
+ btf_fd = bpf_btf_get_fd_by_id_opts(id, &opts);
if (btf_fd < 0)
return libbpf_err_ptr(-errno);
@@ -1634,6 +1640,11 @@ struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
return libbpf_ptr(btf);
}
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
+{
+ return btf_load_from_kernel(id, base_btf, 0);
+}
+
struct btf *btf__load_from_kernel_by_id(__u32 id)
{
return btf__load_from_kernel_by_id_split(id, NULL);
@@ -2090,7 +2101,7 @@ static int validate_type_id(int id)
}
/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */
-static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
+static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id, int kflag)
{
struct btf_type *t;
int sz, name_off = 0;
@@ -2113,7 +2124,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
}
t->name_off = name_off;
- t->info = btf_type_info(kind, 0, 0);
+ t->info = btf_type_info(kind, 0, kflag);
t->type = ref_type_id;
return btf_commit_type(btf, sz);
@@ -2128,7 +2139,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
*/
int btf__add_ptr(struct btf *btf, int ref_type_id)
{
- return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id, 0);
}
/*
@@ -2506,7 +2517,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
struct btf_type *t;
int id;
- id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0);
+ id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0, 0);
if (id <= 0)
return id;
t = btf_type_by_id(btf, id);
@@ -2536,7 +2547,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
if (!name || !name[0])
return libbpf_err(-EINVAL);
- return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0);
}
/*
@@ -2548,7 +2559,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
*/
int btf__add_volatile(struct btf *btf, int ref_type_id)
{
- return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id, 0);
}
/*
@@ -2560,7 +2571,7 @@ int btf__add_volatile(struct btf *btf, int ref_type_id)
*/
int btf__add_const(struct btf *btf, int ref_type_id)
{
- return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id, 0);
}
/*
@@ -2572,7 +2583,7 @@ int btf__add_const(struct btf *btf, int ref_type_id)
*/
int btf__add_restrict(struct btf *btf, int ref_type_id)
{
- return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id, 0);
}
/*
@@ -2588,7 +2599,24 @@ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
if (!value || !value[0])
return libbpf_err(-EINVAL);
- return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
+ return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0);
+}
+
+/*
+ * Append new BTF_KIND_TYPE_TAG type with:
+ * - *value*, non-empty/non-NULL tag value;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Set info->kflag to 1, indicating this tag is an __attribute__
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id)
+{
+ if (!value || !value[0])
+ return libbpf_err(-EINVAL);
+
+ return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1);
}
/*
@@ -2610,7 +2638,7 @@ int btf__add_func(struct btf *btf, const char *name,
linkage != BTF_FUNC_EXTERN)
return libbpf_err(-EINVAL);
- id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
+ id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id, 0);
if (id > 0) {
struct btf_type *t = btf_type_by_id(btf, id);
@@ -2845,18 +2873,8 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
return 0;
}
-/*
- * Append new BTF_KIND_DECL_TAG type with:
- * - *value* - non-empty/non-NULL string;
- * - *ref_type_id* - referenced type ID, it might not exist yet;
- * - *component_idx* - -1 for tagging reference type, otherwise struct/union
- * member or function argument index;
- * Returns:
- * - >0, type ID of newly added BTF type;
- * - <0, on error.
- */
-int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
- int component_idx)
+static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx, int kflag)
{
struct btf_type *t;
int sz, value_off;
@@ -2880,13 +2898,46 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
return value_off;
t->name_off = value_off;
- t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false);
+ t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, kflag);
t->type = ref_type_id;
btf_decl_tag(t)->component_idx = component_idx;
return btf_commit_type(btf, sz);
}
+/*
+ * Append new BTF_KIND_DECL_TAG type with:
+ * - *value* - non-empty/non-NULL string;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * - *component_idx* - -1 for tagging reference type, otherwise struct/union
+ * member or function argument index;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx)
+{
+ return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 0);
+}
+
+/*
+ * Append new BTF_KIND_DECL_TAG type with:
+ * - *value* - non-empty/non-NULL string;
+ * - *ref_type_id* - referenced type ID, it might not exist yet;
+ * - *component_idx* - -1 for tagging reference type, otherwise struct/union
+ * member or function argument index;
+ * Set info->kflag to 1, indicating this tag is an __attribute__
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx)
+{
+ return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 1);
+}
+
struct btf_ext_sec_info_param {
__u32 off;
__u32 len;
@@ -3015,8 +3066,6 @@ static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native)
.desc = "line_info",
};
struct btf_ext_sec_info_param core_relo = {
- .off = btf_ext->hdr->core_relo_off,
- .len = btf_ext->hdr->core_relo_len,
.min_rec_size = sizeof(struct bpf_core_relo),
.ext_info = &btf_ext->core_relo_info,
.desc = "core_relo",
@@ -3034,6 +3083,8 @@ static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native)
if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
return 0; /* skip core relos parsing */
+ core_relo.off = btf_ext->hdr->core_relo_off;
+ core_relo.len = btf_ext->hdr->core_relo_len;
err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native);
if (err)
return err;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 47ee8f6ac489..4392451d634b 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -227,6 +227,7 @@ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id);
+LIBBPF_API int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id);
/* func and func_proto construction APIs */
LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
@@ -243,6 +244,8 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
/* tag construction API */
LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
int component_idx);
+LIBBPF_API int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id,
+ int component_idx);
struct btf_dedup_opts {
size_t sz;
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index a3fc6908f6c9..460c3e57fadb 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1494,7 +1494,10 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_TYPE_TAG:
btf_dump_emit_mods(d, decls);
name = btf_name_of(d, t->name_off);
- btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
+ if (btf_kflag(t))
+ btf_dump_printf(d, " __attribute__((%s))", name);
+ else
+ btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
break;
case BTF_KIND_ARRAY: {
const struct btf_array *a = btf_array(t);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 194809da5172..6b85060f07b3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -670,11 +670,18 @@ struct elf_state {
struct usdt_manager;
+enum bpf_object_state {
+ OBJ_OPEN,
+ OBJ_PREPARED,
+ OBJ_LOADED,
+};
+
struct bpf_object {
char name[BPF_OBJ_NAME_LEN];
char license[64];
__u32 kern_version;
+ enum bpf_object_state state;
struct bpf_program *programs;
size_t nr_programs;
struct bpf_map *maps;
@@ -686,7 +693,6 @@ struct bpf_object {
int nr_extern;
int kconfig_map_idx;
- bool loaded;
bool has_subcalls;
bool has_rodata;
@@ -1511,7 +1517,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->kconfig_map_idx = -1;
obj->kern_version = get_kernel_version();
- obj->loaded = false;
+ obj->state = OBJ_OPEN;
return obj;
}
@@ -2106,7 +2112,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
}
len = strlen(value);
- if (value[len - 1] != '"') {
+ if (len < 2 || value[len - 1] != '"') {
pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
ext->name, value);
return -EINVAL;
@@ -4845,6 +4851,11 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
return 0;
}
+static bool map_is_created(const struct bpf_map *map)
+{
+ return map->obj->state >= OBJ_PREPARED || map->reused;
+}
+
bool bpf_map__autocreate(const struct bpf_map *map)
{
return map->autocreate;
@@ -4852,7 +4863,7 @@ bool bpf_map__autocreate(const struct bpf_map *map)
int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
{
- if (map->obj->loaded)
+ if (map_is_created(map))
return libbpf_err(-EBUSY);
map->autocreate = autocreate;
@@ -4946,7 +4957,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
- if (map->obj->loaded)
+ if (map_is_created(map))
return libbpf_err(-EBUSY);
map->def.max_entries = max_entries;
@@ -5191,11 +5202,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
static void bpf_map__destroy(struct bpf_map *map);
-static bool map_is_created(const struct bpf_map *map)
-{
- return map->obj->loaded || map->reused;
-}
-
static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
{
LIBBPF_OPTS(bpf_map_create_opts, create_attr);
@@ -7897,13 +7903,6 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
for (i = 0; i < obj->nr_programs; i++) {
prog = &obj->programs[i];
- err = bpf_object__sanitize_prog(obj, prog);
- if (err)
- return err;
- }
-
- for (i = 0; i < obj->nr_programs; i++) {
- prog = &obj->programs[i];
if (prog_is_subprog(obj, prog))
continue;
if (!prog->autoload) {
@@ -7927,6 +7926,21 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
return 0;
}
+static int bpf_object_prepare_progs(struct bpf_object *obj)
+{
+ struct bpf_program *prog;
+ size_t i;
+ int err;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ err = bpf_object__sanitize_prog(obj, prog);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
static const struct bpf_sec_def *find_sec_def(const char *sec_name);
static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
@@ -8543,14 +8557,77 @@ static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
return 0;
}
+static void bpf_object_unpin(struct bpf_object *obj)
+{
+ int i;
+
+ /* unpin any maps that were auto-pinned during load */
+ for (i = 0; i < obj->nr_maps; i++)
+ if (obj->maps[i].pinned && !obj->maps[i].reused)
+ bpf_map__unpin(&obj->maps[i], NULL);
+}
+
+static void bpf_object_post_load_cleanup(struct bpf_object *obj)
+{
+ int i;
+
+ /* clean up fd_array */
+ zfree(&obj->fd_array);
+
+ /* clean up module BTFs */
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ close(obj->btf_modules[i].fd);
+ btf__free(obj->btf_modules[i].btf);
+ free(obj->btf_modules[i].name);
+ }
+ obj->btf_module_cnt = 0;
+ zfree(&obj->btf_modules);
+
+ /* clean up vmlinux BTF */
+ btf__free(obj->btf_vmlinux);
+ obj->btf_vmlinux = NULL;
+}
+
+static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path)
+{
+ int err;
+
+ if (obj->state >= OBJ_PREPARED) {
+ pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name);
+ return -EINVAL;
+ }
+
+ err = bpf_object_prepare_token(obj);
+ err = err ? : bpf_object__probe_loading(obj);
+ err = err ? : bpf_object__load_vmlinux_btf(obj, false);
+ err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
+ err = err ? : bpf_object__sanitize_maps(obj);
+ err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
+ err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
+ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
+ err = err ? : bpf_object__sanitize_and_load_btf(obj);
+ err = err ? : bpf_object__create_maps(obj);
+ err = err ? : bpf_object_prepare_progs(obj);
+
+ if (err) {
+ bpf_object_unpin(obj);
+ bpf_object_unload(obj);
+ obj->state = OBJ_LOADED;
+ return err;
+ }
+
+ obj->state = OBJ_PREPARED;
+ return 0;
+}
+
static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
{
- int err, i;
+ int err;
if (!obj)
return libbpf_err(-EINVAL);
- if (obj->loaded) {
+ if (obj->state >= OBJ_LOADED) {
pr_warn("object '%s': load can't be attempted twice\n", obj->name);
return libbpf_err(-EINVAL);
}
@@ -8565,17 +8642,12 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
return libbpf_err(-LIBBPF_ERRNO__ENDIAN);
}
- err = bpf_object_prepare_token(obj);
- err = err ? : bpf_object__probe_loading(obj);
- err = err ? : bpf_object__load_vmlinux_btf(obj, false);
- err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
- err = err ? : bpf_object__sanitize_maps(obj);
- err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
- err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
- err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
- err = err ? : bpf_object__sanitize_and_load_btf(obj);
- err = err ? : bpf_object__create_maps(obj);
- err = err ? : bpf_object__load_progs(obj, extra_log_level);
+ if (obj->state < OBJ_PREPARED) {
+ err = bpf_object_prepare(obj, target_btf_path);
+ if (err)
+ return libbpf_err(err);
+ }
+ err = bpf_object__load_progs(obj, extra_log_level);
err = err ? : bpf_object_init_prog_arrays(obj);
err = err ? : bpf_object_prepare_struct_ops(obj);
@@ -8587,36 +8659,22 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
}
- /* clean up fd_array */
- zfree(&obj->fd_array);
+ bpf_object_post_load_cleanup(obj);
+ obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */
- /* clean up module BTFs */
- for (i = 0; i < obj->btf_module_cnt; i++) {
- close(obj->btf_modules[i].fd);
- btf__free(obj->btf_modules[i].btf);
- free(obj->btf_modules[i].name);
+ if (err) {
+ bpf_object_unpin(obj);
+ bpf_object_unload(obj);
+ pr_warn("failed to load object '%s'\n", obj->path);
+ return libbpf_err(err);
}
- free(obj->btf_modules);
-
- /* clean up vmlinux BTF */
- btf__free(obj->btf_vmlinux);
- obj->btf_vmlinux = NULL;
-
- obj->loaded = true; /* doesn't matter if successfully or not */
-
- if (err)
- goto out;
return 0;
-out:
- /* unpin any maps that were auto-pinned during load */
- for (i = 0; i < obj->nr_maps; i++)
- if (obj->maps[i].pinned && !obj->maps[i].reused)
- bpf_map__unpin(&obj->maps[i], NULL);
+}
- bpf_object_unload(obj);
- pr_warn("failed to load object '%s'\n", obj->path);
- return libbpf_err(err);
+int bpf_object__prepare(struct bpf_object *obj)
+{
+ return libbpf_err(bpf_object_prepare(obj, NULL));
}
int bpf_object__load(struct bpf_object *obj)
@@ -8866,7 +8924,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
if (!obj)
return libbpf_err(-ENOENT);
- if (!obj->loaded) {
+ if (obj->state < OBJ_PREPARED) {
pr_warn("object not yet loaded; load it first\n");
return libbpf_err(-ENOENT);
}
@@ -8945,7 +9003,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
if (!obj)
return libbpf_err(-ENOENT);
- if (!obj->loaded) {
+ if (obj->state < OBJ_LOADED) {
pr_warn("object not yet loaded; load it first\n");
return libbpf_err(-ENOENT);
}
@@ -9064,6 +9122,13 @@ void bpf_object__close(struct bpf_object *obj)
if (IS_ERR_OR_NULL(obj))
return;
+ /*
+ * if user called bpf_object__prepare() without ever getting to
+ * bpf_object__load(), we need to clean up stuff that is normally
+ * cleaned up at the end of loading step
+ */
+ bpf_object_post_load_cleanup(obj);
+
usdt_manager_free(obj->usdt_man);
obj->usdt_man = NULL;
@@ -9132,7 +9197,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
{
- if (obj->loaded)
+ if (obj->state >= OBJ_LOADED)
return libbpf_err(-EINVAL);
obj->kern_version = kern_version;
@@ -9145,12 +9210,12 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
struct bpf_gen *gen;
if (!opts)
- return -EFAULT;
+ return libbpf_err(-EFAULT);
if (!OPTS_VALID(opts, gen_loader_opts))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
gen = calloc(sizeof(*gen), 1);
if (!gen)
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
gen->opts = opts;
gen->swapped_endian = !is_native_endianness(obj);
obj->gen_loader = gen;
@@ -9229,7 +9294,7 @@ bool bpf_program__autoload(const struct bpf_program *prog)
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EINVAL);
prog->autoload = autoload;
@@ -9261,14 +9326,14 @@ int bpf_program__set_insns(struct bpf_program *prog,
{
struct bpf_insn *insns;
- if (prog->obj->loaded)
- return -EBUSY;
+ if (prog->obj->state >= OBJ_LOADED)
+ return libbpf_err(-EBUSY);
insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
/* NULL is a valid return from reallocarray if the new count is zero */
if (!insns && new_insn_cnt) {
pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
- return -ENOMEM;
+ return libbpf_err(-ENOMEM);
}
memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
@@ -9304,7 +9369,7 @@ static int last_custom_sec_def_handler_id;
int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EBUSY);
/* if type is not changed, do nothing */
@@ -9335,7 +9400,7 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program
int bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EBUSY);
prog->expected_attach_type = type;
@@ -9349,7 +9414,7 @@ __u32 bpf_program__flags(const struct bpf_program *prog)
int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EBUSY);
prog->prog_flags = flags;
@@ -9363,7 +9428,7 @@ __u32 bpf_program__log_level(const struct bpf_program *prog)
int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
{
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EBUSY);
prog->log_level = log_level;
@@ -9379,11 +9444,11 @@ const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_siz
int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
{
if (log_size && !log_buf)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (prog->log_size > UINT_MAX)
- return -EINVAL;
- if (prog->obj->loaded)
- return -EBUSY;
+ return libbpf_err(-EINVAL);
+ if (prog->obj->state >= OBJ_LOADED)
+ return libbpf_err(-EBUSY);
prog->log_buf = log_buf;
prog->log_size = log_size;
@@ -9959,7 +10024,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
return libbpf_err(err);
}
-static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd)
{
struct bpf_prog_info info;
__u32 info_len = sizeof(info);
@@ -9979,7 +10044,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
pr_warn("The target program doesn't have BTF\n");
goto out;
}
- btf = btf__load_from_kernel_by_id(info.btf_id);
+ btf = btf_load_from_kernel(info.btf_id, NULL, token_fd);
err = libbpf_get_error(btf);
if (err) {
pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err));
@@ -10062,7 +10127,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
pr_warn("prog '%s': attach program FD is not set\n", prog->name);
return -EINVAL;
}
- err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
+ err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd);
if (err < 0) {
pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n",
prog->name, attach_prog_fd, attach_name, errstr(err));
@@ -10299,7 +10364,7 @@ static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
{
- if (map->obj->loaded || map->reused)
+ if (map_is_created(map))
return libbpf_err(-EBUSY);
if (map->mmaped) {
@@ -10307,7 +10372,7 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
int err;
if (map->def.type != BPF_MAP_TYPE_ARRAY)
- return -EOPNOTSUPP;
+ return libbpf_err(-EOPNOTSUPP);
mmap_old_sz = bpf_map_mmap_sz(map);
mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
@@ -10315,7 +10380,7 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
if (err) {
pr_warn("map '%s': failed to resize memory-mapped region: %s\n",
bpf_map__name(map), errstr(err));
- return err;
+ return libbpf_err(err);
}
err = map_btf_datasec_resize(map, size);
if (err && err != -ENOENT) {
@@ -10345,7 +10410,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
{
size_t actual_sz;
- if (map->obj->loaded || map->reused)
+ if (map_is_created(map))
return libbpf_err(-EBUSY);
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
@@ -12858,7 +12923,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
if (target_fd) {
LIBBPF_OPTS(bpf_link_create_opts, target_opts);
- btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
+ btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd);
if (btf_id < 0)
return libbpf_err_ptr(btf_id);
@@ -13070,17 +13135,17 @@ int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
int err;
if (!bpf_map__is_struct_ops(map))
- return -EINVAL;
+ return libbpf_err(-EINVAL);
if (map->fd < 0) {
pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
- return -EINVAL;
+ return libbpf_err(-EINVAL);
}
st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
/* Ensure the type of a link is correct */
if (st_ops_link->map_fd < 0)
- return -EINVAL;
+ return libbpf_err(-EINVAL);
err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
/* It can be EBUSY if the map has been used to create or
@@ -13666,7 +13731,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
if (!prog || attach_prog_fd < 0)
return libbpf_err(-EINVAL);
- if (prog->obj->loaded)
+ if (prog->obj->state >= OBJ_LOADED)
return libbpf_err(-EINVAL);
if (attach_prog_fd && !attach_func_name) {
@@ -13679,7 +13744,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
if (attach_prog_fd) {
btf_id = libbpf_find_prog_btf_id(attach_func_name,
- attach_prog_fd);
+ attach_prog_fd, prog->obj->token_fd);
if (btf_id < 0)
return libbpf_err(btf_id);
} else {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3020ee45303a..e0605403f977 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -242,6 +242,19 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts);
/**
+ * @brief **bpf_object__prepare()** prepares BPF object for loading:
+ * performs ELF processing, relocations, prepares final state of BPF program
+ * instructions (accessible with bpf_program__insns()), creates and
+ * (potentially) pins maps. Leaves BPF object in the state ready for program
+ * loading.
+ * @param obj Pointer to a valid BPF object instance returned by
+ * **bpf_object__open*()** API
+ * @return 0, on success; negative error code, otherwise, error code is
+ * stored in errno
+ */
+int bpf_object__prepare(struct bpf_object *obj);
+
+/**
* @brief **bpf_object__load()** loads BPF object into kernel.
* @param obj Pointer to a valid BPF object instance returned by
* **bpf_object__open*()** APIs
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index a8b2936a1646..d8b71f22f197 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -436,4 +436,7 @@ LIBBPF_1.6.0 {
bpf_linker__add_buf;
bpf_linker__add_fd;
bpf_linker__new_fd;
+ bpf_object__prepare;
+ btf__add_decl_attr;
+ btf__add_type_attr;
} LIBBPF_1.5.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index de498e2dd6b0..76669c73dcd1 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -409,6 +409,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
int btf_load_into_kernel(struct btf *btf,
char *log_buf, size_t log_sz, __u32 log_level,
int token_fd);
+struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index b52f71c59616..800e0ef09c37 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -2163,7 +2163,7 @@ add_sym:
obj->sym_map[src_sym_idx] = dst_sym_idx;
- if (sym_type == STT_SECTION && dst_sym) {
+ if (sym_type == STT_SECTION && dst_sec) {
dst_sec->sec_sym_idx = dst_sym_idx;
dst_sym->st_value = 0;
}
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 7632e9d41827..2b83c98a1137 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -683,7 +683,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
{
const struct bpf_core_accessor *acc;
const struct btf_type *t;
- __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id;
const struct btf_member *m;
const struct btf_type *mt;
bool bitfield;
@@ -706,8 +706,14 @@ static int bpf_core_calc_field_relo(const char *prog_name,
if (!acc->name) {
if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
*val = spec->bit_offset / 8;
- /* remember field size for load/store mem size */
- sz = btf__resolve_size(spec->btf, acc->type_id);
+ /* remember field size for load/store mem size;
+ * note, for arrays we care about individual element
+ * sizes, not the overall array size
+ */
+ t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id);
+ while (btf_is_array(t))
+ t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
+ sz = btf__resolve_size(spec->btf, elem_id);
if (sz < 0)
return -EINVAL;
*field_sz = sz;
@@ -767,7 +773,17 @@ static int bpf_core_calc_field_relo(const char *prog_name,
case BPF_CORE_FIELD_BYTE_OFFSET:
*val = byte_off;
if (!bitfield) {
- *field_sz = byte_sz;
+ /* remember field size for load/store mem size;
+ * note, for arrays we care about individual element
+ * sizes, not the overall array size
+ */
+ t = skip_mods_and_typedefs(spec->btf, field_type_id, &elem_id);
+ while (btf_is_array(t))
+ t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
+ sz = btf__resolve_size(spec->btf, elem_id);
+ if (sz < 0)
+ return -EINVAL;
+ *field_sz = sz;
*type_id = field_type_id;
}
break;
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
index 8743049e32b7..9a541762f54c 100644
--- a/tools/lib/bpf/str_error.c
+++ b/tools/lib/bpf/str_error.c
@@ -36,7 +36,7 @@ char *libbpf_strerror_r(int err, char *dst, int len)
return dst;
}
-const char *errstr(int err)
+const char *libbpf_errstr(int err)
{
static __thread char buf[12];
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index 66ffebde0684..53e7fbffc13e 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -7,10 +7,13 @@
char *libbpf_strerror_r(int err, char *dst, int len);
/**
- * @brief **errstr()** returns string corresponding to numeric errno
+ * @brief **libbpf_errstr()** returns string corresponding to numeric errno
* @param err negative numeric errno
* @return pointer to string representation of the errno, that is invalidated
* upon the next call.
*/
-const char *errstr(int err);
+const char *libbpf_errstr(int err);
+
+#define errstr(err) libbpf_errstr(err)
+
#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index b811f754939f..2a7865c8e3fe 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -108,6 +108,38 @@ int bpf_usdt_arg_cnt(struct pt_regs *ctx)
return spec->arg_cnt;
}
+/* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument.
+ * Returns negative error if argument is not found or arg_num is invalid.
+ */
+static __always_inline
+int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num)
+{
+ struct __bpf_usdt_arg_spec *arg_spec;
+ struct __bpf_usdt_spec *spec;
+ int spec_id;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return -ESRCH;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return -ESRCH;
+
+ if (arg_num >= BPF_USDT_MAX_ARG_CNT)
+ return -ENOENT;
+ barrier_var(arg_num);
+ if (arg_num >= spec->arg_cnt)
+ return -ENOENT;
+
+ arg_spec = &spec->args[arg_num];
+
+ /* arg_spec->arg_bitshift = 64 - arg_sz * 8
+ * so: arg_sz = (64 - arg_spec->arg_bitshift) / 8
+ */
+ return (unsigned int)(64 - arg_spec->arg_bitshift) / 8;
+}
+
/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
* Returns 0 on success; negative error, otherwise.
* On error *res is guaranteed to be set to zero.
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 901349da680f..6d8feda27ce9 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,12 +1,3 @@
-bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-kprobe_multi_bench_attach # needs CONFIG_FPROBE
-kprobe_multi_test # needs CONFIG_FPROBE
-module_attach # prog 'kprobe_multi': failed to auto-attach: -95
fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
-fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 87551628e112..66bb50356be0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -95,18 +95,12 @@ TEST_GEN_PROGS += test_progs-cpuv4
TEST_INST_SUBDIRS += cpuv4
endif
-TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
+TEST_GEN_FILES = test_tc_edt.bpf.o
TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
- test_xdp_redirect_multi.sh \
- test_tunnel.sh \
- test_lwt_seg6local.sh \
test_lirc_mode2.sh \
- test_xdp_vlan_mode_generic.sh \
- test_xdp_vlan_mode_native.sh \
- test_lwt_ip_encap.sh \
test_tc_tunnel.sh \
test_tc_edt.sh \
test_xdping.sh \
@@ -117,9 +111,9 @@ TEST_PROGS := test_kmod.sh \
test_xsk.sh \
test_xdp_features.sh
-TEST_PROGS_EXTENDED := with_addr.sh \
- with_tunnels.sh ima_setup.sh verify_sig_setup.sh \
- test_xdp_vlan.sh test_bpftool.py
+TEST_PROGS_EXTENDED := \
+ ima_setup.sh verify_sig_setup.sh \
+ test_bpftool.py
TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
bpf_test_modorder_y.ko
@@ -135,7 +129,6 @@ TEST_GEN_PROGS_EXTENDED = \
veristat \
xdp_features \
xdp_hw_metadata \
- xdp_redirect_multi \
xdp_synproxy \
xdping \
xskxceiver
@@ -184,9 +177,14 @@ ifeq ($(feature-llvm),1)
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
# both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict
LLVM_CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += -lstdc++
+ # Prefer linking statically if it's available, otherwise fallback to shared
+ ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static)
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += -lstdc++
+ else
+ LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ endif
LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
endif
@@ -306,6 +304,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
+ BPF_TARGET_ENDIAN=$(BPF_TARGET_ENDIAN) \
EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \
cp $(RUNQSLOWER_OUTPUT)runqslower $@
@@ -684,6 +683,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
$(RESOLVE_BTFIDS) \
$(TRUNNER_BPFTOOL) \
+ $(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@
diff --git a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/bpf_arena_spin_lock.h
new file mode 100644
index 000000000000..fb8dc0768999
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_spin_lock.h
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ARENA_SPIN_LOCK_H
+#define BPF_ARENA_SPIN_LOCK_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_atomic.h"
+
+#define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label)
+#define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1)
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+
+#define EBUSY 16
+#define EOPNOTSUPP 95
+#define ETIMEDOUT 110
+
+#ifndef __arena
+#define __arena __attribute__((address_space(1)))
+#endif
+
+extern unsigned long CONFIG_NR_CPUS __kconfig;
+
+/*
+ * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but
+ * PowerPC overrides the definition to define lock->val as u32 instead of
+ * atomic_t, leading to compilation errors. Import a local definition below so
+ * that we don't depend on the vmlinux.h version.
+ */
+
+struct __qspinlock {
+ union {
+ atomic_t val;
+ struct {
+ u8 locked;
+ u8 pending;
+ };
+ struct {
+ u16 locked_pending;
+ u16 tail;
+ };
+ };
+};
+
+#define arena_spinlock_t struct __qspinlock
+/* FIXME: Using typedef causes CO-RE relocation error */
+/* typedef struct qspinlock arena_spinlock_t; */
+
+struct arena_mcs_spinlock {
+ struct arena_mcs_spinlock __arena *next;
+ int locked;
+ int count;
+};
+
+struct arena_qnode {
+ struct arena_mcs_spinlock mcs;
+};
+
+#define _Q_MAX_NODES 4
+#define _Q_PENDING_LOOPS 1
+
+/*
+ * Bitfields in the atomic value:
+ *
+ * 0- 7: locked byte
+ * 8: pending
+ * 9-15: not used
+ * 16-17: tail index
+ * 18-31: tail cpu (+1)
+ */
+#define _Q_MAX_CPUS 1024
+
+#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
+ << _Q_ ## type ## _OFFSET)
+#define _Q_LOCKED_OFFSET 0
+#define _Q_LOCKED_BITS 8
+#define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED)
+
+#define _Q_PENDING_OFFSET (_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS 8
+#define _Q_PENDING_MASK _Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET (_Q_PENDING_OFFSET + _Q_PENDING_BITS)
+#define _Q_TAIL_IDX_BITS 2
+#define _Q_TAIL_IDX_MASK _Q_SET_MASK(TAIL_IDX)
+
+#define _Q_TAIL_CPU_OFFSET (_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
+#define _Q_TAIL_CPU_BITS (32 - _Q_TAIL_CPU_OFFSET)
+#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
+
+#define _Q_TAIL_OFFSET _Q_TAIL_IDX_OFFSET
+#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES];
+
+static inline u32 encode_tail(int cpu, int idx)
+{
+ u32 tail;
+
+ tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+ tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
+
+ return tail;
+}
+
+static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail)
+{
+ u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
+ u32 idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+
+ return &qnodes[cpu][idx].mcs;
+}
+
+static inline
+struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx)
+{
+ return &((struct arena_qnode __arena *)base + idx)->mcs;
+}
+
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+/**
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail)
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = (old & _Q_LOCKED_PENDING_MASK) | tail;
+ /*
+ * We can use relaxed semantics since the caller ensures that
+ * the MCS node is properly initialized before updating the
+ * tail.
+ */
+ /* These loops are not expected to stall, but we still need to
+ * prove to the verifier they will terminate eventually.
+ */
+ cond_break_label(out);
+ } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
+
+ return old;
+out:
+ bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+ return old;
+}
+
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->pending, 0);
+}
+
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ *
+ * Lock stealing is not allowed if this function is used.
+ */
+static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
+}
+
+/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(arena_spinlock_t __arena *lock)
+{
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+}
+
+static __always_inline
+u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = old | _Q_PENDING_VAL;
+ /*
+ * These loops are not expected to stall, but we still need to
+ * prove to the verifier they will terminate eventually.
+ */
+ cond_break_label(out);
+ } while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new));
+
+ return old;
+out:
+ bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+ return old;
+}
+
+/**
+ * arena_spin_trylock - try to acquire the queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock)
+{
+ int val = atomic_read(&lock->val);
+
+ if (unlikely(val))
+ return 0;
+
+ return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
+}
+
+__noinline
+int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val)
+{
+ struct arena_mcs_spinlock __arena *prev, *next, *node0, *node;
+ int ret = -ETIMEDOUT;
+ u32 old, tail;
+ int idx;
+
+ /*
+ * Wait for in-progress pending->locked hand-overs with a bounded
+ * number of spins so that we guarantee forward progress.
+ *
+ * 0,1,0 -> 0,0,1
+ */
+ if (val == _Q_PENDING_VAL) {
+ int cnt = _Q_PENDING_LOOPS;
+ val = atomic_cond_read_relaxed_label(&lock->val,
+ (VAL != _Q_PENDING_VAL) || !cnt--,
+ release_err);
+ }
+
+ /*
+ * If we observe any contention; queue.
+ */
+ if (val & ~_Q_LOCKED_MASK)
+ goto queue;
+
+ /*
+ * trylock || pending
+ *
+ * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
+ */
+ val = arena_fetch_set_pending_acquire(lock);
+
+ /*
+ * If we observe contention, there is a concurrent locker.
+ *
+ * Undo and queue; our setting of PENDING might have made the
+ * n,0,0 -> 0,0,0 transition fail and it will now be waiting
+ * on @next to become !NULL.
+ */
+ if (unlikely(val & ~_Q_LOCKED_MASK)) {
+
+ /* Undo PENDING if we set it. */
+ if (!(val & _Q_PENDING_MASK))
+ clear_pending(lock);
+
+ goto queue;
+ }
+
+ /*
+ * We're pending, wait for the owner to go away.
+ *
+ * 0,1,1 -> *,1,0
+ *
+ * this wait loop must be a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because not all
+ * clear_pending_set_locked() implementations imply full
+ * barriers.
+ */
+ if (val & _Q_LOCKED_MASK)
+ smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
+
+ /*
+ * take ownership and clear the pending bit.
+ *
+ * 0,1,0 -> 0,0,1
+ */
+ clear_pending_set_locked(lock);
+ return 0;
+
+ /*
+ * End of pending bit optimistic spinning and beginning of MCS
+ * queuing.
+ */
+queue:
+ node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs;
+ idx = node0->count++;
+ tail = encode_tail(bpf_get_smp_processor_id(), idx);
+
+ /*
+ * 4 nodes are allocated based on the assumption that there will not be
+ * nested NMIs taking spinlocks. That may not be true in some
+ * architectures even though the chance of needing more than 4 nodes
+ * will still be extremely unlikely. When that happens, we simply return
+ * an error. Original qspinlock has a trylock fallback in this case.
+ */
+ if (unlikely(idx >= _Q_MAX_NODES)) {
+ ret = -EBUSY;
+ goto release_node_err;
+ }
+
+ node = grab_mcs_node(node0, idx);
+
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+
+ node->locked = 0;
+ node->next = NULL;
+
+ /*
+ * We touched a (possibly) cold cacheline in the per-cpu queue node;
+ * attempt the trylock once more in the hope someone let go while we
+ * weren't watching.
+ */
+ if (arena_spin_trylock(lock))
+ goto release;
+
+ /*
+ * Ensure that the initialisation of @node is complete before we
+ * publish the updated tail via xchg_tail() and potentially link
+ * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+ */
+ smp_wmb();
+
+ /*
+ * Publish the updated tail.
+ * We have already touched the queueing cacheline; don't bother with
+ * pending stuff.
+ *
+ * p,*,* -> n,*,*
+ */
+ old = xchg_tail(lock, tail);
+ next = NULL;
+
+ /*
+ * if there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_MASK) {
+ prev = decode_tail(old);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
+
+ /*
+ * While waiting for the MCS lock, the next pointer may have
+ * been set by another lock waiter. We cannot prefetch here
+ * due to lack of equivalent instruction in BPF ISA.
+ */
+ next = READ_ONCE(node->next);
+ }
+
+ /*
+ * we're at the head of the waitqueue, wait for the owner & pending to
+ * go away.
+ *
+ * *,x,y -> *,0,0
+ *
+ * this wait loop must use a load-acquire such that we match the
+ * store-release that clears the locked bit and create lock
+ * sequentiality; this is because the set_locked() function below
+ * does not imply a full barrier.
+ */
+ val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK),
+ release_node_err);
+
+ /*
+ * claim the lock:
+ *
+ * n,0,0 -> 0,0,1 : lock, uncontended
+ * *,*,0 -> *,*,1 : lock, contended
+ *
+ * If the queue head is the only one in the queue (lock value == tail)
+ * and nobody is pending, clear the tail code and grab the lock.
+ * Otherwise, we only need to grab the lock.
+ */
+
+ /*
+ * In the PV case we might already have _Q_LOCKED_VAL set, because
+ * of lock stealing; therefore we must also allow:
+ *
+ * n,0,1 -> 0,0,1
+ *
+ * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
+ * above wait condition, therefore any concurrent setting of
+ * PENDING will make the uncontended transition fail.
+ */
+ if ((val & _Q_TAIL_MASK) == tail) {
+ if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+ goto release; /* No contention */
+ }
+
+ /*
+ * Either somebody is queued behind us or _Q_PENDING_VAL got set
+ * which will then detect the remaining tail and queue behind us
+ * ensuring we'll see a @next.
+ */
+ set_locked(lock);
+
+ /*
+ * contended path; wait for next if not observed yet, release.
+ */
+ if (!next)
+ next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err);
+
+ arch_mcs_spin_unlock_contended(&next->locked);
+
+release:;
+ /*
+ * release the node
+ *
+ * Doing a normal dec vs this_cpu_dec is fine. An upper context always
+ * decrements count it incremented before returning, thus we're fine.
+ * For contexts interrupting us, they either observe our dec or not.
+ * Just ensure the compiler doesn't reorder this statement, as a
+ * this_cpu_dec implicitly implied that.
+ */
+ barrier();
+ node0->count--;
+ return 0;
+release_node_err:
+ barrier();
+ node0->count--;
+ goto release_err;
+release_err:
+ return ret;
+}
+
+/**
+ * arena_spin_lock - acquire a queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * On error, returned value will be negative.
+ * On success, zero is returned.
+ *
+ * The return value _must_ be tested against zero for success,
+ * instead of checking it against negative, for passing the
+ * BPF verifier.
+ *
+ * The user should do:
+ * if (arena_spin_lock(...) != 0) // failure
+ * or
+ * if (arena_spin_lock(...) == 0) // success
+ * or
+ * if (arena_spin_lock(...)) // failure
+ * or
+ * if (!arena_spin_lock(...)) // success
+ * instead of:
+ * if (arena_spin_lock(...) < 0) // failure
+ *
+ * The return value can still be inspected later.
+ */
+static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock)
+{
+ int val = 0;
+
+ if (CONFIG_NR_CPUS > 1024)
+ return -EOPNOTSUPP;
+
+ bpf_preempt_disable();
+ if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
+ return 0;
+
+ val = arena_spin_lock_slowpath(lock, val);
+ /* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */
+ if (val)
+ bpf_preempt_enable();
+ return val;
+}
+
+/**
+ * arena_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock)
+{
+ /*
+ * unlock() needs release semantics:
+ */
+ smp_store_release(&lock->locked, 0);
+ bpf_preempt_enable();
+}
+
+#define arena_spin_lock_irqsave(lock, flags) \
+ ({ \
+ int __ret; \
+ bpf_local_irq_save(&(flags)); \
+ __ret = arena_spin_lock((lock)); \
+ if (__ret) \
+ bpf_local_irq_restore(&(flags)); \
+ (__ret); \
+ })
+
+#define arena_spin_unlock_irqrestore(lock, flags) \
+ ({ \
+ arena_spin_unlock((lock)); \
+ bpf_local_irq_restore(&(flags)); \
+ })
+
+#endif
+
+#endif /* BPF_ARENA_SPIN_LOCK_H */
diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h
new file mode 100644
index 000000000000..a9674e544322
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_atomic.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ATOMIC_H
+#define BPF_ATOMIC_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+extern bool CONFIG_X86_64 __kconfig __weak;
+
+/*
+ * __unqual_typeof(x) - Declare an unqualified scalar type, leaving
+ * non-scalar types unchanged,
+ *
+ * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char'
+ * is not type-compatible with 'signed char', and we define a separate case.
+ *
+ * This is copied verbatim from kernel's include/linux/compiler_types.h, but
+ * with default expression (for pointers) changed from (x) to (typeof(x)0).
+ *
+ * This is because LLVM has a bug where for lvalue (x), it does not get rid of
+ * an extra address_space qualifier, but does in case of rvalue (typeof(x)0).
+ * Hence, for pointers, we need to create an rvalue expression to get the
+ * desired type. See https://github.com/llvm/llvm-project/issues/53400.
+ */
+#define __scalar_type_to_expr_cases(type) \
+ unsigned type : (unsigned type)0, signed type : (signed type)0
+
+#define __unqual_typeof(x) \
+ typeof(_Generic((x), \
+ char: (char)0, \
+ __scalar_type_to_expr_cases(char), \
+ __scalar_type_to_expr_cases(short), \
+ __scalar_type_to_expr_cases(int), \
+ __scalar_type_to_expr_cases(long), \
+ __scalar_type_to_expr_cases(long long), \
+ default: (typeof(x))0))
+
+/* No-op for BPF */
+#define cpu_relax() ({})
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val))
+
+#define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new)
+
+#define try_cmpxchg(p, pold, new) \
+ ({ \
+ __unqual_typeof(*(pold)) __o = *(pold); \
+ __unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \
+ if (__r != __o) \
+ *(pold) = __r; \
+ __r == __o; \
+ })
+
+#define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define smp_mb() \
+ ({ \
+ unsigned long __val; \
+ __sync_fetch_and_add(&__val, 0); \
+ })
+
+#define smp_rmb() \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ else \
+ barrier(); \
+ })
+
+#define smp_wmb() \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ else \
+ barrier(); \
+ })
+
+/* Control dependency provides LOAD->STORE, provide LOAD->LOAD */
+#define smp_acquire__after_ctrl_dep() ({ smp_rmb(); })
+
+#define smp_load_acquire(p) \
+ ({ \
+ __unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ barrier(); \
+ __v; \
+ })
+
+#define smp_store_release(p, val) \
+ ({ \
+ if (!CONFIG_X86_64) \
+ smp_mb(); \
+ barrier(); \
+ WRITE_ONCE(*(p), val); \
+ })
+
+#define smp_cond_load_relaxed_label(p, cond_expr, label) \
+ ({ \
+ typeof(p) __ptr = (p); \
+ __unqual_typeof(*(p)) VAL; \
+ for (;;) { \
+ VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \
+ if (cond_expr) \
+ break; \
+ cond_break_label(label); \
+ cpu_relax(); \
+ } \
+ (typeof(*(p)))VAL; \
+ })
+
+#define smp_cond_load_acquire_label(p, cond_expr, label) \
+ ({ \
+ __unqual_typeof(*p) __val = \
+ smp_cond_load_relaxed_label(p, cond_expr, label); \
+ smp_acquire__after_ctrl_dep(); \
+ (typeof(*(p)))__val; \
+ })
+
+#define atomic_read(p) READ_ONCE((p)->counter)
+
+#define atomic_cond_read_relaxed_label(p, cond_expr, label) \
+ smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label)
+
+#define atomic_cond_read_acquire_label(p, cond_expr, label) \
+ smp_cond_load_acquire_label(&(p)->counter, cond_expr, label)
+
+#define atomic_try_cmpxchg_relaxed(p, pold, new) \
+ try_cmpxchg_relaxed(&(p)->counter, pold, new)
+
+#define atomic_try_cmpxchg_acquire(p, pold, new) \
+ try_cmpxchg_acquire(&(p)->counter, pold, new)
+
+#endif /* BPF_ATOMIC_H */
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index cd8ecd39c3f3..6535c8ae3c46 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -368,12 +368,12 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("may_goto %l[l_break]" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#else
@@ -392,7 +392,7 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
@@ -400,7 +400,7 @@ l_true: \
.short 0" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#else
@@ -418,7 +418,7 @@ l_true: \
ret; \
})
-#define cond_break \
+#define __cond_break(expr) \
({ __label__ l_break, l_continue; \
asm volatile goto("1:.byte 0xe5; \
.byte 0; \
@@ -426,12 +426,15 @@ l_true: \
.short 0" \
:::: l_break); \
goto l_continue; \
- l_break: break; \
+ l_break: expr; \
l_continue:; \
})
#endif
#endif
+#define cond_break __cond_break(break)
+#define cond_break_label(label) __cond_break(goto label)
+
#ifndef bpf_nop_mov
#define bpf_nop_mov(var) \
asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 2eb3483f2fb0..8215c9b3115e 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -87,4 +87,9 @@ struct dentry;
*/
extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name,
struct bpf_dynptr *value_ptr) __ksym __weak;
+
+extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str,
+ const struct bpf_dynptr *value_p, int flags) __ksym __weak;
+extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak;
+
#endif
diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c
index d5ac507401d7..98f840c3a38f 100644
--- a/tools/testing/selftests/bpf/cap_helpers.c
+++ b/tools/testing/selftests/bpf/cap_helpers.c
@@ -19,7 +19,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps)
err = capget(&hdr, data);
if (err)
- return err;
+ return -errno;
if (old_caps)
*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
@@ -32,7 +32,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps)
data[1].effective |= cap1;
err = capset(&hdr, data);
if (err)
- return err;
+ return -errno;
return 0;
}
@@ -49,7 +49,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps)
err = capget(&hdr, data);
if (err)
- return err;
+ return -errno;
if (old_caps)
*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
@@ -61,7 +61,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps)
data[1].effective &= ~cap1;
err = capset(&hdr, data);
if (err)
- return err;
+ return -errno;
return 0;
}
diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h
index 6d163530cb0f..8dcb28557f76 100644
--- a/tools/testing/selftests/bpf/cap_helpers.h
+++ b/tools/testing/selftests/bpf/cap_helpers.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/capability.h>
+#include <errno.h>
#ifndef CAP_PERFMON
#define CAP_PERFMON 38
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 29541d486c5e..72b5c174ab3b 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -446,6 +446,23 @@ char *ping_command(int family)
return "ping";
}
+int append_tid(char *str, size_t sz)
+{
+ size_t end;
+
+ if (!str)
+ return -1;
+
+ end = strlen(str);
+ if (end + 8 > sz)
+ return -1;
+
+ sprintf(&str[end], "%07d", gettid());
+ str[end + 7] = '\0';
+
+ return 0;
+}
+
int remove_netns(const char *name)
{
char *cmd;
@@ -761,6 +778,36 @@ struct tmonitor_ctx {
int pcap_fd;
};
+static int __base_pr(const char *format, va_list args)
+{
+ return vfprintf(stdout, format, args);
+}
+
+static tm_print_fn_t __tm_pr = __base_pr;
+
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+ tm_print_fn_t old_print_fn;
+
+ old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
+
+ return old_print_fn;
+}
+
+void tm_print(const char *format, ...)
+{
+ tm_print_fn_t print_fn;
+ va_list args;
+
+ print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
+ if (!print_fn)
+ return;
+
+ va_start(args, format);
+ print_fn(format, args);
+ va_end(args);
+}
+
/* Is this packet captured with a Ethernet protocol type? */
static bool is_ethernet(const u_char *packet)
{
@@ -778,7 +825,7 @@ static bool is_ethernet(const u_char *packet)
case 770: /* ARPHRD_FRAD */
case 778: /* ARPHDR_IPGRE */
case 803: /* ARPHRD_IEEE80211_RADIOTAP */
- printf("Packet captured: arphdr_type=%d\n", arphdr_type);
+ tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
return false;
}
return true;
@@ -799,12 +846,13 @@ static const char *pkt_type_str(u16 pkt_type)
return "Unknown";
}
+#define MAX_FLAGS_STRLEN 21
/* Show the information of the transport layer in the packet */
static void show_transport(const u_char *packet, u16 len, u32 ifindex,
const char *src_addr, const char *dst_addr,
u16 proto, bool ipv6, u8 pkt_type)
{
- char *ifname, _ifname[IF_NAMESIZE];
+ char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
const char *transport_str;
u16 src_port, dst_port;
struct udphdr *udp;
@@ -827,47 +875,39 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex,
dst_port = ntohs(tcp->dest);
transport_str = "TCP";
} else if (proto == IPPROTO_ICMP) {
- printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
- ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
- packet[0], packet[1]);
+ tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
+ ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+ packet[0], packet[1]);
return;
} else if (proto == IPPROTO_ICMPV6) {
- printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
- ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
- packet[0], packet[1]);
+ tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
+ ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+ packet[0], packet[1]);
return;
} else {
- printf("%-7s %-3s %s %s > %s: protocol %d\n",
- ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
- src_addr, dst_addr, proto);
+ tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
+ ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
+ src_addr, dst_addr, proto);
return;
}
/* TCP or UDP*/
- flockfile(stdout);
+ if (proto == IPPROTO_TCP)
+ snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
+ tcp->fin ? ", FIN" : "",
+ tcp->syn ? ", SYN" : "",
+ tcp->rst ? ", RST" : "",
+ tcp->ack ? ", ACK" : "");
+
if (ipv6)
- printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d",
- ifname, pkt_type_str(pkt_type), src_addr, src_port,
- dst_addr, dst_port, transport_str, len);
+ tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
+ ifname, pkt_type_str(pkt_type), src_addr, src_port,
+ dst_addr, dst_port, transport_str, len, flags);
else
- printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d",
- ifname, pkt_type_str(pkt_type), src_addr, src_port,
- dst_addr, dst_port, transport_str, len);
-
- if (proto == IPPROTO_TCP) {
- if (tcp->fin)
- printf(", FIN");
- if (tcp->syn)
- printf(", SYN");
- if (tcp->rst)
- printf(", RST");
- if (tcp->ack)
- printf(", ACK");
- }
-
- printf("\n");
- funlockfile(stdout);
+ tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
+ ifname, pkt_type_str(pkt_type), src_addr, src_port,
+ dst_addr, dst_port, transport_str, len, flags);
}
static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
@@ -982,8 +1022,8 @@ static void *traffic_monitor_thread(void *arg)
ifname = _ifname;
}
- printf("%-7s %-3s Unknown network protocol type 0x%x\n",
- ifname, pkt_type_str(ptype), proto);
+ tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
+ ifname, pkt_type_str(ptype), proto);
}
}
@@ -1183,8 +1223,9 @@ void traffic_monitor_stop(struct tmonitor_ctx *ctx)
write(ctx->wake_fd, &w, sizeof(w));
pthread_join(ctx->thread, NULL);
- printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
+ tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
traffic_monitor_release(ctx);
}
+
#endif /* TRAFFIC_MONITOR */
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 9235976d0c50..ef208eefd571 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -18,6 +18,7 @@ typedef __u16 __sum16;
#include <netinet/udp.h>
#include <bpf/bpf_endian.h>
#include <net/if.h>
+#include <stdio.h>
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
@@ -101,6 +102,18 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes);
int make_netns(const char *name);
int remove_netns(const char *name);
+/**
+ * append_tid() - Append thread ID to the given string.
+ *
+ * @str: string to extend
+ * @sz: string's size
+ *
+ * 8 characters are used to append the thread ID (7 digits + '\0')
+ *
+ * Returns -1 on errors, 0 otherwise
+ */
+int append_tid(char *str, size_t sz);
+
static __u16 csum_fold(__u32 csum)
{
csum = (csum & 0xffff) + (csum >> 16);
@@ -240,10 +253,13 @@ static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h,
struct tmonitor_ctx;
+typedef int (*tm_print_fn_t)(const char *format, va_list args);
+
#ifdef TRAFFIC_MONITOR
struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name);
void traffic_monitor_stop(struct tmonitor_ctx *ctx);
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn);
#else
static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
const char *subtest_name)
@@ -254,6 +270,11 @@ static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, cons
static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
{
}
+
+static inline tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+ return NULL;
+}
#endif
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 4ebd0da898f5..1d53a8561ee2 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -610,9 +610,11 @@ static int do_test_single(struct bpf_align_test *test)
.log_size = sizeof(bpf_vlog),
.log_level = 2,
);
+ const char *main_pass_start = "0: R1=ctx() R10=fp0";
const char *line_ptr;
int cur_line = -1;
int prog_len, i;
+ char *start;
int fd_prog;
int ret;
@@ -632,7 +634,13 @@ static int do_test_single(struct bpf_align_test *test)
ret = 0;
/* We make a local copy so that we can strtok() it */
strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
- line_ptr = strtok(bpf_vlog_copy, "\n");
+ start = strstr(bpf_vlog_copy, main_pass_start);
+ if (!start) {
+ ret = 1;
+ printf("Can't find initial line '%s'\n", main_pass_start);
+ goto out;
+ }
+ line_ptr = strtok(start, "\n");
for (i = 0; i < MAX_MATCHES; i++) {
struct bpf_reg_match m = test->matches[i];
const char *p;
@@ -682,6 +690,7 @@ static int do_test_single(struct bpf_align_test *test)
break;
}
}
+out:
if (fd_prog >= 0)
close(fd_prog);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
index 26e7c06c6cb4..d98577a6babc 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
@@ -162,6 +162,66 @@ static void test_uaf(struct arena_atomics *skel)
ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails");
}
+static void test_load_acquire(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ if (skel->data->skip_lacq_srel_tests) {
+ printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.load_acquire);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->load_acquire8_result, 0x12,
+ "load_acquire8_result");
+ ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234,
+ "load_acquire16_result");
+ ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678,
+ "load_acquire32_result");
+ ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef,
+ "load_acquire64_result");
+}
+
+static void test_store_release(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ if (skel->data->skip_lacq_srel_tests) {
+ printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.store_release);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->store_release8_result, 0x12,
+ "store_release8_result");
+ ASSERT_EQ(skel->arena->store_release16_result, 0x1234,
+ "store_release16_result");
+ ASSERT_EQ(skel->arena->store_release32_result, 0x12345678,
+ "store_release32_result");
+ ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef,
+ "store_release64_result");
+}
+
void test_arena_atomics(void)
{
struct arena_atomics *skel;
@@ -171,7 +231,7 @@ void test_arena_atomics(void)
if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open"))
return;
- if (skel->data->skip_tests) {
+ if (skel->data->skip_all_tests) {
printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang",
__func__);
test__skip();
@@ -198,6 +258,10 @@ void test_arena_atomics(void)
test_xchg(skel);
if (test__start_subtest("uaf"))
test_uaf(skel);
+ if (test__start_subtest("load_acquire"))
+ test_load_acquire(skel);
+ if (test__start_subtest("store_release"))
+ test_store_release(skel);
cleanup:
arena_atomics__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
new file mode 100644
index 000000000000..7565fc7690c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+struct __qspinlock { int val; };
+typedef struct __qspinlock arena_spinlock_t;
+
+struct arena_qnode {
+ unsigned long next;
+ int count;
+ int locked;
+};
+
+#include "arena_spin_lock.skel.h"
+
+static long cpu;
+static int repeat;
+
+pthread_barrier_t barrier;
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *)arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = repeat,
+ );
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity");
+
+ err = pthread_barrier_wait(&barrier);
+ if (err != PTHREAD_BARRIER_SERIAL_THREAD && err != 0)
+ ASSERT_FALSE(true, "pthread_barrier");
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run err");
+ ASSERT_EQ((int)topts.retval, 0, "test_run retval");
+
+ pthread_exit(arg);
+}
+
+static void test_arena_spin_lock_size(int size)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct arena_spin_lock *skel;
+ pthread_t thread_id[16];
+ int prog_fd, i, err;
+ void *ret;
+
+ if (get_nprocs() < 2) {
+ test__skip();
+ return;
+ }
+
+ skel = arena_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load"))
+ return;
+ if (skel->data->test_skip == 2) {
+ test__skip();
+ goto end;
+ }
+ skel->bss->cs_count = size;
+ skel->bss->limit = repeat * 16;
+
+ ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init");
+
+ prog_fd = bpf_program__fd(skel->progs.prog);
+ for (i = 0; i < 16; i++) {
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end_barrier;
+ }
+
+ for (i = 0; i < 16; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end_barrier;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end_barrier;
+ }
+
+ ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value");
+
+end_barrier:
+ pthread_barrier_destroy(&barrier);
+end:
+ arena_spin_lock__destroy(skel);
+ return;
+}
+
+void test_arena_spin_lock(void)
+{
+ repeat = 1000;
+ if (test__start_subtest("arena_spin_lock_1"))
+ test_arena_spin_lock_size(1);
+ cpu = 0;
+ if (test__start_subtest("arena_spin_lock_1000"))
+ test_arena_spin_lock_size(1000);
+ cpu = 0;
+ repeat = 100;
+ if (test__start_subtest("arena_spin_lock_50000"))
+ test_arena_spin_lock_size(50000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index cc184e4420f6..67557cda2208 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -6,6 +6,10 @@
#include <test_progs.h>
#include "bloom_filter_map.skel.h"
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
static void test_fail_cases(void)
{
LIBBPF_OPTS(bpf_map_create_opts, opts);
@@ -69,6 +73,7 @@ static void test_success_cases(void)
/* Create a map */
opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE;
+ opts.numa_node = NUMA_NO_NODE;
fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 6f1bfacd7375..add4a18c33bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -323,19 +323,87 @@ static void test_task_pidfd(void)
static void test_task_sleepable(void)
{
struct bpf_iter_tasks *skel;
+ int pid, status, err, data_pipe[2], finish_pipe[2], c;
+ char *test_data = NULL;
+ char *test_data_long = NULL;
+ char *data[2];
+
+ if (!ASSERT_OK(pipe(data_pipe), "data_pipe") ||
+ !ASSERT_OK(pipe(finish_pipe), "finish_pipe"))
+ return;
skel = bpf_iter_tasks__open_and_load();
if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
return;
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork"))
+ return;
+
+ if (pid == 0) {
+ /* child */
+ close(data_pipe[0]);
+ close(finish_pipe[1]);
+
+ test_data = malloc(sizeof(char) * 10);
+ strncpy(test_data, "test_data", 10);
+ test_data[9] = '\0';
+
+ test_data_long = malloc(sizeof(char) * 5000);
+ for (int i = 0; i < 5000; ++i) {
+ if (i % 2 == 0)
+ test_data_long[i] = 'b';
+ else
+ test_data_long[i] = 'a';
+ }
+ test_data_long[4999] = '\0';
+
+ data[0] = test_data;
+ data[1] = test_data_long;
+
+ write(data_pipe[1], &data, sizeof(data));
+
+ /* keep child alive until after the test */
+ err = read(finish_pipe[0], &c, 1);
+ if (err != 1)
+ exit(-1);
+
+ close(data_pipe[1]);
+ close(finish_pipe[0]);
+ _exit(0);
+ }
+
+ /* parent */
+ close(data_pipe[1]);
+ close(finish_pipe[0]);
+
+ err = read(data_pipe[0], &data, sizeof(data));
+ ASSERT_EQ(err, sizeof(data), "read_check");
+
+ skel->bss->user_ptr = data[0];
+ skel->bss->user_ptr_long = data[1];
+ skel->bss->pid = pid;
+
do_dummy_read(skel->progs.dump_task_sleepable);
ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
"num_expected_failure_copy_from_user_task");
ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
"num_success_copy_from_user_task");
+ ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0,
+ "num_expected_failure_copy_from_user_task_str");
+ ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0,
+ "num_success_copy_from_user_task_str");
bpf_iter_tasks__destroy(skel);
+
+ write(finish_pipe[1], &c, 1);
+ err = waitpid(pid, &status, 0);
+ ASSERT_EQ(err, pid, "waitpid");
+ ASSERT_EQ(status, 0, "zero_child_exit");
+
+ close(data_pipe[0]);
+ close(finish_pipe[1]);
}
static void test_task_stack(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index a4a1f93878d4..dbd13f8e42a7 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -72,11 +72,14 @@ static void test_bpf_nf_ct(int mode)
if (!ASSERT_OK(system(cmd), cmd))
goto end;
- srv_port = (mode == TEST_XDP) ? 5005 : 5006;
- srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS);
+ srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS);
if (!ASSERT_GE(srv_fd, 0, "start_server"))
goto end;
+ srv_port = get_socket_local_port(srv_fd);
+ if (!ASSERT_GE(srv_port, 0, "get_sock_local_port"))
+ goto end;
+
client_fd = connect_to_server(srv_fd);
if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
goto end;
@@ -91,7 +94,7 @@ static void test_bpf_nf_ct(int mode)
skel->bss->saddr = peer_addr.sin_addr.s_addr;
skel->bss->sport = peer_addr.sin_port;
skel->bss->daddr = peer_addr.sin_addr.s_addr;
- skel->bss->dport = htons(srv_port);
+ skel->bss->dport = srv_port;
if (mode == TEST_XDP)
prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index e63d74ce046f..8a9ba4292109 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3866,11 +3866,11 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "vlen != 0",
},
{
- .descr = "decl_tag test #8, invalid kflag",
+ .descr = "decl_tag test #8, tag with kflag",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
- BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1),
+ BTF_DECL_ATTR_ENC(NAME_TBD, 2, -1),
BTF_END_RAW,
},
BTF_STR_SEC("\0local\0tag1"),
@@ -3881,8 +3881,6 @@ static struct btf_raw_test raw_tests[] = {
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 1,
- .btf_load_err = true,
- .err_str = "Invalid btf_info kind_flag",
},
{
.descr = "decl_tag test #9, var, invalid component_idx",
@@ -4207,6 +4205,23 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Type tags don't precede modifiers",
},
{
+ .descr = "type_tag test #7, tag with kflag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ATTR_ENC(NAME_TBD, 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
.descr = "enum64 test #1, unsigned, size 8",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index b293b8501fd6..c0a776feec23 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -126,26 +126,69 @@ done:
return err;
}
-static char *dump_buf;
-static size_t dump_buf_sz;
-static FILE *dump_buf_file;
+struct test_ctx {
+ struct btf *btf;
+ struct btf_dump *d;
+ char *dump_buf;
+ size_t dump_buf_sz;
+ FILE *dump_buf_file;
+};
-static void test_btf_dump_incremental(void)
+static void test_ctx__free(struct test_ctx *t)
{
- struct btf *btf = NULL;
- struct btf_dump *d = NULL;
- int id, err, i;
+ fclose(t->dump_buf_file);
+ free(t->dump_buf);
+ btf_dump__free(t->d);
+ btf__free(t->btf);
+}
- dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
- if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
- return;
- btf = btf__new_empty();
- if (!ASSERT_OK_PTR(btf, "new_empty"))
+static int test_ctx__init(struct test_ctx *t)
+{
+ t->dump_buf_file = open_memstream(&t->dump_buf, &t->dump_buf_sz);
+ if (!ASSERT_OK_PTR(t->dump_buf_file, "dump_memstream"))
+ return -1;
+ t->btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(t->btf, "new_empty"))
goto err_out;
- d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL);
- if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+ t->d = btf_dump__new(t->btf, btf_dump_printf, t->dump_buf_file, NULL);
+ if (!ASSERT_OK(libbpf_get_error(t->d), "btf_dump__new"))
goto err_out;
+ return 0;
+
+err_out:
+ test_ctx__free(t);
+ return -1;
+}
+
+static void test_ctx__dump_and_compare(struct test_ctx *t,
+ const char *expected_output,
+ const char *message)
+{
+ int i, err;
+
+ for (i = 1; i < btf__type_cnt(t->btf); i++) {
+ err = btf_dump__dump_type(t->d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(t->dump_buf_file);
+ t->dump_buf[t->dump_buf_sz] = 0; /* some libc implementations don't do this */
+
+ ASSERT_STREQ(t->dump_buf, expected_output, message);
+}
+
+static void test_btf_dump_incremental(void)
+{
+ struct test_ctx t = {};
+ struct btf *btf;
+ int id, err;
+
+ if (test_ctx__init(&t))
+ return;
+
+ btf = t.btf;
+
/* First, generate BTF corresponding to the following C code:
*
* enum x;
@@ -182,15 +225,7 @@ static void test_btf_dump_incremental(void)
err = btf__add_field(btf, "x", 4, 0, 0);
ASSERT_OK(err, "field_ok");
- for (i = 1; i < btf__type_cnt(btf); i++) {
- err = btf_dump__dump_type(d, i);
- ASSERT_OK(err, "dump_type_ok");
- }
-
- fflush(dump_buf_file);
- dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
-
- ASSERT_STREQ(dump_buf,
+ test_ctx__dump_and_compare(&t,
"enum x;\n"
"\n"
"enum x {\n"
@@ -221,7 +256,7 @@ static void test_btf_dump_incremental(void)
* enum values don't conflict;
*
*/
- fseek(dump_buf_file, 0, SEEK_SET);
+ fseek(t.dump_buf_file, 0, SEEK_SET);
id = btf__add_struct(btf, "s", 4);
ASSERT_EQ(id, 7, "struct_id");
@@ -232,14 +267,7 @@ static void test_btf_dump_incremental(void)
err = btf__add_field(btf, "s", 6, 64, 0);
ASSERT_OK(err, "field_ok");
- for (i = 1; i < btf__type_cnt(btf); i++) {
- err = btf_dump__dump_type(d, i);
- ASSERT_OK(err, "dump_type_ok");
- }
-
- fflush(dump_buf_file);
- dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
- ASSERT_STREQ(dump_buf,
+ test_ctx__dump_and_compare(&t,
"struct s___2 {\n"
" enum x x;\n"
" enum {\n"
@@ -248,11 +276,53 @@ static void test_btf_dump_incremental(void)
" struct s s;\n"
"};\n\n" , "c_dump1");
-err_out:
- fclose(dump_buf_file);
- free(dump_buf);
- btf_dump__free(d);
- btf__free(btf);
+ test_ctx__free(&t);
+}
+
+static void test_btf_dump_type_tags(void)
+{
+ struct test_ctx t = {};
+ struct btf *btf;
+ int id, err;
+
+ if (test_ctx__init(&t))
+ return;
+
+ btf = t.btf;
+
+ /* Generate BTF corresponding to the following C code:
+ *
+ * struct s {
+ * void __attribute__((btf_type_tag(\"void_tag\"))) *p1;
+ * void __attribute__((void_attr)) *p2;
+ * };
+ *
+ */
+
+ id = btf__add_type_tag(btf, "void_tag", 0);
+ ASSERT_EQ(id, 1, "type_tag_id");
+ id = btf__add_ptr(btf, id);
+ ASSERT_EQ(id, 2, "void_ptr_id1");
+
+ id = btf__add_type_attr(btf, "void_attr", 0);
+ ASSERT_EQ(id, 3, "type_attr_id");
+ id = btf__add_ptr(btf, id);
+ ASSERT_EQ(id, 4, "void_ptr_id2");
+
+ id = btf__add_struct(btf, "s", 8);
+ ASSERT_EQ(id, 5, "struct_id");
+ err = btf__add_field(btf, "p1", 2, 0, 0);
+ ASSERT_OK(err, "field_ok1");
+ err = btf__add_field(btf, "p2", 4, 0, 0);
+ ASSERT_OK(err, "field_ok2");
+
+ test_ctx__dump_and_compare(&t,
+"struct s {\n"
+" void __attribute__((btf_type_tag(\"void_tag\"))) *p1;\n"
+" void __attribute__((void_attr)) *p2;\n"
+"};\n\n", "dump_and_compare");
+
+ test_ctx__free(&t);
}
#define STRSIZE 4096
@@ -874,6 +944,9 @@ void test_btf_dump() {
if (test__start_subtest("btf_dump: incremental"))
test_btf_dump_incremental();
+ if (test__start_subtest("btf_dump: type_tags"))
+ test_btf_dump_type_tags();
+
btf = libbpf_find_kernel_btf();
if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
new file mode 100644
index 000000000000..d4d583872fa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+ enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype;
+ int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd;
+ struct cgroup_preorder *skel = NULL;
+ struct bpf_program *prog;
+ __u8 *result, buf;
+ socklen_t optlen;
+ int err = 0;
+
+ skel = cgroup_preorder__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+ return 0;
+
+ buf = 0x00;
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (!ASSERT_OK(err, "setsockopt"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI;
+ if (all_preorder)
+ opts.flags |= BPF_F_PREORDER;
+ prog = skel->progs.child;
+ prog_c_fd = bpf_program__fd(prog);
+ prog_c_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-child"))
+ goto close_skel;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+ prog = skel->progs.child_2;
+ prog_c2_fd = bpf_program__fd(prog);
+ prog_c2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2"))
+ goto detach_child;
+
+ optlen = 1;
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_child_2;
+
+ result = skel->bss->result;
+ if (all_preorder)
+ ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only");
+ else
+ ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only");
+
+ skel->bss->idx = 0;
+ memset(result, 0, 4);
+
+ opts.flags = BPF_F_ALLOW_MULTI;
+ if (all_preorder)
+ opts.flags |= BPF_F_PREORDER;
+ prog = skel->progs.parent;
+ prog_p_fd = bpf_program__fd(prog);
+ prog_p_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+ goto detach_child_2;
+
+ opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+ prog = skel->progs.parent_2;
+ prog_p2_fd = bpf_program__fd(prog);
+ prog_p2_atype = bpf_program__expected_attach_type(prog);
+ err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+ goto detach_parent;
+
+ err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto detach_parent_2;
+
+ if (all_preorder)
+ ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2,
+ "parent and child");
+ else
+ ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3,
+ "parent and child");
+
+detach_parent_2:
+ ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+ "bpf_prog_detach2-parent_2");
+detach_parent:
+ ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+ "bpf_prog_detach2-parent");
+detach_child_2:
+ ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype),
+ "bpf_prog_detach2-child_2");
+detach_child:
+ ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype),
+ "bpf_prog_detach2-child");
+close_skel:
+ cgroup_preorder__destroy(skel);
+ return err;
+}
+
+void test_cgroup_preorder(void)
+{
+ int cg_parent = -1, cg_child = -1, sock_fd = -1;
+
+ cg_parent = test__join_cgroup("/parent");
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+ goto out;
+
+ cg_child = test__join_cgroup("/parent/child");
+ if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
+ goto out;
+
+ sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1");
+ ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2");
+
+out:
+ close(sock_fd);
+ close(cg_child);
+ close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 64abba72ac10..37c1cc52ed98 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -10,12 +10,18 @@
static int run_test(int cgroup_fd, int server_fd, bool classid)
{
struct connect4_dropper *skel;
- int fd, err = 0;
+ int fd, err = 0, port;
skel = connect4_dropper__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
return -1;
+ port = get_socket_local_port(server_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+ return -1;
+
+ skel->bss->port = ntohs(port);
+
skel->links.connect_v4_dropper =
bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
cgroup_fd);
@@ -48,10 +54,9 @@ void test_cgroup_v1v2(void)
{
struct network_helper_opts opts = {};
int server_fd, client_fd, cgroup_fd;
- static const int port = 60120;
/* Step 1: Check base connectivity works without any BPF. */
- server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd"))
return;
client_fd = connect_to_fd_opts(server_fd, &opts);
@@ -66,7 +71,7 @@ void test_cgroup_v1v2(void)
cgroup_fd = test__join_cgroup("/connect_dropper");
if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd")) {
close(cgroup_fd);
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
deleted file mode 100644
index 7526de379081..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c
+++ /dev/null
@@ -1,107 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include "bpf/libbpf.h"
-#include "changes_pkt_data_freplace.skel.h"
-#include "changes_pkt_data.skel.h"
-#include <test_progs.h>
-
-static void print_verifier_log(const char *log)
-{
- if (env.verbosity >= VERBOSE_VERY)
- fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
-}
-
-static void test_aux(const char *main_prog_name,
- const char *to_be_replaced,
- const char *replacement,
- bool expect_load)
-{
- struct changes_pkt_data_freplace *freplace = NULL;
- struct bpf_program *freplace_prog = NULL;
- struct bpf_program *main_prog = NULL;
- LIBBPF_OPTS(bpf_object_open_opts, opts);
- struct changes_pkt_data *main = NULL;
- char log[16*1024];
- int err;
-
- opts.kernel_log_buf = log;
- opts.kernel_log_size = sizeof(log);
- if (env.verbosity >= VERBOSE_SUPER)
- opts.kernel_log_level = 1 | 2 | 4;
- main = changes_pkt_data__open_opts(&opts);
- if (!ASSERT_OK_PTR(main, "changes_pkt_data__open"))
- goto out;
- main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
- if (!ASSERT_OK_PTR(main_prog, "main_prog"))
- goto out;
- bpf_program__set_autoload(main_prog, true);
- err = changes_pkt_data__load(main);
- print_verifier_log(log);
- if (!ASSERT_OK(err, "changes_pkt_data__load"))
- goto out;
- freplace = changes_pkt_data_freplace__open_opts(&opts);
- if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open"))
- goto out;
- freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
- if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
- goto out;
- bpf_program__set_autoload(freplace_prog, true);
- bpf_program__set_autoattach(freplace_prog, true);
- bpf_program__set_attach_target(freplace_prog,
- bpf_program__fd(main_prog),
- to_be_replaced);
- err = changes_pkt_data_freplace__load(freplace);
- print_verifier_log(log);
- if (expect_load) {
- ASSERT_OK(err, "changes_pkt_data_freplace__load");
- } else {
- ASSERT_ERR(err, "changes_pkt_data_freplace__load");
- ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log");
- }
-
-out:
- changes_pkt_data_freplace__destroy(freplace);
- changes_pkt_data__destroy(main);
-}
-
-/* There are two global subprograms in both changes_pkt_data.skel.h:
- * - one changes packet data;
- * - another does not.
- * It is ok to freplace subprograms that change packet data with those
- * that either do or do not. It is only ok to freplace subprograms
- * that do not change packet data with those that do not as well.
- * The below tests check outcomes for each combination of such freplace.
- * Also test a case when main subprogram itself is replaced and is a single
- * subprogram in a program.
- */
-void test_changes_pkt_data_freplace(void)
-{
- struct {
- const char *main;
- const char *to_be_replaced;
- bool changes;
- } mains[] = {
- { "main_with_subprogs", "changes_pkt_data", true },
- { "main_with_subprogs", "does_not_change_pkt_data", false },
- { "main_changes", "main_changes", true },
- { "main_does_not_change", "main_does_not_change", false },
- };
- struct {
- const char *func;
- bool changes;
- } replacements[] = {
- { "changes_pkt_data", true },
- { "does_not_change_pkt_data", false }
- };
- char buf[64];
-
- for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
- for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
- snprintf(buf, sizeof(buf), "%s_with_%s",
- mains[i].to_be_replaced, replacements[j].func);
- if (!test__start_subtest(buf))
- continue;
- test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func,
- mains[i].changes || !replacements[j].changes);
- }
- }
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
new file mode 100644
index 000000000000..285f20241fe1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "compute_live_registers.skel.h"
+#include "test_progs.h"
+
+void test_compute_live_registers(void)
+{
+ RUN_TESTS(compute_live_registers);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index e10ea92c3fe2..08963c82f30b 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -85,11 +85,11 @@ static int duration = 0;
#define NESTING_ERR_CASE(name) { \
NESTING_CASE_COMMON(name), \
.fails = true, \
- .run_btfgen_fails = true, \
+ .run_btfgen_fails = true, \
}
#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
- .a = { [2] = 1 }, \
+ .a = { [2] = 1, [3] = 11 }, \
.b = { [1] = { [2] = { [3] = 2 } } }, \
.c = { [1] = { .c = 3 } }, \
.d = { [0] = { [0] = { .d = 4 } } }, \
@@ -108,6 +108,7 @@ static int duration = 0;
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \
.a2 = 1, \
+ .a3 = 12, \
.b123 = 2, \
.c1c = 3, \
.d00d = 4, \
@@ -602,6 +603,7 @@ static const struct core_reloc_test_case test_cases[] = {
ARRAYS_ERR_CASE(arrays___err_non_array),
ARRAYS_ERR_CASE(arrays___err_wrong_val_type),
ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
+ ARRAYS_ERR_CASE(arrays___err_bad_signed_arr_elem_sz),
/* enum/ptr/int handling scenarios */
PRIMITIVES_CASE(primitives),
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index e58a04654238..6c45330a5ca3 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -25,6 +25,10 @@ static const char * const cpumask_success_testcases[] = {
"test_global_mask_nested_deep_rcu",
"test_global_mask_nested_deep_array_rcu",
"test_cpumask_weight",
+ "test_refcount_null_tracking",
+ "test_populate_reject_small_mask",
+ "test_populate_reject_unaligned",
+ "test_populate",
};
static void verify_success(const char *prog_name)
@@ -78,6 +82,5 @@ void test_cpumask(void)
verify_success(cpumask_success_testcases[i]);
}
- RUN_TESTS(cpumask_success);
RUN_TESTS(cpumask_failure);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b614a5272dfd..e29cc16124c2 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -10,6 +10,7 @@ enum test_setup_type {
SETUP_SYSCALL_SLEEP,
SETUP_SKB_PROG,
SETUP_SKB_PROG_TP,
+ SETUP_XDP_PROG,
};
static struct {
@@ -18,6 +19,8 @@ static struct {
} success_tests[] = {
{"test_read_write", SETUP_SYSCALL_SLEEP},
{"test_dynptr_data", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_copy", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_copy_xdp", SETUP_XDP_PROG},
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
@@ -120,6 +123,24 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
break;
}
+ case SETUP_XDP_PROG:
+ {
+ char data[5000];
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .repeat = 1,
+ );
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
}
ASSERT_EQ(skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index a1d52e73fb16..9add890c2d37 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -83,8 +83,8 @@ static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *ma
int err;
memset(&info, 0, len);
- info.nr_map_ids = *nr_map_ids,
- info.map_ids = ptr_to_u64(map_ids),
+ info.nr_map_ids = *nr_map_ids;
+ info.map_ids = ptr_to_u64(map_ids);
err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c
new file mode 100644
index 000000000000..568d3aa48a78
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "fexit_noreturns.skel.h"
+
+void test_fexit_noreturns(void)
+{
+ RUN_TESTS(fexit_noreturns);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
index 5a0b51157451..43a26ec69a8e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -8,11 +8,12 @@
#include <unistd.h>
#include <test_progs.h>
#include "test_get_xattr.skel.h"
+#include "test_set_remove_xattr.skel.h"
#include "test_fsverity.skel.h"
static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
-static void test_xattr(void)
+static void test_get_xattr(const char *name, const char *value, bool allow_access)
{
struct test_get_xattr *skel = NULL;
int fd = -1, err;
@@ -25,7 +26,7 @@ static void test_xattr(void)
close(fd);
fd = -1;
- err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0);
+ err = setxattr(testfile, name, value, strlen(value) + 1, 0);
if (err && errno == EOPNOTSUPP) {
printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
"To run this test, make sure /tmp filesystem supports xattr.\n",
@@ -48,16 +49,23 @@ static void test_xattr(void)
goto out;
fd = open(testfile, O_RDONLY, 0644);
+
if (!ASSERT_GE(fd, 0, "open_file"))
goto out;
- ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
-
/* Trigger security_inode_getxattr */
- err = getxattr(testfile, "user.kfuncs", v, sizeof(v));
- ASSERT_EQ(err, -1, "getxattr_return");
- ASSERT_EQ(errno, EINVAL, "getxattr_errno");
- ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+ err = getxattr(testfile, name, v, sizeof(v));
+
+ if (allow_access) {
+ ASSERT_EQ(err, -1, "getxattr_return");
+ ASSERT_EQ(errno, EINVAL, "getxattr_errno");
+ ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
+ ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+ } else {
+ ASSERT_EQ(err, strlen(value) + 1, "getxattr_return");
+ ASSERT_EQ(skel->bss->found_xattr_from_file, 0, "found_xattr_from_file");
+ ASSERT_EQ(skel->bss->found_xattr_from_dentry, 0, "found_xattr_from_dentry");
+ }
out:
close(fd);
@@ -65,6 +73,127 @@ out:
remove(testfile);
}
+/* xattr value we will set to security.bpf.foo */
+static const char value_foo[] = "hello";
+
+static void read_and_validate_foo(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_foo, value_out, sizeof(value_out));
+ ASSERT_EQ(err, sizeof(value_foo), "getxattr size foo");
+ ASSERT_EQ(strncmp(value_out, value_foo, sizeof(value_foo)), 0, "strncmp value_foo");
+}
+
+static void set_foo(struct test_set_remove_xattr *skel)
+{
+ ASSERT_OK(setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0),
+ "setxattr foo");
+}
+
+static void validate_bar_match(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+ ASSERT_EQ(err, sizeof(skel->data->value_bar), "getxattr size bar");
+ ASSERT_EQ(strncmp(value_out, skel->data->value_bar, sizeof(skel->data->value_bar)), 0,
+ "strncmp value_bar");
+}
+
+static void validate_bar_removed(struct test_set_remove_xattr *skel)
+{
+ char value_out[32];
+ int err;
+
+ err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+ ASSERT_LT(err, 0, "getxattr size bar should fail");
+}
+
+static void test_set_remove_xattr(void)
+{
+ struct test_set_remove_xattr *skel = NULL;
+ int fd = -1, err;
+
+ fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ close(fd);
+ fd = -1;
+
+ skel = test_set_remove_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_set_remove_xattr__open_and_load"))
+ return;
+
+ /* Set security.bpf.foo to "hello" */
+ err = setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0);
+ if (err && errno == EOPNOTSUPP) {
+ printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+ "To run this test, make sure /tmp filesystem supports xattr.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ if (!ASSERT_OK(err, "setxattr"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_set_remove_xattr__attach(skel);
+ if (!ASSERT_OK(err, "test_set_remove_xattr__attach"))
+ goto out;
+
+ /* First, test not _locked version of the kfuncs with getxattr. */
+
+ /* Read security.bpf.foo and trigger test_inode_getxattr. This
+ * bpf program will set security.bpf.bar to "world".
+ */
+ read_and_validate_foo(skel);
+ validate_bar_match(skel);
+
+ /* Read security.bpf.foo and trigger test_inode_getxattr again.
+ * This will remove xattr security.bpf.bar.
+ */
+ read_and_validate_foo(skel);
+ validate_bar_removed(skel);
+
+ ASSERT_TRUE(skel->bss->set_security_bpf_bar_success, "set_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->remove_security_bpf_bar_success, "remove_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->set_security_selinux_fail, "set_security_selinux_fail");
+ ASSERT_TRUE(skel->bss->remove_security_selinux_fail, "remove_security_selinux_fail");
+
+ /* Second, test _locked version of the kfuncs, with setxattr */
+
+ /* Set security.bpf.foo and trigger test_inode_setxattr. This
+ * bpf program will set security.bpf.bar to "world".
+ */
+ set_foo(skel);
+ validate_bar_match(skel);
+
+ /* Set security.bpf.foo and trigger test_inode_setxattr again.
+ * This will remove xattr security.bpf.bar.
+ */
+ set_foo(skel);
+ validate_bar_removed(skel);
+
+ ASSERT_TRUE(skel->bss->locked_set_security_bpf_bar_success,
+ "locked_set_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->locked_remove_security_bpf_bar_success,
+ "locked_remove_security_bpf_bar_success");
+ ASSERT_TRUE(skel->bss->locked_set_security_selinux_fail,
+ "locked_set_security_selinux_fail");
+ ASSERT_TRUE(skel->bss->locked_remove_security_selinux_fail,
+ "locked_remove_security_selinux_fail");
+
+out:
+ close(fd);
+ test_set_remove_xattr__destroy(skel);
+ remove(testfile);
+}
+
#ifndef SHA256_DIGEST_SIZE
#define SHA256_DIGEST_SIZE 32
#endif
@@ -141,8 +270,21 @@ out:
void test_fs_kfuncs(void)
{
- if (test__start_subtest("xattr"))
- test_xattr();
+ /* Matches xattr_names in progs/test_get_xattr.c */
+ if (test__start_subtest("user_xattr"))
+ test_get_xattr("user.kfuncs", "hello", true);
+
+ if (test__start_subtest("security_bpf_xattr"))
+ test_get_xattr("security.bpf.xxx", "hello", true);
+
+ if (test__start_subtest("security_bpf_xattr_error"))
+ test_get_xattr("security.bpf", "hello", false);
+
+ if (test__start_subtest("security_selinux_xattr_error"))
+ test_get_xattr("security.selinux", "hello", false);
+
+ if (test__start_subtest("set_remove_xattr"))
+ test_set_remove_xattr();
if (test__start_subtest("fsverity"))
test_fsverity();
diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
new file mode 100644
index 000000000000..a133354ac9bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Microsoft */
+#include <test_progs.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
+#include "test_kernel_flag.skel.h"
+
+void test_kernel_flag(void)
+{
+ struct test_kernel_flag *lsm_skel;
+ struct kfunc_call_test *skel = NULL;
+ struct kfunc_call_test_lskel *lskel = NULL;
+ int ret;
+
+ lsm_skel = test_kernel_flag__open_and_load();
+ if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel"))
+ return;
+
+ lsm_skel->bss->monitored_tid = gettid();
+
+ ret = test_kernel_flag__attach(lsm_skel);
+ if (!ASSERT_OK(ret, "test_kernel_flag__attach"))
+ goto close_prog;
+
+ /* Test with skel. This should pass the gatekeeper */
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto close_prog;
+
+ /* Test with lskel. This should fail due to blocking kernel-based bpf() invocations */
+ lskel = kfunc_call_test_lskel__open_and_load();
+ if (!ASSERT_ERR_PTR(lskel, "lskel"))
+ goto close_prog;
+
+close_prog:
+ if (skel)
+ kfunc_call_test__destroy(skel);
+ if (lskel)
+ kfunc_call_test_lskel__destroy(lskel);
+
+ lsm_skel->bss->monitored_tid = 0;
+ test_kernel_flag__destroy(lsm_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
new file mode 100644
index 000000000000..b6391af5f6f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <netinet/in.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define BPF_FILE "test_lwt_ip_encap.bpf.o"
+
+#define NETNS_NAME_SIZE 32
+#define NETNS_BASE "ns-lwt-ip-encap"
+
+#define IP4_ADDR_1 "172.16.1.100"
+#define IP4_ADDR_2 "172.16.2.100"
+#define IP4_ADDR_3 "172.16.3.100"
+#define IP4_ADDR_4 "172.16.4.100"
+#define IP4_ADDR_5 "172.16.5.100"
+#define IP4_ADDR_6 "172.16.6.100"
+#define IP4_ADDR_7 "172.16.7.100"
+#define IP4_ADDR_8 "172.16.8.100"
+#define IP4_ADDR_GRE "172.16.16.100"
+
+#define IP4_ADDR_SRC IP4_ADDR_1
+#define IP4_ADDR_DST IP4_ADDR_4
+
+#define IP6_ADDR_1 "fb01::1"
+#define IP6_ADDR_2 "fb02::1"
+#define IP6_ADDR_3 "fb03::1"
+#define IP6_ADDR_4 "fb04::1"
+#define IP6_ADDR_5 "fb05::1"
+#define IP6_ADDR_6 "fb06::1"
+#define IP6_ADDR_7 "fb07::1"
+#define IP6_ADDR_8 "fb08::1"
+#define IP6_ADDR_GRE "fb10::1"
+
+#define IP6_ADDR_SRC IP6_ADDR_1
+#define IP6_ADDR_DST IP6_ADDR_4
+
+/* Setup/topology:
+ *
+ * NS1 NS2 NS3
+ * veth1 <---> veth2 veth3 <---> veth4 (the top route)
+ * veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
+ *
+ * Each vethN gets IP[4|6]_ADDR_N address.
+ *
+ * IP*_ADDR_SRC = IP*_ADDR_1
+ * IP*_ADDR_DST = IP*_ADDR_4
+ *
+ * All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST.
+ *
+ * By default, routes are configured to allow packets to go
+ * IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route).
+ *
+ * A GRE device is installed in NS3 with IP*_ADDR_GRE, and
+ * NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8
+ * (the bottom route).
+ *
+ * Tests:
+ *
+ * 1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping
+ * from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE.
+ *
+ * 2a. In an egress test, a bpf LWT_XMIT program is installed on veth1
+ * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ * ping: SRC->[encap at veth1:egress]->GRE:decap->DST
+ * ping replies go DST->SRC directly
+ *
+ * 2b. In an ingress test, a bpf LWT_IN program is installed on veth2
+ * that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ * ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
+ * ping replies go DST->SRC directly
+ */
+
+static int create_ns(char *name, size_t name_sz)
+{
+ if (!name)
+ goto fail;
+
+ if (!ASSERT_OK(append_tid(name, name_sz), "append TID"))
+ goto fail;
+
+ SYS(fail, "ip netns add %s", name);
+
+ /* rp_filter gets confused by what these tests are doing, so disable it */
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name);
+ /* Disable IPv6 DAD because it sometimes takes too long and fails tests */
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int set_top_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+ SYS(fail, "ip -n %s a add %s/24 dev veth1", ns1, IP4_ADDR_1);
+ SYS(fail, "ip -n %s a add %s/24 dev veth2", ns2, IP4_ADDR_2);
+ SYS(fail, "ip -n %s a add %s/24 dev veth3", ns2, IP4_ADDR_3);
+ SYS(fail, "ip -n %s a add %s/24 dev veth4", ns3, IP4_ADDR_4);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4);
+
+ SYS(fail, "ip -n %s link set dev veth1 up", ns1);
+ SYS(fail, "ip -n %s link set dev veth2 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth3 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth4 up", ns3);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+ SYS(fail, "ip -n %s a add %s/24 dev veth5", ns1, IP4_ADDR_5);
+ SYS(fail, "ip -n %s a add %s/24 dev veth6", ns2, IP4_ADDR_6);
+ SYS(fail, "ip -n %s a add %s/24 dev veth7", ns2, IP4_ADDR_7);
+ SYS(fail, "ip -n %s a add %s/24 dev veth8", ns3, IP4_ADDR_8);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7);
+ SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8);
+
+ SYS(fail, "ip -n %s link set dev veth5 up", ns1);
+ SYS(fail, "ip -n %s link set dev veth6 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth7 up", ns2);
+ SYS(fail, "ip -n %s link set dev veth8 up", ns3);
+
+ return 0;
+fail:
+ return 1;
+}
+
+static int configure_vrf(const char *ns1, const char *ns2)
+{
+ if (!ns1 || !ns2)
+ goto fail;
+
+ SYS(fail, "ip -n %s link add red type vrf table 1001", ns1);
+ SYS(fail, "ip -n %s link set red up", ns1);
+ SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1);
+ SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1);
+ SYS(fail, "ip -n %s link set veth1 vrf red", ns1);
+ SYS(fail, "ip -n %s link set veth5 vrf red", ns1);
+
+ SYS(fail, "ip -n %s link add red type vrf table 1001", ns2);
+ SYS(fail, "ip -n %s link set red up", ns2);
+ SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2);
+ SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2);
+ SYS(fail, "ip -n %s link set veth2 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth3 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth6 vrf red", ns2);
+ SYS(fail, "ip -n %s link set veth7 vrf red", ns2);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int configure_ns1(const char *ns1, const char *vrf)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns1 || !vrf)
+ goto fail;
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto fail;
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth1 %s", IP4_ADDR_2, vrf);
+ SYS(fail, "ip route add default dev veth1 via %s %s", IP4_ADDR_2, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf);
+ SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth5 %s", IP4_ADDR_6, vrf);
+ SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf);
+ SYS(fail, "ip route add %s/32 dev veth5 via %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth5 via %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int configure_ns2(const char *ns2, const char *vrf)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns2 || !vrf)
+ goto fail;
+
+ nstoken = open_netns(ns2);
+ if (!ASSERT_OK_PTR(nstoken, "open ns2"))
+ goto fail;
+
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2);
+ SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2);
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth2 %s", IP4_ADDR_1, vrf);
+ SYS(fail, "ip route add %s/32 dev veth3 %s", IP4_ADDR_4, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth6 %s", IP4_ADDR_5, vrf);
+ SYS(fail, "ip route add %s/32 dev veth7 %s", IP4_ADDR_8, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf);
+ SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int configure_ns3(const char *ns3)
+{
+ struct nstoken *nstoken = NULL;
+
+ if (!ns3)
+ goto fail;
+
+ nstoken = open_netns(ns3);
+ if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+ goto fail;
+
+ /* Top route */
+ SYS(fail, "ip route add %s/32 dev veth4", IP4_ADDR_3);
+ SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_1, IP4_ADDR_3);
+ SYS(fail, "ip route add %s/32 dev veth4 via %s", IP4_ADDR_2, IP4_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_1, IP6_ADDR_3);
+ SYS(fail, "ip -6 route add %s/128 dev veth4 via %s", IP6_ADDR_2, IP6_ADDR_3);
+ /* Bottom route */
+ SYS(fail, "ip route add %s/32 dev veth8", IP4_ADDR_7);
+ SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_5, IP4_ADDR_7);
+ SYS(fail, "ip route add %s/32 dev veth8 via %s", IP4_ADDR_6, IP4_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_5, IP6_ADDR_7);
+ SYS(fail, "ip -6 route add %s/128 dev veth8 via %s", IP6_ADDR_6, IP6_ADDR_7);
+
+ /* Configure IPv4 GRE device */
+ SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255",
+ IP4_ADDR_1, IP4_ADDR_GRE);
+ SYS(fail, "ip link set gre_dev up");
+ SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE);
+
+ /* Configure IPv6 GRE device */
+ SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255",
+ IP6_ADDR_1, IP6_ADDR_GRE);
+ SYS(fail, "ip link set gre6_dev up");
+ SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE);
+
+ close_netns(nstoken);
+ return 0;
+fail:
+ close_netns(nstoken);
+ return -1;
+}
+
+static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf)
+{
+ if (!ns1 || !ns2 || !ns3 || !vrf)
+ goto fail;
+
+ SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2);
+ SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3);
+ SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2);
+ SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3);
+
+ if (vrf[0]) {
+ if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf"))
+ goto fail;
+ }
+ if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses"))
+ goto fail;
+
+ if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes"))
+ goto fail;
+
+ if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes"))
+ goto fail;
+
+ /* Link bottom route to the GRE tunnels */
+ SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s",
+ ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf);
+ SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s",
+ ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf);
+ SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s",
+ ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf);
+ SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s",
+ ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+ SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+ SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf);
+ SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf);
+
+ return 0;
+fail:
+ return -1;
+}
+
+#define GSO_SIZE 5000
+#define GSO_TCP_PORT 9000
+/* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */
+static int test_gso_fix(const char *ns1, const char *ns3, int family)
+{
+ const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST;
+ char gso_packet[GSO_SIZE] = {};
+ struct nstoken *nstoken = NULL;
+ int sfd, cfd, afd;
+ ssize_t bytes;
+ int ret = -1;
+
+ if (!ns1 || !ns3)
+ return ret;
+
+ nstoken = open_netns(ns3);
+ if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+ return ret;
+
+ sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+ if (!ASSERT_OK_FD(sfd, "start server"))
+ goto close_netns;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto close_server;
+
+ cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+ if (!ASSERT_OK_FD(cfd, "connect to server"))
+ goto close_server;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ afd = accept(sfd, NULL, NULL);
+ if (!ASSERT_OK_FD(afd, "accept"))
+ goto close_client;
+
+ /* Send a packet larger than MTU */
+ bytes = send(cfd, gso_packet, GSO_SIZE, 0);
+ if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet"))
+ goto close_accept;
+
+ /* Verify we received all expected bytes */
+ bytes = read(afd, gso_packet, GSO_SIZE);
+ if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet"))
+ goto close_accept;
+
+ ret = 0;
+
+close_accept:
+ close(afd);
+close_client:
+ close(cfd);
+close_server:
+ close(sfd);
+close_netns:
+ close_netns(nstoken);
+
+ return ret;
+}
+
+static int check_ping_ok(const char *ns1)
+{
+ SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST);
+ SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST);
+ return 0;
+fail:
+ return -1;
+}
+
+static int check_ping_fails(const char *ns1)
+{
+ int ret;
+
+ ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST);
+ if (!ret)
+ return -1;
+
+ ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST);
+ if (!ret)
+ return -1;
+
+ return 0;
+}
+
+#define EGRESS true
+#define INGRESS false
+#define IPV4_ENCAP true
+#define IPV6_ENCAP false
+static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf)
+{
+ char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-";
+ char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-";
+ char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-";
+ char *sec = ipv4_encap ? "encap_gre" : "encap_gre6";
+
+ if (!vrf)
+ return;
+
+ if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1"))
+ goto out;
+ if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2"))
+ goto out;
+ if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3"))
+ goto out;
+
+ if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network"))
+ goto out;
+
+ /* By default, pings work */
+ if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+ goto out;
+
+ /* Remove NS2->DST routes, ping fails */
+ SYS(out, "ip -n %s route del %s/32 dev veth3 %s", ns2, IP4_ADDR_DST, vrf);
+ SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf);
+ if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+ goto out;
+
+ /* Install replacement routes (LWT/eBPF), pings succeed */
+ if (egress) {
+ SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+ ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+ SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+ ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+ } else {
+ SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s",
+ ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+ SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s",
+ ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+ }
+
+ if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+ goto out;
+
+ /* Skip GSO tests with VRF: VRF routing needs properly assigned
+ * source IP/device, which is easy to do with ping but hard with TCP.
+ */
+ if (egress && !vrf[0]) {
+ if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO"))
+ goto out;
+ }
+
+ /* Negative test: remove routes to GRE devices: ping fails */
+ if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev"))
+ goto out;
+ if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+ goto out;
+
+ /* Another negative test */
+ if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf),
+ "add unreachable routes"))
+ goto out;
+ ASSERT_OK(check_ping_fails(ns1), "ping expected fail");
+
+out:
+ SYS_NOFAIL("ip netns del %s", ns1);
+ SYS_NOFAIL("ip netns del %s", ns2);
+ SYS_NOFAIL("ip netns del %s", ns3);
+}
+
+void test_lwt_ip_encap_vrf_ipv6(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_vrf_ipv4(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_ipv6(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV6_ENCAP, EGRESS, "");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV6_ENCAP, INGRESS, "");
+}
+
+void test_lwt_ip_encap_ipv4(void)
+{
+ if (test__start_subtest("egress"))
+ lwt_ip_encap(IPV4_ENCAP, EGRESS, "");
+
+ if (test__start_subtest("ingress"))
+ lwt_ip_encap(IPV4_ENCAP, INGRESS, "");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
new file mode 100644
index 000000000000..3bc730b7c7fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Connects 6 network namespaces through veths.
+ * Each NS may have different IPv6 global scope addresses :
+ *
+ * NS1 NS2 NS3 NS4 NS5 NS6
+ * lo veth1 <-> veth2 veth3 <-> veth4 veth5 <-> veth6 lo veth7 <-> veth8 veth9 <-> veth10 lo
+ * fb00 ::1 ::12 ::21 ::34 ::43 ::56 ::65 ::78 ::87 ::910 ::109 ::6
+ * fd00 ::4
+ * fc42 ::1
+ *
+ * All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+ * IPv6 header with a Segment Routing Header, with segments :
+ * fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+ *
+ * 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+ * - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+ * - fd00::2 : remove the TLV, change the flags, add a tag
+ * - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+ *
+ * fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+ * Each End.BPF action will validate the operations applied on the SRH by the
+ * previous BPF program in the chain, otherwise the packet is dropped.
+ *
+ * An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+ * datagram can be read on NS6 when binding to fb00::6.
+ */
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define NETNS_BASE "lwt-seg6local-"
+#define BPF_FILE "test_lwt_seg6local.bpf.o"
+
+static void cleanup(void)
+{
+ int ns;
+
+ for (ns = 1; ns < 7; ns++)
+ SYS_NOFAIL("ip netns del %s%d", NETNS_BASE, ns);
+}
+
+static int setup(void)
+{
+ int ns;
+
+ for (ns = 1; ns < 7; ns++)
+ SYS(fail, "ip netns add %s%d", NETNS_BASE, ns);
+
+ SYS(fail, "ip -n %s6 link set dev lo up", NETNS_BASE);
+
+ for (ns = 1; ns < 6; ns++) {
+ int local_id = ns * 2 - 1;
+ int peer_id = ns * 2;
+ int next_ns = ns + 1;
+
+ SYS(fail, "ip -n %s%d link add veth%d type veth peer name veth%d netns %s%d",
+ NETNS_BASE, ns, local_id, peer_id, NETNS_BASE, next_ns);
+
+ SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, ns, local_id);
+ SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, next_ns, peer_id);
+
+ /* All link scope addresses to veths */
+ SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+ NETNS_BASE, ns, local_id, peer_id, local_id);
+ SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+ NETNS_BASE, next_ns, peer_id, local_id, peer_id);
+ }
+
+
+ SYS(fail, "ip -n %s5 -6 route add fb00::109 table 117 dev veth9 scope link", NETNS_BASE);
+
+ SYS(fail, "ip -n %s1 -6 addr add fb00::1/16 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s1 -6 route add fb00::6 dev veth1 via fb00::21", NETNS_BASE);
+
+ SYS(fail, "ip -n %s2 -6 route add fb00::6 encap bpf in obj %s sec encap_srh dev veth2",
+ NETNS_BASE, BPF_FILE);
+ SYS(fail, "ip -n %s2 -6 route add fd00::1 dev veth3 via fb00::43 scope link", NETNS_BASE);
+
+ SYS(fail, "ip -n %s3 -6 route add fc42::1 dev veth5 via fb00::65", NETNS_BASE);
+ SYS(fail,
+ "ip -n %s3 -6 route add fd00::1 encap seg6local action End.BPF endpoint obj %s sec add_egr_x dev veth4",
+ NETNS_BASE, BPF_FILE);
+
+ SYS(fail,
+ "ip -n %s4 -6 route add fd00::2 encap seg6local action End.BPF endpoint obj %s sec pop_egr dev veth6",
+ NETNS_BASE, BPF_FILE);
+ SYS(fail, "ip -n %s4 -6 addr add fc42::1 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s4 -6 route add fd00::3 dev veth7 via fb00::87", NETNS_BASE);
+
+ SYS(fail, "ip -n %s5 -6 route add fd00::4 table 117 dev veth9 via fb00::109", NETNS_BASE);
+ SYS(fail,
+ "ip -n %s5 -6 route add fd00::3 encap seg6local action End.BPF endpoint obj %s sec inspect_t dev veth8",
+ NETNS_BASE, BPF_FILE);
+
+ SYS(fail, "ip -n %s6 -6 addr add fb00::6/16 dev lo", NETNS_BASE);
+ SYS(fail, "ip -n %s6 -6 addr add fd00::4/16 dev lo", NETNS_BASE);
+
+ for (ns = 1; ns < 6; ns++)
+ SYS(fail, "ip netns exec %s%d sysctl -wq net.ipv6.conf.all.forwarding=1",
+ NETNS_BASE, ns);
+
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.all.seg6_enabled=1", NETNS_BASE);
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.lo.seg6_enabled=1", NETNS_BASE);
+ SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.veth10.seg6_enabled=1", NETNS_BASE);
+
+ return 0;
+fail:
+ return -1;
+}
+
+#define SERVER_PORT 7330
+#define CLIENT_PORT 2121
+void test_lwt_seg6local(void)
+{
+ struct sockaddr_in6 server_addr = {};
+ const char *ns1 = NETNS_BASE "1";
+ const char *ns6 = NETNS_BASE "6";
+ struct nstoken *nstoken = NULL;
+ const char *foobar = "foobar";
+ ssize_t bytes;
+ int sfd, cfd;
+ char buf[7];
+
+ if (!ASSERT_OK(setup(), "setup"))
+ goto out;
+
+ nstoken = open_netns(ns6);
+ if (!ASSERT_OK_PTR(nstoken, "open ns6"))
+ goto out;
+
+ sfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::6", SERVER_PORT, NULL);
+ if (!ASSERT_OK_FD(sfd, "start server"))
+ goto close_netns;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+ goto close_server;
+
+ cfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::1", CLIENT_PORT, NULL);
+ if (!ASSERT_OK_FD(cfd, "start client"))
+ goto close_server;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ /* Send a packet larger than MTU */
+ server_addr.sin6_family = AF_INET6;
+ server_addr.sin6_port = htons(SERVER_PORT);
+ if (!ASSERT_EQ(inet_pton(AF_INET6, "fb00::6", &server_addr.sin6_addr), 1,
+ "build target addr"))
+ goto close_client;
+
+ bytes = sendto(cfd, foobar, sizeof(foobar), 0,
+ (struct sockaddr *)&server_addr, sizeof(server_addr));
+ if (!ASSERT_EQ(bytes, sizeof(foobar), "send packet"))
+ goto close_client;
+
+ /* Verify we received all expected bytes */
+ bytes = read(sfd, buf, sizeof(buf));
+ if (!ASSERT_EQ(bytes, sizeof(buf), "receive packet"))
+ goto close_client;
+ ASSERT_STREQ(buf, foobar, "check udp packet");
+
+close_client:
+ close(cfd);
+close_server:
+ close(sfd);
+close_netns:
+ close_netns(nstoken);
+
+out:
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
index ac3c3c097c0e..e00cd34586dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -33,20 +33,25 @@ void test_netns_cookie(void)
skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
skel->progs.get_netns_cookie_sockops, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach_sockops"))
goto done;
verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
map = bpf_map__fd(skel->maps.sock_map);
err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
- if (!ASSERT_OK(err, "prog_attach"))
+ if (!ASSERT_OK(err, "prog_attach_sk_msg"))
goto done;
tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx);
err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta);
- if (!ASSERT_OK(err, "prog_attach"))
+ if (!ASSERT_OK(err, "prog_attach_tcx"))
goto done;
+ skel->links.get_netns_cookie_cgroup_skb = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_cgroup_skb, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_cgroup_skb, "prog_attach_cgroup_skb"))
+ goto cleanup_tc;
+
server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
goto cleanup_tc;
@@ -69,16 +74,18 @@ void test_netns_cookie(void)
if (!ASSERT_OK(err, "getsockopt"))
goto cleanup_tc;
- ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value_sockops");
err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
&client_fd, &val);
if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
goto cleanup_tc;
- ASSERT_EQ(val, cookie_expected_value, "cookie_value");
- ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value");
- ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value");
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value_sk_msg");
+ ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value_init_tcx");
+ ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value_tcx");
+ ASSERT_EQ(skel->bss->cgroup_skb_init_netns_cookie, cookie_expected_value, "cookie_value_init_cgroup_skb");
+ ASSERT_EQ(skel->bss->cgroup_skb_netns_cookie, cookie_expected_value, "cookie_value_cgroup_skb");
cleanup_tc:
err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd);
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 761ce24bce38..99c953f2be21 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -200,41 +200,28 @@ static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg)
return;
}
-static void test_in_netns(int (*fn)(void *), void *arg)
-{
- struct nstoken *nstoken = NULL;
-
- SYS(cleanup, "ip netns add ns_current_pid_tgid");
- SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up");
-
- nstoken = open_netns("ns_current_pid_tgid");
- if (!ASSERT_OK_PTR(nstoken, "open_netns"))
- goto cleanup;
-
- test_ns_current_pid_tgid_new_ns(fn, arg);
-
-cleanup:
- if (nstoken)
- close_netns(nstoken);
- SYS_NOFAIL("ip netns del ns_current_pid_tgid");
-}
-
/* TODO: use a different tracepoint */
-void serial_test_ns_current_pid_tgid(void)
+void serial_test_current_pid_tgid(void)
{
if (test__start_subtest("root_ns_tp"))
test_current_pid_tgid_tp(NULL);
if (test__start_subtest("new_ns_tp"))
test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL);
- if (test__start_subtest("new_ns_cgrp")) {
- int cgroup_fd = -1;
-
- cgroup_fd = test__join_cgroup("/sock_addr");
- if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) {
- test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd);
- close(cgroup_fd);
- }
+}
+
+void test_ns_current_pid_tgid_cgrp(void)
+{
+ int cgroup_fd = test__join_cgroup("/sock_addr");
+
+ if (ASSERT_OK_FD(cgroup_fd, "join_cgroup")) {
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_cgrp, &cgroup_fd);
+ close(cgroup_fd);
}
- if (test__start_subtest("new_ns_sk_msg"))
- test_in_netns(test_current_pid_tgid_sk_msg, NULL);
}
+
+void test_ns_current_pid_tgid_sk_msg(void)
+{
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_sk_msg, NULL);
+}
+
+
diff --git a/tools/testing/selftests/bpf/prog_tests/prepare.c b/tools/testing/selftests/bpf/prog_tests/prepare.c
new file mode 100644
index 000000000000..fb5cdad97116
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prepare.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "prepare.skel.h"
+
+static bool check_prepared(struct bpf_object *obj)
+{
+ bool is_prepared = true;
+ const struct bpf_map *map;
+
+ bpf_object__for_each_map(map, obj) {
+ if (bpf_map__fd(map) < 0)
+ is_prepared = false;
+ }
+
+ return is_prepared;
+}
+
+static void test_prepare_no_load(void)
+{
+ struct prepare *skel;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ );
+
+ skel = prepare__open();
+ if (!ASSERT_OK_PTR(skel, "prepare__open"))
+ return;
+
+ if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+ goto cleanup;
+
+ err = bpf_object__prepare(skel->obj);
+
+ if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+ goto cleanup;
+
+ if (!ASSERT_OK(err, "bpf_object__prepare"))
+ goto cleanup;
+
+cleanup:
+ prepare__destroy(skel);
+}
+
+static void test_prepare_load(void)
+{
+ struct prepare *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ );
+
+ skel = prepare__open();
+ if (!ASSERT_OK_PTR(skel, "prepare__open"))
+ return;
+
+ if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+ goto cleanup;
+
+ err = bpf_object__prepare(skel->obj);
+ if (!ASSERT_OK(err, "bpf_object__prepare"))
+ goto cleanup;
+
+ err = prepare__load(skel);
+ if (!ASSERT_OK(err, "prepare__load"))
+ goto cleanup;
+
+ if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.program);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ goto cleanup;
+
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+
+cleanup:
+ prepare__destroy(skel);
+}
+
+void test_prepare(void)
+{
+ if (test__start_subtest("prepare_load"))
+ test_prepare_load();
+ if (test__start_subtest("prepare_no_load"))
+ test_prepare_no_load();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
index 509883e6823a..5d3c00a08a88 100644
--- a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
+++ b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
@@ -6,6 +6,7 @@
#include "epilogue_tailcall.skel.h"
#include "pro_epilogue_goto_start.skel.h"
#include "epilogue_exit.skel.h"
+#include "pro_epilogue_with_kfunc.skel.h"
struct st_ops_args {
__u64 a;
@@ -55,6 +56,7 @@ void test_pro_epilogue(void)
RUN_TESTS(pro_epilogue);
RUN_TESTS(pro_epilogue_goto_start);
RUN_TESTS(epilogue_exit);
+ RUN_TESTS(pro_epilogue_with_kfunc);
if (test__start_subtest("tailcall"))
test_tailcall();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index ebe0c12b5536..c9f855e5da24 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -81,6 +81,9 @@ static const char * const inproper_region_tests[] = {
"nested_rcu_region",
"rcu_read_lock_global_subprog_lock",
"rcu_read_lock_global_subprog_unlock",
+ "rcu_read_lock_sleepable_helper_global_subprog",
+ "rcu_read_lock_sleepable_kfunc_global_subprog",
+ "rcu_read_lock_sleepable_global_subprog_indirect",
};
static void test_inproper_region(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
index c7b9ba8b1d06..a8d1eaa67020 100644
--- a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -24,6 +24,7 @@ struct read_ret_desc {
{ .name = "copy_from_user", .ret = -EFAULT },
{ .name = "copy_from_user_task", .ret = -EFAULT },
{ .name = "copy_from_user_str", .ret = -EFAULT },
+ { .name = "copy_from_user_task_str", .ret = -EFAULT },
};
void test_read_vsyscall(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index e12255121c15..e4dac529d424 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -202,7 +202,7 @@ err_out:
void test_setget_sockopt(void)
{
cg_fd = test__join_cgroup(CG_NAME);
- if (cg_fd < 0)
+ if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
return;
if (create_netns())
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index 2b0068742ef9..e3ea5dc2f697 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -50,6 +50,9 @@ static struct {
{ "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
{ "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" },
{ "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" },
+ { "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" },
};
static int match_regex(const char *pattern, const char *string)
diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c
new file mode 100644
index 000000000000..5dd6c120a838
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/summarization.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include "summarization_freplace.skel.h"
+#include "summarization.skel.h"
+#include <test_progs.h>
+
+static void print_verifier_log(const char *log)
+{
+ if (env.verbosity >= VERBOSE_VERY)
+ fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
+}
+
+static void test_aux(const char *main_prog_name,
+ const char *to_be_replaced,
+ const char *replacement,
+ bool expect_load,
+ const char *err_msg)
+{
+ struct summarization_freplace *freplace = NULL;
+ struct bpf_program *freplace_prog = NULL;
+ struct bpf_program *main_prog = NULL;
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct summarization *main = NULL;
+ char log[16*1024];
+ int err;
+
+ opts.kernel_log_buf = log;
+ opts.kernel_log_size = sizeof(log);
+ if (env.verbosity >= VERBOSE_SUPER)
+ opts.kernel_log_level = 1 | 2 | 4;
+ main = summarization__open_opts(&opts);
+ if (!ASSERT_OK_PTR(main, "summarization__open"))
+ goto out;
+ main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
+ if (!ASSERT_OK_PTR(main_prog, "main_prog"))
+ goto out;
+ bpf_program__set_autoload(main_prog, true);
+ err = summarization__load(main);
+ print_verifier_log(log);
+ if (!ASSERT_OK(err, "summarization__load"))
+ goto out;
+ freplace = summarization_freplace__open_opts(&opts);
+ if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open"))
+ goto out;
+ freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
+ if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
+ goto out;
+ bpf_program__set_autoload(freplace_prog, true);
+ bpf_program__set_autoattach(freplace_prog, true);
+ bpf_program__set_attach_target(freplace_prog,
+ bpf_program__fd(main_prog),
+ to_be_replaced);
+ err = summarization_freplace__load(freplace);
+ print_verifier_log(log);
+
+ /* The might_sleep extension doesn't work yet as sleepable calls are not
+ * allowed, but preserve the check in case it's supported later and then
+ * this particular combination can be enabled.
+ */
+ if (!strcmp("might_sleep", replacement) && err) {
+ ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log");
+ ASSERT_EQ(err, -EINVAL, "err");
+ test__skip();
+ goto out;
+ }
+
+ if (expect_load) {
+ ASSERT_OK(err, "summarization_freplace__load");
+ } else {
+ ASSERT_ERR(err, "summarization_freplace__load");
+ ASSERT_HAS_SUBSTR(log, err_msg, "error log");
+ }
+
+out:
+ summarization_freplace__destroy(freplace);
+ summarization__destroy(main);
+}
+
+/* There are two global subprograms in both summarization.skel.h:
+ * - one changes packet data;
+ * - another does not.
+ * It is ok to freplace subprograms that change packet data with those
+ * that either do or do not. It is only ok to freplace subprograms
+ * that do not change packet data with those that do not as well.
+ * The below tests check outcomes for each combination of such freplace.
+ * Also test a case when main subprogram itself is replaced and is a single
+ * subprogram in a program.
+ *
+ * This holds for might_sleep programs. It is ok to replace might_sleep with
+ * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced
+ * with might_sleep.
+ */
+void test_summarization_freplace(void)
+{
+ struct {
+ const char *main;
+ const char *to_be_replaced;
+ bool has_side_effect;
+ } mains[2][4] = {
+ {
+ { "main_changes_with_subprogs", "changes_pkt_data", true },
+ { "main_changes_with_subprogs", "does_not_change_pkt_data", false },
+ { "main_changes", "main_changes", true },
+ { "main_does_not_change", "main_does_not_change", false },
+ },
+ {
+ { "main_might_sleep_with_subprogs", "might_sleep", true },
+ { "main_might_sleep_with_subprogs", "does_not_sleep", false },
+ { "main_might_sleep", "main_might_sleep", true },
+ { "main_does_not_sleep", "main_does_not_sleep", false },
+ },
+ };
+ const char *pkt_err = "Extension program changes packet data";
+ const char *slp_err = "Extension program may sleep";
+ struct {
+ const char *func;
+ bool has_side_effect;
+ const char *err_msg;
+ } replacements[2][2] = {
+ {
+ { "changes_pkt_data", true, pkt_err },
+ { "does_not_change_pkt_data", false, pkt_err },
+ },
+ {
+ { "might_sleep", true, slp_err },
+ { "does_not_sleep", false, slp_err },
+ },
+ };
+ char buf[64];
+
+ for (int t = 0; t < 2; t++) {
+ for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
+ for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
+ snprintf(buf, sizeof(buf), "%s_with_%s",
+ mains[t][i].to_be_replaced, replacements[t][j].func);
+ if (!test__start_subtest(buf))
+ continue;
+ test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func,
+ mains[t][i].has_side_effect || !replacements[t][j].has_side_effect,
+ replacements[t][j].err_msg);
+ }
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 544144620ca6..66a900327f91 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1600,6 +1600,7 @@ static void test_tailcall_bpf2bpf_freplace(void)
goto out;
err = bpf_link__destroy(freplace_link);
+ freplace_link = NULL;
if (!ASSERT_OK(err, "destroy link"))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
index 1af9ec1149aa..2186a24e7d8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_links.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -13,7 +13,7 @@
#include "netlink_helpers.h"
#include "tc_helpers.h"
-void serial_test_tc_links_basic(void)
+void test_ns_tc_links_basic(void)
{
LIBBPF_OPTS(bpf_prog_query_opts, optq);
LIBBPF_OPTS(bpf_tcx_opts, optl);
@@ -260,7 +260,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_before(void)
+void test_ns_tc_links_before(void)
{
test_tc_links_before_target(BPF_TCX_INGRESS);
test_tc_links_before_target(BPF_TCX_EGRESS);
@@ -414,7 +414,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_after(void)
+void test_ns_tc_links_after(void)
{
test_tc_links_after_target(BPF_TCX_INGRESS);
test_tc_links_after_target(BPF_TCX_EGRESS);
@@ -514,7 +514,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_revision(void)
+void test_ns_tc_links_revision(void)
{
test_tc_links_revision_target(BPF_TCX_INGRESS);
test_tc_links_revision_target(BPF_TCX_EGRESS);
@@ -618,7 +618,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_chain_classic(void)
+void test_ns_tc_links_chain_classic(void)
{
test_tc_chain_classic(BPF_TCX_INGRESS, false);
test_tc_chain_classic(BPF_TCX_EGRESS, false);
@@ -846,7 +846,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_replace(void)
+void test_ns_tc_links_replace(void)
{
test_tc_links_replace_target(BPF_TCX_INGRESS);
test_tc_links_replace_target(BPF_TCX_EGRESS);
@@ -1158,7 +1158,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_invalid(void)
+void test_ns_tc_links_invalid(void)
{
test_tc_links_invalid_target(BPF_TCX_INGRESS);
test_tc_links_invalid_target(BPF_TCX_EGRESS);
@@ -1314,7 +1314,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_prepend(void)
+void test_ns_tc_links_prepend(void)
{
test_tc_links_prepend_target(BPF_TCX_INGRESS);
test_tc_links_prepend_target(BPF_TCX_EGRESS);
@@ -1470,7 +1470,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_append(void)
+void test_ns_tc_links_append(void)
{
test_tc_links_append_target(BPF_TCX_INGRESS);
test_tc_links_append_target(BPF_TCX_EGRESS);
@@ -1568,7 +1568,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_links_dev_cleanup(void)
+void test_ns_tc_links_dev_cleanup(void)
{
test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS);
test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS);
@@ -1672,7 +1672,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_links_chain_mixed(void)
+void test_ns_tc_links_chain_mixed(void)
{
test_tc_chain_mixed(BPF_TCX_INGRESS);
test_tc_chain_mixed(BPF_TCX_EGRESS);
@@ -1782,7 +1782,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_links_ingress(void)
+void test_ns_tc_links_ingress(void)
{
test_tc_links_ingress(BPF_TCX_INGRESS, true, true);
test_tc_links_ingress(BPF_TCX_INGRESS, true, false);
@@ -1823,7 +1823,7 @@ static int qdisc_replace(int ifindex, const char *kind, bool block)
return err;
}
-void serial_test_tc_links_dev_chain0(void)
+void test_ns_tc_links_dev_chain0(void)
{
int err, ifindex;
@@ -1955,7 +1955,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_links_dev_mixed(void)
+void test_ns_tc_links_dev_mixed(void)
{
test_tc_links_dev_mixed(BPF_TCX_INGRESS);
test_tc_links_dev_mixed(BPF_TCX_EGRESS);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index f77f604389aa..dd7a138d8c3d 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -10,7 +10,7 @@
#include "test_tc_link.skel.h"
#include "tc_helpers.h"
-void serial_test_tc_opts_basic(void)
+void test_ns_tc_opts_basic(void)
{
LIBBPF_OPTS(bpf_prog_attach_opts, opta);
LIBBPF_OPTS(bpf_prog_detach_opts, optd);
@@ -254,7 +254,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_before(void)
+void test_ns_tc_opts_before(void)
{
test_tc_opts_before_target(BPF_TCX_INGRESS);
test_tc_opts_before_target(BPF_TCX_EGRESS);
@@ -445,7 +445,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_after(void)
+void test_ns_tc_opts_after(void)
{
test_tc_opts_after_target(BPF_TCX_INGRESS);
test_tc_opts_after_target(BPF_TCX_EGRESS);
@@ -554,7 +554,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_revision(void)
+void test_ns_tc_opts_revision(void)
{
test_tc_opts_revision_target(BPF_TCX_INGRESS);
test_tc_opts_revision_target(BPF_TCX_EGRESS);
@@ -655,7 +655,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_chain_classic(void)
+void test_ns_tc_opts_chain_classic(void)
{
test_tc_chain_classic(BPF_TCX_INGRESS, false);
test_tc_chain_classic(BPF_TCX_EGRESS, false);
@@ -864,7 +864,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_replace(void)
+void test_ns_tc_opts_replace(void)
{
test_tc_opts_replace_target(BPF_TCX_INGRESS);
test_tc_opts_replace_target(BPF_TCX_EGRESS);
@@ -1017,7 +1017,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_invalid(void)
+void test_ns_tc_opts_invalid(void)
{
test_tc_opts_invalid_target(BPF_TCX_INGRESS);
test_tc_opts_invalid_target(BPF_TCX_EGRESS);
@@ -1157,7 +1157,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_prepend(void)
+void test_ns_tc_opts_prepend(void)
{
test_tc_opts_prepend_target(BPF_TCX_INGRESS);
test_tc_opts_prepend_target(BPF_TCX_EGRESS);
@@ -1297,7 +1297,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_append(void)
+void test_ns_tc_opts_append(void)
{
test_tc_opts_append_target(BPF_TCX_INGRESS);
test_tc_opts_append_target(BPF_TCX_EGRESS);
@@ -1387,7 +1387,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_opts_dev_cleanup(void)
+void test_ns_tc_opts_dev_cleanup(void)
{
test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS);
test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS);
@@ -1563,7 +1563,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_mixed(void)
+void test_ns_tc_opts_mixed(void)
{
test_tc_opts_mixed_target(BPF_TCX_INGRESS);
test_tc_opts_mixed_target(BPF_TCX_EGRESS);
@@ -1642,7 +1642,7 @@ cleanup:
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_demixed(void)
+void test_ns_tc_opts_demixed(void)
{
test_tc_opts_demixed_target(BPF_TCX_INGRESS);
test_tc_opts_demixed_target(BPF_TCX_EGRESS);
@@ -1813,7 +1813,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach(void)
+void test_ns_tc_opts_detach(void)
{
test_tc_opts_detach_target(BPF_TCX_INGRESS);
test_tc_opts_detach_target(BPF_TCX_EGRESS);
@@ -2020,7 +2020,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach_before(void)
+void test_ns_tc_opts_detach_before(void)
{
test_tc_opts_detach_before_target(BPF_TCX_INGRESS);
test_tc_opts_detach_before_target(BPF_TCX_EGRESS);
@@ -2236,7 +2236,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_detach_after(void)
+void test_ns_tc_opts_detach_after(void)
{
test_tc_opts_detach_after_target(BPF_TCX_INGRESS);
test_tc_opts_detach_after_target(BPF_TCX_EGRESS);
@@ -2265,7 +2265,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old)
assert_mprog_count(target, 0);
}
-void serial_test_tc_opts_delete_empty(void)
+void test_ns_tc_opts_delete_empty(void)
{
test_tc_opts_delete_empty(BPF_TCX_INGRESS, false);
test_tc_opts_delete_empty(BPF_TCX_EGRESS, false);
@@ -2372,7 +2372,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_chain_mixed(void)
+void test_ns_tc_opts_chain_mixed(void)
{
test_tc_chain_mixed(BPF_TCX_INGRESS);
test_tc_chain_mixed(BPF_TCX_EGRESS);
@@ -2446,7 +2446,7 @@ cleanup:
ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
}
-void serial_test_tc_opts_max(void)
+void test_ns_tc_opts_max(void)
{
test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false);
test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false);
@@ -2748,7 +2748,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_query(void)
+void test_ns_tc_opts_query(void)
{
test_tc_opts_query_target(BPF_TCX_INGRESS);
test_tc_opts_query_target(BPF_TCX_EGRESS);
@@ -2807,7 +2807,7 @@ cleanup:
test_tc_link__destroy(skel);
}
-void serial_test_tc_opts_query_attach(void)
+void test_ns_tc_opts_query_attach(void)
{
test_tc_opts_query_attach_target(BPF_TCX_INGRESS);
test_tc_opts_query_attach_target(BPF_TCX_EGRESS);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
new file mode 100644
index 000000000000..467cc72a3588
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
@@ -0,0 +1,16 @@
+#include <test_progs.h>
+
+#include "struct_ops_kptr_return.skel.h"
+#include "struct_ops_kptr_return_fail__wrong_type.skel.h"
+#include "struct_ops_kptr_return_fail__invalid_scalar.skel.h"
+#include "struct_ops_kptr_return_fail__nonzero_offset.skel.h"
+#include "struct_ops_kptr_return_fail__local_kptr.skel.h"
+
+void test_struct_ops_kptr_return(void)
+{
+ RUN_TESTS(struct_ops_kptr_return);
+ RUN_TESTS(struct_ops_kptr_return_fail__wrong_type);
+ RUN_TESTS(struct_ops_kptr_return_fail__invalid_scalar);
+ RUN_TESTS(struct_ops_kptr_return_fail__nonzero_offset);
+ RUN_TESTS(struct_ops_kptr_return_fail__local_kptr);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
new file mode 100644
index 000000000000..da60c715fc59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
@@ -0,0 +1,14 @@
+#include <test_progs.h>
+
+#include "struct_ops_refcounted.skel.h"
+#include "struct_ops_refcounted_fail__ref_leak.skel.h"
+#include "struct_ops_refcounted_fail__global_subprog.skel.h"
+#include "struct_ops_refcounted_fail__tail_call.skel.h"
+
+void test_struct_ops_refcounted(void)
+{
+ RUN_TESTS(struct_ops_refcounted);
+ RUN_TESTS(struct_ops_refcounted_fail__ref_leak);
+ RUN_TESTS(struct_ops_refcounted_fail__global_subprog);
+ RUN_TESTS(struct_ops_refcounted_fail__tail_call);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index cec746e77cd3..bae0e9de277d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -71,6 +71,8 @@
#define IP4_ADDR2_VETH1 "172.16.1.20"
#define IP4_ADDR_TUNL_DEV0 "10.1.1.100"
#define IP4_ADDR_TUNL_DEV1 "10.1.1.200"
+#define IP6_ADDR_TUNL_DEV0 "fc80::100"
+#define IP6_ADDR_TUNL_DEV1 "fc80::200"
#define IP6_ADDR_VETH0 "::11"
#define IP6_ADDR1_VETH1 "::22"
@@ -98,6 +100,27 @@
#define XFRM_SPI_IN_TO_OUT 0x1
#define XFRM_SPI_OUT_TO_IN 0x2
+#define GRE_TUNL_DEV0 "gre00"
+#define GRE_TUNL_DEV1 "gre11"
+
+#define IP6GRE_TUNL_DEV0 "ip6gre00"
+#define IP6GRE_TUNL_DEV1 "ip6gre11"
+
+#define ERSPAN_TUNL_DEV0 "erspan00"
+#define ERSPAN_TUNL_DEV1 "erspan11"
+
+#define IP6ERSPAN_TUNL_DEV0 "ip6erspan00"
+#define IP6ERSPAN_TUNL_DEV1 "ip6erspan11"
+
+#define GENEVE_TUNL_DEV0 "geneve00"
+#define GENEVE_TUNL_DEV1 "geneve11"
+
+#define IP6GENEVE_TUNL_DEV0 "ip6geneve00"
+#define IP6GENEVE_TUNL_DEV1 "ip6geneve11"
+
+#define IP6TNL_TUNL_DEV0 "ip6tnl00"
+#define IP6TNL_TUNL_DEV1 "ip6tnl11"
+
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
static int config_device(void)
@@ -216,6 +239,18 @@ fail:
return -1;
}
+static int set_ipv4_addr(const char *dev0, const char *dev1)
+{
+ SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip link set dev %s up", dev1);
+ SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return 1;
+}
+
static int add_ipip_tunnel(enum ipip_encap encap)
{
int err;
@@ -356,6 +391,99 @@ static void delete_xfrm_tunnel(void)
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
}
+static int add_ipv4_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+ dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+ return set_ipv4_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static void delete_tunnel(const char *dev0, const char *dev1)
+{
+ if (!dev0 || !dev1)
+ return;
+
+ SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0);
+ SYS_NOFAIL("ip link delete dev %s", dev1);
+}
+
+static int set_ipv6_addr(const char *dev0, const char *dev1)
+{
+ /* disable IPv6 DAD because it might take too long and fail tests */
+ SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0);
+ SYS(fail, "ip -n at_ns0 link set dev veth0 up");
+ SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1);
+ SYS(fail, "ip link set dev veth1 up");
+
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+
+ SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s up", dev1);
+ return 0;
+fail:
+ return 1;
+}
+
+static int add_ipv6_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+ dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+ return set_ipv6_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int add_geneve_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s",
+ dev0, type, opt, IP4_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+ return set_ipv4_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
+static int add_ip6geneve_tunnel(const char *dev0, const char *dev1,
+ const char *type, const char *opt)
+{
+ if (!type || !opt || !dev0 || !dev1)
+ return -1;
+
+ SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s",
+ dev0, type, opt, IP6_ADDR1_VETH1);
+
+ SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+ return set_ipv6_addr(dev0, dev1);
+fail:
+ return -1;
+}
+
static int test_ping(int family, const char *addr)
{
SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
@@ -364,32 +492,76 @@ fail:
return -1;
}
-static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+static void ping_dev0(void)
{
+ /* ping from root namespace test */
+ test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+}
+
+static void ping_dev1(void)
+{
+ struct nstoken *nstoken;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ return;
+
+ test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+}
+
+static void ping6_veth0(void)
+{
+ test_ping(AF_INET6, IP6_ADDR_VETH0);
+}
+
+static void ping6_dev0(void)
+{
+ test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0);
+}
+
+static void ping6_dev1(void)
+{
+ struct nstoken *nstoken;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ return;
+
+ test_ping(AF_INET, IP6_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+}
+
+static int attach_tc_prog(int ifindex, int igr_fd, int egr_fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
.priority = 1, .prog_fd = igr_fd);
DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
.priority = 1, .prog_fd = egr_fd);
int ret;
- ret = bpf_tc_hook_create(hook);
+ ret = bpf_tc_hook_create(&hook);
if (!ASSERT_OK(ret, "create tc hook"))
return ret;
if (igr_fd >= 0) {
- hook->attach_point = BPF_TC_INGRESS;
- ret = bpf_tc_attach(hook, &opts1);
+ hook.attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(&hook, &opts1);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
- bpf_tc_hook_destroy(hook);
+ bpf_tc_hook_destroy(&hook);
return ret;
}
}
if (egr_fd >= 0) {
- hook->attach_point = BPF_TC_EGRESS;
- ret = bpf_tc_attach(hook, &opts2);
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(&hook, &opts2);
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
- bpf_tc_hook_destroy(hook);
+ bpf_tc_hook_destroy(&hook);
return ret;
}
}
@@ -397,6 +569,50 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
return 0;
}
+static int generic_attach(const char *dev, int igr_fd, int egr_fd)
+{
+ int ifindex;
+
+ if (!ASSERT_OK_FD(igr_fd, "check ingress fd"))
+ return -1;
+ if (!ASSERT_OK_FD(egr_fd, "check egress fd"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ return attach_tc_prog(ifindex, igr_fd, egr_fd);
+}
+
+static int generic_attach_igr(const char *dev, int igr_fd)
+{
+ int ifindex;
+
+ if (!ASSERT_OK_FD(igr_fd, "check ingress fd"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ return attach_tc_prog(ifindex, igr_fd, -1);
+}
+
+static int generic_attach_egr(const char *dev, int egr_fd)
+{
+ int ifindex;
+
+ if (!ASSERT_OK_FD(egr_fd, "check egress fd"))
+ return -1;
+
+ ifindex = if_nametoindex(dev);
+ if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+ return -1;
+
+ return attach_tc_prog(ifindex, -1, egr_fd);
+}
+
static void test_vxlan_tunnel(void)
{
struct test_tunnel_kern *skel = NULL;
@@ -404,11 +620,9 @@ static void test_vxlan_tunnel(void)
int local_ip_map_fd = -1;
int set_src_prog_fd, get_src_prog_fd;
int set_dst_prog_fd;
- int key = 0, ifindex = -1;
+ int key = 0;
uint local_ip;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add vxlan tunnel */
err = add_vxlan_tunnel();
@@ -419,42 +633,22 @@ static void test_vxlan_tunnel(void)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(VXLAN_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ if (generic_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
/* load and attach bpf prog to veth dev tc hook point */
- ifindex = if_nametoindex("veth1");
- if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1))
+ if (generic_attach_igr("veth1", set_dst_prog_fd))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
nstoken = open_netns("at_ns0");
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
- ifindex = if_nametoindex(VXLAN_TUNL_DEV0);
- if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+ if (generic_attach_egr(VXLAN_TUNL_DEV0, set_dst_prog_fd))
goto done;
close_netns(nstoken);
@@ -468,9 +662,7 @@ static void test_vxlan_tunnel(void)
goto done;
/* ping test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev0();
done:
/* delete vxlan tunnel */
@@ -488,11 +680,9 @@ static void test_ip6vxlan_tunnel(void)
int local_ip_map_fd = -1;
int set_src_prog_fd, get_src_prog_fd;
int set_dst_prog_fd;
- int key = 0, ifindex = -1;
+ int key = 0;
uint local_ip;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add vxlan tunnel */
err = add_ip6vxlan_tunnel();
@@ -503,31 +693,17 @@ static void test_ip6vxlan_tunnel(void)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ if (generic_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
nstoken = open_netns("at_ns0");
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto done;
- ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0);
- if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
- if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+ if (generic_attach_egr(IP6VXLAN_TUNL_DEV0, set_dst_prog_fd))
goto done;
close_netns(nstoken);
@@ -541,9 +717,7 @@ static void test_ip6vxlan_tunnel(void)
goto done;
/* ping test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev0();
done:
/* delete ipv6 vxlan tunnel */
@@ -557,12 +731,8 @@ done:
static void test_ipip_tunnel(enum ipip_encap encap)
{
struct test_tunnel_kern *skel = NULL;
- struct nstoken *nstoken;
int set_src_prog_fd, get_src_prog_fd;
- int ifindex = -1;
int err;
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
/* add ipip tunnel */
err = add_ipip_tunnel(encap);
@@ -573,10 +743,6 @@ static void test_ipip_tunnel(enum ipip_encap encap)
skel = test_tunnel_kern__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex(IPIP_TUNL_DEV1);
- if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex"))
- goto done;
- tc_hook.ifindex = ifindex;
switch (encap) {
case FOU:
@@ -598,26 +764,11 @@ static void test_ipip_tunnel(enum ipip_encap encap)
skel->progs.ipip_set_tunnel);
}
- if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ if (generic_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
goto done;
- /* ping from root namespace test */
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
-
- /* ping from at_ns0 namespace test */
- nstoken = open_netns("at_ns0");
- if (!ASSERT_OK_PTR(nstoken, "setns"))
- goto done;
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
- close_netns(nstoken);
+ ping_dev0();
+ ping_dev1();
done:
/* delete ipip tunnel */
@@ -628,11 +779,8 @@ done:
static void test_xfrm_tunnel(void)
{
- DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
- .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
struct test_tunnel_kern *skel = NULL;
- struct nstoken *nstoken;
int xdp_prog_fd;
int tc_prog_fd;
int ifindex;
@@ -646,19 +794,16 @@ static void test_xfrm_tunnel(void)
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
goto done;
- ifindex = if_nametoindex("veth1");
- if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
- goto done;
/* attach tc prog to tunnel dev */
- tc_hook.ifindex = ifindex;
tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
- if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
- goto done;
- if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
+ if (generic_attach_igr("veth1", tc_prog_fd))
goto done;
/* attach xdp prog to tunnel dev */
+ ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+ goto done;
xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
goto done;
@@ -666,14 +811,7 @@ static void test_xfrm_tunnel(void)
if (!ASSERT_OK(err, "bpf_xdp_attach"))
goto done;
- /* ping from at_ns0 namespace test */
- nstoken = open_netns("at_ns0");
- if (!ASSERT_OK_PTR(nstoken, "setns"))
- goto done;
- err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
- close_netns(nstoken);
- if (!ASSERT_OK(err, "test_ping"))
- goto done;
+ ping_dev1();
if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
goto done;
@@ -690,6 +828,281 @@ done:
test_tunnel_kern__destroy(skel);
}
+enum gre_test {
+ GRE,
+ GRE_NOKEY,
+ GRETAP,
+ GRETAP_NOKEY,
+};
+
+static void test_gre_tunnel(enum gre_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case GRE:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRE_NOKEY:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRETAP:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ case GRETAP_NOKEY:
+ err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2");
+ set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ if (generic_attach(GRE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+
+done:
+ delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum ip6gre_test {
+ IP6GRE,
+ IP6GRETAP
+};
+
+static void test_ip6gre_tunnel(enum ip6gre_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case IP6GRE:
+ err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+ "ip6gre", "flowlabel 0xbcdef key 2");
+ break;
+ case IP6GRETAP:
+ err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+ "ip6gretap", "flowlabel 0xbcdef key 2");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel);
+ if (generic_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ ping6_dev1();
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum erspan_test {
+ V1,
+ V2
+};
+
+static void test_erspan_tunnel(enum erspan_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case V1:
+ err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+ "erspan", "seq key 2 erspan_ver 1 erspan 123");
+ break;
+ case V2:
+ err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+ "erspan",
+ "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel);
+ if (generic_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6erspan_tunnel(enum erspan_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ switch (test) {
+ case V1:
+ err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+ "ip6erspan", "seq key 2 erspan_ver 1 erspan 123");
+ break;
+ case V2:
+ err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+ "ip6erspan",
+ "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7");
+ break;
+ }
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel);
+ if (generic_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_geneve_tunnel(void)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1,
+ "geneve", "dstport 6081");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel);
+ if (generic_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6geneve_tunnel(void)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1,
+ "geneve", "");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel);
+ if (generic_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping_dev0();
+ ping_dev1();
+done:
+ delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
+enum ip6tnl_test {
+ IPIP6,
+ IP6IP6
+};
+
+static void test_ip6tnl_tunnel(enum ip6tnl_test test)
+{
+ struct test_tunnel_kern *skel;
+ int set_fd, get_fd;
+ int err;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ return;
+
+ err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", "");
+ if (!ASSERT_OK(err, "add tunnel"))
+ goto done;
+
+ switch (test) {
+ case IPIP6:
+ set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel);
+ break;
+ case IP6IP6:
+ set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel);
+ get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel);
+ break;
+ }
+ if (generic_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
+ goto done;
+
+ ping6_veth0();
+ switch (test) {
+ case IPIP6:
+ ping_dev0();
+ ping_dev1();
+ break;
+ case IP6IP6:
+ ping6_dev0();
+ ping6_dev1();
+ break;
+ }
+
+done:
+ delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1);
+ test_tunnel_kern__destroy(skel);
+}
+
#define RUN_TEST(name, ...) \
({ \
if (test__start_subtest(#name)) { \
@@ -707,6 +1120,20 @@ static void *test_tunnel_run_tests(void *arg)
RUN_TEST(ipip_tunnel, FOU);
RUN_TEST(ipip_tunnel, GUE);
RUN_TEST(xfrm_tunnel);
+ RUN_TEST(gre_tunnel, GRE);
+ RUN_TEST(gre_tunnel, GRE_NOKEY);
+ RUN_TEST(gre_tunnel, GRETAP);
+ RUN_TEST(gre_tunnel, GRETAP_NOKEY);
+ RUN_TEST(ip6gre_tunnel, IP6GRE);
+ RUN_TEST(ip6gre_tunnel, IP6GRETAP);
+ RUN_TEST(erspan_tunnel, V1);
+ RUN_TEST(erspan_tunnel, V2);
+ RUN_TEST(ip6erspan_tunnel, V1);
+ RUN_TEST(ip6erspan_tunnel, V2);
+ RUN_TEST(geneve_tunnel);
+ RUN_TEST(ip6geneve_tunnel);
+ RUN_TEST(ip6tnl_tunnel, IPIP6);
+ RUN_TEST(ip6tnl_tunnel, IP6IP6);
return NULL;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
new file mode 100644
index 000000000000..a95b42bf744a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+
+#define __CHECK_STR(str, name) \
+ do { \
+ if (!ASSERT_HAS_SUBSTR(fix->output, (str), (name))) \
+ goto out; \
+ } while (0)
+
+struct fixture {
+ char tmpfile[80];
+ int fd;
+ char *output;
+ size_t sz;
+ char veristat[80];
+};
+
+static struct fixture *init_fixture(void)
+{
+ struct fixture *fix = malloc(sizeof(struct fixture));
+
+ /* for no_alu32 and cpuv4 veristat is in parent folder */
+ if (access("./veristat", F_OK) == 0)
+ strcpy(fix->veristat, "./veristat");
+ else if (access("../veristat", F_OK) == 0)
+ strcpy(fix->veristat, "../veristat");
+ else
+ PRINT_FAIL("Can't find veristat binary");
+
+ snprintf(fix->tmpfile, sizeof(fix->tmpfile), "/tmp/test_veristat.XXXXXX");
+ fix->fd = mkstemp(fix->tmpfile);
+ fix->sz = 1000000;
+ fix->output = malloc(fix->sz);
+ return fix;
+}
+
+static void teardown_fixture(struct fixture *fix)
+{
+ free(fix->output);
+ close(fix->fd);
+ remove(fix->tmpfile);
+ free(fix);
+}
+
+static void test_set_global_vars_succeeds(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS(out,
+ "%s set_global_vars.bpf.o"\
+ " -G \"var_s64 = 0xf000000000000001\" "\
+ " -G \"var_u64 = 0xfedcba9876543210\" "\
+ " -G \"var_s32 = -0x80000000\" "\
+ " -G \"var_u32 = 0x76543210\" "\
+ " -G \"var_s16 = -32768\" "\
+ " -G \"var_u16 = 60652\" "\
+ " -G \"var_s8 = -128\" "\
+ " -G \"var_u8 = 255\" "\
+ " -G \"var_ea = EA2\" "\
+ " -G \"var_eb = EB2\" "\
+ " -G \"var_ec = EC2\" "\
+ " -G \"var_b = 1\" "\
+ "-vl2 > %s", fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
+ __CHECK_STR("_w=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
+ __CHECK_STR("_w=0x80000000 ", "var_s32 = -0x80000000");
+ __CHECK_STR("_w=0x76543210 ", "var_u32 = 0x76543210");
+ __CHECK_STR("_w=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("_w=0xecec ", "var_u16 = 60652");
+ __CHECK_STR("_w=128 ", "var_s8 = -128");
+ __CHECK_STR("_w=255 ", "var_u8 = 255");
+ __CHECK_STR("_w=11 ", "var_ea = EA2");
+ __CHECK_STR("_w=12 ", "var_eb = EB2");
+ __CHECK_STR("_w=13 ", "var_ec = EC2");
+ __CHECK_STR("_w=1 ", "var_b = 1");
+
+out:
+ teardown_fixture(fix);
+}
+
+static void test_set_global_vars_from_file_succeeds(void)
+{
+ struct fixture *fix = init_fixture();
+ char input_file[80];
+ const char *vars = "var_s16 = -32768\nvar_u16 = 60652";
+ int fd;
+
+ snprintf(input_file, sizeof(input_file), "/tmp/veristat_input.XXXXXX");
+ fd = mkstemp(input_file);
+ if (!ASSERT_GE(fd, 0, "valid fd"))
+ goto out;
+
+ write(fd, vars, strlen(vars));
+ syncfs(fd);
+ SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s",
+ fix->veristat, input_file, fix->tmpfile);
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("_w=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("_w=0xecec ", "var_u16 = 60652");
+
+out:
+ close(fd);
+ remove(input_file);
+ teardown_fixture(fix);
+}
+
+static void test_set_global_vars_out_of_range(void)
+{
+ struct fixture *fix = init_fixture();
+
+ SYS_FAIL(out,
+ "%s set_global_vars.bpf.o -G \"var_s32 = 2147483648\" -vl2 2> %s",
+ fix->veristat, fix->tmpfile);
+
+ read(fix->fd, fix->output, fix->sz);
+ __CHECK_STR("is out of range [-2147483648; 2147483647]", "out of range");
+
+out:
+ teardown_fixture(fix);
+}
+
+void test_veristat(void)
+{
+ if (test__start_subtest("set_global_vars_succeeds"))
+ test_set_global_vars_succeeds();
+
+ if (test__start_subtest("set_global_vars_out_of_range"))
+ test_set_global_vars_out_of_range();
+
+ if (test__start_subtest("set_global_vars_from_file_succeeds"))
+ test_set_global_vars_from_file_succeeds();
+}
+
+#undef __CHECK_STR
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
index 8d75424fe6bc..3e98a1665936 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
@@ -3,17 +3,50 @@
/* Create 3 namespaces with 3 veth peers, and forward packets in-between using
* native XDP
*
- * XDP_TX
- * NS1(veth11) NS2(veth22) NS3(veth33)
- * | | |
- * | | |
- * (veth1, (veth2, (veth3,
- * id:111) id:122) id:133)
- * ^ | ^ | ^ |
- * | | XDP_REDIRECT | | XDP_REDIRECT | |
- * | ------------------ ------------------ |
- * -----------------------------------------
- * XDP_REDIRECT
+ * Network topology:
+ * ---------- ---------- ----------
+ * | NS1 | | NS2 | | NS3 |
+ * | veth11 | | veth22 | | veth33 |
+ * ----|----- -----|---- -----|----
+ * | | |
+ * ----|------------------|----------------|----
+ * | veth1 veth2 veth3 |
+ * | |
+ * | NSO |
+ * ---------------------------------------------
+ *
+ * Test cases:
+ * - [test_xdp_veth_redirect] : ping veth33 from veth11
+ *
+ * veth11 veth22 veth33
+ * (XDP_PASS) (XDP_TX) (XDP_PASS)
+ * | | |
+ * | | |
+ * veth1 veth2 veth3
+ * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT)
+ * ^ | ^ | ^ |
+ * | | | | | |
+ * | ------------------ ------------------ |
+ * -----------------------------------------
+ *
+ * - [test_xdp_veth_broadcast_redirect]: broadcast from veth11
+ * - IPv4 ping : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS
+ * -> echo request received by all except veth11
+ * - IPv4 ping : BPF_F_BROADCAST
+ * -> echo request received by all veth
+ * - [test_xdp_veth_egress]:
+ * - all src mac should be the magic mac
+ *
+ * veth11 veth22 veth33
+ * (XDP_PASS) (XDP_PASS) (XDP_PASS)
+ * | | |
+ * | | |
+ * veth1 veth2 veth3
+ * (XDP_REDIRECT) (XDP_REDIRECT) (XDP_REDIRECT)
+ * | ^ ^
+ * | | |
+ * ----------------------------------------
+ *
*/
#define _GNU_SOURCE
@@ -22,192 +55,545 @@
#include "network_helpers.h"
#include "xdp_dummy.skel.h"
#include "xdp_redirect_map.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
#include "xdp_tx.skel.h"
+#include <uapi/linux/if_link.h>
#define VETH_PAIRS_COUNT 3
-#define NS_SUFFIX_LEN 6
-#define VETH_NAME_MAX_LEN 16
+#define VETH_NAME_MAX_LEN 32
+#define IP_MAX_LEN 16
#define IP_SRC "10.1.1.11"
#define IP_DST "10.1.1.33"
-#define IP_CMD_MAX_LEN 128
-
-struct skeletons {
- struct xdp_dummy *xdp_dummy;
- struct xdp_tx *xdp_tx;
- struct xdp_redirect_map *xdp_redirect_maps;
-};
+#define IP_NEIGH "10.1.1.253"
+#define PROG_NAME_MAX_LEN 128
+#define NS_NAME_MAX_LEN 32
struct veth_configuration {
char local_veth[VETH_NAME_MAX_LEN]; /* Interface in main namespace */
char remote_veth[VETH_NAME_MAX_LEN]; /* Peer interface in dedicated namespace*/
- const char *namespace; /* Namespace for the remote veth */
- char next_veth[VETH_NAME_MAX_LEN]; /* Local interface to redirect traffic to */
- char *remote_addr; /* IP address of the remote veth */
+ char namespace[NS_NAME_MAX_LEN]; /* Namespace for the remote veth */
+ int next_veth; /* Local interface to redirect traffic to */
+ char remote_addr[IP_MAX_LEN]; /* IP address of the remote veth */
};
-static struct veth_configuration config[VETH_PAIRS_COUNT] = {
- {
- .local_veth = "veth1",
- .remote_veth = "veth11",
- .next_veth = "veth2",
- .remote_addr = IP_SRC,
- .namespace = "ns-veth11"
- },
- {
- .local_veth = "veth2",
- .remote_veth = "veth22",
- .next_veth = "veth3",
- .remote_addr = NULL,
- .namespace = "ns-veth22"
- },
+struct net_configuration {
+ char ns0_name[NS_NAME_MAX_LEN];
+ struct veth_configuration veth_cfg[VETH_PAIRS_COUNT];
+};
+
+static const struct net_configuration default_config = {
+ .ns0_name = "ns0-",
{
- .local_veth = "veth3",
- .remote_veth = "veth33",
- .next_veth = "veth1",
- .remote_addr = IP_DST,
- .namespace = "ns-veth33"
+ {
+ .local_veth = "veth1-",
+ .remote_veth = "veth11",
+ .next_veth = 1,
+ .remote_addr = IP_SRC,
+ .namespace = "ns-veth11-"
+ },
+ {
+ .local_veth = "veth2-",
+ .remote_veth = "veth22",
+ .next_veth = 2,
+ .remote_addr = "",
+ .namespace = "ns-veth22-"
+ },
+ {
+ .local_veth = "veth3-",
+ .remote_veth = "veth33",
+ .next_veth = 0,
+ .remote_addr = IP_DST,
+ .namespace = "ns-veth33-"
+ }
}
};
-static int attach_programs_to_veth_pair(struct skeletons *skeletons, int index)
+struct prog_configuration {
+ char local_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to local_veth */
+ char remote_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to remote_veth */
+ u32 local_flags; /* XDP flags to use on local_veth */
+ u32 remote_flags; /* XDP flags to use on remote_veth */
+};
+
+static int attach_programs_to_veth_pair(struct bpf_object **objs, size_t nb_obj,
+ struct net_configuration *net_config,
+ struct prog_configuration *prog, int index)
{
struct bpf_program *local_prog, *remote_prog;
- struct bpf_link **local_link, **remote_link;
struct nstoken *nstoken;
- struct bpf_link *link;
- int interface;
-
- switch (index) {
- case 0:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_0;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_0;
- remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog;
- remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog;
- break;
- case 1:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_1;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_1;
- remote_prog = skeletons->xdp_tx->progs.xdp_tx;
- remote_link = &skeletons->xdp_tx->links.xdp_tx;
- break;
- case 2:
- local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_2;
- local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_2;
- remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog;
- remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog;
- break;
+ int interface, ret, i;
+
+ for (i = 0; i < nb_obj; i++) {
+ local_prog = bpf_object__find_program_by_name(objs[i], prog[index].local_name);
+ if (local_prog)
+ break;
}
- interface = if_nametoindex(config[index].local_veth);
+ if (!ASSERT_OK_PTR(local_prog, "find local program"))
+ return -1;
+
+ for (i = 0; i < nb_obj; i++) {
+ remote_prog = bpf_object__find_program_by_name(objs[i], prog[index].remote_name);
+ if (remote_prog)
+ break;
+ }
+ if (!ASSERT_OK_PTR(remote_prog, "find remote program"))
+ return -1;
+
+ interface = if_nametoindex(net_config->veth_cfg[index].local_veth);
if (!ASSERT_NEQ(interface, 0, "non zero interface index"))
return -1;
- link = bpf_program__attach_xdp(local_prog, interface);
- if (!ASSERT_OK_PTR(link, "attach xdp program to local veth"))
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(local_prog),
+ prog[index].local_flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp program to local veth"))
return -1;
- *local_link = link;
- nstoken = open_netns(config[index].namespace);
+
+ nstoken = open_netns(net_config->veth_cfg[index].namespace);
if (!ASSERT_OK_PTR(nstoken, "switch to remote veth namespace"))
return -1;
- interface = if_nametoindex(config[index].remote_veth);
+
+ interface = if_nametoindex(net_config->veth_cfg[index].remote_veth);
if (!ASSERT_NEQ(interface, 0, "non zero interface index")) {
close_netns(nstoken);
return -1;
}
- link = bpf_program__attach_xdp(remote_prog, interface);
- *remote_link = link;
- close_netns(nstoken);
- if (!ASSERT_OK_PTR(link, "attach xdp program to remote veth"))
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(remote_prog),
+ prog[index].remote_flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp program to remote veth")) {
+ close_netns(nstoken);
return -1;
+ }
+ close_netns(nstoken);
return 0;
}
-static int configure_network(struct skeletons *skeletons)
+static int create_network(struct net_configuration *net_config)
{
- int interface_id;
- int map_fd;
- int err;
- int i = 0;
+ struct nstoken *nstoken = NULL;
+ int i, err;
+
+ memcpy(net_config, &default_config, sizeof(struct net_configuration));
+
+ /* Create unique namespaces */
+ err = append_tid(net_config->ns0_name, NS_NAME_MAX_LEN);
+ if (!ASSERT_OK(err, "append TID to ns0 name"))
+ goto fail;
+ SYS(fail, "ip netns add %s", net_config->ns0_name);
- /* First create and configure all interfaces */
for (i = 0; i < VETH_PAIRS_COUNT; i++) {
- SYS(fail, "ip netns add %s", config[i].namespace);
- SYS(fail, "ip link add %s type veth peer name %s netns %s",
- config[i].local_veth, config[i].remote_veth, config[i].namespace);
- SYS(fail, "ip link set dev %s up", config[i].local_veth);
- if (config[i].remote_addr)
- SYS(fail, "ip -n %s addr add %s/24 dev %s", config[i].namespace,
- config[i].remote_addr, config[i].remote_veth);
- SYS(fail, "ip -n %s link set dev %s up", config[i].namespace,
- config[i].remote_veth);
+ err = append_tid(net_config->veth_cfg[i].namespace, NS_NAME_MAX_LEN);
+ if (!ASSERT_OK(err, "append TID to ns name"))
+ goto fail;
+ SYS(fail, "ip netns add %s", net_config->veth_cfg[i].namespace);
}
- /* Then configure the redirect map and attach programs to interfaces */
- map_fd = bpf_map__fd(skeletons->xdp_redirect_maps->maps.tx_port);
- if (!ASSERT_GE(map_fd, 0, "open redirect map"))
+ /* Create interfaces */
+ nstoken = open_netns(net_config->ns0_name);
+ if (!nstoken)
goto fail;
+
for (i = 0; i < VETH_PAIRS_COUNT; i++) {
- interface_id = if_nametoindex(config[i].next_veth);
- if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
- goto fail;
- err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
- if (!ASSERT_OK(err, "configure interface redirection through map"))
- goto fail;
- if (attach_programs_to_veth_pair(skeletons, i))
- goto fail;
+ SYS(fail, "ip link add %s type veth peer name %s netns %s",
+ net_config->veth_cfg[i].local_veth, net_config->veth_cfg[i].remote_veth,
+ net_config->veth_cfg[i].namespace);
+ SYS(fail, "ip link set dev %s up", net_config->veth_cfg[i].local_veth);
+ if (net_config->veth_cfg[i].remote_addr[0])
+ SYS(fail, "ip -n %s addr add %s/24 dev %s",
+ net_config->veth_cfg[i].namespace,
+ net_config->veth_cfg[i].remote_addr,
+ net_config->veth_cfg[i].remote_veth);
+ SYS(fail, "ip -n %s link set dev %s up", net_config->veth_cfg[i].namespace,
+ net_config->veth_cfg[i].remote_veth);
}
+ close_netns(nstoken);
return 0;
fail:
+ close_netns(nstoken);
return -1;
}
-static void cleanup_network(void)
+static void cleanup_network(struct net_configuration *net_config)
{
int i;
- /* Deleting namespaces is enough to automatically remove veth pairs as well
- */
+ SYS_NOFAIL("ip netns del %s", net_config->ns0_name);
for (i = 0; i < VETH_PAIRS_COUNT; i++)
- SYS_NOFAIL("ip netns del %s", config[i].namespace);
+ SYS_NOFAIL("ip netns del %s", net_config->veth_cfg[i].namespace);
}
-static int check_ping(struct skeletons *skeletons)
+#define VETH_REDIRECT_SKEL_NB 3
+static void xdp_veth_redirect(u32 flags)
{
+ struct prog_configuration ping_config[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_0",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_1",
+ .remote_name = "xdp_tx",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_2",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ struct bpf_object *bpf_objs[VETH_REDIRECT_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct net_configuration net_config;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ int map_fd;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_tx, "xdp_tx__open_and_load"))
+ goto destroy_xdp_dummy;
+
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_tx;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ /* Then configure the redirect map and attach programs to interfaces */
+ map_fd = bpf_map__fd(xdp_redirect_map->maps.tx_port);
+ if (!ASSERT_OK_FD(map_fd, "open redirect map"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_tx->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int next_veth = net_config.veth_cfg[i].next_veth;
+ int interface_id;
+ int err;
+
+ interface_id = if_nametoindex(net_config.veth_cfg[next_veth].local_veth);
+ if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
+ goto destroy_xdp_redirect_map;
+ err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
+ if (!ASSERT_OK(err, "configure interface redirection through map"))
+ goto destroy_xdp_redirect_map;
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+ &net_config, ping_config, i))
+ goto destroy_xdp_redirect_map;
+ }
+
/* Test: if all interfaces are properly configured, we must be able to ping
* veth33 from veth11
*/
- return SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
- config[0].namespace, IP_DST);
+ ASSERT_OK(SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
+ net_config.veth_cfg[0].namespace, IP_DST), "ping");
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_tx:
+ xdp_tx__destroy(xdp_tx);
+destroy_xdp_dummy:
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
}
-void test_xdp_veth_redirect(void)
+#define BROADCAST_REDIRECT_SKEL_NB 2
+static void xdp_veth_broadcast_redirect(u32 attach_flags, u64 redirect_flags)
{
- struct skeletons skeletons = {};
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_0",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_1",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_multi_prog",
+ .remote_name = "xdp_count_2",
+ .local_flags = attach_flags,
+ .remote_flags = attach_flags,
+ }
+ };
+ struct bpf_object *bpf_objs[BROADCAST_REDIRECT_SKEL_NB];
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct bpf_devmap_val devmap_val = {};
+ struct net_configuration net_config;
+ struct nstoken *nstoken = NULL;
+ u16 protocol = ETH_P_IP;
+ int group_map;
+ int flags_map;
+ int cnt_map;
+ u64 cnt = 0;
+ int i, err;
- skeletons.xdp_dummy = xdp_dummy__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
return;
- skeletons.xdp_tx = xdp_tx__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
+
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
+ goto destroy_xdp_redirect_map;
+
+ group_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_all);
+ if (!ASSERT_OK_FD(group_map, "open map_all"))
+ goto destroy_xdp_redirect_map;
+
+ flags_map = bpf_map__fd(xdp_redirect_multi_kern->maps.redirect_flags);
+ if (!ASSERT_OK_FD(group_map, "open map_all"))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(flags_map, &protocol, &redirect_flags, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "init IP count"))
+ goto destroy_xdp_redirect_map;
+
+ cnt_map = bpf_map__fd(xdp_redirect_map->maps.rxcnt);
+ if (!ASSERT_OK_FD(cnt_map, "open rxcnt map"))
+ goto destroy_xdp_redirect_map;
+
+ bpf_objs[0] = xdp_redirect_multi_kern->obj;
+ bpf_objs[1] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, BROADCAST_REDIRECT_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+ devmap_val.ifindex = ifindex;
+ err = bpf_map_update_elem(group_map, &ifindex, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ err = bpf_map_lookup_elem(cnt_map, &i, &cnt);
+ if (!ASSERT_OK(err, "get IP cnt"))
+ goto destroy_xdp_redirect_map;
+
+ if (redirect_flags & BPF_F_EXCLUDE_INGRESS)
+ /* veth11 shouldn't receive the ICMP requests;
+ * others should
+ */
+ ASSERT_EQ(cnt, i ? 4 : 0, "compare IP cnt");
+ else
+ /* All remote veth should receive the ICMP requests */
+ ASSERT_EQ(cnt, 4, "compare IP cnt");
+ }
+
+destroy_xdp_redirect_map:
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+
+ cleanup_network(&net_config);
+}
+
+#define VETH_EGRESS_SKEL_NB 3
+static void xdp_veth_egress(u32 flags)
+{
+ struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "xdp_dummy_prog",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_1",
+ .local_flags = flags,
+ .remote_flags = flags,
+ },
+ {
+ .local_name = "xdp_redirect_map_all_prog",
+ .remote_name = "store_mac_2",
+ .local_flags = flags,
+ .remote_flags = flags,
+ }
+ };
+ const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+ struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
+ struct xdp_redirect_map *xdp_redirect_map;
+ struct bpf_devmap_val devmap_val = {};
+ struct net_configuration net_config;
+ int mac_map, egress_map, res_map;
+ struct nstoken *nstoken = NULL;
+ struct xdp_dummy *xdp_dummy;
+ int err;
+ int i;
+
+ xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+ return;
+
+ xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
goto destroy_xdp_dummy;
- skeletons.xdp_redirect_maps = xdp_redirect_map__open_and_load();
- if (!ASSERT_OK_PTR(skeletons.xdp_redirect_maps, "xdp_redirect_map__open_and_load"))
- goto destroy_xdp_tx;
+ xdp_redirect_map = xdp_redirect_map__open_and_load();
+ if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+ goto destroy_xdp_redirect_multi_kern;
- if (configure_network(&skeletons))
+ if (!ASSERT_OK(create_network(&net_config), "create network"))
goto destroy_xdp_redirect_map;
- ASSERT_OK(check_ping(&skeletons), "ping");
+ mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map);
+ if (!ASSERT_OK_FD(mac_map, "open mac_map"))
+ goto destroy_xdp_redirect_map;
+
+ egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress);
+ if (!ASSERT_OK_FD(egress_map, "open map_egress"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.bpf_prog.fd = bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
+
+ bpf_objs[0] = xdp_dummy->obj;
+ bpf_objs[1] = xdp_redirect_multi_kern->obj;
+ bpf_objs[2] = xdp_redirect_map->obj;
+
+ nstoken = open_netns(net_config.ns0_name);
+ if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+ int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+ SYS(destroy_xdp_redirect_map,
+ "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+ net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+ if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+ &net_config, prog_cfg, i))
+ goto destroy_xdp_redirect_map;
+
+ err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+
+ devmap_val.ifindex = ifindex;
+ err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto destroy_xdp_redirect_map;
+ }
+
+ SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+ net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+ res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac);
+ if (!ASSERT_OK_FD(res_map, "open rx_map"))
+ goto destroy_xdp_redirect_map;
+
+ for (i = 0; i < 2; i++) {
+ u32 key = i;
+ u64 res;
+
+ err = bpf_map_lookup_elem(res_map, &key, &res);
+ if (!ASSERT_OK(err, "get MAC res"))
+ goto destroy_xdp_redirect_map;
+
+ ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac");
+ }
destroy_xdp_redirect_map:
- xdp_redirect_map__destroy(skeletons.xdp_redirect_maps);
-destroy_xdp_tx:
- xdp_tx__destroy(skeletons.xdp_tx);
+ close_netns(nstoken);
+ xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+ xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
destroy_xdp_dummy:
- xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_dummy__destroy(xdp_dummy);
+
+ cleanup_network(&net_config);
+}
+
+void test_xdp_veth_redirect(void)
+{
+ if (test__start_subtest("0"))
+ xdp_veth_redirect(0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_veth_redirect(XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_veth_redirect(XDP_FLAGS_SKB_MODE);
+}
+
+void test_xdp_veth_broadcast_redirect(void)
+{
+ if (test__start_subtest("0/BROADCAST"))
+ xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST);
+
+ if (test__start_subtest("0/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+ if (test__start_subtest("DRV_MODE/BROADCAST"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, BPF_F_BROADCAST);
+
+ if (test__start_subtest("DRV_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+ if (test__start_subtest("SKB_MODE/BROADCAST"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, BPF_F_BROADCAST);
+
+ if (test__start_subtest("SKB_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+ xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+void test_xdp_veth_egress(void)
+{
+ if (test__start_subtest("0/egress"))
+ xdp_veth_egress(0);
+
+ if (test__start_subtest("DRV_MODE/egress"))
+ xdp_veth_egress(XDP_FLAGS_DRV_MODE);
- cleanup_network();
+ if (test__start_subtest("SKB_MODE/egress"))
+ xdp_veth_egress(XDP_FLAGS_SKB_MODE);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index c3ab9b6fb069..f9392df23f8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -19,6 +19,7 @@
#include "priv_prog.skel.h"
#include "dummy_st_ops_success.skel.h"
#include "token_lsm.skel.h"
+#include "priv_freplace_prog.skel.h"
static inline int sys_mount(const char *dev_name, const char *dir_name,
const char *type, unsigned long flags,
@@ -788,6 +789,84 @@ static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
return 0;
}
+static int userns_obj_priv_freplace_setup(int mnt_fd, struct priv_freplace_prog **fr_skel,
+ struct priv_prog **skel, int *tgt_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ int err;
+ char buf[256];
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ *skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(*skel, "priv_prog__open_opts"))
+ return -EINVAL;
+ err = priv_prog__load(*skel);
+ if (!ASSERT_OK(err, "priv_prog__load"))
+ return -EINVAL;
+
+ *fr_skel = priv_freplace_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(*skel, "priv_freplace_prog__open_opts"))
+ return -EINVAL;
+
+ *tgt_fd = bpf_program__fd((*skel)->progs.xdp_prog1);
+ return 0;
+}
+
+/* Verify that freplace works from user namespace, because bpf token is loaded
+ * in bpf_object__prepare
+ */
+static int userns_obj_priv_freplace_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ struct priv_freplace_prog *fr_skel = NULL;
+ struct priv_prog *skel = NULL;
+ int err, tgt_fd;
+
+ err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+ if (!ASSERT_OK(err, "setup"))
+ goto out;
+
+ err = bpf_object__prepare(fr_skel->obj);
+ if (!ASSERT_OK(err, "freplace__prepare"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+ if (!ASSERT_OK(err, "set_attach_target"))
+ goto out;
+
+ err = priv_freplace_prog__load(fr_skel);
+ ASSERT_OK(err, "priv_freplace_prog__load");
+
+out:
+ priv_freplace_prog__destroy(fr_skel);
+ priv_prog__destroy(skel);
+ return err;
+}
+
+/* Verify that replace fails to set attach target from user namespace without bpf token */
+static int userns_obj_priv_freplace_prog_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ struct priv_freplace_prog *fr_skel = NULL;
+ struct priv_prog *skel = NULL;
+ int err, tgt_fd;
+
+ err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+ if (!ASSERT_OK(err, "setup"))
+ goto out;
+
+ err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+ if (ASSERT_ERR(err, "attach fails"))
+ err = 0;
+ else
+ err = -EINVAL;
+
+out:
+ priv_freplace_prog__destroy(fr_skel);
+ priv_prog__destroy(skel);
+ return err;
+}
+
/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
* which should cause struct_ops application to fail, as BTF won't be uploaded
* into the kernel, even if STRUCT_OPS programs themselves are allowed
@@ -1004,12 +1083,28 @@ void test_token(void)
if (test__start_subtest("obj_priv_prog")) {
struct bpffs_opts opts = {
.cmds = bit(BPF_PROG_LOAD),
- .progs = bit(BPF_PROG_TYPE_KPROBE),
+ .progs = bit(BPF_PROG_TYPE_XDP),
.attachs = ~0ULL,
};
subtest_userns(&opts, userns_obj_priv_prog);
}
+ if (test__start_subtest("obj_priv_freplace_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+ .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+ subtest_userns(&opts, userns_obj_priv_freplace_prog);
+ }
+ if (test__start_subtest("obj_priv_freplace_prog_fail")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+ .progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+ .attachs = ~0ULL,
+ };
+ subtest_userns(&opts, userns_obj_priv_freplace_prog_fail);
+ }
if (test__start_subtest("obj_priv_btf_fail")) {
struct bpffs_opts opts = {
/* disallow BTF loading */
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 56ed1eb9b527..495d66414b57 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -45,7 +45,7 @@ static void subtest_basic_usdt(void)
LIBBPF_OPTS(bpf_usdt_opts, opts);
struct test_usdt *skel;
struct test_usdt__bss *bss;
- int err;
+ int err, i;
skel = test_usdt__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -75,6 +75,7 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+ ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
/* auto-attached usdt3 gets default zero cookie value */
ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
@@ -86,6 +87,9 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+ ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size");
+ ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size");
+ ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size");
/* auto-attached usdt12 gets default zero cookie value */
ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
@@ -104,6 +108,11 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+ int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 };
+
+ for (i = 0; i < 12; i++)
+ ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size");
+
/* trigger_func() is marked __always_inline, so USDT invocations will be
* inlined in two different places, meaning that each USDT will have
* at least 2 different places to be attached to. This verifies that
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 8a0e1ff8a2dc..e66a57970d28 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -45,6 +45,7 @@
#include "verifier_ldsx.skel.h"
#include "verifier_leak_ptr.skel.h"
#include "verifier_linked_scalars.skel.h"
+#include "verifier_load_acquire.skel.h"
#include "verifier_loops1.skel.h"
#include "verifier_lwt.skel.h"
#include "verifier_map_in_map.skel.h"
@@ -80,6 +81,7 @@
#include "verifier_spill_fill.skel.h"
#include "verifier_spin_lock.skel.h"
#include "verifier_stack_ptr.skel.h"
+#include "verifier_store_release.skel.h"
#include "verifier_subprog_precision.skel.h"
#include "verifier_subreg.skel.h"
#include "verifier_tailcall_jit.skel.h"
@@ -121,7 +123,7 @@ static void run_tests_aux(const char *skel_name,
/* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */
err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps);
if (err) {
- PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
return;
}
@@ -131,7 +133,7 @@ static void run_tests_aux(const char *skel_name,
err = cap_enable_effective(old_caps, NULL);
if (err)
- PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
}
#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
@@ -173,6 +175,7 @@ void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); }
+void test_verifier_load_acquire(void) { RUN(verifier_load_acquire); }
void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
@@ -211,6 +214,7 @@ void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); }
void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
+void test_verifier_store_release(void) { RUN(verifier_store_release); }
void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
void test_verifier_subreg(void) { RUN(verifier_subreg); }
void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
new file mode 100644
index 000000000000..18dd25344de7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Network topology:
+ * ----------- -----------
+ * | NS1 | | NS2 |
+ * | veth0 -|--------|- veth0 |
+ * ----------- -----------
+ *
+ */
+
+#define _GNU_SOURCE
+#include <net/if.h>
+#include <uapi/linux/if_link.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xdp_vlan.skel.h"
+
+
+#define VETH_NAME "veth0"
+#define NS_MAX_SIZE 32
+#define NS1_NAME "ns-xdp-vlan-1-"
+#define NS2_NAME "ns-xdp-vlan-2-"
+#define NS1_IP_ADDR "100.64.10.1"
+#define NS2_IP_ADDR "100.64.10.2"
+#define VLAN_ID 4011
+
+static int setup_network(char *ns1, char *ns2)
+{
+ if (!ASSERT_OK(append_tid(ns1, NS_MAX_SIZE), "create ns1 name"))
+ goto fail;
+ if (!ASSERT_OK(append_tid(ns2, NS_MAX_SIZE), "create ns2 name"))
+ goto fail;
+
+ SYS(fail, "ip netns add %s", ns1);
+ SYS(fail, "ip netns add %s", ns2);
+ SYS(fail, "ip -n %s link add %s type veth peer name %s netns %s",
+ ns1, VETH_NAME, VETH_NAME, ns2);
+
+ /* NOTICE: XDP require VLAN header inside packet payload
+ * - Thus, disable VLAN offloading driver features
+ */
+ SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns1, VETH_NAME);
+ SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns2, VETH_NAME);
+
+ /* NS1 configuration */
+ SYS(fail, "ip -n %s addr add %s/24 dev %s", ns1, NS1_IP_ADDR, VETH_NAME);
+ SYS(fail, "ip -n %s link set %s up", ns1, VETH_NAME);
+
+ /* NS2 configuration */
+ SYS(fail, "ip -n %s link add link %s name %s.%d type vlan id %d",
+ ns2, VETH_NAME, VETH_NAME, VLAN_ID, VLAN_ID);
+ SYS(fail, "ip -n %s addr add %s/24 dev %s.%d", ns2, NS2_IP_ADDR, VETH_NAME, VLAN_ID);
+ SYS(fail, "ip -n %s link set %s up", ns2, VETH_NAME);
+ SYS(fail, "ip -n %s link set %s.%d up", ns2, VETH_NAME, VLAN_ID);
+
+ /* At this point ping should fail because VLAN tags are only used by NS2 */
+ return !SYS_NOFAIL("ip netns exec %s ping -W 1 -c1 %s", ns2, NS1_IP_ADDR);
+
+fail:
+ return -1;
+}
+
+static void cleanup_network(const char *ns1, const char *ns2)
+{
+ SYS_NOFAIL("ip netns del %s", ns1);
+ SYS_NOFAIL("ip netns del %s", ns2);
+}
+
+static void xdp_vlan(struct bpf_program *xdp, struct bpf_program *tc, u32 flags)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ char ns1[NS_MAX_SIZE] = NS1_NAME;
+ char ns2[NS_MAX_SIZE] = NS2_NAME;
+ struct nstoken *nstoken = NULL;
+ int interface;
+ int ret;
+
+ if (!ASSERT_OK(setup_network(ns1, ns2), "setup network"))
+ goto cleanup;
+
+ nstoken = open_netns(ns1);
+ if (!ASSERT_OK_PTR(nstoken, "open NS1"))
+ goto cleanup;
+
+ interface = if_nametoindex(VETH_NAME);
+ if (!ASSERT_NEQ(interface, 0, "get interface index"))
+ goto cleanup;
+
+ ret = bpf_xdp_attach(interface, bpf_program__fd(xdp), flags, NULL);
+ if (!ASSERT_OK(ret, "attach xdp_vlan_change"))
+ goto cleanup;
+
+ tc_hook.ifindex = interface;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto detach_xdp;
+
+ /* Now we'll use BPF programs to pop/push the VLAN tags */
+ tc_opts.prog_fd = bpf_program__fd(tc);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto detach_xdp;
+
+ close_netns(nstoken);
+ nstoken = NULL;
+
+ /* Now the namespaces can reach each-other, test with pings */
+ SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns1, NS2_IP_ADDR);
+ SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns2, NS1_IP_ADDR);
+
+
+detach_tc:
+ bpf_tc_detach(&tc_hook, &tc_opts);
+detach_xdp:
+ bpf_xdp_detach(interface, flags, NULL);
+cleanup:
+ close_netns(nstoken);
+ cleanup_network(ns1, ns2);
+}
+
+/* First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+ * egress use TC to add back VLAN tag 4011
+ */
+void test_xdp_vlan_change(void)
+{
+ struct test_xdp_vlan *skel;
+
+ skel = test_xdp_vlan__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+ return;
+
+ if (test__start_subtest("0"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+ XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+ XDP_FLAGS_SKB_MODE);
+
+ test_xdp_vlan__destroy(skel);
+}
+
+/* Second test: XDP prog fully remove vlan header
+ *
+ * Catch kernel bug for generic-XDP, that doesn't allow us to
+ * remove a VLAN header, because skb->protocol still contain VLAN
+ * ETH_P_8021Q indication, and this cause overwriting of our changes.
+ */
+void test_xdp_vlan_remove(void)
+{
+ struct test_xdp_vlan *skel;
+
+ skel = test_xdp_vlan__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+ return;
+
+ if (test__start_subtest("0"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 0);
+
+ if (test__start_subtest("DRV_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+ XDP_FLAGS_DRV_MODE);
+
+ if (test__start_subtest("SKB_MODE"))
+ xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+ XDP_FLAGS_SKB_MODE);
+
+ test_xdp_vlan__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
index 40dd57fca5cc..a52feff98112 100644
--- a/tools/testing/selftests/bpf/progs/arena_atomics.c
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -6,6 +6,8 @@
#include <stdbool.h>
#include <stdatomic.h>
#include "bpf_arena_common.h"
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARENA);
@@ -19,9 +21,17 @@ struct {
} arena SEC(".maps");
#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
-bool skip_tests __attribute((__section__(".data"))) = false;
+bool skip_all_tests __attribute((__section__(".data"))) = false;
#else
-bool skip_tests = true;
+bool skip_all_tests = true;
+#endif
+
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_lacq_srel_tests = true;
#endif
__u32 pid = 0;
@@ -274,4 +284,111 @@ int uaf(const void *ctx)
return 0;
}
+#if __clang_major__ >= 18
+__u8 __arena_global load_acquire8_value = 0x12;
+__u16 __arena_global load_acquire16_value = 0x1234;
+__u32 __arena_global load_acquire32_value = 0x12345678;
+__u64 __arena_global load_acquire64_value = 0x1234567890abcdef;
+
+__u8 __arena_global load_acquire8_result = 0;
+__u16 __arena_global load_acquire16_result = 0;
+__u32 __arena_global load_acquire32_result = 0;
+__u64 __arena_global load_acquire64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global load_acquire8_value;
+__u64 __arena_global load_acquire16_value;
+__u64 __arena_global load_acquire32_value;
+__u64 __arena_global load_acquire64_value;
+
+__u64 __arena_global load_acquire8_result;
+__u64 __arena_global load_acquire16_result;
+__u64 __arena_global load_acquire32_result;
+__u64 __arena_global load_acquire64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int load_acquire(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+
+#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \
+ { asm volatile ( \
+ "r1 = %[" #SRC "] ll;" \
+ "r1 = addr_space_cast(r1, 0x0, 0x1);" \
+ ".8byte %[load_acquire_insn];" \
+ "r3 = %[" #DST "] ll;" \
+ "r3 = addr_space_cast(r3, 0x0, 0x1);" \
+ "*(" #SIZE " *)(r3 + 0) = r2;" \
+ : \
+ : __imm_addr(SRC), \
+ __imm_insn(load_acquire_insn, \
+ BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_LOAD_ACQ, \
+ BPF_REG_2, BPF_REG_1, 0)), \
+ __imm_addr(DST) \
+ : __clobber_all); } \
+
+ LOAD_ACQUIRE_ARENA(B, u8, load_acquire8_value, load_acquire8_result)
+ LOAD_ACQUIRE_ARENA(H, u16, load_acquire16_value,
+ load_acquire16_result)
+ LOAD_ACQUIRE_ARENA(W, u32, load_acquire32_value,
+ load_acquire32_result)
+ LOAD_ACQUIRE_ARENA(DW, u64, load_acquire64_value,
+ load_acquire64_result)
+#undef LOAD_ACQUIRE_ARENA
+
+#endif
+ return 0;
+}
+
+#if __clang_major__ >= 18
+__u8 __arena_global store_release8_result = 0;
+__u16 __arena_global store_release16_result = 0;
+__u32 __arena_global store_release32_result = 0;
+__u64 __arena_global store_release64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global store_release8_result;
+__u64 __arena_global store_release16_result;
+__u64 __arena_global store_release32_result;
+__u64 __arena_global store_release64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int store_release(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+
+#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \
+ { asm volatile ( \
+ "r1 = " VAL ";" \
+ "r2 = %[" #DST "] ll;" \
+ "r2 = addr_space_cast(r2, 0x0, 0x1);" \
+ ".8byte %[store_release_insn];" \
+ : \
+ : __imm_addr(DST), \
+ __imm_insn(store_release_insn, \
+ BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_STORE_REL, \
+ BPF_REG_2, BPF_REG_1, 0)) \
+ : __clobber_all); } \
+
+ STORE_RELEASE_ARENA(B, store_release8_result, "0x12")
+ STORE_RELEASE_ARENA(H, store_release16_result, "0x1234")
+ STORE_RELEASE_ARENA(W, store_release32_result, "0x12345678")
+ STORE_RELEASE_ARENA(DW, store_release64_result,
+ "0x1234567890abcdef ll")
+#undef STORE_RELEASE_ARENA
+
+#endif
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
new file mode 100644
index 000000000000..c4500c37f85e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_arena_spin_lock.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+int cs_count;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+arena_spinlock_t __arena lock;
+int test_skip = 1;
+#else
+int test_skip = 2;
+#endif
+
+int counter;
+int limit;
+
+SEC("tc")
+int prog(void *ctx)
+{
+ int ret = -2;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+ unsigned long flags;
+
+ if ((ret = arena_spin_lock_irqsave(&lock, flags)))
+ return ret;
+ if (counter != limit)
+ counter++;
+ bpf_repeat(cs_count);
+ ret = 0;
+ arena_spin_unlock_irqrestore(&lock, flags);
+#endif
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
index bc10c4e4b4fa..966ee5a7b066 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
@@ -9,6 +9,13 @@ char _license[] SEC("license") = "GPL";
uint32_t tid = 0;
int num_unknown_tid = 0;
int num_known_tid = 0;
+void *user_ptr = 0;
+void *user_ptr_long = 0;
+uint32_t pid = 0;
+
+static char big_str1[5000];
+static char big_str2[5005];
+static char big_str3[4996];
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
@@ -35,7 +42,9 @@ int dump_task(struct bpf_iter__task *ctx)
}
int num_expected_failure_copy_from_user_task = 0;
+int num_expected_failure_copy_from_user_task_str = 0;
int num_success_copy_from_user_task = 0;
+int num_success_copy_from_user_task_str = 0;
SEC("iter.s/task")
int dump_task_sleepable(struct bpf_iter__task *ctx)
@@ -44,6 +53,9 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
struct task_struct *task = ctx->task;
static const char info[] = " === END ===";
struct pt_regs *regs;
+ char task_str1[10] = "aaaaaaaaaa";
+ char task_str2[10], task_str3[10];
+ char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa";
void *ptr;
uint32_t user_data = 0;
int ret;
@@ -78,8 +90,106 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
BPF_SEQ_PRINTF(seq, "%s\n", info);
return 0;
}
+
++num_success_copy_from_user_task;
+ /* Read an invalid pointer and ensure we get an error */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0);
+ if (ret >= 0 || task_str1[9] != 'a' || task_str1[0] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Read an invalid pointer and ensure we get error with pad zeros flag */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1),
+ ptr, task, BPF_F_PAD_ZEROS);
+ if (ret >= 0 || task_str1[9] != '\0' || task_str1[0] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_expected_failure_copy_from_user_task_str;
+
+ /* Same length as the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0);
+ /* only need to do the task pid check once */
+ if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10 || task->tgid != pid) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str3, 2, user_ptr, task, 0);
+ if (bpf_strncmp(task_str3, 2, "t\0") != 0 || ret != 2) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+ || task_str4[sizeof(task_str4) - 1] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string with pad zeros flag */
+ ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+ || task_str4[sizeof(task_str4) - 1] != '\0') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Longer length than the string past a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr, task, 0);
+ if (bpf_strncmp(big_str1, 10, "test_data\0") != 0 || ret != 10) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* String that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr_long, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(big_str1, 4, "baba") != 0 || ret != 5000
+ || bpf_strncmp(big_str1 + 4996, 4, "bab\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ for (int i = 0; i < 4999; ++i) {
+ if (i % 2 == 0) {
+ if (big_str1[i] != 'b') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ } else {
+ if (big_str1[i] != 'a') {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ }
+ }
+
+ /* Longer length than the string that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str2, 5005, user_ptr_long, task, BPF_F_PAD_ZEROS);
+ if (bpf_strncmp(big_str2, 4, "baba") != 0 || ret != 5000
+ || bpf_strncmp(big_str2 + 4996, 5, "bab\0\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Shorter length than the string that crosses a page boundary */
+ ret = bpf_copy_from_user_task_str(big_str3, 4996, user_ptr_long, task, 0);
+ if (bpf_strncmp(big_str3, 4, "baba") != 0 || ret != 4996
+ || bpf_strncmp(big_str3 + 4992, 4, "bab\0") != 0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ ++num_success_copy_from_user_task_str;
+
if (ctx->meta->seq_num == 0)
BPF_SEQ_PRINTF(seq, " tgid gid data\n");
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index f45f4352feeb..13a2e22f5465 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -135,6 +135,8 @@
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
+#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
+#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12
@@ -172,6 +174,9 @@
#elif defined(__TARGET_ARCH_riscv)
#define SYSCALL_WRAPPER 1
#define SYS_PREFIX "__riscv_"
+#elif defined(__TARGET_ARCH_powerpc)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX ""
#else
#define SYSCALL_WRAPPER 0
#define SYS_PREFIX "__se_"
@@ -208,4 +213,21 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+#define CAN_USE_GOTOL
+#endif
+
+#if _clang_major__ >= 18
+#define CAN_USE_BPF_ST
+#endif
+
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#define CAN_USE_LOAD_ACQ_STORE_REL
+#endif
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index eb6ed1b7b2ef..659694162739 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -15,7 +15,11 @@
#define SO_KEEPALIVE 9
#define SO_PRIORITY 12
#define SO_REUSEPORT 15
+#if defined(__TARGET_ARCH_powerpc)
+#define SO_RCVLOWAT 16
+#else
#define SO_RCVLOWAT 18
+#endif
#define SO_BINDTODEVICE 25
#define SO_MARK 36
#define SO_MAX_PACING_RATE 47
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
new file mode 100644
index 000000000000..21a560427b10
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_bad_signed_arr_elem_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_preorder.c b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
new file mode 100644
index 000000000000..4ef6202baa0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned int idx;
+__u8 result[4];
+
+SEC("cgroup/getsockopt")
+int child(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 1;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int child_2(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 2;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 3;
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent_2(struct bpf_sockopt *ctx)
+{
+ if (idx < 4)
+ result[idx++] = 4;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c
deleted file mode 100644
index 43cada48b28a..000000000000
--- a/tools/testing/selftests/bpf/progs/changes_pkt_data.c
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-__noinline
-long changes_pkt_data(struct __sk_buff *sk)
-{
- return bpf_skb_pull_data(sk, 0);
-}
-
-__noinline __weak
-long does_not_change_pkt_data(struct __sk_buff *sk)
-{
- return 0;
-}
-
-SEC("?tc")
-int main_with_subprogs(struct __sk_buff *sk)
-{
- changes_pkt_data(sk);
- does_not_change_pkt_data(sk);
- return 0;
-}
-
-SEC("?tc")
-int main_changes(struct __sk_buff *sk)
-{
- bpf_skb_pull_data(sk, 0);
- return 0;
-}
-
-SEC("?tc")
-int main_does_not_change(struct __sk_buff *sk)
-{
- return 0;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
new file mode 100644
index 000000000000..f3d79aecbf93
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_arena_common.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} test_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1);
+} arena SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg(" 0: .......... (b7) r0 = 42")
+__msg(" 1: 0......... (bf) r1 = r0")
+__msg(" 2: .1........ (bf) r2 = r1")
+__msg(" 3: ..2....... (bf) r3 = r2")
+__msg(" 4: ...3...... (bf) r4 = r3")
+__msg(" 5: ....4..... (bf) r5 = r4")
+__msg(" 6: .....5.... (bf) r6 = r5")
+__msg(" 7: ......6... (bf) r7 = r6")
+__msg(" 8: .......7.. (bf) r8 = r7")
+__msg(" 9: ........8. (bf) r9 = r8")
+__msg("10: .........9 (bf) r0 = r9")
+__msg("11: 0......... (95) exit")
+__naked void assign_chain(void)
+{
+ asm volatile (
+ "r0 = 42;"
+ "r1 = r0;"
+ "r2 = r1;"
+ "r3 = r2;"
+ "r4 = r3;"
+ "r5 = r4;"
+ "r6 = r5;"
+ "r7 = r6;"
+ "r8 = r7;"
+ "r9 = r8;"
+ "r0 = r9;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 7")
+__msg("1: .1........ (07) r1 += 7")
+__msg("2: .......... (b7) r2 = 7")
+__msg("3: ..2....... (b7) r3 = 42")
+__msg("4: ..23...... (0f) r2 += r3")
+__msg("5: .......... (b7) r0 = 0")
+__msg("6: 0......... (95) exit")
+__naked void arithmetics(void)
+{
+ asm volatile (
+ "r1 = 7;"
+ "r1 += 7;"
+ "r2 = 7;"
+ "r3 = 42;"
+ "r2 += r3;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#ifdef CAN_USE_BPF_ST
+SEC("socket")
+__log_level(2)
+__msg(" 1: .1........ (07) r1 += -8")
+__msg(" 2: .1........ (7a) *(u64 *)(r1 +0) = 7")
+__msg(" 3: .1........ (b7) r2 = 42")
+__msg(" 4: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg(" 5: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg(" 6: .......... (b7) r0 = 0")
+__naked void store(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "*(u64 *)(r1 +0) = 7;"
+ "r2 = 42;"
+ "*(u64 *)(r1 +0) = r2;"
+ "*(u64 *)(r1 +0) = r2;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("1: ....4..... (07) r4 += -8")
+__msg("2: ....4..... (79) r5 = *(u64 *)(r4 +0)")
+__msg("3: ....45.... (07) r4 += -8")
+__naked void load(void)
+{
+ asm volatile (
+ "r4 = r10;"
+ "r4 += -8;"
+ "r5 = *(u64 *)(r4 +0);"
+ "r4 += -8;"
+ "r0 = r5;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .1........ (61) r2 = *(u32 *)(r1 +0)")
+__msg("1: ..2....... (d4) r2 = le64 r2")
+__msg("2: ..2....... (bf) r0 = r2")
+__naked void endian(void)
+{
+ asm volatile (
+ "r2 = *(u32 *)(r1 +0);"
+ "r2 = le64 r2;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg(" 8: 0......... (b7) r1 = 1")
+__msg(" 9: 01........ (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)")
+__msg("10: 01........ (c3) lock *(u32 *)(r0 +0) += r1")
+__msg("11: 01........ (db) r1 = atomic64_xchg((u64 *)(r0 +0), r1)")
+__msg("12: 01........ (bf) r2 = r0")
+__msg("13: .12....... (bf) r0 = r1")
+__msg("14: 012....... (db) r0 = atomic64_cmpxchg((u64 *)(r2 +0), r0, r1)")
+__naked void atomic(void)
+{
+ asm volatile (
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = 0;"
+ "*(u64 *)(r2 +0) = r1;"
+ "r1 = %[test_map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto 1f;"
+ "r1 = 1;"
+ "r1 = atomic_fetch_add((u64 *)(r0 +0), r1);"
+ ".8byte %[add_nofetch];" /* same as "lock *(u32 *)(r0 +0) += r1;" */
+ "r1 = xchg_64(r0 + 0, r1);"
+ "r2 = r0;"
+ "r0 = r1;"
+ "r0 = cmpxchg_64(r2 + 0, r0, r1);"
+ "1: exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(test_map),
+ __imm_insn(add_nofetch, BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__log_level(2)
+__msg("2: .12....... (db) store_release((u64 *)(r2 -8), r1)")
+__msg("3: .......... (bf) r3 = r10")
+__msg("4: ...3...... (db) r4 = load_acquire((u64 *)(r3 -8))")
+__naked void atomic_load_acq_store_rel(void)
+{
+ asm volatile (
+ "r1 = 42;"
+ "r2 = r10;"
+ ".8byte %[store_release_insn];" /* store_release((u64 *)(r2 - 8), r1); */
+ "r3 = r10;"
+ ".8byte %[load_acquire_insn];" /* r4 = load_acquire((u64 *)(r3 + 0)); */
+ "r0 = r4;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_4, BPF_REG_3, -8))
+ : __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__log_level(2)
+__msg("4: .12....7.. (85) call bpf_trace_printk#6")
+__msg("5: 0......7.. (0f) r0 += r7")
+__naked void regular_call(void)
+{
+ asm volatile (
+ "r7 = 1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 1;"
+ "call %[bpf_trace_printk];"
+ "r0 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (25) if r1 > 0x7 goto pc+1")
+__msg("3: ..2....... (bf) r0 = r2")
+__naked void if1(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "if r1 > 0x7 goto +1;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("3: 0123...... (2d) if r1 > r3 goto pc+1")
+__msg("4: ..2....... (bf) r0 = r2")
+__naked void if2(void)
+{
+ asm volatile (
+ "r0 = 1;"
+ "r2 = 2;"
+ "r3 = 7;"
+ "if r1 > r3 goto +1;"
+ "r0 = r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 0")
+__msg("1: .1........ (b7) r2 = 7")
+__msg("2: .12....... (25) if r1 > 0x7 goto pc+4")
+__msg("3: .12....... (07) r1 += 1")
+__msg("4: .12....... (27) r2 *= 2")
+__msg("5: .12....... (05) goto pc+0")
+__msg("6: .12....... (05) goto pc-5")
+__msg("7: .......... (b7) r0 = 0")
+__msg("8: 0......... (95) exit")
+__naked void loop(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "r2 = 7;"
+ "if r1 > 0x7 goto +4;"
+ "r1 += 1;"
+ "r2 *= 2;"
+ "goto +0;"
+ "goto -5;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+#ifdef CAN_USE_GOTOL
+SEC("socket")
+__log_level(2)
+__msg("2: .123...... (25) if r1 > 0x7 goto pc+2")
+__msg("3: ..2....... (bf) r0 = r2")
+__msg("4: 0......... (06) gotol pc+1")
+__msg("5: ...3...... (bf) r0 = r3")
+__msg("6: 0......... (95) exit")
+__naked void gotol(void)
+{
+ asm volatile (
+ "r2 = 42;"
+ "r3 = 24;"
+ "if r1 > 0x7 goto +2;"
+ "r0 = r2;"
+ "gotol +1;"
+ "r0 = r3;"
+ "exit;"
+ :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 1")
+__msg("1: .1........ (e5) may_goto pc+1")
+__msg("2: .......... (05) goto pc-3")
+__msg("3: .1........ (bf) r0 = r1")
+__msg("4: 0......... (95) exit")
+__naked void may_goto(void)
+{
+ asm volatile (
+ "1: r1 = 1;"
+ ".8byte %[may_goto];"
+ "goto 1b;"
+ "r0 = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("1: 0......... (18) r2 = 0x7")
+__msg("3: 0.2....... (0f) r0 += r2")
+__naked void ldimm64(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 0x7 ll;"
+ "r0 += r2;"
+ "exit;"
+ :
+ :: __clobber_all);
+}
+
+/* No rules specific for LD_ABS/LD_IND, default behaviour kicks in */
+SEC("socket")
+__log_level(2)
+__msg("2: 0123456789 (30) r0 = *(u8 *)skb[42]")
+__msg("3: 012.456789 (0f) r7 += r0")
+__msg("4: 012.456789 (b7) r3 = 42")
+__msg("5: 0123456789 (50) r0 = *(u8 *)skb[r3 + 0]")
+__msg("6: 0......7.. (0f) r7 += r0")
+__naked void ldabs(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r7 = 0;"
+ "r0 = *(u8 *)skb[42];"
+ "r7 += r0;"
+ "r3 = 42;"
+ ".8byte %[ld_ind];" /* same as "r0 = *(u8 *)skb[r3];" */
+ "r7 += r0;"
+ "r0 = r7;"
+ "exit;"
+ :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_B, BPF_REG_3, 0))
+ : __clobber_all);
+}
+
+
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__log_level(2)
+__msg(" 6: .12345.... (85) call bpf_arena_alloc_pages")
+__msg(" 7: 0......... (bf) r1 = addr_space_cast(r0, 0, 1)")
+__msg(" 8: .1........ (b7) r2 = 42")
+__naked void addr_space_cast(void)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = 0;"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r1 = addr_space_cast(r0, 0, 1);"
+ "r2 = 42;"
+ "*(u64 *)(r1 +0) = r2;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena)
+ : __clobber_all);
+}
+#endif
+
+static __used __naked int aux1(void)
+{
+ asm volatile (
+ "r0 = r1;"
+ "r0 += r2;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: ....45.... (b7) r1 = 1")
+__msg("1: .1..45.... (b7) r2 = 2")
+__msg("2: .12.45.... (b7) r3 = 3")
+/* Conservative liveness for subprog parameters. */
+__msg("3: .12345.... (85) call pc+2")
+__msg("4: .......... (b7) r0 = 0")
+__msg("5: 0......... (95) exit")
+__msg("6: .12....... (bf) r0 = r1")
+__msg("7: 0.2....... (0f) r0 += r2")
+/* Conservative liveness for subprog return value. */
+__msg("8: 0......... (95) exit")
+__naked void subprog1(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "r2 = 2;"
+ "r3 = 3;"
+ "call aux1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+ bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
index d3f4c5e4fb69..a3819a5d09c8 100644
--- a/tools/testing/selftests/bpf/progs/connect4_dropper.c
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -13,12 +13,14 @@
#define VERDICT_REJECT 0
#define VERDICT_PROCEED 1
+int port;
+
SEC("cgroup/connect4")
int connect_v4_dropper(struct bpf_sock_addr *ctx)
{
if (ctx->type != SOCK_STREAM)
return VERDICT_PROCEED;
- if (ctx->user_port == bpf_htons(60120))
+ if (ctx->user_port == bpf_htons(port))
return VERDICT_REJECT;
return VERDICT_PROCEED;
}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index fd8e1b4c6762..5760ae015e09 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -347,6 +347,7 @@ struct core_reloc_nesting___err_too_deep {
*/
struct core_reloc_arrays_output {
int a2;
+ int a3;
char b123;
int c1c;
int d00d;
@@ -455,6 +456,15 @@ struct core_reloc_arrays___err_bad_zero_sz_arr {
struct core_reloc_arrays_substruct d[1][2];
};
+struct core_reloc_arrays___err_bad_signed_arr_elem_sz {
+ /* int -> short (signed!): not supported case */
+ short a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
/*
* PRIMITIVES
*/
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 4ece7873ba60..86085b79f5ca 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -61,6 +61,7 @@ u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak;
u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
const struct cpumask *src2) __ksym __weak;
u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak;
+int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz) __ksym __weak;
void bpf_rcu_read_lock(void) __ksym __weak;
void bpf_rcu_read_unlock(void) __ksym __weak;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index b40b52548ffb..8a2fd596c8a3 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -222,3 +222,41 @@ int BPF_PROG(test_invalid_nested_array, struct task_struct *task, u64 clone_flag
return 0;
}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("type=scalar expected=fp")
+int BPF_PROG(test_populate_invalid_destination, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *invalid = (struct bpf_cpumask *)0x123456;
+ u64 bits;
+ int ret;
+
+ ret = bpf_cpumask_populate((struct cpumask *)invalid, &bits, sizeof(bits));
+ if (!ret)
+ err = 2;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("leads to invalid memory access")
+int BPF_PROG(test_populate_invalid_source, struct task_struct *task, u64 clone_flags)
+{
+ void *garbage = (void *)0x123456;
+ struct bpf_cpumask *local;
+ int ret;
+
+ local = create_cpumask();
+ if (!local) {
+ err = 1;
+ return 0;
+ }
+
+ ret = bpf_cpumask_populate((struct cpumask *)local, garbage, 8);
+ if (!ret)
+ err = 2;
+
+ bpf_cpumask_release(local);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 80ee469b0b60..0e04c31b91c0 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -749,7 +749,6 @@ out:
}
SEC("tp_btf/task_newtask")
-__success
int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *mask1, *mask2;
@@ -770,3 +769,122 @@ free_masks_return:
bpf_cpumask_release(mask2);
return 0;
}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_small_mask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local;
+ u8 toofewbits;
+ int ret;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ /* The kfunc should prevent this operation */
+ ret = bpf_cpumask_populate((struct cpumask *)local, &toofewbits, sizeof(toofewbits));
+ if (ret != -EACCES)
+ err = 2;
+
+ bpf_cpumask_release(local);
+
+ return 0;
+}
+
+/* Mask is guaranteed to be large enough for bpf_cpumask_t. */
+#define CPUMASK_TEST_MASKLEN (sizeof(cpumask_t))
+
+/* Add an extra word for the test_populate_reject_unaligned test. */
+u64 bits[CPUMASK_TEST_MASKLEN / 8 + 1];
+extern bool CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS __kconfig __weak;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_unaligned, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask;
+ char *src;
+ int ret;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Skip if unaligned accesses are fine for this arch. */
+ if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ return 0;
+
+ mask = bpf_cpumask_create();
+ if (!mask) {
+ err = 1;
+ return 0;
+ }
+
+ /* Misalign the source array by a byte. */
+ src = &((char *)bits)[1];
+
+ ret = bpf_cpumask_populate((struct cpumask *)mask, src, CPUMASK_TEST_MASKLEN);
+ if (ret != -EINVAL)
+ err = 2;
+
+ bpf_cpumask_release(mask);
+
+ return 0;
+}
+
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask;
+ bool bit;
+ int ret;
+ int i;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Set only odd bits. */
+ __builtin_memset(bits, 0xaa, CPUMASK_TEST_MASKLEN);
+
+ mask = bpf_cpumask_create();
+ if (!mask) {
+ err = 1;
+ return 0;
+ }
+
+ /* Pass the entire bits array, the kfunc will only copy the valid bits. */
+ ret = bpf_cpumask_populate((struct cpumask *)mask, bits, CPUMASK_TEST_MASKLEN);
+ if (ret) {
+ err = 2;
+ goto out;
+ }
+
+ /*
+ * Test is there to appease the verifier. We cannot directly
+ * access NR_CPUS, the upper bound for nr_cpus, so we infer
+ * it from the size of cpumask_t.
+ */
+ if (nr_cpus < 0 || nr_cpus >= CPUMASK_TEST_MASKLEN * 8) {
+ err = 3;
+ goto out;
+ }
+
+ bpf_for(i, 0, nr_cpus) {
+ /* Odd-numbered bits should be set, even ones unset. */
+ bit = bpf_cpumask_test_cpu(i, (const struct cpumask *)mask);
+ if (bit == (i % 2 != 0))
+ continue;
+
+ err = 4;
+ break;
+ }
+
+out:
+ bpf_cpumask_release(mask);
+
+ return 0;
+}
+
+#undef CPUMASK_TEST_MASKLEN
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index bfcc85686cf0..e1fba28e4a86 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -1,20 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Facebook */
+#include <vmlinux.h>
#include <string.h>
#include <stdbool.h>
-#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
-#include "bpf_kfuncs.h"
#include "errno.h"
char _license[] SEC("license") = "GPL";
int pid, err, val;
-struct sample {
+struct ringbuf_sample {
int pid;
int seq;
long value;
@@ -121,7 +120,7 @@ int test_dynptr_data(void *ctx)
static int ringbuf_callback(__u32 index, void *data)
{
- struct sample *sample;
+ struct ringbuf_sample *sample;
struct bpf_dynptr *ptr = (struct bpf_dynptr *)data;
@@ -138,7 +137,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep")
int test_ringbuf(void *ctx)
{
struct bpf_dynptr ptr;
- struct sample *sample;
+ struct ringbuf_sample *sample;
if (bpf_get_current_pid_tgid() >> 32 != pid)
return 0;
@@ -567,3 +566,117 @@ int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
return 1;
}
+
+static inline int bpf_memcmp(const char *a, const char *b, u32 size)
+{
+ int i;
+
+ bpf_for(i, 0, size) {
+ if (a[i] != b[i])
+ return a[i] < b[i] ? -1 : 1;
+ }
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_copy(void *ctx)
+{
+ char data[] = "hello there, world!!";
+ char buf[32] = {'\0'};
+ __u32 sz = sizeof(data);
+ struct bpf_dynptr src, dst;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &src);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &dst);
+
+ /* Test basic case of copying contiguous memory backed dynptrs */
+ err = bpf_dynptr_write(&src, 0, data, sz, 0);
+ err = err ?: bpf_dynptr_copy(&dst, 0, &src, 0, sz);
+ err = err ?: bpf_dynptr_read(buf, sz, &dst, 0, 0);
+ err = err ?: bpf_memcmp(data, buf, sz);
+
+ /* Test that offsets are handled correctly */
+ err = err ?: bpf_dynptr_copy(&dst, 3, &src, 5, sz - 5);
+ err = err ?: bpf_dynptr_read(buf, sz - 5, &dst, 3, 0);
+ err = err ?: bpf_memcmp(data + 5, buf, sz - 5);
+
+ bpf_ringbuf_discard_dynptr(&src, 0);
+ bpf_ringbuf_discard_dynptr(&dst, 0);
+ return 0;
+}
+
+SEC("xdp")
+int test_dynptr_copy_xdp(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr_buf, ptr_xdp;
+ char data[] = "qwertyuiopasdfghjkl";
+ char buf[32] = {'\0'};
+ __u32 len = sizeof(data);
+ int i, chunks = 200;
+
+ /* ptr_xdp is backed by non-contiguous memory */
+ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf);
+
+ /* Destination dynptr is backed by non-contiguous memory */
+ bpf_for(i, 0, chunks) {
+ err = bpf_dynptr_write(&ptr_buf, i * len, data, len, 0);
+ if (err)
+ goto out;
+ }
+
+ err = bpf_dynptr_copy(&ptr_xdp, 0, &ptr_buf, 0, len * chunks);
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_xdp, i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ /* Source dynptr is backed by non-contiguous memory */
+ __builtin_memset(buf, 0, sizeof(buf));
+ bpf_for(i, 0, chunks) {
+ err = bpf_dynptr_write(&ptr_buf, i * len, buf, len, 0);
+ if (err)
+ goto out;
+ }
+
+ err = bpf_dynptr_copy(&ptr_buf, 0, &ptr_xdp, 0, len * chunks);
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_buf, i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ /* Both source and destination dynptrs are backed by non-contiguous memory */
+ err = bpf_dynptr_copy(&ptr_xdp, 2, &ptr_xdp, len, len * (chunks - 1));
+ if (err)
+ goto out;
+
+ bpf_for(i, 0, chunks - 1) {
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = bpf_dynptr_read(&buf, len, &ptr_xdp, 2 + i * len, 0);
+ if (err)
+ goto out;
+ if (bpf_memcmp(data, buf, len) != 0)
+ goto out;
+ }
+
+ if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG)
+ err = 1;
+
+out:
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_noreturns.c b/tools/testing/selftests/bpf/progs/fexit_noreturns.c
new file mode 100644
index 000000000000..54654539f550
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_noreturns.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fexit/do_exit")
+__failure __msg("Attaching fexit/fmod_ret to __noreturn functions is rejected.")
+int BPF_PROG(noreturns)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c
index b0b53d980964..298d48d7886d 100644
--- a/tools/testing/selftests/bpf/progs/irq.c
+++ b/tools/testing/selftests/bpf/progs/irq.c
@@ -222,7 +222,7 @@ int __noinline global_local_irq_balance(void)
}
SEC("?tc")
-__failure __msg("global function calls are not allowed with IRQs disabled")
+__success
int irq_global_subprog(struct __sk_buff *ctx)
{
unsigned long flags;
@@ -441,4 +441,73 @@ int irq_ooo_refs_array(struct __sk_buff *ctx)
return 0;
}
+int __noinline
+global_subprog(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+__success
+int irq_non_sleepable_global_subprog(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_subprog(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_helper_global_subprog(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_sleepable_helper_subprog(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_global_subprog_indirect(void *ctx)
+{
+ unsigned long flags;
+
+ bpf_local_irq_save(&flags);
+ global_subprog_calling_sleepable_global(0);
+ bpf_local_irq_restore(&flags);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 190822b2f08b..427b72954b87 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -7,6 +7,8 @@
#include "bpf_misc.h"
#include "bpf_compiler.h"
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
static volatile int zero = 0;
int my_pid;
@@ -1175,6 +1177,122 @@ __naked int loop_state_deps2(void)
}
SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps3(void)
+{
+ /* This is equivalent to a C program below.
+ *
+ * if (random() != 24) { // assume false branch is placed first
+ * i = iter_new(); // fp[-8]
+ * while (iter_next(i));
+ * iter_destroy(i);
+ * return;
+ * }
+ *
+ * for (i = 10; i > 0; i--); // increase dfs_depth for child states
+ *
+ * i = iter_new(); // fp[-8]
+ * b = -24; // r8
+ * for (;;) { // checkpoint (L)
+ * if (iter_next(i)) // checkpoint (N)
+ * break;
+ * if (random() == 77) { // assume false branch is placed first
+ * *(u64 *)(r10 + b) = 7; // this is not safe when b == -25
+ * iter_destroy(i);
+ * return;
+ * }
+ * if (random() == 42) { // assume false branch is placed first
+ * b = -25;
+ * }
+ * }
+ * iter_destroy(i);
+ *
+ * In case of a buggy verifier first loop might poison
+ * env->cur_state->loop_entry with a state having 0 branches
+ * and small dfs_depth. This would trigger NOT_EXACT states
+ * comparison for some states within second loop.
+ * Specifically, checkpoint (L) might be problematic if:
+ * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet;
+ * - checkpoint (L) is first reached in state {b=-24};
+ * - traversal is pruned at checkpoint (N) setting checkpoint's (L)
+ * branch count to 0, thus making it eligible for use in pruning;
+ * - checkpoint (L) is next reached in state {b=-25},
+ * this would cause NOT_EXACT comparison with a state {b=-24}
+ * while 'b' is not marked precise yet.
+ */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 24 goto 2f;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 5;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 != 0 goto 1b;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "2:"
+ /* loop to increase dfs_depth */
+ "r0 = 10;"
+ "3:"
+ "r0 -= 1;"
+ "if r0 != 0 goto 3b;"
+ /* end of loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r8 = -24;"
+ "main_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto main_loop_end_%=;"
+ /* first if */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 77 goto unsafe_write_%=;"
+ /* second if */
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto poison_r8_%=;"
+ /* iterate */
+ "goto main_loop_%=;"
+ "main_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+
+ "unsafe_write_%=:"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "goto main_loop_end_%=;"
+
+ "poison_r8_%=:"
+ "r8 = -25;"
+ "goto main_loop_%=;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
__success
__naked int triple_continue(void)
{
@@ -1512,4 +1630,25 @@ int iter_destroy_bad_arg(const void *ctx)
return 0;
}
+SEC("raw_tp")
+__success
+int clean_live_states(const void *ctx)
+{
+ char buf[1];
+ int i, j, k, l, m, n, o;
+
+ bpf_for(i, 0, 10)
+ bpf_for(j, 0, 10)
+ bpf_for(k, 0, 10)
+ bpf_for(l, 0, 10)
+ bpf_for(m, 0, 10)
+ bpf_for(n, 0, 10)
+ bpf_for(o, 0, 10) {
+ if (unlikely(bpf_get_prandom_u32()))
+ buf[0] = 42;
+ bpf_printk("%s", buf);
+ }
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
index c6edf8dbefeb..94040714af18 100644
--- a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -28,6 +28,7 @@ struct {
} sock_map SEC(".maps");
int tcx_init_netns_cookie, tcx_netns_cookie;
+int cgroup_skb_init_netns_cookie, cgroup_skb_netns_cookie;
SEC("sockops")
int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
@@ -91,4 +92,12 @@ int get_netns_cookie_tcx(struct __sk_buff *skb)
return TCX_PASS;
}
+SEC("cgroup_skb/ingress")
+int get_netns_cookie_cgroup_skb(struct __sk_buff *skb)
+{
+ cgroup_skb_init_netns_cookie = bpf_get_netns_cookie(NULL);
+ cgroup_skb_netns_cookie = bpf_get_netns_cookie(skb);
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
index 6c5797bf0ead..7d04254e61f1 100644
--- a/tools/testing/selftests/bpf/progs/preempt_lock.c
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -134,7 +134,7 @@ int __noinline preempt_global_subprog(void)
}
SEC("?tc")
-__failure __msg("global function calls are not allowed with preemption disabled")
+__success
int preempt_global_subprog_test(struct __sk_buff *ctx)
{
preempt_disable();
@@ -143,4 +143,70 @@ int preempt_global_subprog_test(struct __sk_buff *ctx)
return 0;
}
+int __noinline
+global_subprog(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_sleepable_helper_subprog(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_sleepable_kfunc_subprog(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ if (ctx->mark)
+ global_subprog_calling_sleepable_global(ctx->mark);
+ preempt_enable();
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c
new file mode 100644
index 000000000000..1f1dd547e4ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/prepare.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+//#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int program(struct __sk_buff *skb)
+{
+ err = 0;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_freplace_prog.c b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
new file mode 100644
index 000000000000..ccf1b04010ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("freplace/xdp_prog1")
+int new_xdp_prog2(struct xdp_md *xd)
+{
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
index 3c7b2b618c8a..725e29595079 100644
--- a/tools/testing/selftests/bpf/progs/priv_prog.c
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -6,8 +6,8 @@
char _license[] SEC("license") = "GPL";
-SEC("kprobe")
-int kprobe_prog(void *ctx)
+SEC("xdp")
+int xdp_prog1(struct xdp_md *xdp)
{
- return 1;
+ return XDP_DROP;
}
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
new file mode 100644
index 000000000000..a5a8f08ac8fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void __kfunc_btf_root(void)
+{
+ bpf_kfunc_st_ops_inc10(NULL);
+}
+
+static __noinline __used int subprog(struct st_ops_args *args)
+{
+ args->a += 1;
+ return args->a;
+}
+
+__success
+/* prologue */
+__xlated("0: r8 = r1")
+__xlated("1: r1 = 0")
+__xlated("2: call kernel-function")
+__xlated("3: if r0 != 0x0 goto pc+5")
+__xlated("4: r6 = *(u64 *)(r8 +0)")
+__xlated("5: r7 = *(u64 *)(r6 +0)")
+__xlated("6: r7 += 1000")
+__xlated("7: *(u64 *)(r6 +0) = r7")
+__xlated("8: goto pc+2")
+__xlated("9: r1 = r0")
+__xlated("10: call kernel-function")
+__xlated("11: r1 = r8")
+/* save __u64 *ctx to stack */
+__xlated("12: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("13: r1 = *(u64 *)(r1 +0)")
+__xlated("14: r6 = r1")
+__xlated("15: call kernel-function")
+__xlated("16: r1 = r6")
+__xlated("17: call pc+")
+/* epilogue */
+__xlated("18: r1 = 0")
+__xlated("19: r6 = 0")
+__xlated("20: call kernel-function")
+__xlated("21: if r0 != 0x0 goto pc+6")
+__xlated("22: r1 = *(u64 *)(r10 -8)")
+__xlated("23: r1 = *(u64 *)(r1 +0)")
+__xlated("24: r6 = *(u64 *)(r1 +0)")
+__xlated("25: r6 += 10000")
+__xlated("26: *(u64 *)(r1 +0) = r6")
+__xlated("27: goto pc+2")
+__xlated("28: r1 = r0")
+__xlated("29: call kernel-function")
+__xlated("30: r0 = r6")
+__xlated("31: r0 *= 2")
+__xlated("32: exit")
+SEC("struct_ops/test_pro_epilogue")
+__naked int test_kfunc_pro_epilogue(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r1 +0);"
+ "r6 = r1;"
+ "call %[bpf_kfunc_st_ops_inc10];"
+ "r1 = r6;"
+ "call subprog;"
+ "exit;"
+ :
+ : __imm(bpf_kfunc_st_ops_inc10)
+ : __clobber_all);
+}
+
+SEC("syscall")
+__retval(22022) /* (PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */
+int syscall_pro_epilogue(void *ctx)
+{
+ struct st_ops_args args = {};
+
+ return bpf_kfunc_st_ops_test_pro_epilogue(&args);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops pro_epilogue_with_kfunc = {
+ .test_pro_epilogue = (void *)test_kfunc_pro_epilogue,
+};
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index ab3a532b7dd6..43637ee2cdcd 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -242,7 +242,8 @@ out:
}
SEC("?lsm.s/bpf")
-int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size,
+ bool kernel)
{
struct bpf_key *bkey;
@@ -439,3 +440,61 @@ int rcu_read_lock_global_subprog_unlock(void *ctx)
ret += global_subprog_unlock(ret);
return 0;
}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_helper_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_sleepable_helper_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_kfunc_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_sleepable_kfunc_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_global_subprog_indirect(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_subprog_calling_sleepable_global(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
index 39ebef430059..395591374d4f 100644
--- a/tools/testing/selftests/bpf/progs/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -8,14 +8,16 @@
int target_pid = 0;
void *user_ptr = 0;
-int read_ret[9];
+int read_ret[10];
char _license[] SEC("license") = "GPL";
/*
- * This is the only kfunc, the others are helpers
+ * These are the kfuncs, the others are helpers
*/
int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym;
+int bpf_copy_from_user_task_str(void *dst, u32, const void *,
+ struct task_struct *, u64) __weak __ksym;
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int do_probe_read(void *ctx)
@@ -47,6 +49,11 @@ int do_copy_from_user(void *ctx)
read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
bpf_get_current_task_btf(), 0);
read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0);
+ read_ret[9] = bpf_copy_from_user_task_str((char *)buf,
+ sizeof(buf),
+ user_ptr,
+ bpf_get_current_task_btf(),
+ 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
new file mode 100644
index 000000000000..9adb5ba4cd4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+enum Enum { EA1 = 0, EA2 = 11 };
+enum Enumu64 {EB1 = 0llu, EB2 = 12llu };
+enum Enums64 { EC1 = 0ll, EC2 = 13ll };
+
+const volatile __s64 var_s64 = -1;
+const volatile __u64 var_u64 = 0;
+const volatile __s32 var_s32 = -1;
+const volatile __u32 var_u32 = 0;
+const volatile __s16 var_s16 = -1;
+const volatile __u16 var_u16 = 0;
+const volatile __s8 var_s8 = -1;
+const volatile __u8 var_u8 = 0;
+const volatile enum Enum var_ea = EA1;
+const volatile enum Enumu64 var_eb = EB1;
+const volatile enum Enums64 var_ec = EC1;
+const volatile bool var_b = false;
+
+char arr[4] = {0};
+
+SEC("socket")
+int test_set_globals(void *ctx)
+{
+ volatile __s8 a;
+
+ a = var_s64;
+ a = var_u64;
+ a = var_s32;
+ a = var_u32;
+ a = var_s16;
+ a = var_u16;
+ a = var_s8;
+ a = var_u8;
+ a = var_ea;
+ a = var_eb;
+ a = var_ec;
+ a = var_b;
+ return a;
+}
diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
index 18373a7df76e..f47bf88f8d2a 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -35,7 +35,10 @@ static __always_inline int local_strncmp(const char *s1, unsigned int sz,
SEC("tp/syscalls/sys_enter_getpgid")
int strncmp_no_helper(void *ctx)
{
- if (local_strncmp(str, cmp_str_len + 1, target) < 0)
+ const char *target_str = target;
+
+ barrier_var(target_str);
+ if (local_strncmp(str, cmp_str_len + 1, target_str) < 0)
__sync_add_and_fetch(&hits, 1);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
new file mode 100644
index 000000000000..36386b3c23a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task
+ * here is acquried automatically as the task argument is tagged with "__ref".
+ */
+SEC("struct_ops/test_return_ref_kptr")
+struct task_struct *BPF_PROG(kptr_return, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ if (dummy % 2) {
+ bpf_task_release(task);
+ return NULL;
+ }
+ return task;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
new file mode 100644
index 000000000000..caeea158ef69
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
@@ -0,0 +1,26 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a non-zero scalar value.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 has smin=1 smax=1 should have been in [0, 0]")
+struct task_struct *BPF_PROG(kptr_return_fail__invalid_scalar, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ bpf_task_release(task);
+ return (struct task_struct *)1;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__invalid_scalar,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
new file mode 100644
index 000000000000..b8b4f05c3d7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
@@ -0,0 +1,34 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a local kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__local_kptr, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ struct task_struct *t;
+
+ bpf_task_release(task);
+
+ t = bpf_obj_new(typeof(*task));
+ if (!t)
+ return NULL;
+
+ return t;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__local_kptr,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
new file mode 100644
index 000000000000..7ddeb28c2329
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
@@ -0,0 +1,25 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a modified referenced kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("dereference of modified trusted_ptr_ ptr R0 off={{[0-9]+}} disallowed")
+struct task_struct *BPF_PROG(kptr_return_fail__nonzero_offset, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ return (struct task_struct *)&task->jobctl;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__nonzero_offset,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
new file mode 100644
index 000000000000..6a2dd5367802
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a referenced kptr of the wrong type.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy,
+ struct task_struct *task, struct cgroup *cgrp)
+{
+ struct task_struct *ret;
+
+ ret = (struct task_struct *)bpf_cgroup_acquire(cgrp);
+ bpf_task_release(task);
+
+ return ret;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+ .test_return_ref_kptr = (void *)kptr_return_fail__wrong_type,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
new file mode 100644
index 000000000000..76dcb6089d7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
@@ -0,0 +1,31 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This is a test BPF program that uses struct_ops to access a referenced
+ * kptr argument. This is a test for the verifier to ensure that it
+ * 1) recongnizes the task as a referenced object (i.e., ref_obj_id > 0), and
+ * 2) the same reference can be acquired from multiple paths as long as it
+ * has not been released.
+ */
+SEC("struct_ops/test_refcounted")
+int BPF_PROG(refcounted, int dummy, struct task_struct *task)
+{
+ if (dummy == 1)
+ bpf_task_release(task);
+ else
+ bpf_task_release(task);
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_refcounted = {
+ .test_refcounted = (void *)refcounted,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
new file mode 100644
index 000000000000..ae074aa62852
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
@@ -0,0 +1,39 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern void bpf_task_release(struct task_struct *p) __ksym;
+
+__noinline int subprog_release(__u64 *ctx __arg_ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+ int dummy = (int)ctx[0];
+
+ bpf_task_release(task);
+
+ return dummy + 1;
+}
+
+/* Test that the verifier rejects a program that contains a global
+ * subprogram with referenced kptr arguments
+ */
+SEC("struct_ops/test_refcounted")
+__failure __log_level(2)
+__msg("Validating subprog_release() func#1...")
+__msg("invalid bpf_context access off=8. Reference may already be released")
+int refcounted_fail__global_subprog(unsigned long long *ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+
+ bpf_task_release(task);
+
+ return subprog_release(ctx);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__global_subprog,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
new file mode 100644
index 000000000000..e945b1a04294
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
@@ -0,0 +1,22 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Test that the verifier rejects a program that acquires a referenced
+ * kptr through context without releasing the reference
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("Unreleased reference id=1 alloc_insn=0")
+int BPF_PROG(refcounted_fail__ref_leak, int dummy,
+ struct task_struct *task)
+{
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__ref_leak,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
new file mode 100644
index 000000000000..3b125025a1f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+/* Test that the verifier rejects a program with referenced kptr arguments
+ * that tail call
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("program with __ref argument cannot tail call")
+int refcounted_fail__tail_call(unsigned long long *ctx)
+{
+ struct task_struct *task = (struct task_struct *)ctx[1];
+
+ bpf_task_release(task);
+ bpf_tail_call(ctx, &prog_array, 0);
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+ .test_refcounted = (void *)refcounted_fail__tail_call,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/summarization.c b/tools/testing/selftests/bpf/progs/summarization.c
new file mode 100644
index 000000000000..f89effe82c9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/summarization.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline
+long changes_pkt_data(struct __sk_buff *sk)
+{
+ return bpf_skb_pull_data(sk, 0);
+}
+
+__noinline __weak
+long does_not_change_pkt_data(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+SEC("?tc")
+int main_changes_with_subprogs(struct __sk_buff *sk)
+{
+ changes_pkt_data(sk);
+ does_not_change_pkt_data(sk);
+ return 0;
+}
+
+SEC("?tc")
+int main_changes(struct __sk_buff *sk)
+{
+ bpf_skb_pull_data(sk, 0);
+ return 0;
+}
+
+SEC("?tc")
+int main_does_not_change(struct __sk_buff *sk)
+{
+ return 0;
+}
+
+__noinline
+long might_sleep(struct pt_regs *ctx __arg_ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+__noinline __weak
+long does_not_sleep(struct pt_regs *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep_with_subprogs(struct pt_regs *ctx)
+{
+ might_sleep(ctx);
+ does_not_sleep(ctx);
+ return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep(struct pt_regs *ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+SEC("?uprobe.s")
+int main_does_not_sleep(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/summarization_freplace.c
index f9a622705f1b..935f00e0e9ea 100644
--- a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c
+++ b/tools/testing/selftests/bpf/progs/summarization_freplace.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
SEC("?freplace")
@@ -15,4 +15,19 @@ long does_not_change_pkt_data(struct __sk_buff *sk)
return 0;
}
+SEC("?freplace")
+long might_sleep(struct pt_regs *ctx)
+{
+ int i;
+
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+SEC("?freplace")
+long does_not_sleep(struct pt_regs *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
index 44628865fe1d..4fee0fdc7607 100644
--- a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -51,13 +51,13 @@ out:
}
SEC("lsm/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
return bpf_link_create_verify(cmd);
}
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
return bpf_link_create_verify(cmd);
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
index 51b3f79df523..448403634eea 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -15,6 +15,7 @@ struct {
struct core_reloc_arrays_output {
int a2;
+ int a3;
char b123;
int c1c;
int d00d;
@@ -41,6 +42,7 @@ int test_core_arrays(void *ctx)
{
struct core_reloc_arrays *in = (void *)&data.in;
struct core_reloc_arrays_output *out = (void *)&data.out;
+ int *a;
if (CORE_READ(&out->a2, &in->a[2]))
return 1;
@@ -53,6 +55,9 @@ int test_core_arrays(void *ctx)
if (CORE_READ(&out->f01c, &in->f[0][1].c))
return 1;
+ a = __builtin_preserve_access_index(({ in->a; }));
+ out->a3 = a[0] + a[1] + a[2] + a[3];
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
index 66e737720f7c..54305f4c9f2d 100644
--- a/tools/testing/selftests/bpf/progs/test_get_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -6,6 +6,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -17,12 +18,23 @@ static const char expected_value[] = "hello";
char value1[32];
char value2[32];
+/* Matches caller of test_get_xattr() in prog_tests/fs_kfuncs.c */
+static const char xattr_names[][64] = {
+ /* The following work. */
+ "user.kfuncs",
+ "security.bpf.xxx",
+
+ /* The following do not work. */
+ "security.bpf",
+ "security.selinux"
+};
+
SEC("lsm.s/file_open")
int BPF_PROG(test_file_open, struct file *f)
{
struct bpf_dynptr value_ptr;
__u32 pid;
- int ret;
+ int ret, i;
pid = bpf_get_current_pid_tgid() >> 32;
if (pid != monitored_pid)
@@ -30,7 +42,11 @@ int BPF_PROG(test_file_open, struct file *f)
bpf_dynptr_from_mem(value1, sizeof(value1), 0, &value_ptr);
- ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr);
+ for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+ ret = bpf_get_file_xattr(f, xattr_names[i], &value_ptr);
+ if (ret == sizeof(expected_value))
+ break;
+ }
if (ret != sizeof(expected_value))
return 0;
if (bpf_strncmp(value1, ret, expected_value))
@@ -44,7 +60,7 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
{
struct bpf_dynptr value_ptr;
__u32 pid;
- int ret;
+ int ret, i;
pid = bpf_get_current_pid_tgid() >> 32;
if (pid != monitored_pid)
@@ -52,7 +68,11 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
bpf_dynptr_from_mem(value2, sizeof(value2), 0, &value_ptr);
- ret = bpf_get_dentry_xattr(dentry, "user.kfuncs", &value_ptr);
+ for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+ ret = bpf_get_dentry_xattr(dentry, xattr_names[i], &value_ptr);
+ if (ret == sizeof(expected_value))
+ break;
+ }
if (ret != sizeof(expected_value))
return 0;
if (bpf_strncmp(value2, ret, expected_value))
diff --git a/tools/testing/selftests/bpf/progs/test_kernel_flag.c b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
new file mode 100644
index 000000000000..b45fab3be352
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2025 Microsoft Corporation
+ *
+ * Author: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_tid;
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+ __u32 tid;
+
+ tid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
+ if (!kernel || tid != monitored_tid)
+ return 0;
+ else
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index cd4d752bd089..061befb004c2 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -36,7 +36,7 @@ char _license[] SEC("license") = "GPL";
SEC("?lsm.s/bpf")
__failure __msg("cannot pass in dynptr at an offset=-8")
-int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
unsigned long val;
@@ -46,7 +46,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
SEC("?lsm.s/bpf")
__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
-int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
unsigned long val = 0;
@@ -55,7 +55,7 @@ int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
}
SEC("lsm.s/bpf")
-int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_key *trusted_keyring;
struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
index c73776990ae3..cdbbb12f1491 100644
--- a/tools/testing/selftests/bpf/progs/test_lookup_key.c
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -23,7 +23,7 @@ extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
extern void bpf_key_put(struct bpf_key *key) __ksym;
SEC("lsm.s/bpf")
-int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_key *bkey;
__u32 pid;
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
index 2fdc44e76624..89b0cd5a3e06 100644
--- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -7,7 +7,7 @@
char tp_name[128];
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
switch (cmd) {
case BPF_RAW_TRACEPOINT_OPEN:
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 5eb25c6ad75b..a5be3267dbb0 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2018 Facebook */
-#include <stdlib.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
diff --git a/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
new file mode 100644
index 000000000000..6a612cf168d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+
+const char xattr_foo[] = "security.bpf.foo";
+const char xattr_bar[] = "security.bpf.bar";
+static const char xattr_selinux[] = "security.selinux";
+char value_bar[] = "world";
+char read_value[32];
+
+bool set_security_bpf_bar_success;
+bool remove_security_bpf_bar_success;
+bool set_security_selinux_fail;
+bool remove_security_selinux_fail;
+
+char name_buf[32];
+
+static inline bool name_match_foo(const char *name)
+{
+ bpf_probe_read_kernel(name_buf, sizeof(name_buf), name);
+
+ return !bpf_strncmp(name_buf, sizeof(xattr_foo), xattr_foo);
+}
+
+/* Test bpf_set_dentry_xattr and bpf_remove_dentry_xattr */
+SEC("lsm.s/inode_getxattr")
+int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* Only do the following for security.bpf.foo */
+ if (!name_match_foo(name))
+ return 0;
+
+ bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+ /* read security.bpf.bar */
+ ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+ if (ret < 0) {
+ /* If security.bpf.bar doesn't exist, set it */
+ bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+ ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+ if (!ret)
+ set_security_bpf_bar_success = true;
+ ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+ if (ret)
+ set_security_selinux_fail = true;
+ } else {
+ /* If security.bpf.bar exists, remove it */
+ ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+ if (!ret)
+ remove_security_bpf_bar_success = true;
+
+ ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+ if (ret)
+ remove_security_selinux_fail = true;
+ }
+
+ return 0;
+}
+
+bool locked_set_security_bpf_bar_success;
+bool locked_remove_security_bpf_bar_success;
+bool locked_set_security_selinux_fail;
+bool locked_remove_security_selinux_fail;
+
+/* Test bpf_set_dentry_xattr_locked and bpf_remove_dentry_xattr_locked.
+ * It not necessary to differentiate the _locked version and the
+ * not-_locked version in the BPF program. The verifier will fix them up
+ * properly.
+ */
+SEC("lsm.s/inode_setxattr")
+int BPF_PROG(test_inode_setxattr, struct mnt_idmap *idmap,
+ struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* Only do the following for security.bpf.foo */
+ if (!name_match_foo(name))
+ return 0;
+
+ bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+ /* read security.bpf.bar */
+ ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+ if (ret < 0) {
+ /* If security.bpf.bar doesn't exist, set it */
+ bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+ ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+ if (!ret)
+ locked_set_security_bpf_bar_success = true;
+ ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+ if (ret)
+ locked_set_security_selinux_fail = true;
+ } else {
+ /* If security.bpf.bar exists, remove it */
+ ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+ if (!ret)
+ locked_remove_security_bpf_bar_success = true;
+
+ ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+ if (ret)
+ locked_remove_security_selinux_fail = true;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
index 1c8b678e2e9a..f678ee6bd7ea 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -245,4 +245,73 @@ int lock_global_subprog_call2(struct __sk_buff *ctx)
return ret;
}
+int __noinline
+global_subprog_int(int i)
+{
+ if (i)
+ bpf_printk("%p", &i);
+ return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user(&i, sizeof(i), NULL);
+ return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+ if (i)
+ bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+ global_subprog_int(i);
+ return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+ if (!i)
+ global_sleepable_kfunc_subprog(i);
+ return i;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_sleepable_helper_subprog(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_sleepable_kfunc_subprog(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_subprog_calling_sleepable_global(ctx->mark);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
index 7e750309ce27..0b74b8bd22e8 100644
--- a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -49,7 +49,7 @@ out:
}
SEC("lsm.s/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct cgroup *cgrp = NULL;
struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index 505aab9a5234..096488f47fbc 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -11,6 +11,7 @@ int usdt0_called;
u64 usdt0_cookie;
int usdt0_arg_cnt;
int usdt0_arg_ret;
+int usdt0_arg_size;
SEC("usdt")
int usdt0(struct pt_regs *ctx)
@@ -26,6 +27,7 @@ int usdt0(struct pt_regs *ctx)
usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
/* should return -ENOENT for any arg_num */
usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+ usdt0_arg_size = bpf_usdt_arg_size(ctx, bpf_get_prandom_u32());
return 0;
}
@@ -34,6 +36,7 @@ u64 usdt3_cookie;
int usdt3_arg_cnt;
int usdt3_arg_rets[3];
u64 usdt3_args[3];
+int usdt3_arg_sizes[3];
SEC("usdt//proc/self/exe:test:usdt3")
int usdt3(struct pt_regs *ctx)
@@ -50,12 +53,15 @@ int usdt3(struct pt_regs *ctx)
usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
usdt3_args[0] = (int)tmp;
+ usdt3_arg_sizes[0] = bpf_usdt_arg_size(ctx, 0);
usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
usdt3_args[1] = (long)tmp;
+ usdt3_arg_sizes[1] = bpf_usdt_arg_size(ctx, 1);
usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
usdt3_args[2] = (uintptr_t)tmp;
+ usdt3_arg_sizes[2] = bpf_usdt_arg_size(ctx, 2);
return 0;
}
@@ -64,12 +70,15 @@ int usdt12_called;
u64 usdt12_cookie;
int usdt12_arg_cnt;
u64 usdt12_args[12];
+int usdt12_arg_sizes[12];
SEC("usdt//proc/self/exe:test:usdt12")
int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
long a6, __u64 a7, uintptr_t a8, int a9, short a10,
short a11, signed char a12)
{
+ int i;
+
if (my_pid != (bpf_get_current_pid_tgid() >> 32))
return 0;
@@ -90,6 +99,11 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
usdt12_args[9] = a10;
usdt12_args[10] = a11;
usdt12_args[11] = a12;
+
+ bpf_for(i, 0, 12) {
+ usdt12_arg_sizes[i] = bpf_usdt_arg_size(ctx, i);
+ }
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index 12034a73ee2d..e96d09e11115 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -37,7 +37,7 @@ struct {
char _license[] SEC("license") = "GPL";
SEC("lsm.s/bpf")
-int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
{
struct bpf_dynptr data_ptr, sig_ptr;
struct data *data_val;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index a7588302268d..a80cc5f2f4f2 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -102,8 +102,8 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
#define TESTVLAN 4011 /* 0xFAB */
// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
-SEC("xdp_drop_vlan_4011")
-int xdp_prognum0(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_drop_vlan_4011(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -144,8 +144,8 @@ Load prog with ip tool:
/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
#define TO_VLAN 0
-SEC("xdp_vlan_change")
-int xdp_prognum1(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_change(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -178,8 +178,8 @@ int xdp_prognum1(struct xdp_md *ctx)
#endif
#define VLAN_HDR_SZ 4 /* bytes */
-SEC("xdp_vlan_remove_outer")
-int xdp_prognum2(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -224,8 +224,8 @@ void shift_mac_4bytes_32bit(void *data)
p[1] = p[0];
}
-SEC("xdp_vlan_remove_outer2")
-int xdp_prognum3(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer2(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
@@ -254,8 +254,8 @@ int xdp_prognum3(struct xdp_md *ctx)
* The TC-clsact eBPF programs (currently) need to be attach via TC commands
*/
-SEC("tc_vlan_push")
-int _tc_progA(struct __sk_buff *ctx)
+SEC("tc")
+int tc_vlan_push(struct __sk_buff *ctx)
{
bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
index 5094c288cfd7..a9be6ae49454 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -620,23 +620,61 @@ __naked void helper_call_does_not_prevent_bpf_fastcall(void)
SEC("raw_tp")
__arch_x86_64
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
+__xlated("...")
+__xlated("4: r0 = &(void __percpu *)(r0)")
+__xlated("...")
+/* may_goto expansion starts */
+__xlated("6: r11 = *(u64 *)(r10 -24)")
+__xlated("7: if r11 == 0x0 goto pc+6")
+__xlated("8: r11 -= 1")
+__xlated("9: if r11 != 0x0 goto pc+2")
+__xlated("10: r11 = -24")
+__xlated("11: call unknown")
+__xlated("12: *(u64 *)(r10 -24) = r11")
+/* may_goto expansion ends */
+__xlated("13: *(u64 *)(r10 -8) = r1")
+__xlated("14: exit")
+__success
+__naked void may_goto_interaction_x86_64(void)
+{
+ asm volatile (
+ "r1 = 1;"
+ "*(u64 *)(r10 - 16) = r1;"
+ "call %[bpf_get_smp_processor_id];"
+ "r1 = *(u64 *)(r10 - 16);"
+ ".8byte %[may_goto];"
+ /* just touch some stack at -8 */
+ "*(u64 *)(r10 - 8) = r1;"
+ "exit;"
+ :
+ : __imm(bpf_get_smp_processor_id),
+ __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_arm64
__log_level(4) __msg("stack depth 16")
/* may_goto counter at -16 */
__xlated("0: *(u64 *)(r10 -16) =")
__xlated("1: r1 = 1")
-__xlated("...")
-__xlated("3: r0 = &(void __percpu *)(r0)")
-__xlated("...")
+__xlated("2: call bpf_get_smp_processor_id")
/* may_goto expansion starts */
-__xlated("5: r11 = *(u64 *)(r10 -16)")
-__xlated("6: if r11 == 0x0 goto pc+3")
-__xlated("7: r11 -= 1")
-__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("3: r11 = *(u64 *)(r10 -16)")
+__xlated("4: if r11 == 0x0 goto pc+3")
+__xlated("5: r11 -= 1")
+__xlated("6: *(u64 *)(r10 -16) = r11")
/* may_goto expansion ends */
-__xlated("9: *(u64 *)(r10 -8) = r1")
-__xlated("10: exit")
+__xlated("7: *(u64 *)(r10 -8) = r1")
+__xlated("8: exit")
__success
-__naked void may_goto_interaction(void)
+__naked void may_goto_interaction_arm64(void)
{
asm volatile (
"r1 = 1;"
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 05a329ee45ee..d5d8f24df394 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -4,11 +4,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
- defined(__TARGET_ARCH_loongarch)) && \
- __clang_major__ >= 18
+#ifdef CAN_USE_GOTOL
SEC("socket")
__description("gotol, small_imm")
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index e54bb5385bc1..75dd922e4e9f 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -407,11 +407,7 @@ l0_%=: call %[bpf_jiffies64]; \
: __clobber_all);
}
-#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
- (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
- defined(__TARGET_ARCH_loongarch)) && \
- __clang_major__ >= 18
+#ifdef CAN_USE_GOTOL
SEC("socket")
__success __retval(0)
__naked void gotol_and_may_goto(void)
diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
new file mode 100644
index 000000000000..77698d5a19e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__description("load-acquire, 8-bit")
+__success __success_unpriv __retval(0x12)
+__naked void load_acquire_8(void)
+{
+ asm volatile (
+ "w1 = 0x12;"
+ "*(u8 *)(r10 - 1) = w1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 16-bit")
+__success __success_unpriv __retval(0x1234)
+__naked void load_acquire_16(void)
+{
+ asm volatile (
+ "w1 = 0x1234;"
+ "*(u16 *)(r10 - 2) = w1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 32-bit")
+__success __success_unpriv __retval(0x12345678)
+__naked void load_acquire_32(void)
+{
+ asm volatile (
+ "w1 = 0x12345678;"
+ "*(u32 *)(r10 - 4) = w1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 64-bit")
+__success __success_unpriv __retval(0x1234567890abcdef)
+__naked void load_acquire_64(void)
+{
+ asm volatile (
+ "r1 = 0x1234567890abcdef ll;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void load_acquire_with_uninitialized_src_reg(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r2 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with non-pointer src_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void load_acquire_with_non_pointer_src_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r1 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned load-acquire")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void load_acquire_misaligned(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 5));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -5))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire from ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC loads from R1 ctx is not allowed")
+__naked void load_acquire_from_ctx_pointer(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r1 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("load-acquire from pkt pointer")
+__failure __msg("BPF_ATOMIC loads from R2 pkt is not allowed")
+__naked void load_acquire_from_pkt_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+ "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+ "r1 = r2;"
+ "r1 += 8;"
+ "if r1 >= r3 goto l0_%=;"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+"l0_%=: r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("load-acquire from flow_keys pointer")
+__failure __msg("BPF_ATOMIC loads from R2 flow_keys is not allowed")
+__naked void load_acquire_from_flow_keys_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+ ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+ "exit;"
+ :
+ : __imm_const(__sk_buff_flow_keys,
+ offsetof(struct __sk_buff, flow_keys)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("load-acquire from sock pointer")
+__failure __msg("BPF_ATOMIC loads from R2 sock is not allowed")
+__naked void load_acquire_from_sock_pointer(void)
+{
+ asm volatile (
+ "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+ // w0 = load_acquire((u8 *)(r2 + offsetof(struct bpf_sock, family)));
+ ".8byte %[load_acquire_insn];"
+ "exit;"
+ :
+ : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+ __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2,
+ offsetof(struct bpf_sock, family)))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void load_acquire_with_invalid_reg(void)
+{
+ asm volatile (
+ ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r15 + 0));
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, 15 /* invalid reg */, 0))
+ : __clobber_all);
+}
+
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support load-acquire, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
index e81097c96fe2..3966d827f288 100644
--- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
@@ -69,8 +69,38 @@ __naked void may_goto_batch_1(void)
}
SEC("raw_tp")
-__description("may_goto batch with offsets 2/0")
+__description("may_goto batch with offsets 2/0 - x86_64")
__arch_x86_64
+__xlated("0: *(u64 *)(r10 -16) = 65535")
+__xlated("1: *(u64 *)(r10 -8) = 0")
+__xlated("2: r11 = *(u64 *)(r10 -16)")
+__xlated("3: if r11 == 0x0 goto pc+6")
+__xlated("4: r11 -= 1")
+__xlated("5: if r11 != 0x0 goto pc+2")
+__xlated("6: r11 = -16")
+__xlated("7: call unknown")
+__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("9: r0 = 1")
+__xlated("10: r0 = 2")
+__xlated("11: exit")
+__success
+__naked void may_goto_batch_2_x86_64(void)
+{
+ asm volatile (
+ ".8byte %[may_goto1];"
+ ".8byte %[may_goto3];"
+ "r0 = 1;"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
+ __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__description("may_goto batch with offsets 2/0 - arm64")
+__arch_arm64
__xlated("0: *(u64 *)(r10 -8) = 8388608")
__xlated("1: r11 = *(u64 *)(r10 -8)")
__xlated("2: if r11 == 0x0 goto pc+3")
@@ -80,7 +110,7 @@ __xlated("5: r0 = 1")
__xlated("6: r0 = 2")
__xlated("7: exit")
__success
-__naked void may_goto_batch_2(void)
+__naked void may_goto_batch_2_arm64(void)
{
asm volatile (
".8byte %[may_goto1];"
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 6b564d4c0986..6662d4b39969 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2023 SUSE LLC */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
SEC("?raw_tp")
@@ -90,6 +91,54 @@ __naked int bpf_end_bswap(void)
::: __clobber_all);
}
+#if defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (db) r2 = load_acquire((u64 *)(r10 -8))")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_load_acquire(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ "*(u64 *)(r10 - 8) = r1;"
+ ".8byte %[load_acquire_insn];" /* r2 = load_acquire((u64 *)(r10 - 8)); */
+ "r3 = r10;"
+ "r3 += r2;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(load_acquire_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
+ : __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r2 = r10")
+__msg("mark_precise: frame0: regs=r1 stack= before 2: (79) r1 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (db) store_release((u64 *)(r10 -8), r1)")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_store_release(void)
+{
+ asm volatile (
+ "r1 = 8;"
+ ".8byte %[store_release_insn];" /* store_release((u64 *)(r10 - 8), r1); */
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = r10;"
+ "r2 += r1;" /* mark_precise */
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+#endif /* load-acquire, store-release */
#endif /* v4 instruction */
SEC("?raw_tp")
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
index 417c61cd4b19..24aabc6083fd 100644
--- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -481,4 +481,56 @@ l1_%=: r0 = 42; \
: __clobber_all);
}
+SEC("socket")
+__description("PTR_TO_STACK stack size > 512")
+__failure __msg("invalid write to stack R1 off=-520 size=8")
+__naked void stack_check_size_gt_512(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -520; \
+ r0 = 42; \
+ *(u64*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+#ifdef __BPF_FEATURE_MAY_GOTO
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto with jit")
+__load_if_JITed()
+__success __retval(42)
+__naked void stack_check_size_512_with_may_goto_jit(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u32*)(r1 + 0) = r0; \
+ may_goto l0_%=; \
+ r2 = 100; \
+ l0_%=: \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto without jit")
+__load_if_no_JITed()
+__failure __msg("stack size 520(extra 8) is too large")
+__naked void stack_check_size_512_with_may_goto(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u32*)(r1 + 0) = r0; \
+ may_goto l0_%=; \
+ r2 = 100; \
+ l0_%=: \
+ exit; \
+" ::: __clobber_all);
+}
+#endif
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c
new file mode 100644
index 000000000000..c0442d5bb049
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+
+SEC("socket")
+__description("store-release, 8-bit")
+__success __success_unpriv __retval(0x12)
+__naked void store_release_8(void)
+{
+ asm volatile (
+ "w1 = 0x12;"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1);
+ "w0 = *(u8 *)(r10 - 1);"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 16-bit")
+__success __success_unpriv __retval(0x1234)
+__naked void store_release_16(void)
+{
+ asm volatile (
+ "w1 = 0x1234;"
+ ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1);
+ "w0 = *(u16 *)(r10 - 2);"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_H, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -2))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 32-bit")
+__success __success_unpriv __retval(0x12345678)
+__naked void store_release_32(void)
+{
+ asm volatile (
+ "w1 = 0x12345678;"
+ ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1);
+ "w0 = *(u32 *)(r10 - 4);"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -4))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 64-bit")
+__success __success_unpriv __retval(0x1234567890abcdef)
+__naked void store_release_64(void)
+{
+ asm volatile (
+ "r1 = 0x1234567890abcdef ll;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+ "r0 = *(u64 *)(r10 - 8);"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_src_reg(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r2);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_2, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized dst_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_dst_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r2 - 8), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with non-pointer dst_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void store_release_with_non_pointer_dst_reg(void)
+{
+ asm volatile (
+ "r1 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r1 + 0), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_1, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned store-release")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void store_release_misaligned(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 5), w0);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_0, -5))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release to ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__naked void store_release_to_ctx_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ // store_release((u8 *)(r1 + offsetof(struct __sk_buff, cb[0])), w0);
+ ".8byte %[store_release_insn];"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("store-release to pkt pointer")
+__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed")
+__naked void store_release_to_pkt_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+ "r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+ "r1 = r2;"
+ "r1 += 8;"
+ "if r1 >= r3 goto l0_%=;"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+"l0_%=: r0 = 0;"
+ "exit;"
+ :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("store-release to flow_keys pointer")
+__failure __msg("BPF_ATOMIC stores into R2 flow_keys is not allowed")
+__naked void store_release_to_flow_keys_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+ "exit;"
+ :
+ : __imm_const(__sk_buff_flow_keys,
+ offsetof(struct __sk_buff, flow_keys)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("store-release to sock pointer")
+__failure __msg("R2 cannot write into sock")
+__naked void store_release_to_sock_pointer(void)
+{
+ asm volatile (
+ "w0 = 0;"
+ "r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+ ".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+ "exit;"
+ :
+ : __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, leak pointer to stack")
+__success __success_unpriv __retval(0)
+__naked void store_release_leak_pointer_to_stack(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+ : __clobber_all);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("store-release, leak pointer to map")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("R6 leaks addr into map")
+__naked void store_release_leak_pointer_to_map(void)
+{
+ asm volatile (
+ "r6 = r1;"
+ "r1 = %[map_hash_8b] ll;"
+ "r2 = 0;"
+ "*(u64 *)(r10 - 8) = r2;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 == 0 goto l0_%=;"
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r0 + 0), r6);
+"l0_%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(map_hash_8b),
+ __imm(bpf_map_lookup_elem),
+ __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_0, BPF_REG_6, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void store_release_with_invalid_reg(void)
+{
+ asm volatile (
+ ".8byte %[store_release_insn];" // store_release((u64 *)(r15 + 0), r1);
+ "exit;"
+ :
+ : __imm_insn(store_release_insn,
+ BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, 15 /* invalid reg */, BPF_REG_1, 0))
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
index 682dda8dabbc..50c8958f94e5 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -1,7 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/if_ether.h>
+
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
@@ -28,4 +31,89 @@ int xdp_redirect_map_2(struct xdp_md *xdp)
return bpf_redirect_map(&tx_port, 2, 0);
}
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} rxcnt SEC(".maps");
+
+static int xdp_count(struct xdp_md *xdp, __u32 key)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u64 *count;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_DROP;
+
+ if (bpf_htons(eth->h_proto) == ETH_P_IP) {
+ /* We only count IPv4 packets */
+ count = bpf_map_lookup_elem(&rxcnt, &key);
+ if (count)
+ *count += 1;
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_count_0(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 0);
+}
+
+SEC("xdp")
+int xdp_count_1(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 1);
+}
+
+SEC("xdp")
+int xdp_count_2(struct xdp_md *xdp)
+{
+ return xdp_count(xdp, 2);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __be64);
+} rx_mac SEC(".maps");
+
+static int store_mac(struct xdp_md *xdp, __u32 id)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u32 key = id;
+ __be64 mac = 0;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_DROP;
+
+ /* Only store IPv4 MAC to avoid being polluted by IPv6 packets */
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ __builtin_memcpy(&mac, eth->h_source, ETH_ALEN);
+ bpf_map_update_elem(&rx_mac, &key, &mac, 0);
+ bpf_printk("%s - %x", __func__, mac);
+ }
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int store_mac_1(struct xdp_md *xdp)
+{
+ return store_mac(xdp, 0);
+}
+
+SEC("xdp")
+int store_mac_2(struct xdp_md *xdp)
+{
+ return store_mac(xdp, 1);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 97b26a30b59a..bc2945ed8a80 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -34,6 +34,14 @@ struct {
__uint(max_entries, 128);
} mac_map SEC(".maps");
+/* map to store redirect flags for each protocol*/
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u16);
+ __type(value, __u64);
+ __uint(max_entries, 16);
+} redirect_flags SEC(".maps");
+
SEC("xdp")
int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
{
@@ -41,25 +49,34 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
void *data = (void *)(long)ctx->data;
int if_index = ctx->ingress_ifindex;
struct ethhdr *eth = data;
+ __u64 *flags_from_map;
__u16 h_proto;
__u64 nh_off;
+ __u64 flags;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return XDP_DROP;
- h_proto = eth->h_proto;
-
- /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
- if (h_proto == bpf_htons(ETH_P_IP))
- return bpf_redirect_map(&map_all, 0,
- BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
- /* Using IPv6 for none flag testing */
- else if (h_proto == bpf_htons(ETH_P_IPV6))
- return bpf_redirect_map(&map_all, if_index, 0);
- /* All others for BPF_F_BROADCAST testing */
- else
- return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+ h_proto = bpf_htons(eth->h_proto);
+
+ flags_from_map = bpf_map_lookup_elem(&redirect_flags, &h_proto);
+
+ /* Default flags for IPv4 : (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) */
+ if (h_proto == ETH_P_IP) {
+ flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS;
+ return bpf_redirect_map(&map_all, 0, flags);
+ }
+ /* Default flags for IPv6 : 0 */
+ if (h_proto == ETH_P_IPV6) {
+ flags = flags_from_map ? *flags_from_map : 0;
+ return bpf_redirect_map(&map_all, if_index, flags);
+ }
+ /* Default flags for others BPF_F_BROADCAST : 0 */
+ else {
+ flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST;
+ return bpf_redirect_map(&map_all, 0, flags);
+ }
}
/* The following 2 progs are for 2nd devmap prog testing */
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index fb4f4714eeb4..e65889ab4adf 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -72,9 +72,15 @@
#define BTF_TYPE_FLOAT_ENC(name, sz) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+#define BTF_DECL_ATTR_ENC(value, type, component_idx) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), type), (component_idx)
+
#define BTF_DECL_TAG_ENC(value, type, component_idx) \
BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+#define BTF_TYPE_ATTR_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 1, 0), type)
+
#define BTF_TYPE_TAG_ENC(value, type) \
BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index cc9dde507aba..3220f1d28697 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -1130,6 +1130,7 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
};
static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
+ .get_func_proto = bpf_base_func_proto,
.is_valid_access = bpf_testmod_ops_is_valid_access,
};
@@ -1176,10 +1177,25 @@ static int bpf_testmod_ops__test_maybe_null(int dummy,
return 0;
}
+static int bpf_testmod_ops__test_refcounted(int dummy,
+ struct task_struct *task__ref)
+{
+ return 0;
+}
+
+static struct task_struct *
+bpf_testmod_ops__test_return_ref_kptr(int dummy, struct task_struct *task__ref,
+ struct cgroup *cgrp)
+{
+ return NULL;
+}
+
static struct bpf_testmod_ops __bpf_testmod_ops = {
.test_1 = bpf_testmod_test_1,
.test_2 = bpf_testmod_test_2,
.test_maybe_null = bpf_testmod_ops__test_maybe_null,
+ .test_refcounted = bpf_testmod_ops__test_refcounted,
+ .test_return_ref_kptr = bpf_testmod_ops__test_return_ref_kptr,
};
struct bpf_struct_ops bpf_bpf_testmod_ops = {
@@ -1293,6 +1309,85 @@ static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args)
return 0;
}
+static int bpf_cgroup_from_id_id;
+static int bpf_cgroup_release_id;
+
+static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ /* r8 = r1; // r8 will be "u64 *ctx".
+ * r1 = 0;
+ * r0 = bpf_cgroup_from_id(r1);
+ * if r0 != 0 goto pc+5;
+ * r6 = r8[0]; // r6 will be "struct st_ops *args".
+ * r7 = r6->a;
+ * r7 += 1000;
+ * r6->a = r7;
+ * goto pc+2;
+ * r1 = r0;
+ * bpf_cgroup_release(r1);
+ * r1 = r8;
+ */
+ *insn++ = BPF_MOV64_REG(BPF_REG_8, BPF_REG_1);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 5);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_8, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
+ *insn++ = prog->insnsi[0];
+
+ return insn - insn_buf;
+}
+
+static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
+ s16 ctx_stack_off)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ /* r1 = 0;
+ * r6 = 0;
+ * r0 = bpf_cgroup_from_id(r1);
+ * if r0 != 0 goto pc+6;
+ * r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
+ * r1 = r1[0]; // r1 will be "struct st_ops *args"
+ * r6 = r1->a;
+ * r6 += 10000;
+ * r1->a = r6;
+ * goto pc+2
+ * r1 = r0;
+ * bpf_cgroup_release(r1);
+ * r0 = r6;
+ * r0 *= 2;
+ * BPF_EXIT;
+ */
+ *insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_6, 0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 6);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000);
+ *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+ *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
+ *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
+ *insn++ = BPF_EXIT_INSN();
+
+ return insn - insn_buf;
+}
+
+#define KFUNC_PRO_EPI_PREFIX "test_kfunc_"
static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog)
{
@@ -1302,6 +1397,9 @@ static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
return 0;
+ if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+ return st_ops_gen_prologue_with_kfunc(insn_buf, direct_write, prog);
+
/* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx".
* r7 = r6->a;
* r7 += 1000;
@@ -1325,6 +1423,9 @@ static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog
strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
return 0;
+ if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+ return st_ops_gen_epilogue_with_kfunc(insn_buf, prog, ctx_stack_off);
+
/* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
* r1 = r1[0]; // r1 will be "struct st_ops *args"
* r6 = r1->a;
@@ -1395,6 +1496,13 @@ static void st_ops_unreg(void *kdata, struct bpf_link *link)
static int st_ops_init(struct btf *btf)
{
+ struct btf *kfunc_btf;
+
+ bpf_cgroup_from_id_id = bpf_find_btf_id("bpf_cgroup_from_id", BTF_KIND_FUNC, &kfunc_btf);
+ bpf_cgroup_release_id = bpf_find_btf_id("bpf_cgroup_release", BTF_KIND_FUNC, &kfunc_btf);
+ if (bpf_cgroup_from_id_id < 0 || bpf_cgroup_release_id < 0)
+ return -EINVAL;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
index 356803d1c10e..c9fab51f16e2 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
@@ -6,6 +6,7 @@
#include <linux/types.h>
struct task_struct;
+struct cgroup;
struct bpf_testmod_test_read_ctx {
char *buf;
@@ -36,6 +37,11 @@ struct bpf_testmod_ops {
/* Used to test nullable arguments. */
int (*test_maybe_null)(int dummy, struct task_struct *task);
int (*unsupported_ops)(void);
+ /* Used to test ref_acquired arguments. */
+ int (*test_refcounted)(int dummy, struct task_struct *task);
+ /* Used to test returning referenced kptr. */
+ struct task_struct *(*test_return_ref_kptr)(int dummy, struct task_struct *task,
+ struct cgroup *cgrp);
/* The following fields are used to test shadow copies. */
char onebyte;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 53b06647cf57..49f2fc61061f 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -37,6 +37,7 @@
#define TEST_TAG_JITED_PFX "comment:test_jited="
#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
+#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xcafe4all
@@ -55,6 +56,11 @@ enum mode {
UNPRIV = 2
};
+enum load_mode {
+ JITED = 1 << 0,
+ NO_JITED = 1 << 1,
+};
+
struct expect_msg {
const char *substr; /* substring match */
regex_t regex;
@@ -87,6 +93,7 @@ struct test_spec {
int prog_flags;
int mode_mask;
int arch_mask;
+ int load_mask;
bool auxiliary;
bool valid;
};
@@ -406,6 +413,7 @@ static int parse_test_spec(struct test_loader *tester,
bool collect_jit = false;
int func_id, i, err = 0;
u32 arch_mask = 0;
+ u32 load_mask = 0;
struct btf *btf;
enum arch arch;
@@ -580,10 +588,22 @@ static int parse_test_spec(struct test_loader *tester,
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_LOAD_MODE_PFX)) {
+ val = s + sizeof(TEST_TAG_LOAD_MODE_PFX) - 1;
+ if (strcmp(val, "jited") == 0) {
+ load_mask = JITED;
+ } else if (strcmp(val, "no_jited") == 0) {
+ load_mask = NO_JITED;
+ } else {
+ PRINT_FAIL("bad load spec: '%s'", val);
+ err = -EINVAL;
+ goto cleanup;
+ }
}
}
spec->arch_mask = arch_mask ?: -1;
+ spec->load_mask = load_mask ?: (JITED | NO_JITED);
if (spec->mode_mask == 0)
spec->mode_mask = PRIV;
@@ -773,7 +793,7 @@ static int drop_capabilities(struct cap_state *caps)
err = cap_disable_effective(caps_to_drop, &caps->old_caps);
if (err) {
- PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(-err));
return err;
}
@@ -790,7 +810,7 @@ static int restore_capabilities(struct cap_state *caps)
err = cap_enable_effective(caps->old_caps, NULL);
if (err)
- PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(-err));
caps->initialized = false;
return err;
}
@@ -928,6 +948,7 @@ void run_subtest(struct test_loader *tester,
bool unpriv)
{
struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
+ int current_runtime = is_jit_enabled() ? JITED : NO_JITED;
struct bpf_program *tprog = NULL, *tprog_iter;
struct bpf_link *link, *links[32] = {};
struct test_spec *spec_iter;
@@ -946,6 +967,11 @@ void run_subtest(struct test_loader *tester,
return;
}
+ if ((current_runtime & spec->load_mask) == 0) {
+ test__skip();
+ return;
+ }
+
if (unpriv) {
if (!can_execute_unpriv(tester, spec)) {
test__skip();
@@ -959,7 +985,7 @@ void run_subtest(struct test_loader *tester,
if (subspec->caps) {
err = cap_enable_effective(subspec->caps, NULL);
if (err) {
- PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err));
+ PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(-err));
goto subtest_cleanup;
}
}
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
deleted file mode 100755
index 1e565f47aca9..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Setup/topology:
-#
-# NS1 NS2 NS3
-# veth1 <---> veth2 veth3 <---> veth4 (the top route)
-# veth5 <---> veth6 veth7 <---> veth8 (the bottom route)
-#
-# each vethN gets IPv[4|6]_N address
-#
-# IPv*_SRC = IPv*_1
-# IPv*_DST = IPv*_4
-#
-# all tests test pings from IPv*_SRC to IPv*_DST
-#
-# by default, routes are configured to allow packets to go
-# IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
-#
-# a GRE device is installed in NS3 with IPv*_GRE, and
-# NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
-# (the bottom route)
-#
-# Tests:
-#
-# 1. routes NS2->IPv*_DST are brought down, so the only way a ping
-# from IP*_SRC to IP*_DST can work is via IPv*_GRE
-#
-# 2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
-# that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-# ping: SRC->[encap at veth1:egress]->GRE:decap->DST
-# ping replies go DST->SRC directly
-#
-# 2b. in an ingress test, a bpf LWT_IN program is installed on veth2
-# that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
-# ping replies go DST->SRC directly
-
-BPF_FILE="test_lwt_ip_encap.bpf.o"
-if [[ $EUID -ne 0 ]]; then
- echo "This script must be run as root"
- echo "FAIL"
- exit 1
-fi
-
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-
-readonly IPv4_1="172.16.1.100"
-readonly IPv4_2="172.16.2.100"
-readonly IPv4_3="172.16.3.100"
-readonly IPv4_4="172.16.4.100"
-readonly IPv4_5="172.16.5.100"
-readonly IPv4_6="172.16.6.100"
-readonly IPv4_7="172.16.7.100"
-readonly IPv4_8="172.16.8.100"
-readonly IPv4_GRE="172.16.16.100"
-
-readonly IPv4_SRC=$IPv4_1
-readonly IPv4_DST=$IPv4_4
-
-readonly IPv6_1="fb01::1"
-readonly IPv6_2="fb02::1"
-readonly IPv6_3="fb03::1"
-readonly IPv6_4="fb04::1"
-readonly IPv6_5="fb05::1"
-readonly IPv6_6="fb06::1"
-readonly IPv6_7="fb07::1"
-readonly IPv6_8="fb08::1"
-readonly IPv6_GRE="fb10::1"
-
-readonly IPv6_SRC=$IPv6_1
-readonly IPv6_DST=$IPv6_4
-
-TEST_STATUS=0
-TESTS_SUCCEEDED=0
-TESTS_FAILED=0
-
-TMPFILE=""
-
-process_test_results()
-{
- if [[ "${TEST_STATUS}" -eq 0 ]] ; then
- echo "PASS"
- TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
- else
- echo "FAIL"
- TESTS_FAILED=$((TESTS_FAILED+1))
- fi
-}
-
-print_test_summary_and_exit()
-{
- echo "passed tests: ${TESTS_SUCCEEDED}"
- echo "failed tests: ${TESTS_FAILED}"
- if [ "${TESTS_FAILED}" -eq "0" ] ; then
- exit 0
- else
- exit 1
- fi
-}
-
-setup()
-{
- set -e # exit on error
- TEST_STATUS=0
-
- # create devices and namespaces
- ip netns add "${NS1}"
- ip netns add "${NS2}"
- ip netns add "${NS3}"
-
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
- # disable IPv6 DAD because it sometimes takes too long and fails tests
- ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
- ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
- ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
-
- ip link add veth1 type veth peer name veth2
- ip link add veth3 type veth peer name veth4
- ip link add veth5 type veth peer name veth6
- ip link add veth7 type veth peer name veth8
-
- ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
- ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip link set veth1 netns ${NS1}
- ip link set veth2 netns ${NS2}
- ip link set veth3 netns ${NS2}
- ip link set veth4 netns ${NS3}
- ip link set veth5 netns ${NS1}
- ip link set veth6 netns ${NS2}
- ip link set veth7 netns ${NS2}
- ip link set veth8 netns ${NS3}
-
- if [ ! -z "${VRF}" ] ; then
- ip -netns ${NS1} link add red type vrf table 1001
- ip -netns ${NS1} link set red up
- ip -netns ${NS1} route add table 1001 unreachable default metric 8192
- ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192
- ip -netns ${NS1} link set veth1 vrf red
- ip -netns ${NS1} link set veth5 vrf red
-
- ip -netns ${NS2} link add red type vrf table 1001
- ip -netns ${NS2} link set red up
- ip -netns ${NS2} route add table 1001 unreachable default metric 8192
- ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192
- ip -netns ${NS2} link set veth2 vrf red
- ip -netns ${NS2} link set veth3 vrf red
- ip -netns ${NS2} link set veth6 vrf red
- ip -netns ${NS2} link set veth7 vrf red
- fi
-
- # configure addesses: the top route (1-2-3-4)
- ip -netns ${NS1} addr add ${IPv4_1}/24 dev veth1
- ip -netns ${NS2} addr add ${IPv4_2}/24 dev veth2
- ip -netns ${NS2} addr add ${IPv4_3}/24 dev veth3
- ip -netns ${NS3} addr add ${IPv4_4}/24 dev veth4
- ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
- ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
- ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
- ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
-
- # configure addresses: the bottom route (5-6-7-8)
- ip -netns ${NS1} addr add ${IPv4_5}/24 dev veth5
- ip -netns ${NS2} addr add ${IPv4_6}/24 dev veth6
- ip -netns ${NS2} addr add ${IPv4_7}/24 dev veth7
- ip -netns ${NS3} addr add ${IPv4_8}/24 dev veth8
- ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
- ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
- ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
- ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
-
- ip -netns ${NS1} link set dev veth1 up
- ip -netns ${NS2} link set dev veth2 up
- ip -netns ${NS2} link set dev veth3 up
- ip -netns ${NS3} link set dev veth4 up
- ip -netns ${NS1} link set dev veth5 up
- ip -netns ${NS2} link set dev veth6 up
- ip -netns ${NS2} link set dev veth7 up
- ip -netns ${NS3} link set dev veth8 up
-
- # configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
- # the bottom route to specific bottom addresses
-
- # NS1
- # top route
- ip -netns ${NS1} route add ${IPv4_2}/32 dev veth1 ${VRF}
- ip -netns ${NS1} route add default dev veth1 via ${IPv4_2} ${VRF} # go top by default
- ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF} # go top by default
- # bottom route
- ip -netns ${NS1} route add ${IPv4_6}/32 dev veth5 ${VRF}
- ip -netns ${NS1} route add ${IPv4_7}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS1} route add ${IPv4_8}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF}
-
- # NS2
- # top route
- ip -netns ${NS2} route add ${IPv4_1}/32 dev veth2 ${VRF}
- ip -netns ${NS2} route add ${IPv4_4}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF}
- # bottom route
- ip -netns ${NS2} route add ${IPv4_5}/32 dev veth6 ${VRF}
- ip -netns ${NS2} route add ${IPv4_8}/32 dev veth7 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF}
-
- # NS3
- # top route
- ip -netns ${NS3} route add ${IPv4_3}/32 dev veth4
- ip -netns ${NS3} route add ${IPv4_1}/32 dev veth4 via ${IPv4_3}
- ip -netns ${NS3} route add ${IPv4_2}/32 dev veth4 via ${IPv4_3}
- ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
- ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
- ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
- # bottom route
- ip -netns ${NS3} route add ${IPv4_7}/32 dev veth8
- ip -netns ${NS3} route add ${IPv4_5}/32 dev veth8 via ${IPv4_7}
- ip -netns ${NS3} route add ${IPv4_6}/32 dev veth8 via ${IPv4_7}
- ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
- ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
- ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
-
- # configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
- ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
- ip -netns ${NS3} link set gre_dev up
- ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
- ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF}
- ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF}
-
-
- # configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
- ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
- ip -netns ${NS3} link set gre6_dev up
- ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
- ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
-
- TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
-
- sleep 1 # reduce flakiness
- set +e
-}
-
-cleanup()
-{
- if [ -f ${TMPFILE} ] ; then
- rm ${TMPFILE}
- fi
-
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
- ip netns del ${NS3} 2> /dev/null
-}
-
-trap cleanup EXIT
-
-remove_routes_to_gredev()
-{
- ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF}
- ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF}
- ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF}
-}
-
-add_unreachable_routes_to_gredev()
-{
- ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF}
- ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF}
- ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
- ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
-}
-
-test_ping()
-{
- local readonly PROTO=$1
- local readonly EXPECTED=$2
- local RET=0
-
- if [ "${PROTO}" == "IPv4" ] ; then
- ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
- RET=$?
- elif [ "${PROTO}" == "IPv6" ] ; then
- ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
- RET=$?
- else
- echo " test_ping: unknown PROTO: ${PROTO}"
- TEST_STATUS=1
- fi
-
- if [ "0" != "${RET}" ]; then
- RET=1
- fi
-
- if [ "${EXPECTED}" != "${RET}" ] ; then
- echo " test_ping failed: expected: ${EXPECTED}; got ${RET}"
- TEST_STATUS=1
- fi
-}
-
-test_gso()
-{
- local readonly PROTO=$1
- local readonly PKT_SZ=5000
- local IP_DST=""
- : > ${TMPFILE} # trim the capture file
-
- # check that nc is present
- command -v nc >/dev/null 2>&1 || \
- { echo >&2 "nc is not available: skipping TSO tests"; return; }
-
- # listen on port 9000, capture TCP into $TMPFILE
- if [ "${PROTO}" == "IPv4" ] ; then
- IP_DST=${IPv4_DST}
- ip netns exec ${NS3} bash -c \
- "nc -4 -l -p 9000 > ${TMPFILE} &"
- elif [ "${PROTO}" == "IPv6" ] ; then
- IP_DST=${IPv6_DST}
- ip netns exec ${NS3} bash -c \
- "nc -6 -l -p 9000 > ${TMPFILE} &"
- RET=$?
- else
- echo " test_gso: unknown PROTO: ${PROTO}"
- TEST_STATUS=1
- fi
- sleep 1 # let nc start listening
-
- # send a packet larger than MTU
- ip netns exec ${NS1} bash -c \
- "dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
- sleep 2 # let the packet get delivered
-
- # verify we received all expected bytes
- SZ=$(stat -c %s ${TMPFILE})
- if [ "$SZ" != "$PKT_SZ" ] ; then
- echo " test_gso failed: ${PROTO}"
- TEST_STATUS=1
- fi
-}
-
-test_egress()
-{
- local readonly ENCAP=$1
- echo "starting egress ${ENCAP} encap test ${VRF}"
- setup
-
- # by default, pings work
- test_ping IPv4 0
- test_ping IPv6 0
-
- # remove NS2->DST routes, ping fails
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
- test_ping IPv4 1
- test_ping IPv6 1
-
- # install replacement routes (LWT/eBPF), pings succeed
- if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre dev veth1 ${VRF}
- elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
- ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
- else
- echo " unknown encap ${ENCAP}"
- TEST_STATUS=1
- fi
- test_ping IPv4 0
- test_ping IPv6 0
-
- # skip GSO tests with VRF: VRF routing needs properly assigned
- # source IP/device, which is easy to do with ping and hard with dd/nc.
- if [ -z "${VRF}" ] ; then
- test_gso IPv4
- test_gso IPv6
- fi
-
- # a negative test: remove routes to GRE devices: ping fails
- remove_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- # another negative test
- add_unreachable_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- cleanup
- process_test_results
-}
-
-test_ingress()
-{
- local readonly ENCAP=$1
- echo "starting ingress ${ENCAP} encap test ${VRF}"
- setup
-
- # need to wait a bit for IPv6 to autoconf, otherwise
- # ping6 sometimes fails with "unable to bind to address"
-
- # by default, pings work
- test_ping IPv4 0
- test_ping IPv6 0
-
- # remove NS2->DST routes, pings fail
- ip -netns ${NS2} route del ${IPv4_DST}/32 dev veth3 ${VRF}
- ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
- test_ping IPv4 1
- test_ping IPv6 1
-
- # install replacement routes (LWT/eBPF), pings succeed
- if [ "${ENCAP}" == "IPv4" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre dev veth2 ${VRF}
- elif [ "${ENCAP}" == "IPv6" ] ; then
- ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
- ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
- else
- echo "FAIL: unknown encap ${ENCAP}"
- TEST_STATUS=1
- fi
- test_ping IPv4 0
- test_ping IPv6 0
-
- # a negative test: remove routes to GRE devices: ping fails
- remove_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- # another negative test
- add_unreachable_routes_to_gredev
- test_ping IPv4 1
- test_ping IPv6 1
-
- cleanup
- process_test_results
-}
-
-VRF=""
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-VRF="vrf red"
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-print_test_summary_and_exit
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
deleted file mode 100755
index 0efea2292d6a..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/bin/bash
-# Connects 6 network namespaces through veths.
-# Each NS may have different IPv6 global scope addresses :
-# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
-# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
-# fc42::1 fd00::4
-#
-# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
-# IPv6 header with a Segment Routing Header, with segments :
-# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
-#
-# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
-# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
-# - fd00::2 : remove the TLV, change the flags, add a tag
-# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
-#
-# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
-# Each End.BPF action will validate the operations applied on the SRH by the
-# previous BPF program in the chain, otherwise the packet is dropped.
-#
-# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
-# datagram can be read on NS6 when binding to fb00::6.
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-BPF_FILE="test_lwt_seg6local.bpf.o"
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-readonly NS4="ns4-$(mktemp -u XXXXXX)"
-readonly NS5="ns5-$(mktemp -u XXXXXX)"
-readonly NS6="ns6-$(mktemp -u XXXXXX)"
-
-msg="skip all tests:"
-if [ $UID != 0 ]; then
- echo $msg please run this as root >&2
- exit $ksft_skip
-fi
-
-TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
-
-cleanup()
-{
- if [ "$?" = "0" ]; then
- echo "selftests: test_lwt_seg6local [PASS]";
- else
- echo "selftests: test_lwt_seg6local [FAILED]";
- fi
-
- set +e
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
- ip netns del ${NS3} 2> /dev/null
- ip netns del ${NS4} 2> /dev/null
- ip netns del ${NS5} 2> /dev/null
- ip netns del ${NS6} 2> /dev/null
- rm -f $TMP_FILE
-}
-
-set -e
-
-ip netns add ${NS1}
-ip netns add ${NS2}
-ip netns add ${NS3}
-ip netns add ${NS4}
-ip netns add ${NS5}
-ip netns add ${NS6}
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-ip link add veth3 type veth peer name veth4
-ip link add veth5 type veth peer name veth6
-ip link add veth7 type veth peer name veth8
-ip link add veth9 type veth peer name veth10
-
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-ip link set veth3 netns ${NS2}
-ip link set veth4 netns ${NS3}
-ip link set veth5 netns ${NS3}
-ip link set veth6 netns ${NS4}
-ip link set veth7 netns ${NS4}
-ip link set veth8 netns ${NS5}
-ip link set veth9 netns ${NS5}
-ip link set veth10 netns ${NS6}
-
-ip netns exec ${NS1} ip link set dev veth1 up
-ip netns exec ${NS2} ip link set dev veth2 up
-ip netns exec ${NS2} ip link set dev veth3 up
-ip netns exec ${NS3} ip link set dev veth4 up
-ip netns exec ${NS3} ip link set dev veth5 up
-ip netns exec ${NS4} ip link set dev veth6 up
-ip netns exec ${NS4} ip link set dev veth7 up
-ip netns exec ${NS5} ip link set dev veth8 up
-ip netns exec ${NS5} ip link set dev veth9 up
-ip netns exec ${NS6} ip link set dev veth10 up
-ip netns exec ${NS6} ip link set dev lo up
-
-# All link scope addresses and routes required between veths
-ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
-ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2
-ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4
-
-ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6
-ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
-ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8
-
-ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
-ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
-sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -TERM $!
-
-if [[ $(< $TMP_FILE) != "foobar" ]]; then
- exit 1
-fi
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 8b40e9496af1..986ce32b113a 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1396,9 +1396,10 @@ static void test_map_stress(void)
#define MAX_DELAY_US 50000
#define MIN_DELAY_RANGE_US 5000
-static bool retry_for_again_or_busy(int err)
+static bool can_retry(int err)
{
- return (err == EAGAIN || err == EBUSY);
+ return (err == EAGAIN || err == EBUSY ||
+ (err == ENOMEM && map_opts.map_flags == BPF_F_NO_PREALLOC));
}
int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
@@ -1451,12 +1452,12 @@ static void test_update_delete(unsigned int fn, void *data)
if (do_update) {
err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
- retry_for_again_or_busy);
+ can_retry);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
- retry_for_again_or_busy);
+ can_retry);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index c9e745d49493..309d9d4a8ace 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -88,7 +88,9 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
#endif
}
-static void stdio_restore_cleanup(void)
+static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void stdio_restore(void)
{
#ifdef __GLIBC__
if (verbose() && env.worker_id == -1) {
@@ -98,34 +100,33 @@ static void stdio_restore_cleanup(void)
fflush(stdout);
+ pthread_mutex_lock(&stdout_lock);
+
if (env.subtest_state) {
- fclose(env.subtest_state->stdout_saved);
+ if (env.subtest_state->stdout_saved)
+ fclose(env.subtest_state->stdout_saved);
env.subtest_state->stdout_saved = NULL;
stdout = env.test_state->stdout_saved;
stderr = env.test_state->stdout_saved;
} else {
- fclose(env.test_state->stdout_saved);
+ if (env.test_state->stdout_saved)
+ fclose(env.test_state->stdout_saved);
env.test_state->stdout_saved = NULL;
+ stdout = env.stdout_saved;
+ stderr = env.stderr_saved;
}
+
+ pthread_mutex_unlock(&stdout_lock);
#endif
}
-static void stdio_restore(void)
+static int traffic_monitor_print_fn(const char *format, va_list args)
{
-#ifdef __GLIBC__
- if (verbose() && env.worker_id == -1) {
- /* nothing to do, output to stdout by default */
- return;
- }
-
- if (stdout == env.stdout_saved)
- return;
-
- stdio_restore_cleanup();
+ pthread_mutex_lock(&stdout_lock);
+ vfprintf(stdout, format, args);
+ pthread_mutex_unlock(&stdout_lock);
- stdout = env.stdout_saved;
- stderr = env.stderr_saved;
-#endif
+ return 0;
}
/* Adapted from perf/util/string.c */
@@ -474,8 +475,6 @@ static void dump_test_log(const struct prog_test_def *test,
print_test_result(test, test_state);
}
-static void stdio_restore(void);
-
/* A bunch of tests set custom affinity per-thread and/or per-process. Reset
* it after each test/sub-test.
*/
@@ -490,13 +489,11 @@ static void reset_affinity(void)
err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
if (err < 0) {
- stdio_restore();
fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
exit(EXIT_ERR_SETUP_INFRA);
}
err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
if (err < 0) {
- stdio_restore();
fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
exit(EXIT_ERR_SETUP_INFRA);
}
@@ -514,7 +511,6 @@ static void save_netns(void)
static void restore_netns(void)
{
if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
- stdio_restore();
perror("setns(CLONE_NEWNS)");
exit(EXIT_ERR_SETUP_INFRA);
}
@@ -541,7 +537,8 @@ void test__end_subtest(void)
test_result(subtest_state->error_cnt,
subtest_state->skipped));
- stdio_restore_cleanup();
+ stdio_restore();
+
env.subtest_state = NULL;
}
@@ -1270,8 +1267,10 @@ void crash_handler(int signum)
sz = backtrace(bt, ARRAY_SIZE(bt));
- if (env.stdout_saved)
- stdio_restore();
+ fflush(stdout);
+ stdout = env.stdout_saved;
+ stderr = env.stderr_saved;
+
if (env.test) {
env.test_state->error_cnt++;
dump_test_log(env.test, env.test_state, true, false, NULL);
@@ -1365,10 +1364,19 @@ static int recv_message(int sock, struct msg *msg)
return ret;
}
+static bool ns_is_needed(const char *test_name)
+{
+ if (strlen(test_name) < 3)
+ return false;
+
+ return !strncmp(test_name, "ns_", 3);
+}
+
static void run_one_test(int test_num)
{
struct prog_test_def *test = &prog_test_defs[test_num];
struct test_state *state = &test_states[test_num];
+ struct netns_obj *ns = NULL;
env.test = test;
env.test_state = state;
@@ -1376,10 +1384,13 @@ static void run_one_test(int test_num)
stdio_hijack(&state->log_buf, &state->log_cnt);
watchdog_start();
+ if (ns_is_needed(test->test_name))
+ ns = netns_new(test->test_name, true);
if (test->run_test)
test->run_test();
else if (test->run_serial_test)
test->run_serial_test();
+ netns_free(ns);
watchdog_stop();
/* ensure last sub-test is finalized properly */
@@ -1388,6 +1399,8 @@ static void run_one_test(int test_num)
state->tested = true;
+ stdio_restore();
+
if (verbose() && env.worker_id == -1)
print_test_result(test, state);
@@ -1396,7 +1409,6 @@ static void run_one_test(int test_num)
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
- stdio_restore();
free(stop_libbpf_log_capture());
dump_test_log(test, state, false, false, NULL);
@@ -1931,6 +1943,9 @@ int main(int argc, char **argv)
sigaction(SIGSEGV, &sigact, NULL);
+ env.stdout_saved = stdout;
+ env.stderr_saved = stderr;
+
env.secs_till_notify = 10;
env.secs_till_kill = 120;
err = argp_parse(&argp, argc, argv, 0, NULL, &env);
@@ -1947,6 +1962,8 @@ int main(int argc, char **argv)
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
+ traffic_monitor_set_print(traffic_monitor_print_fn);
+
srand(time(NULL));
env.jit_enabled = is_jit_enabled();
@@ -1957,9 +1974,6 @@ int main(int argc, char **argv)
return -1;
}
- env.stdout_saved = stdout;
- env.stderr_saved = stderr;
-
env.has_testmod = true;
if (!env.list_test_names) {
/* ensure previous instance of the module is unloaded */
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 404d0d4915d5..870694f2a359 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -427,6 +427,14 @@ void hexdump(const char *prefix, const void *buf, size_t len);
goto goto_label; \
})
+#define SYS_FAIL(goto_label, fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_NEQ(0, system(cmd), cmd)) \
+ goto goto_label; \
+ })
+
#define ALL_TO_DEV_NULL " >/dev/null 2>&1"
#define SYS_NOFAIL(fmt, ...) \
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
deleted file mode 100755
index d9661b9988ba..000000000000
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ /dev/null
@@ -1,645 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# End-to-end eBPF tunnel test suite
-# The script tests BPF network tunnel implementation.
-#
-# Topology:
-# ---------
-# root namespace | at_ns0 namespace
-# |
-# ----------- | -----------
-# | tnl dev | | | tnl dev | (overlay network)
-# ----------- | -----------
-# metadata-mode | native-mode
-# with bpf |
-# |
-# ---------- | ----------
-# | veth1 | --------- | veth0 | (underlay network)
-# ---------- peer ----------
-#
-#
-# Device Configuration
-# --------------------
-# Root namespace with metadata-mode tunnel + BPF
-# Device names and addresses:
-# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
-# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
-#
-# Namespace at_ns0 with native tunnel
-# Device names and addresses:
-# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
-# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
-#
-#
-# End-to-end ping packet flow
-# ---------------------------
-# Most of the tests start by namespace creation, device configuration,
-# then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
-# from root namespace, the following operations happen:
-# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
-# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
-# with remote_ip=172.16.1.100 and others.
-# 3) Outer tunnel header is prepended and route the packet to veth1's egress
-# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
-# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
-# 6) Forward the packet to the overlay tnl dev
-
-BPF_FILE="test_tunnel_kern.bpf.o"
-BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
-PING_ARG="-c 3 -w 10 -q"
-ret=0
-GREEN='\033[0;92m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-config_device()
-{
- ip netns add at_ns0
- ip link add veth0 type veth peer name veth1
- ip link set veth0 netns at_ns0
- ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip link set dev veth1 up mtu 1500
- ip addr add dev veth1 172.16.1.200/24
-}
-
-add_gre_tunnel()
-{
- tun_key=
- if [ -n "$1" ]; then
- tun_key="key $1"
- fi
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq $tun_key \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE $tun_key external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6gretap_tunnel()
-{
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
- local ::11 remote ::22
-
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV fc80::200/24
- ip link set dev $DEV up
-}
-
-add_erspan_tunnel()
-{
- # at_ns0 namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local 172.16.1.100 remote 172.16.1.200 \
- erspan_ver 2 erspan_dir egress erspan_hwid 3
- fi
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6erspan_tunnel()
-{
-
- # assign ipv6 address
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- if [ "$1" == "v1" ]; then
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 1 erspan 123
- else
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
- local ::11 remote ::22 \
- erspan_ver 2 erspan_dir egress erspan_hwid 7
- fi
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_geneve_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- id 2 dstport 6081 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE dstport 6081 external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6geneve_tunnel()
-{
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 22 \
- remote ::22 # geneve has no local option
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
-add_ipip_tunnel()
-{
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- local 172.16.1.100 remote 172.16.1.200
- ip netns exec at_ns0 ip link set dev $DEV_NS up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip link set dev $DEV up
- ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6tnl_tunnel()
-{
- ip netns exec at_ns0 ip addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- ip addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- local ::11 remote ::22
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external
- ip addr add dev $DEV 10.1.1.200/24
- ip addr add dev $DEV 1::22/96
- ip link set dev $DEV up
-}
-
-test_gre()
-{
- TYPE=gretap
- DEV_NS=gretap00
- DEV=gretap11
- ret=0
-
- check $TYPE
- config_device
- add_gre_tunnel 2
- attach_bpf $DEV gre_set_tunnel gre_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_gre_no_tunnel_key()
-{
- TYPE=gre
- DEV_NS=gre00
- DEV=gre11
- ret=0
-
- check $TYPE
- config_device
- add_gre_tunnel
- attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gre()
-{
- TYPE=ip6gre
- DEV_NS=ip6gre00
- DEV=ip6gre11
- ret=0
-
- check $TYPE
- config_device
- # reuse the ip6gretap function
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- ping $PING_ARG 10.1.1.100
- check_err $?
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 $PING_ARG fc80::200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gretap()
-{
- TYPE=ip6gretap
- DEV_NS=ip6gretap00
- DEV=ip6gretap11
- ret=0
-
- check $TYPE
- config_device
- add_ip6gretap_tunnel
- attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # overlay: ipv4 over ipv6
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- ping $PING_ARG 10.1.1.100
- check_err $?
- # overlay: ipv6 over ipv6
- ip netns exec at_ns0 ping6 $PING_ARG fc80::200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_erspan()
-{
- TYPE=erspan
- DEV_NS=erspan00
- DEV=erspan11
- ret=0
-
- check $TYPE
- config_device
- add_erspan_tunnel $1
- attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6erspan()
-{
- TYPE=ip6erspan
- DEV_NS=ip6erspan00
- DEV=ip6erspan11
- ret=0
-
- check $TYPE
- config_device
- add_ip6erspan_tunnel $1
- attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
- ping6 $PING_ARG ::11
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_geneve()
-{
- TYPE=geneve
- DEV_NS=geneve00
- DEV=geneve11
- ret=0
-
- check $TYPE
- config_device
- add_geneve_tunnel
- attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6geneve()
-{
- TYPE=geneve
- DEV_NS=ip6geneve00
- DEV=ip6geneve11
- ret=0
-
- check $TYPE
- config_device
- add_ip6geneve_tunnel
- attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-test_ipip()
-{
- TYPE=ipip
- DEV_NS=ipip00
- DEV=ipip11
- ret=0
-
- check $TYPE
- config_device
- add_ipip_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ipip6()
-{
- TYPE=ip6tnl
- DEV_NS=ipip6tnl00
- DEV=ipip6tnl11
- ret=0
-
- check $TYPE
- config_device
- add_ip6tnl_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip4 over ip6
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6ip6()
-{
- TYPE=ip6tnl
- DEV_NS=ip6ip6tnl00
- DEV=ip6ip6tnl11
- ret=0
-
- check $TYPE
- config_device
- add_ip6tnl_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip6 over ip6
- ping6 $PING_ARG 1::11
- check_err $?
- ip netns exec at_ns0 ping6 $PING_ARG 1::22
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-attach_bpf()
-{
- DEV=$1
- SET=$2
- GET=$3
- mkdir -p ${BPF_PIN_TUNNEL_DIR}
- bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/
- tc qdisc add dev $DEV clsact
- tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
- tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
-}
-
-cleanup()
-{
- rm -rf ${BPF_PIN_TUNNEL_DIR}
-
- ip netns delete at_ns0 2> /dev/null
- ip link del veth1 2> /dev/null
- ip link del ipip11 2> /dev/null
- ip link del ipip6tnl11 2> /dev/null
- ip link del ip6ip6tnl11 2> /dev/null
- ip link del gretap11 2> /dev/null
- ip link del gre11 2> /dev/null
- ip link del ip6gre11 2> /dev/null
- ip link del ip6gretap11 2> /dev/null
- ip link del geneve11 2> /dev/null
- ip link del ip6geneve11 2> /dev/null
- ip link del erspan11 2> /dev/null
- ip link del ip6erspan11 2> /dev/null
-}
-
-cleanup_exit()
-{
- echo "CATCH SIGKILL or SIGINT, cleanup and exit"
- cleanup
- exit 0
-}
-
-check()
-{
- ip link help 2>&1 | grep -q "\s$1\s"
- if [ $? -ne 0 ];then
- echo "SKIP $1: iproute2 not support"
- cleanup
- return 1
- fi
-}
-
-enable_debug()
-{
- echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
-}
-
-check_err()
-{
- if [ $ret -eq 0 ]; then
- ret=$1
- fi
-}
-
-bpf_tunnel_test()
-{
- local errors=0
-
- echo "Testing GRE tunnel..."
- test_gre
- errors=$(( $errors + $? ))
-
- echo "Testing GRE tunnel (without tunnel keys)..."
- test_gre_no_tunnel_key
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GRE tunnel..."
- test_ip6gre
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GRETAP tunnel..."
- test_ip6gretap
- errors=$(( $errors + $? ))
-
- echo "Testing ERSPAN tunnel..."
- test_erspan v2
- errors=$(( $errors + $? ))
-
- echo "Testing IP6ERSPAN tunnel..."
- test_ip6erspan v2
- errors=$(( $errors + $? ))
-
- echo "Testing GENEVE tunnel..."
- test_geneve
- errors=$(( $errors + $? ))
-
- echo "Testing IP6GENEVE tunnel..."
- test_ip6geneve
- errors=$(( $errors + $? ))
-
- echo "Testing IPIP tunnel..."
- test_ipip
- errors=$(( $errors + $? ))
-
- echo "Testing IPIP6 tunnel..."
- test_ipip6
- errors=$(( $errors + $? ))
-
- echo "Testing IP6IP6 tunnel..."
- test_ip6ip6
- errors=$(( $errors + $? ))
-
- return $errors
-}
-
-trap cleanup 0 3 6
-trap cleanup_exit 2 9
-
-cleanup
-bpf_tunnel_test
-
-if [ $? -ne 0 ]; then
- echo -e "$(basename $0): ${RED}FAIL${NC}"
- exit 1
-fi
-echo -e "$(basename $0): ${GREEN}PASS${NC}"
-exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
deleted file mode 100755
index 4c3c3fdd2d73..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test topology:
-# - - - - - - - - - - - - - - - - - - -
-# | veth1 veth2 veth3 | ns0
-# - -| - - - - - - | - - - - - - | - -
-# --------- --------- ---------
-# | veth0 | | veth0 | | veth0 |
-# --------- --------- ---------
-# ns1 ns2 ns3
-#
-# Test modules:
-# XDP modes: generic, native, native + egress_prog
-#
-# Test cases:
-# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
-# the redirects.
-# ns1 -> gw: ns1, ns2, ns3, should receive the arp request
-# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
-# interface should not receive the redirects.
-# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
-# IPv6: Testing none flag, all the pkts should be redirected back
-# ping test: ns1 -> ns2 (block), echo requests will be redirect back
-# egress_prog:
-# all src mac should be egress interface's mac
-
-# netns numbers
-NUM=3
-IFACES=""
-DRV_MODE="xdpgeneric xdpdrv xdpegress"
-PASS=0
-FAIL=0
-LOG_DIR=$(mktemp -d)
-declare -a NS
-NS[0]="ns0-$(mktemp -u XXXXXX)"
-NS[1]="ns1-$(mktemp -u XXXXXX)"
-NS[2]="ns2-$(mktemp -u XXXXXX)"
-NS[3]="ns3-$(mktemp -u XXXXXX)"
-
-test_pass()
-{
- echo "Pass: $@"
- PASS=$((PASS + 1))
-}
-
-test_fail()
-{
- echo "fail: $@"
- FAIL=$((FAIL + 1))
-}
-
-clean_up()
-{
- for i in $(seq 0 $NUM); do
- ip netns del ${NS[$i]} 2> /dev/null
- done
-}
-
-# Kselftest framework requirement - SKIP code is 4.
-check_env()
-{
- ip link set dev lo xdpgeneric off &>/dev/null
- if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
- exit 4
- fi
-
- which tcpdump &>/dev/null
- if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without tcpdump"
- exit 4
- fi
-}
-
-setup_ns()
-{
- local mode=$1
- IFACES=""
-
- if [ "$mode" = "xdpegress" ]; then
- mode="xdpdrv"
- fi
-
- ip netns add ${NS[0]}
- for i in $(seq $NUM); do
- ip netns add ${NS[$i]}
- ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
- ip -n ${NS[$i]} link set veth0 up
- ip -n ${NS[0]} link set veth$i up
-
- ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
- ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
- # Add a neigh entry for IPv4 ping test
- ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
- ip -n ${NS[$i]} link set veth0 $mode obj \
- xdp_dummy.bpf.o sec xdp &> /dev/null || \
- { test_fail "Unable to load dummy xdp" && exit 1; }
- IFACES="$IFACES veth$i"
- veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
- done
-}
-
-do_egress_tests()
-{
- local mode=$1
-
- # mac test
- ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
- ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
- sleep 0.5
- ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
- sleep 0.5
- pkill tcpdump
-
- # mac check
- grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \
- test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
- grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \
- test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
-}
-
-do_ping_tests()
-{
- local mode=$1
-
- # ping6 test: echo request should be redirect back to itself, not others
- ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
-
- ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
- ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
- ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
- sleep 0.5
- # ARP test
- ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
- # IPv4 test
- ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
- # IPv6 test
- ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
- sleep 0.5
- pkill tcpdump
-
- # All netns should receive the redirect arp requests
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-1" || \
- test_fail "$mode arp(F_BROADCAST) ns1-1"
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-2" || \
- test_fail "$mode arp(F_BROADCAST) ns1-2"
- [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \
- test_pass "$mode arp(F_BROADCAST) ns1-3" || \
- test_fail "$mode arp(F_BROADCAST) ns1-3"
-
- # ns1 should not receive the redirect echo request, others should
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
- [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
- test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
-
- # ns1 should receive the echo request, ns2 should not
- [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
- test_pass "$mode IPv6 (no flags) ns1-1" || \
- test_fail "$mode IPv6 (no flags) ns1-1"
- [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \
- test_pass "$mode IPv6 (no flags) ns1-2" || \
- test_fail "$mode IPv6 (no flags) ns1-2"
-}
-
-do_tests()
-{
- local mode=$1
- local drv_p
-
- case ${mode} in
- xdpdrv) drv_p="-N";;
- xdpegress) drv_p="-X";;
- xdpgeneric) drv_p="-S";;
- esac
-
- ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
- xdp_pid=$!
- sleep 1
- if ! ps -p $xdp_pid > /dev/null; then
- test_fail "$mode xdp_redirect_multi start failed"
- return 1
- fi
-
- if [ "$mode" = "xdpegress" ]; then
- do_egress_tests $mode
- else
- do_ping_tests $mode
- fi
-
- kill $xdp_pid
-}
-
-check_env
-
-trap clean_up EXIT
-
-for mode in ${DRV_MODE}; do
- setup_ns $mode
- do_tests $mode
- clean_up
-done
-rm -rf ${LOG_DIR}
-
-echo "Summary: PASS $PASS, FAIL $FAIL"
-[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
deleted file mode 100755
index fbcaa9f0120b..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-# Author: Jesper Dangaard Brouer <hawk@kernel.org>
-
-# Kselftest framework requirement - SKIP code is 4.
-readonly KSFT_SKIP=4
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-
-# Allow wrapper scripts to name test
-if [ -z "$TESTNAME" ]; then
- TESTNAME=xdp_vlan
-fi
-
-# Default XDP mode
-XDP_MODE=xdpgeneric
-
-usage() {
- echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
- echo ""
- echo "Usage: $0 [-vfh]"
- echo " -v | --verbose : Verbose"
- echo " --flush : Flush before starting (e.g. after --interactive)"
- echo " --interactive : Keep netns setup running after test-run"
- echo " --mode=XXX : Choose XDP mode (xdp | xdpgeneric | xdpdrv)"
- echo ""
-}
-
-valid_xdp_mode()
-{
- local mode=$1
-
- case "$mode" in
- xdpgeneric | xdpdrv | xdp)
- return 0
- ;;
- *)
- return 1
- esac
-}
-
-cleanup()
-{
- local status=$?
-
- if [ "$status" = "0" ]; then
- echo "selftests: $TESTNAME [PASS]";
- else
- echo "selftests: $TESTNAME [FAILED]";
- fi
-
- if [ -n "$INTERACTIVE" ]; then
- echo "Namespace setup still active explore with:"
- echo " ip netns exec ${NS1} bash"
- echo " ip netns exec ${NS2} bash"
- exit $status
- fi
-
- set +e
- ip link del veth1 2> /dev/null
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
-}
-
-# Using external program "getopt" to get --long-options
-OPTIONS=$(getopt -o hvfi: \
- --long verbose,flush,help,interactive,debug,mode: -- "$@")
-if (( $? != 0 )); then
- usage
- echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
- exit 2
-fi
-eval set -- "$OPTIONS"
-
-## --- Parse command line arguments / parameters ---
-while true; do
- case "$1" in
- -v | --verbose)
- export VERBOSE=yes
- shift
- ;;
- -i | --interactive | --debug )
- INTERACTIVE=yes
- shift
- ;;
- -f | --flush )
- cleanup
- shift
- ;;
- --mode )
- shift
- XDP_MODE=$1
- shift
- ;;
- -- )
- shift
- break
- ;;
- -h | --help )
- usage;
- echo "selftests: $TESTNAME [SKIP] usage help info requested"
- exit $KSFT_SKIP
- ;;
- * )
- shift
- break
- ;;
- esac
-done
-
-if [ "$EUID" -ne 0 ]; then
- echo "selftests: $TESTNAME [FAILED] need root privileges"
- exit 1
-fi
-
-valid_xdp_mode $XDP_MODE
-if [ $? -ne 0 ]; then
- echo "selftests: $TESTNAME [FAILED] unknown XDP mode ($XDP_MODE)"
- exit 1
-fi
-
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ]; then
- echo "selftests: $TESTNAME [SKIP] need ip xdp support"
- exit $KSFT_SKIP
-fi
-
-# Interactive mode likely require us to cleanup netns
-if [ -n "$INTERACTIVE" ]; then
- ip link del veth1 2> /dev/null
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
-fi
-
-# Exit on failure
-set -e
-
-# Some shell-tools dependencies
-which ip > /dev/null
-which tc > /dev/null
-which ethtool > /dev/null
-
-# Make rest of shell verbose, showing comments as doc/info
-if [ -n "$VERBOSE" ]; then
- set -v
-fi
-
-# Create two namespaces
-ip netns add ${NS1}
-ip netns add ${NS2}
-
-# Run cleanup if failing or on kill
-trap cleanup 0 2 3 6 9
-
-# Create veth pair
-ip link add veth1 type veth peer name veth2
-
-# Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-
-# NOTICE: XDP require VLAN header inside packet payload
-# - Thus, disable VLAN offloading driver features
-# - For veth REMEMBER TX side VLAN-offload
-#
-# Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ${NS1} ethtool -K veth1 rxvlan off
-ip netns exec ${NS2} ethtool -K veth2 rxvlan off
-#
-# Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ${NS2} ethtool -K veth2 txvlan off
-ip netns exec ${NS1} ethtool -K veth1 txvlan off
-
-export IPADDR1=100.64.41.1
-export IPADDR2=100.64.41.2
-
-# In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ${NS1} ip link set veth1 up
-
-# In ns2/veth2 create VLAN device
-export VLAN=4011
-export DEVNS2=veth2
-ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ${NS2} ip link set $DEVNS2 up
-ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up
-
-# Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ${NS1} ip link set lo up
-ip netns exec ${NS2} ip link set lo up
-
-# At this point, the hosts cannot reach each-other,
-# because ns2 are using VLAN tags on the packets.
-
-ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
-
-
-# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
-# ----------------------------------------------------------------------
-# In ns1: ingress use XDP to remove VLAN tags
-export DEVNS1=veth1
-export BPF_FILE=test_xdp_vlan.bpf.o
-
-# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
-export XDP_PROG=xdp_vlan_change
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
-
-# In ns1: egress use TC to add back VLAN tag 4011
-# (del cmd)
-# tc qdisc del dev $DEVNS1 clsact 2> /dev/null
-#
-ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
-ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
- prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push
-
-# Now the namespaces can reach each-other, test with ping:
-ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
-
-# Second test: Replace xdp prog, that fully remove vlan header
-#
-# Catch kernel bug for generic-XDP, that does didn't allow us to
-# remove a VLAN header, because skb->protocol still contain VLAN
-# ETH_P_8021Q indication, and this cause overwriting of our changes.
-#
-export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
-
-# Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
deleted file mode 100755
index c515326d6d59..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test generic-XDP
-export TESTNAME=xdp_vlan_mode_generic
-./test_xdp_vlan.sh --mode=xdpgeneric
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
deleted file mode 100755
index 5cf7ce1f16c1..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test native-XDP
-export TESTNAME=xdp_vlan_mode_native
-./test_xdp_vlan.sh --mode=xdpdrv
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 06af5029885b..a18972ffdeb6 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
#include <argp.h>
#include <libgen.h>
+#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include <sched.h>
@@ -154,6 +155,16 @@ struct filter {
bool abs;
};
+struct var_preset {
+ char *name;
+ enum { INTEGRAL, ENUMERATOR } type;
+ union {
+ long long ivalue;
+ char *svalue;
+ };
+ bool applied;
+};
+
static struct env {
char **filenames;
int filename_cnt;
@@ -195,6 +206,8 @@ static struct env {
int progs_processed;
int progs_skipped;
int top_src_lines;
+ struct var_preset *presets;
+ int npresets;
} env;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -246,16 +259,20 @@ static const struct argp_option opts[] = {
{ "test-reg-invariants", 'r', NULL, 0,
"Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
+ { "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" },
{},
};
static int parse_stats(const char *stats_str, struct stat_specs *specs);
static int append_filter(struct filter **filters, int *cnt, const char *str);
static int append_filter_file(const char *path);
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr);
+static int append_var_preset_file(const char *filename);
+static int append_file(const char *path);
+static int append_file_from_file(const char *path);
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
- void *tmp;
int err;
switch (key) {
@@ -353,15 +370,26 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case 'G': {
+ if (arg[0] == '@')
+ err = append_var_preset_file(arg + 1);
+ else
+ err = append_var_preset(&env.presets, &env.npresets, arg);
+ if (err) {
+ fprintf(stderr, "Failed to parse global variable presets: %s\n", arg);
+ return err;
+ }
+ break;
+ }
case ARGP_KEY_ARG:
- tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
- if (!tmp)
- return -ENOMEM;
- env.filenames = tmp;
- env.filenames[env.filename_cnt] = strdup(arg);
- if (!env.filenames[env.filename_cnt])
- return -ENOMEM;
- env.filename_cnt++;
+ if (arg[0] == '@')
+ err = append_file_from_file(arg + 1);
+ else
+ err = append_file(arg);
+ if (err) {
+ fprintf(stderr, "Failed to collect BPF object files: %d\n", err);
+ return err;
+ }
break;
default:
return ARGP_ERR_UNKNOWN;
@@ -632,7 +660,7 @@ static int append_filter_file(const char *path)
f = fopen(path, "r");
if (!f) {
err = -errno;
- fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
+ fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(-err));
return err;
}
@@ -662,6 +690,49 @@ static const struct stat_specs default_output_spec = {
},
};
+static int append_file(const char *path)
+{
+ void *tmp;
+
+ tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
+ if (!tmp)
+ return -ENOMEM;
+ env.filenames = tmp;
+ env.filenames[env.filename_cnt] = strdup(path);
+ if (!env.filenames[env.filename_cnt])
+ return -ENOMEM;
+ env.filename_cnt++;
+ return 0;
+}
+
+static int append_file_from_file(const char *path)
+{
+ char buf[1024];
+ int err = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open object files list in '%s': %s\n",
+ path, strerror(errno));
+ return err;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ /* lines starting with # are comments, skip them */
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+ err = append_file(buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
static const struct stat_specs default_csv_output_spec = {
.spec_cnt = 14,
.ids = {
@@ -1163,13 +1234,13 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
bpf_program__set_expected_attach_type(prog, attach_type);
if (!env.quiet) {
- printf("Using guessed program type '%s' for %s/%s...\n",
+ fprintf(stderr, "Using guessed program type '%s' for %s/%s...\n",
libbpf_bpf_prog_type_str(prog_type),
filename, prog_name);
}
} else {
if (!env.quiet) {
- printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
+ fprintf(stderr, "Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
ctx_name, filename, prog_name);
}
}
@@ -1292,6 +1363,261 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
return 0;
};
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr)
+{
+ void *tmp;
+ struct var_preset *cur;
+ char var[256], val[256], *val_end;
+ long long value;
+ int n;
+
+ tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets));
+ if (!tmp)
+ return -ENOMEM;
+ *presets = tmp;
+ cur = &(*presets)[*cnt];
+ memset(cur, 0, sizeof(*cur));
+ (*cnt)++;
+
+ if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) {
+ fprintf(stderr, "Failed to parse expression '%s'\n", expr);
+ return -EINVAL;
+ }
+
+ if (val[0] == '-' || isdigit(val[0])) {
+ /* must be a number */
+ errno = 0;
+ value = strtoll(val, &val_end, 0);
+ if (errno == ERANGE) {
+ errno = 0;
+ value = strtoull(val, &val_end, 0);
+ }
+ if (errno || *val_end != '\0') {
+ fprintf(stderr, "Failed to parse value '%s'\n", val);
+ return -EINVAL;
+ }
+ cur->ivalue = value;
+ cur->type = INTEGRAL;
+ } else {
+ /* if not a number, consider it enum value */
+ cur->svalue = strdup(val);
+ if (!cur->svalue)
+ return -ENOMEM;
+ cur->type = ENUMERATOR;
+ }
+
+ cur->name = strdup(var);
+ if (!cur->name)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int append_var_preset_file(const char *filename)
+{
+ char buf[1024];
+ FILE *f;
+ int err = 0;
+
+ f = fopen(filename, "rt");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(-err));
+ return -EINVAL;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+
+ err = append_var_preset(&env.presets, &env.npresets, buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+static bool is_signed_type(const struct btf_type *t)
+{
+ if (btf_is_int(t))
+ return btf_int_encoding(t) & BTF_INT_SIGNED;
+ if (btf_is_any_enum(t))
+ return btf_kflag(t);
+ return true;
+}
+
+static int enum_value_from_name(const struct btf *btf, const struct btf_type *t,
+ const char *evalue, long long *retval)
+{
+ if (btf_is_enum(t)) {
+ struct btf_enum *e = btf_enum(t);
+ int i, n = btf_vlen(t);
+
+ for (i = 0; i < n; ++i, ++e) {
+ const char *cur_name = btf__name_by_offset(btf, e->name_off);
+
+ if (strcmp(cur_name, evalue) == 0) {
+ *retval = e->val;
+ return 0;
+ }
+ }
+ } else if (btf_is_enum64(t)) {
+ struct btf_enum64 *e = btf_enum64(t);
+ int i, n = btf_vlen(t);
+
+ for (i = 0; i < n; ++i, ++e) {
+ const char *cur_name = btf__name_by_offset(btf, e->name_off);
+ __u64 value = btf_enum64_value(e);
+
+ if (strcmp(cur_name, evalue) == 0) {
+ *retval = value;
+ return 0;
+ }
+ }
+ }
+ return -EINVAL;
+}
+
+static bool is_preset_supported(const struct btf_type *t)
+{
+ return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t,
+ struct bpf_map *map, struct btf_var_secinfo *sinfo,
+ struct var_preset *preset)
+{
+ const struct btf_type *base_type;
+ void *ptr;
+ long long value = preset->ivalue;
+ size_t size;
+
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
+ if (!base_type) {
+ fprintf(stderr, "Failed to resolve type %d\n", t->type);
+ return -EINVAL;
+ }
+ if (!is_preset_supported(base_type)) {
+ fprintf(stderr, "Setting value for type %s is not supported\n",
+ btf__name_by_offset(btf, base_type->name_off));
+ return -EINVAL;
+ }
+
+ if (preset->type == ENUMERATOR) {
+ if (btf_is_any_enum(base_type)) {
+ if (enum_value_from_name(btf, base_type, preset->svalue, &value)) {
+ fprintf(stderr,
+ "Failed to find integer value for enum element %s\n",
+ preset->svalue);
+ return -EINVAL;
+ }
+ } else {
+ fprintf(stderr, "Value %s is not supported for type %s\n",
+ preset->svalue, btf__name_by_offset(btf, base_type->name_off));
+ return -EINVAL;
+ }
+ }
+
+ /* Check if value fits into the target variable size */
+ if (sinfo->size < sizeof(value)) {
+ bool is_signed = is_signed_type(base_type);
+ __u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0);
+ long long max_val = 1ll << unsigned_bits;
+
+ if (value >= max_val || value < -max_val) {
+ fprintf(stderr,
+ "Variable %s value %lld is out of range [%lld; %lld]\n",
+ btf__name_by_offset(btf, t->name_off), value,
+ is_signed ? -max_val : 0, max_val - 1);
+ return -EINVAL;
+ }
+ }
+
+ ptr = bpf_map__initial_value(map, &size);
+ if (!ptr || sinfo->offset + sinfo->size > size)
+ return -EINVAL;
+
+ if (__BYTE_ORDER == __LITTLE_ENDIAN) {
+ memcpy(ptr + sinfo->offset, &value, sinfo->size);
+ } else { /* __BYTE_ORDER == __BIG_ENDIAN */
+ __u8 src_offset = sizeof(value) - sinfo->size;
+
+ memcpy(ptr + sinfo->offset, (void *)&value + src_offset, sinfo->size);
+ }
+ return 0;
+}
+
+static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets)
+{
+ struct btf_var_secinfo *sinfo;
+ const char *sec_name;
+ const struct btf_type *t;
+ struct bpf_map *map;
+ struct btf *btf;
+ int i, j, k, n, cnt, err = 0;
+
+ if (npresets == 0)
+ return 0;
+
+ btf = bpf_object__btf(obj);
+ if (!btf)
+ return -EINVAL;
+
+ cnt = btf__type_cnt(btf);
+ for (i = 1; i != cnt; ++i) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ sinfo = btf_var_secinfos(t);
+ sec_name = btf__name_by_offset(btf, t->name_off);
+ map = bpf_object__find_map_by_name(obj, sec_name);
+ if (!map)
+ continue;
+
+ n = btf_vlen(t);
+ for (j = 0; j < n; ++j, ++sinfo) {
+ const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
+ const char *var_name;
+
+ if (!btf_is_var(var_type))
+ continue;
+
+ var_name = btf__name_by_offset(btf, var_type->name_off);
+
+ for (k = 0; k < npresets; ++k) {
+ if (strcmp(var_name, presets[k].name) != 0)
+ continue;
+
+ if (presets[k].applied) {
+ fprintf(stderr, "Variable %s is set more than once",
+ var_name);
+ return -EINVAL;
+ }
+
+ err = set_global_var(obj, btf, var_type, map, sinfo, presets + k);
+ if (err)
+ return err;
+
+ presets[k].applied = true;
+ break;
+ }
+ }
+ }
+ for (i = 0; i < npresets; ++i) {
+ if (!presets[i].applied) {
+ fprintf(stderr, "Global variable preset %s has not been applied\n",
+ presets[i].name);
+ }
+ presets[i].applied = false;
+ }
+ return err;
+}
+
static int process_obj(const char *filename)
{
const char *base_filename = basename(strdupa(filename));
@@ -1341,6 +1667,11 @@ static int process_obj(const char *filename)
if (prog_cnt == 1) {
prog = bpf_object__next_program(obj, NULL);
bpf_program__set_autoload(prog, true);
+ err = set_global_vars(obj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
+ goto cleanup;
+ }
process_prog(filename, obj, prog);
goto cleanup;
}
@@ -1355,6 +1686,12 @@ static int process_obj(const char *filename)
goto cleanup;
}
+ err = set_global_vars(tobj, env.presets, env.npresets);
+ if (err) {
+ fprintf(stderr, "Failed to set global variables %d\n", err);
+ goto cleanup;
+ }
+
lprog = NULL;
bpf_object__for_each_program(tprog, tobj) {
const char *tprog_name = bpf_program__name(tprog);
@@ -2460,5 +2797,11 @@ int main(int argc, char **argv)
free(env.deny_filters[i].prog_glob);
}
free(env.deny_filters);
+ for (i = 0; i < env.npresets; ++i) {
+ free(env.presets[i].name);
+ if (env.presets[i].type == ENUMERATOR)
+ free(env.presets[i].svalue);
+ }
+ free(env.presets);
return -err;
}
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
deleted file mode 100755
index ffcd3953f94c..000000000000
--- a/tools/testing/selftests/bpf/with_addr.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# add private ipv4 and ipv6 addresses to loopback
-
-readonly V6_INNER='100::a/128'
-readonly V4_INNER='192.168.0.1/32'
-
-if getopts ":s" opt; then
- readonly SIT_DEV_NAME='sixtofourtest0'
- readonly V6_SIT='2::/64'
- readonly V4_SIT='172.17.0.1/32'
- shift
-fi
-
-fail() {
- echo "error: $*" 1>&2
- exit 1
-}
-
-setup() {
- ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
- ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
-
- if [[ -n "${V6_SIT}" ]]; then
- ip link add "${SIT_DEV_NAME}" type sit remote any local any \
- || fail 'failed to add sit'
- ip link set dev "${SIT_DEV_NAME}" up \
- || fail 'failed to bring sit device up'
- ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
- || fail 'failed to setup v6 SIT address'
- ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
- || fail 'failed to setup v4 SIT address'
- fi
-
- sleep 2 # avoid race causing bind to fail
-}
-
-cleanup() {
- if [[ -n "${V6_SIT}" ]]; then
- ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
- ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
- ip link del "${SIT_DEV_NAME}"
- fi
-
- ip -4 addr del "${V4_INNER}" dev lo
- ip -6 addr del "${V6_INNER}" dev lo
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
deleted file mode 100755
index e24949ed3a20..000000000000
--- a/tools/testing/selftests/bpf/with_tunnels.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# setup tunnels for flow dissection test
-
-readonly SUFFIX="test_$(mktemp -u XXXX)"
-CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
-
-setup() {
- ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
- ip link add "gre_${SUFFIX}" type gre ${CONFIG}
- ip link add "sit_${SUFFIX}" type sit ${CONFIG}
-
- echo "tunnels before test:"
- ip tunnel show
-
- ip link set "ipip_${SUFFIX}" up
- ip link set "gre_${SUFFIX}" up
- ip link set "sit_${SUFFIX}" up
-}
-
-
-cleanup() {
- ip tunnel del "ipip_${SUFFIX}"
- ip tunnel del "gre_${SUFFIX}"
- ip tunnel del "sit_${SUFFIX}"
-
- echo "tunnels after test:"
- ip tunnel show
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
deleted file mode 100644
index c1fc44c87c30..000000000000
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ /dev/null
@@ -1,226 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <net/if.h>
-#include <unistd.h>
-#include <libgen.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include "bpf_util.h"
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#define MAX_IFACE_NUM 32
-#define MAX_INDEX_NUM 1024
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int ifaces[MAX_IFACE_NUM] = {};
-
-static void int_exit(int sig)
-{
- __u32 prog_id = 0;
- int i;
-
- for (i = 0; ifaces[i] > 0; i++) {
- if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
- printf("bpf_xdp_query_id failed\n");
- exit(1);
- }
- if (prog_id)
- bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
- }
-
- exit(0);
-}
-
-static int get_mac_addr(unsigned int ifindex, void *mac_addr)
-{
- char ifname[IF_NAMESIZE];
- struct ifreq ifr;
- int fd, ret = -1;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- return ret;
-
- if (!if_indextoname(ifindex, ifname))
- goto err_out;
-
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
- goto err_out;
-
- memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
- ret = 0;
-
-err_out:
- close(fd);
- return ret;
-}
-
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n"
- " -X load xdp program on egress\n",
- prog);
-}
-
-int main(int argc, char **argv)
-{
- int prog_fd, group_all, mac_map;
- struct bpf_program *ingress_prog, *egress_prog;
- int i, err, ret, opt, egress_prog_fd = 0;
- struct bpf_devmap_val devmap_val;
- bool attach_egress_prog = false;
- unsigned char mac_addr[6];
- char ifname[IF_NAMESIZE];
- struct bpf_object *obj;
- unsigned int ifindex;
- char filename[256];
-
- while ((opt = getopt(argc, argv, "SNFX")) != -1) {
- switch (opt) {
- case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
- break;
- case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
- break;
- case 'X':
- attach_egress_prog = true;
- break;
- default:
- usage(basename(argv[0]));
- return 1;
- }
- }
-
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
- xdp_flags |= XDP_FLAGS_DRV_MODE;
- } else if (attach_egress_prog) {
- printf("Load xdp program on egress with SKB mode not supported yet\n");
- goto err_out;
- }
-
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
- goto err_out;
- }
-
- printf("Get interfaces:");
- for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
- ifaces[i] = if_nametoindex(argv[optind + i]);
- if (!ifaces[i])
- ifaces[i] = strtoul(argv[optind + i], NULL, 0);
- if (!if_indextoname(ifaces[i], ifname)) {
- perror("Invalid interface name or i");
- goto err_out;
- }
- if (ifaces[i] > MAX_INDEX_NUM) {
- printf(" interface index too large\n");
- goto err_out;
- }
- printf(" %d", ifaces[i]);
- }
- printf("\n");
-
- snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
- obj = bpf_object__open_file(filename, NULL);
- err = libbpf_get_error(obj);
- if (err)
- goto err_out;
- err = bpf_object__load(obj);
- if (err)
- goto err_out;
- prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL));
-
- if (attach_egress_prog)
- group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
- else
- group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
- mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
-
- if (group_all < 0 || mac_map < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- goto err_out;
- }
-
- if (attach_egress_prog) {
- /* Find ingress/egress prog for 2nd xdp prog */
- ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
- egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
- if (!ingress_prog || !egress_prog) {
- printf("finding ingress/egress_prog in obj file failed\n");
- goto err_out;
- }
- prog_fd = bpf_program__fd(ingress_prog);
- egress_prog_fd = bpf_program__fd(egress_prog);
- if (prog_fd < 0 || egress_prog_fd < 0) {
- printf("find egress_prog fd failed\n");
- goto err_out;
- }
- }
-
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- /* Init forward multicast groups and exclude group */
- for (i = 0; ifaces[i] > 0; i++) {
- ifindex = ifaces[i];
-
- if (attach_egress_prog) {
- ret = get_mac_addr(ifindex, mac_addr);
- if (ret < 0) {
- printf("get interface %d mac failed\n", ifindex);
- goto err_out;
- }
- ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
- if (ret) {
- perror("bpf_update_elem mac_map failed\n");
- goto err_out;
- }
- }
-
- /* Add all the interfaces to group all */
- devmap_val.ifindex = ifindex;
- devmap_val.bpf_prog.fd = egress_prog_fd;
- ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
- if (ret) {
- perror("bpf_map_update_elem");
- goto err_out;
- }
-
- /* bind prog_fd to each interface */
- ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
- if (ret) {
- printf("Set xdp fd failed on %d\n", ifindex);
- goto err_out;
- }
- }
-
- /* sleep some time for testing */
- sleep(999);
-
- return 0;
-
-err_out:
- return 1;
-}