summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile16
-rw-r--r--tools/arch/arm64/include/asm/sysreg.h65
-rw-r--r--tools/arch/x86/include/asm/amd/ibs.h (renamed from tools/arch/x86/include/asm/amd-ibs.h)2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h24
-rw-r--r--tools/arch/x86/include/asm/inat.h6
-rw-r--r--tools/arch/x86/kcpuid/cpuid.csv791
-rw-r--r--tools/arch/x86/kcpuid/kcpuid.c375
-rw-r--r--tools/arch/x86/lib/insn.c7
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt56
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst10
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool4
-rw-r--r--tools/bpf/bpftool/cgroup.c14
-rw-r--r--tools/bpf/bpftool/link.c3
-rw-r--r--tools/bpf/bpftool/prog.c12
-rw-r--r--tools/include/nolibc/Makefile34
-rw-r--r--tools/include/nolibc/arch-aarch64.h1
-rw-r--r--tools/include/nolibc/arch-arm.h2
-rw-r--r--tools/include/nolibc/arch-i386.h2
-rw-r--r--tools/include/nolibc/arch-loongarch.h7
-rw-r--r--tools/include/nolibc/arch-m68k.h141
-rw-r--r--tools/include/nolibc/arch-powerpc.h2
-rw-r--r--tools/include/nolibc/arch-riscv.h1
-rw-r--r--tools/include/nolibc/arch-sparc.h191
-rw-r--r--tools/include/nolibc/arch-x86_64.h1
-rw-r--r--tools/include/nolibc/arch.h4
-rw-r--r--tools/include/nolibc/compiler.h9
-rw-r--r--tools/include/nolibc/crt.h5
-rw-r--r--tools/include/nolibc/ctype.h6
-rw-r--r--tools/include/nolibc/dirent.h10
-rw-r--r--tools/include/nolibc/elf.h15
-rw-r--r--tools/include/nolibc/errno.h6
-rw-r--r--tools/include/nolibc/fcntl.h69
-rw-r--r--tools/include/nolibc/getopt.h101
-rw-r--r--tools/include/nolibc/math.h31
-rw-r--r--tools/include/nolibc/nolibc.h21
-rw-r--r--tools/include/nolibc/poll.h55
-rw-r--r--tools/include/nolibc/sched.h50
-rw-r--r--tools/include/nolibc/signal.h6
-rw-r--r--tools/include/nolibc/std.h6
-rw-r--r--tools/include/nolibc/stddef.h24
-rw-r--r--tools/include/nolibc/stdint.h4
-rw-r--r--tools/include/nolibc/stdio.h167
-rw-r--r--tools/include/nolibc/stdlib.h54
-rw-r--r--tools/include/nolibc/string.h40
-rw-r--r--tools/include/nolibc/sys.h423
-rw-r--r--tools/include/nolibc/sys/auxv.h41
-rw-r--r--tools/include/nolibc/sys/ioctl.h29
-rw-r--r--tools/include/nolibc/sys/mman.h82
-rw-r--r--tools/include/nolibc/sys/mount.h37
-rw-r--r--tools/include/nolibc/sys/prctl.h36
-rw-r--r--tools/include/nolibc/sys/random.h34
-rw-r--r--tools/include/nolibc/sys/reboot.h34
-rw-r--r--tools/include/nolibc/sys/resource.h53
-rw-r--r--tools/include/nolibc/sys/stat.h94
-rw-r--r--tools/include/nolibc/sys/syscall.h19
-rw-r--r--tools/include/nolibc/sys/sysmacros.h20
-rw-r--r--tools/include/nolibc/sys/time.h49
-rw-r--r--tools/include/nolibc/sys/timerfd.h87
-rw-r--r--tools/include/nolibc/sys/types.h7
-rw-r--r--tools/include/nolibc/sys/utsname.h42
-rw-r--r--tools/include/nolibc/sys/wait.h116
-rw-r--r--tools/include/nolibc/time.h189
-rw-r--r--tools/include/nolibc/types.h32
-rw-r--r--tools/include/nolibc/unistd.h40
-rw-r--r--tools/include/uapi/asm-generic/socket.h2
-rw-r--r--tools/include/uapi/linux/bpf.h19
-rw-r--r--tools/include/uapi/linux/fanotify.h274
-rw-r--r--tools/include/uapi/linux/if_xdp.h6
-rw-r--r--tools/include/uapi/linux/mount.h235
-rw-r--r--tools/include/uapi/linux/netdev.h1
-rw-r--r--tools/include/uapi/linux/nsfs.h45
-rw-r--r--tools/include/uapi/linux/perf_event.h657
-rw-r--r--tools/include/uapi/linux/prctl.h45
-rw-r--r--tools/lib/bpf/bpf_core_read.h6
-rw-r--r--tools/lib/bpf/bpf_helpers.h8
-rw-r--r--tools/lib/bpf/btf.c226
-rw-r--r--tools/lib/bpf/libbpf.c87
-rw-r--r--tools/lib/bpf/libbpf.h11
-rw-r--r--tools/lib/bpf/libbpf.map4
-rw-r--r--tools/lib/bpf/libbpf_internal.h9
-rw-r--r--tools/lib/bpf/linker.c6
-rw-r--r--tools/lib/bpf/netlink.c20
-rw-r--r--tools/lib/bpf/nlattr.c15
-rw-r--r--tools/memory-model/Documentation/README7
-rw-r--r--tools/memory-model/Documentation/explanation.txt2
-rw-r--r--tools/memory-model/Documentation/locking.txt5
-rw-r--r--tools/memory-model/Documentation/ordering.txt22
-rw-r--r--tools/memory-model/Documentation/recipes.txt4
-rw-r--r--tools/memory-model/Documentation/references.txt3
-rw-r--r--tools/memory-model/Documentation/simple.txt4
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_array.j22
-rw-r--r--tools/net/ynl/Makefile.deps17
-rw-r--r--tools/net/ynl/generated/Makefile7
-rw-r--r--tools/net/ynl/lib/ynl-priv.h19
-rw-r--r--tools/net/ynl/lib/ynl.c160
-rw-r--r--tools/net/ynl/lib/ynl.h18
-rwxr-xr-xtools/net/ynl/pyynl/cli.py15
-rwxr-xr-xtools/net/ynl/pyynl/ethtool.py22
-rw-r--r--tools/net/ynl/pyynl/lib/__init__.py5
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py39
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py848
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py2
-rw-r--r--tools/net/ynl/samples/.gitignore6
-rw-r--r--tools/net/ynl/samples/devlink.c7
-rw-r--r--tools/net/ynl/samples/rt-addr.c80
-rw-r--r--tools/net/ynl/samples/rt-link.c184
-rw-r--r--tools/net/ynl/samples/rt-route.c80
-rw-r--r--tools/net/ynl/samples/tc.c80
-rw-r--r--tools/objtool/arch/x86/decode.c15
-rw-r--r--tools/objtool/elf.c38
-rw-r--r--tools/objtool/include/objtool/elf.h1
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/futex-hash.c7
-rw-r--r--tools/perf/bench/futex-lock-pi.c5
-rw-r--r--tools/perf/bench/futex-requeue.c6
-rw-r--r--tools/perf/bench/futex-wake-parallel.c9
-rw-r--r--tools/perf/bench/futex-wake.c4
-rw-r--r--tools/perf/bench/futex.c67
-rw-r--r--tools/perf/bench/futex.h5
-rwxr-xr-xtools/perf/check-headers.sh2
-rw-r--r--tools/perf/util/amd-sample-raw.c2
-rw-r--r--tools/power/acpi/common/cmfsize.c2
-rw-r--r--tools/power/acpi/common/getopt.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c4
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixdir.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixmap.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixxf.c2
-rw-r--r--tools/power/acpi/tools/acpidump/acpidump.h2
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c2
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c4
-rw-r--r--tools/power/acpi/tools/acpidump/apmain.c2
-rw-r--r--tools/power/cpupower/Makefile13
-rw-r--r--tools/power/cpupower/README28
-rw-r--r--tools/power/cpupower/bindings/python/Makefile8
-rw-r--r--tools/power/cpupower/bindings/python/README13
-rw-r--r--tools/power/cpupower/cpupower-service.conf32
-rw-r--r--tools/power/cpupower/cpupower.service.in16
-rw-r--r--tools/power/cpupower/cpupower.sh26
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py3
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c15
-rw-r--r--tools/power/x86/intel-speed-select/isst-core-tpmi.c12
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c20
-rw-r--r--tools/power/x86/intel-speed-select/isst.h3
-rw-r--r--tools/sched_ext/Makefile23
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h2
-rw-r--r--tools/sched_ext/scx_qmap.bpf.c4
-rw-r--r--tools/sched_ext/scx_show_state.py14
-rw-r--r--tools/testing/crypto/chacha20-s390/test-cipher.c10
-rw-r--r--tools/testing/kunit/configs/all_tests.config1
-rw-r--r--tools/testing/kunit/kunit_json.py10
-rw-r--r--tools/testing/kunit/kunit_kernel.py8
-rw-r--r--tools/testing/kunit/qemu_configs/powerpc.py1
-rw-r--r--tools/testing/kunit/qemu_configs/powerpc32.py17
-rw-r--r--tools/testing/kunit/qemu_configs/powerpcle.py14
-rw-r--r--tools/testing/kunit/qemu_configs/riscv32.py17
-rw-r--r--tools/testing/kunit/qemu_configs/sparc.py2
-rw-r--r--tools/testing/kunit/qemu_configs/sparc64.py16
-rw-r--r--tools/testing/selftests/Makefile4
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c14
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c62
-rw-r--r--tools/testing/selftests/bpf/DENYLIST1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch642
-rw-r--r--tools/testing/selftests/bpf/Makefile16
-rw-r--r--tools/testing/selftests/bpf/bench.c16
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_htab_mem.c3
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c598
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c42
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh2
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h5
-rw-r--r--tools/testing/selftests/bpf/config5
-rw-r--r--tools/testing/selftests/bpf/config.aarch641
-rw-r--r--tools/testing/selftests/bpf/config.s390x1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c231
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_sysfs.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c285
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c192
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c447
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_helpers.h84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c457
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_redir.c465
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_btf_ext.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bench_sockmap_prog.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h (renamed from tools/testing/selftests/bpf/bpf_arena_spin_lock.h)15
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h5
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_common.h27
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c41
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c126
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c756
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h1
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c18
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c9
-rw-r--r--tools/testing/selftests/bpf/progs/dmabuf_iter.c101
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c230
-rw-r--r--tools/testing/selftests/bpf/progs/fd_htab_lookup.c25
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c2
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_peek.c113
-rw-r--r--tools/testing/selftests/bpf/progs/prepare.c1
-rw-r--r--tools/testing/selftests/bpf/progs/raw_tp_null.c2
-rw-r--r--tools/testing/selftests/bpf/progs/raw_tp_null_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c29
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c206
-rw-r--r--tools/testing/selftests/bpf/progs/set_global_vars.c41
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c11
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_ext.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_ktls.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_redir.c68
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_trap.c71
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_load_acquire.c48
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c58
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_store_release.c39
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata.c13
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c50
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c16
-rw-r--r--tools/testing/selftests/bpf/test_loader.c14
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c8
-rw-r--r--tools/testing/selftests/bpf/veristat.c101
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_common.h1
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c118
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h2
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c477
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh18
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore2
-rw-r--r--tools/testing/selftests/drivers/net/Makefile6
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile3
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py45
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c27
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py140
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py1
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py222
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c382
-rw-r--r--tools/testing/selftests/drivers/net/hw/nic_link_layer.py113
-rw-r--r--tools/testing/selftests/drivers/net/hw/nic_performance.py137
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/xsk_reconfig.py60
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py2
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py20
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_id.py23
-rw-r--r--tools/testing/selftests/drivers/net/napi_id_helper.c83
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py16
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py4
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile2
-rw-r--r--tools/testing/selftests/drivers/net/team/config1
-rwxr-xr-xtools/testing/selftests/drivers/net/team/propagation.sh80
-rw-r--r--tools/testing/selftests/filesystems/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c69
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/Makefile9
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c38
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c557
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile6
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h36
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c86
-rw-r--r--tools/testing/selftests/filesystems/utils.c88
-rw-r--r--tools/testing/selftests/filesystems/utils.h3
-rw-r--r--tools/testing/selftests/filesystems/wrappers.h (renamed from tools/testing/selftests/filesystems/overlayfs/wrappers.h)46
-rw-r--r--tools/testing/selftests/ftrace/Makefile2
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore6
-rw-r--r--tools/testing/selftests/futex/functional/Makefile7
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa.c262
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c231
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c292
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh7
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h70
-rw-r--r--tools/testing/selftests/gpio/Makefile2
-rw-r--r--tools/testing/selftests/gpio/config1
-rwxr-xr-xtools/testing/selftests/gpio/gpio-aggregator.sh727
-rw-r--r--tools/testing/selftests/kexec/Makefile7
-rw-r--r--tools/testing/selftests/kexec/test_kexec_jump.c72
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_jump.sh42
-rw-r--r--tools/testing/selftests/kselftest_harness.h170
-rw-r--r--tools/testing/selftests/kselftest_harness/.gitignore2
-rw-r--r--tools/testing/selftests/kselftest_harness/Makefile7
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.c136
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.expected64
-rwxr-xr-xtools/testing/selftests/kselftest_harness/harness-selftest.sh13
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm18
-rw-r--r--tools/testing/selftests/kvm/arm64/host_sve.c127
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c77
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h6
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h141
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h23
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c3
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S59
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c346
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/ucall.c38
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/handlers.S139
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c132
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c24
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c2
-rw-r--r--tools/testing/selftests/mount_setattr/Makefile2
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c61
-rw-r--r--tools/testing/selftests/nci/nci_dev.c2
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c80
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh49
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh2
-rw-r--r--tools/testing/selftests/net/can/.gitignore2
-rw-r--r--tools/testing/selftests/net/can/Makefile11
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c405
-rwxr-xr-xtools/testing/selftests/net/can/test_raw_filter.sh45
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh3
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh123
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh81
-rw-r--r--tools/testing/selftests/net/forwarding/config1
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh2
-rwxr-xr-xtools/testing/selftests/net/ipv6_route_update_soft_lockup.sh1
-rw-r--r--tools/testing/selftests/net/lib.sh47
-rw-r--r--tools/testing/selftests/net/lib/.gitignore1
-rw-r--r--tools/testing/selftests/net/lib/Makefile1
-rw-r--r--tools/testing/selftests/net/lib/ksft.h56
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py24
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py4
-rw-r--r--tools/testing/selftests/net/lib/xdp_helper.c (renamed from tools/testing/selftests/drivers/net/xdp_helper.c)82
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh32
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c21
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c231
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c16
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh26
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh10
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c16
-rw-r--r--tools/testing/selftests/net/net_helper.sh25
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh3
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh2
-rw-r--r--tools/testing/selftests/net/netfilter/config1
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh427
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh37
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh6
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh165
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh635
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh154
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh2
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh38
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh18
-rw-r--r--tools/testing/selftests/net/ovpn/.gitignore2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile32
-rw-r--r--tools/testing/selftests/net/ovpn/common.sh108
-rw-r--r--tools/testing/selftests/net/ovpn/config10
-rw-r--r--tools/testing/selftests/net/ovpn/data64.key5
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2383
-rw-r--r--tools/testing/selftests/net/ovpn/tcp_peers.txt5
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-chachapoly.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh45
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-float.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh117
-rw-r--r--tools/testing/selftests/net/ovpn/udp_peers.txt6
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh1
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c36
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_flavors_test.sh4
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh77
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh83
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh74
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh83
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh125
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rw-r--r--tools/testing/selftests/nolibc/Makefile28
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.c2
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c331
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh7
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c2
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h22
-rw-r--r--tools/testing/selftests/pidfd/pidfd_bind_mount.c74
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c13
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/console-badness.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/srcu_lockdep.sh42
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh89
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh9
-rw-r--r--tools/testing/selftests/sched_ext/Makefile3
-rw-r--r--tools/testing/selftests/sched_ext/allowed_cpus.bpf.c144
-rw-r--r--tools/testing/selftests/sched_ext/allowed_cpus.c84
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c74
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu.c88
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c43
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.c61
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c13
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh30
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json62
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json24
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh4
-rw-r--r--tools/testing/selftests/timens/clock_nanosleep.c4
-rw-r--r--tools/testing/selftests/timens/exec.c2
-rw-r--r--tools/testing/selftests/timens/futex.c2
-rw-r--r--tools/testing/selftests/timens/gettime_perf.c2
-rw-r--r--tools/testing/selftests/timens/procfs.c2
-rw-r--r--tools/testing/selftests/timens/timens.c2
-rw-r--r--tools/testing/selftests/timens/timer.c4
-rw-r--r--tools/testing/selftests/timens/timerfd.c6
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c2
-rw-r--r--tools/testing/selftests/ublk/Makefile11
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c5
-rw-r--r--tools/testing/selftests/ublk/file_backed.c17
-rw-r--r--tools/testing/selftests/ublk/kublk.c153
-rw-r--r--tools/testing/selftests/ublk/kublk.h22
-rw-r--r--tools/testing/selftests/ublk/null.c55
-rw-r--r--tools/testing/selftests/ublk/stripe.c26
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh39
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_04.sh2
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_05.sh2
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_06.sh2
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_08.sh32
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_09.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_10.sh30
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_11.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh10
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh7
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh7
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh9
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh29
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile3
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config1
-rw-r--r--tools/testing/selftests/x86/Makefile3
-rw-r--r--tools/testing/selftests/x86/apx.c10
-rw-r--r--tools/testing/selftests/x86/bugs/Makefile3
-rwxr-xr-xtools/testing/selftests/x86/bugs/common.py164
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_indirect_alignment.py150
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_permutations.py109
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_ret_alignment.py139
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_sysfs.py65
-rw-r--r--tools/testing/selftests/x86/lam.c9
-rw-r--r--tools/testing/selftests/x86/xstate.c3
-rw-r--r--tools/testing/selftests/x86/xstate.h2
-rw-r--r--tools/testing/vsock/timeout.c18
-rw-r--r--tools/testing/vsock/timeout.h1
-rw-r--r--tools/testing/vsock/util.c38
-rw-r--r--tools/testing/vsock/util.h2
-rw-r--r--tools/testing/vsock/vsock_test.c131
-rw-r--r--tools/tracing/rtla/README.txt7
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c5
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c5
-rw-r--r--tools/tracing/rtla/src/timerlat_bpf.c1
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c5
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c5
-rw-r--r--tools/tracing/rtla/src/utils.c2
-rw-r--r--tools/tracing/rtla/src/utils.h6
-rw-r--r--tools/tracing/rtla/tests/engine.sh7
-rw-r--r--tools/tracing/rtla/tests/hwnoise.t4
-rw-r--r--tools/tracing/rtla/tests/osnoise.t6
-rw-r--r--tools/tracing/rtla/tests/timerlat.t12
497 files changed, 24769 insertions, 4520 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 5e1254eb66de..c31cbbd12c45 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -41,6 +41,7 @@ help:
@echo ' mm - misc mm tools'
@echo ' wmi - WMI interface examples'
@echo ' x86_energy_perf_policy - Intel energy policy tool'
+ @echo ' ynl - ynl headers, library, and python tool'
@echo ''
@echo 'You can do:'
@echo ' $$ make -C tools/ <tool>_install'
@@ -118,11 +119,14 @@ freefall: FORCE
kvm_stat: FORCE
$(call descend,kvm/$@)
+ynl: FORCE
+ $(call descend,net/ynl)
+
all: acpi counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
- debugging tracing thermal thermometer thermal-engine
+ debugging tracing thermal thermometer thermal-engine ynl
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -157,13 +161,16 @@ freefall_install:
kvm_stat_install:
$(call descend,kvm/$(@:_install=),install)
+ynl_install:
+ $(call descend,net/$(@:_install=),install)
+
install: acpi_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \
wmi_install debugging_install intel-speed-select_install \
- tracing_install thermometer_install thermal-engine_install
+ tracing_install thermometer_install thermal-engine_install ynl_install
acpi_clean:
$(call descend,power/acpi,clean)
@@ -214,12 +221,15 @@ freefall_clean:
build_clean:
$(call descend,build,clean)
+ynl_clean:
+ $(call descend,net/$(@:_clean=),clean)
+
clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
gpio_clean objtool_clean leds_clean wmi_clean firmware_clean debugging_clean \
intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \
- sched_ext_clean
+ sched_ext_clean ynl_clean
.PHONY: FORCE
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index b6c5ece4fdee..690b6ebd118f 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -117,6 +117,7 @@
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+/* Data cache zero operations */
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
@@ -153,11 +154,13 @@
#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
-/* Data cache zero operations */
#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+#define SYS_DC_CIVAPS sys_insn(1, 0, 7, 15, 1)
+#define SYS_DC_CIGDVAPS sys_insn(1, 0, 7, 15, 5)
+
/*
* Automatically generated definitions for system registers, the
* manual encodings below are in the process of being converted to
@@ -475,6 +478,7 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
+#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@@ -482,23 +486,36 @@
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
+#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0)
#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
+#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
+#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
+#define SYS_PMEVCNTSVRn_EL1(n) sys_reg(2, 0, 14, __CNTR_CRm(n), __PMEV_op2(n))
#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3))
#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
+#define SYS_SPMCGCRn_EL1(n) sys_reg(2, 0, 9, 13, ((n) & 1))
+
+#define __SPMEV_op2(n) ((n) & 0x7)
+#define __SPMEV_crm(p, n) ((((p) & 7) << 1) | (((n) >> 3) & 1))
+#define SYS_SPMEVCNTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b000, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILT2Rn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b011, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b010, n), __SPMEV_op2(n))
+#define SYS_SPMEVTYPERn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b001, n), __SPMEV_op2(n))
+
#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0)
#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5)
@@ -518,7 +535,6 @@
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
-#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
@@ -604,28 +620,18 @@
/* VHE encodings for architectural EL0/1 system registers */
#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
-#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
-#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
-#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3)
-#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
-#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1)
-#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
-#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
-#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
-#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
-#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
@@ -1028,8 +1034,11 @@
#define PIE_RX UL(0xa)
#define PIE_RW UL(0xc)
#define PIE_RWX UL(0xe)
+#define PIE_MASK UL(0xf)
-#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+#define PIRx_ELx_BITS_PER_IDX 4
+#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX)
+#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx))
/*
* Permission Overlay Extension (POE) permission encodings.
@@ -1040,12 +1049,34 @@
#define POE_RX UL(0x3)
#define POE_W UL(0x4)
#define POE_RW UL(0x5)
-#define POE_XW UL(0x6)
-#define POE_RXW UL(0x7)
+#define POE_WX UL(0x6)
+#define POE_RWX UL(0x7)
#define POE_MASK UL(0xf)
-/* Initial value for Permission Overlay Extension for EL0 */
-#define POR_EL0_INIT POE_RXW
+#define POR_ELx_BITS_PER_IDX 4
+#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX)
+#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK)
+#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx))
+
+/*
+ * Definitions for Guarded Control Stack
+ */
+
+#define GCS_CAP_ADDR_MASK GENMASK(63, 12)
+#define GCS_CAP_ADDR_SHIFT 12
+#define GCS_CAP_ADDR_WIDTH 52
+#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x)
+
+#define GCS_CAP_TOKEN_MASK GENMASK(11, 0)
+#define GCS_CAP_TOKEN_SHIFT 0
+#define GCS_CAP_TOKEN_WIDTH 12
+#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x)
+
+#define GCS_CAP_VALID_TOKEN 0x1
+#define GCS_CAP_IN_PROGRESS_TOKEN 0x5
+
+#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
+ GCS_CAP_VALID_TOKEN)
#define ARM64_FEATURE_FIELD_BITS 4
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd/ibs.h
index cb1740bc3da2..300b6e0765b2 100644
--- a/tools/arch/x86/include/asm/amd-ibs.h
+++ b/tools/arch/x86/include/asm/amd/ibs.h
@@ -4,7 +4,7 @@
* 55898 Rev 0.35 - Feb 5, 2021
*/
-#include "msr-index.h"
+#include "../msr-index.h"
/* IBS_OP_DATA2 DataSrc */
#define IBS_DATA_SRC_LOC_CACHE 2
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 6c2c152d8a67..bc81b9d1aeca 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -476,11 +476,11 @@
#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
-#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
-#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
-#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
-#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
-#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */
+#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
/*
* BUG word(s)
@@ -519,7 +519,7 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
@@ -527,10 +527,10 @@
#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
-#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */
-#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
-#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
-#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
-#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
-#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
+#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h
index 253690eb3c26..183aa662b165 100644
--- a/tools/arch/x86/include/asm/inat.h
+++ b/tools/arch/x86/include/asm/inat.h
@@ -82,6 +82,7 @@
#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9))
#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10))
+#define INAT_INV64 (1 << (INAT_FLAG_OFFS + 11))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@@ -242,4 +243,9 @@ static inline int inat_evex_scalable(insn_attr_t attr)
{
return attr & INAT_EVEX_SCALABLE;
}
+
+static inline int inat_is_invalid64(insn_attr_t attr)
+{
+ return attr & INAT_INV64;
+}
#endif
diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv
index d751eb8585d0..8d925ce9750f 100644
--- a/tools/arch/x86/kcpuid/cpuid.csv
+++ b/tools/arch/x86/kcpuid/cpuid.csv
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: CC0-1.0
-# Generator: x86-cpuid-db v1.0
+# Generator: x86-cpuid-db v2.4
#
# Auto-generated file.
@@ -12,297 +12,298 @@
# Leaf 0H
# Maximum standard leaf number + CPU vendor string
- 0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported
- 0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3
- 0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11
- 0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7
+ 0x0, 0, eax, 31:0, max_std_leaf , Highest standard CPUID leaf supported
+ 0x0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3
+ 0x0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11
+ 0x0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7
# Leaf 1H
# CPU FMS (Family/Model/Stepping) + standard feature flags
- 1, 0, eax, 3:0, stepping , Stepping ID
- 1, 0, eax, 7:4, base_model , Base CPU model ID
- 1, 0, eax, 11:8, base_family_id , Base CPU family ID
- 1, 0, eax, 13:12, cpu_type , CPU type
- 1, 0, eax, 19:16, ext_model , Extended CPU model ID
- 1, 0, eax, 27:20, ext_family , Extended CPU family ID
- 1, 0, ebx, 7:0, brand_id , Brand index
- 1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size
- 1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count
- 1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID
- 1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3)
- 1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support
- 1, 0, ecx, 2, dtes64 , 64-bit DS save area
- 1, 0, ecx, 3, monitor , MONITOR/MWAIT support
- 1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store
- 1, 0, ecx, 5, vmx , Virtual Machine Extensions
- 1, 0, ecx, 6, smx , Safer Mode Extensions
- 1, 0, ecx, 7, est , Enhanced Intel SpeedStep
- 1, 0, ecx, 8, tm2 , Thermal Monitor 2
- 1, 0, ecx, 9, ssse3 , Supplemental SSE3
- 1, 0, ecx, 10, cid , L1 Context ID
- 1, 0, ecx, 11, sdbg , Sillicon Debug
- 1, 0, ecx, 12, fma , FMA extensions using YMM state
- 1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support
- 1, 0, ecx, 14, xtpr , xTPR Update Control
- 1, 0, ecx, 15, pdcm , Perfmon and Debug Capability
- 1, 0, ecx, 17, pcid , Process-context identifiers
- 1, 0, ecx, 18, dca , Direct Cache Access
- 1, 0, ecx, 19, sse4_1 , SSE4.1
- 1, 0, ecx, 20, sse4_2 , SSE4.2
- 1, 0, ecx, 21, x2apic , X2APIC support
- 1, 0, ecx, 22, movbe , MOVBE instruction support
- 1, 0, ecx, 23, popcnt , POPCNT instruction support
- 1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation
- 1, 0, ecx, 25, aes , AES instructions
- 1, 0, ecx, 26, xsave , XSAVE (and related instructions) support
- 1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS
- 1, 0, ecx, 28, avx , AVX instructions support
- 1, 0, ecx, 29, f16c , Half-precision floating-point conversion support
- 1, 0, ecx, 30, rdrand , RDRAND instruction support
- 1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system
- 1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87)
- 1, 0, edx, 1, vme , Virtual-8086 Mode Extensions
- 1, 0, edx, 2, de , Debugging Extensions
- 1, 0, edx, 3, pse , Page Size Extension
- 1, 0, edx, 4, tsc , Time Stamp Counter
- 1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support)
- 1, 0, edx, 6, pae , Physical Address Extensions
- 1, 0, edx, 7, mce , Machine Check Exception
- 1, 0, edx, 8, cx8 , CMPXCHG8B instruction
- 1, 0, edx, 9, apic , APIC on-chip
- 1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs
- 1, 0, edx, 12, mtrr , Memory Type Range Registers
- 1, 0, edx, 13, pge , Page Global Extensions
- 1, 0, edx, 14, mca , Machine Check Architecture
- 1, 0, edx, 15, cmov , Conditional Move Instruction
- 1, 0, edx, 16, pat , Page Attribute Table
- 1, 0, edx, 17, pse36 , Page Size Extension (36-bit)
- 1, 0, edx, 18, pn , Processor Serial Number
- 1, 0, edx, 19, clflush , CLFLUSH instruction
- 1, 0, edx, 21, dts , Debug Store
- 1, 0, edx, 22, acpi , Thermal monitor and clock control
- 1, 0, edx, 23, mmx , MMX instructions
- 1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions
- 1, 0, edx, 25, sse , SSE instructions
- 1, 0, edx, 26, sse2 , SSE2 instructions
- 1, 0, edx, 27, ss , Self Snoop
- 1, 0, edx, 28, ht , Hyper-threading
- 1, 0, edx, 29, tm , Thermal Monitor
- 1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved
- 1, 0, edx, 31, pbe , Pending Break Enable
+ 0x1, 0, eax, 3:0, stepping , Stepping ID
+ 0x1, 0, eax, 7:4, base_model , Base CPU model ID
+ 0x1, 0, eax, 11:8, base_family_id , Base CPU family ID
+ 0x1, 0, eax, 13:12, cpu_type , CPU type
+ 0x1, 0, eax, 19:16, ext_model , Extended CPU model ID
+ 0x1, 0, eax, 27:20, ext_family , Extended CPU family ID
+ 0x1, 0, ebx, 7:0, brand_id , Brand index
+ 0x1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size
+ 0x1, 0, ebx, 23:16, n_logical_cpu , Logical CPU count
+ 0x1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID
+ 0x1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3)
+ 0x1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support
+ 0x1, 0, ecx, 2, dtes64 , 64-bit DS save area
+ 0x1, 0, ecx, 3, monitor , MONITOR/MWAIT support
+ 0x1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store
+ 0x1, 0, ecx, 5, vmx , Virtual Machine Extensions
+ 0x1, 0, ecx, 6, smx , Safer Mode Extensions
+ 0x1, 0, ecx, 7, est , Enhanced Intel SpeedStep
+ 0x1, 0, ecx, 8, tm2 , Thermal Monitor 2
+ 0x1, 0, ecx, 9, ssse3 , Supplemental SSE3
+ 0x1, 0, ecx, 10, cid , L1 Context ID
+ 0x1, 0, ecx, 11, sdbg , Silicon Debug
+ 0x1, 0, ecx, 12, fma , FMA extensions using YMM state
+ 0x1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support
+ 0x1, 0, ecx, 14, xtpr , xTPR Update Control
+ 0x1, 0, ecx, 15, pdcm , Perfmon and Debug Capability
+ 0x1, 0, ecx, 17, pcid , Process-context identifiers
+ 0x1, 0, ecx, 18, dca , Direct Cache Access
+ 0x1, 0, ecx, 19, sse4_1 , SSE4.1
+ 0x1, 0, ecx, 20, sse4_2 , SSE4.2
+ 0x1, 0, ecx, 21, x2apic , X2APIC support
+ 0x1, 0, ecx, 22, movbe , MOVBE instruction support
+ 0x1, 0, ecx, 23, popcnt , POPCNT instruction support
+ 0x1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation
+ 0x1, 0, ecx, 25, aes , AES instructions
+ 0x1, 0, ecx, 26, xsave , XSAVE (and related instructions) support
+ 0x1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS
+ 0x1, 0, ecx, 28, avx , AVX instructions support
+ 0x1, 0, ecx, 29, f16c , Half-precision floating-point conversion support
+ 0x1, 0, ecx, 30, rdrand , RDRAND instruction support
+ 0x1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system
+ 0x1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87)
+ 0x1, 0, edx, 1, vme , Virtual-8086 Mode Extensions
+ 0x1, 0, edx, 2, de , Debugging Extensions
+ 0x1, 0, edx, 3, pse , Page Size Extension
+ 0x1, 0, edx, 4, tsc , Time Stamp Counter
+ 0x1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support)
+ 0x1, 0, edx, 6, pae , Physical Address Extensions
+ 0x1, 0, edx, 7, mce , Machine Check Exception
+ 0x1, 0, edx, 8, cx8 , CMPXCHG8B instruction
+ 0x1, 0, edx, 9, apic , APIC on-chip
+ 0x1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs
+ 0x1, 0, edx, 12, mtrr , Memory Type Range Registers
+ 0x1, 0, edx, 13, pge , Page Global Extensions
+ 0x1, 0, edx, 14, mca , Machine Check Architecture
+ 0x1, 0, edx, 15, cmov , Conditional Move Instruction
+ 0x1, 0, edx, 16, pat , Page Attribute Table
+ 0x1, 0, edx, 17, pse36 , Page Size Extension (36-bit)
+ 0x1, 0, edx, 18, pn , Processor Serial Number
+ 0x1, 0, edx, 19, clflush , CLFLUSH instruction
+ 0x1, 0, edx, 21, dts , Debug Store
+ 0x1, 0, edx, 22, acpi , Thermal monitor and clock control
+ 0x1, 0, edx, 23, mmx , MMX instructions
+ 0x1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions
+ 0x1, 0, edx, 25, sse , SSE instructions
+ 0x1, 0, edx, 26, sse2 , SSE2 instructions
+ 0x1, 0, edx, 27, ss , Self Snoop
+ 0x1, 0, edx, 28, ht , Hyper-threading
+ 0x1, 0, edx, 29, tm , Thermal Monitor
+ 0x1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now reserved
+ 0x1, 0, edx, 31, pbe , Pending Break Enable
# Leaf 2H
# Intel cache and TLB information one-byte descriptors
- 2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried
- 2, 0, eax, 15:8, desc1 , Descriptor #1
- 2, 0, eax, 23:16, desc2 , Descriptor #2
- 2, 0, eax, 30:24, desc3 , Descriptor #3
- 2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set
- 2, 0, ebx, 7:0, desc4 , Descriptor #4
- 2, 0, ebx, 15:8, desc5 , Descriptor #5
- 2, 0, ebx, 23:16, desc6 , Descriptor #6
- 2, 0, ebx, 30:24, desc7 , Descriptor #7
- 2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set
- 2, 0, ecx, 7:0, desc8 , Descriptor #8
- 2, 0, ecx, 15:8, desc9 , Descriptor #9
- 2, 0, ecx, 23:16, desc10 , Descriptor #10
- 2, 0, ecx, 30:24, desc11 , Descriptor #11
- 2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set
- 2, 0, edx, 7:0, desc12 , Descriptor #12
- 2, 0, edx, 15:8, desc13 , Descriptor #13
- 2, 0, edx, 23:16, desc14 , Descriptor #14
- 2, 0, edx, 30:24, desc15 , Descriptor #15
- 2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set
+ 0x2, 0, eax, 7:0, iteration_count , Number of times this leaf must be queried
+ 0x2, 0, eax, 15:8, desc1 , Descriptor #1
+ 0x2, 0, eax, 23:16, desc2 , Descriptor #2
+ 0x2, 0, eax, 30:24, desc3 , Descriptor #3
+ 0x2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set
+ 0x2, 0, ebx, 7:0, desc4 , Descriptor #4
+ 0x2, 0, ebx, 15:8, desc5 , Descriptor #5
+ 0x2, 0, ebx, 23:16, desc6 , Descriptor #6
+ 0x2, 0, ebx, 30:24, desc7 , Descriptor #7
+ 0x2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set
+ 0x2, 0, ecx, 7:0, desc8 , Descriptor #8
+ 0x2, 0, ecx, 15:8, desc9 , Descriptor #9
+ 0x2, 0, ecx, 23:16, desc10 , Descriptor #10
+ 0x2, 0, ecx, 30:24, desc11 , Descriptor #11
+ 0x2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set
+ 0x2, 0, edx, 7:0, desc12 , Descriptor #12
+ 0x2, 0, edx, 15:8, desc13 , Descriptor #13
+ 0x2, 0, edx, 23:16, desc14 , Descriptor #14
+ 0x2, 0, edx, 30:24, desc15 , Descriptor #15
+ 0x2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set
# Leaf 4H
# Intel deterministic cache parameters
- 4, 31:0, eax, 4:0, cache_type , Cache type field
- 4, 31:0, eax, 7:5, cache_level , Cache level (1-based)
- 4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level
- 4, 31:0, eax, 9, fully_associative , Fully-associative cache
- 4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache
- 4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package
- 4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based)
- 4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based)
- 4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based)
- 4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based)
- 4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
- 4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches
- 4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function)
+ 0x4, 31:0, eax, 4:0, cache_type , Cache type field
+ 0x4, 31:0, eax, 7:5, cache_level , Cache level (1-based)
+ 0x4, 31:0, eax, 8, cache_self_init , Self-initializing cache level
+ 0x4, 31:0, eax, 9, fully_associative , Fully-associative cache
+ 0x4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache
+ 0x4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package
+ 0x4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based)
+ 0x4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based)
+ 0x4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based)
+ 0x4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based)
+ 0x4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
+ 0x4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches
+ 0x4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function)
# Leaf 5H
# MONITOR/MWAIT instructions enumeration
- 5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes
- 5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes
- 5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported
- 5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported
- 5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT
- 5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT
- 5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT
- 5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT
- 5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT
- 5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT
- 5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT
- 5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT
+ 0x5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes
+ 0x5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes
+ 0x5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported
+ 0x5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported
+ 0x5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT
+ 0x5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT
+ 0x5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT
+ 0x5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT
+ 0x5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT
+ 0x5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT
+ 0x5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT
+ 0x5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT
# Leaf 6H
# Thermal and Power Management enumeration
- 6, 0, eax, 0, dtherm , Digital temprature sensor
- 6, 0, eax, 1, turbo_boost , Intel Turbo Boost
- 6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state)
- 6, 0, eax, 4, pln , Power Limit Notification (PLN) event
- 6, 0, eax, 5, ecmd , Clock modulation duty cycle extension
- 6, 0, eax, 6, pts , Package thermal management
- 6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported
- 6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR)
- 6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported
- 6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference
- 6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request
- 6, 0, eax, 13, hdc_base_regs , HDC base registers are supported
- 6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0
- 6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change
- 6, 0, eax, 16, hwp_peci_override , HWP PECI override
- 6, 0, eax, 17, hwp_flexible , Flexible HWP
- 6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode
- 6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported
- 6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported
- 6, 0, eax, 23, thread_director , Intel thread director support
- 6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported
- 6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds
- 6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface)
- 6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support
- 6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director
- 6, 0, edx, 0, perfcap_reporting , Performance capability reporting
- 6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting
- 6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages
- 6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based
+ 0x6, 0, eax, 0, dtherm , Digital temperature sensor
+ 0x6, 0, eax, 1, turbo_boost , Intel Turbo Boost
+ 0x6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state)
+ 0x6, 0, eax, 4, pln , Power Limit Notification (PLN) event
+ 0x6, 0, eax, 5, ecmd , Clock modulation duty cycle extension
+ 0x6, 0, eax, 6, pts , Package thermal management
+ 0x6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported
+ 0x6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR)
+ 0x6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported
+ 0x6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference
+ 0x6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request
+ 0x6, 0, eax, 13, hdc_base_regs , HDC base registers are supported
+ 0x6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0
+ 0x6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change
+ 0x6, 0, eax, 16, hwp_peci_override , HWP PECI override
+ 0x6, 0, eax, 17, hwp_flexible , Flexible HWP
+ 0x6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode
+ 0x6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported
+ 0x6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported
+ 0x6, 0, eax, 23, thread_director , Intel thread director support
+ 0x6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported
+ 0x6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds
+ 0x6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface)
+ 0x6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support
+ 0x6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director
+ 0x6, 0, edx, 0, perfcap_reporting , Performance capability reporting
+ 0x6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting
+ 0x6, 0, edx, 11:8, feedback_sz , Feedback interface structure size, in 4K pages
+ 0x6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU hardware feedback interface index
# Leaf 7H
# Extended CPU features enumeration
- 7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves
- 7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support
- 7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported
- 7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions)
- 7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1
- 7, 0, ebx, 4, hle , Hardware Lock Elision
- 7, 0, ebx, 5, avx2 , AVX2 instruction set
- 7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions
- 7, 0, ebx, 7, smep , Supervisor Mode Execution Protection
- 7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2
- 7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB
- 7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID)
- 7, 0, ebx, 11, rtm , Intel restricted transactional memory
- 7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring
- 7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero)
- 7, 0, ebx, 14, mpx , Intel memory protection extensions
- 7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent
- 7, 0, ebx, 16, avx512f , AVX-512 foundation instructions
- 7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions
- 7, 0, ebx, 18, rdseed , RDSEED instruction
- 7, 0, ebx, 19, adx , ADCX/ADOX instructions
- 7, 0, ebx, 20, smap , Supervisor mode access prevention
- 7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add
- 7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction
- 7, 0, ebx, 24, clwb , CLWB instruction
- 7, 0, ebx, 25, intel_pt , Intel processor trace
- 7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions
- 7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs
- 7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs
- 7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions
- 7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions
- 7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions
- 7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only)
- 7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs
- 7, 0, ecx, 2, umip , User mode instruction protection
- 7, 0, ecx, 3, pku , Protection keys for user-space
- 7, 0, ecx, 4, ospke , OS protection keys enable
- 7, 0, ecx, 5, waitpkg , WAITPKG instructions
- 7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2
- 7, 0, ecx, 7, cet_ss , CET shadow stack features
- 7, 0, ecx, 8, gfni , Galois field new instructions
- 7, 0, ecx, 9, vaes , Vector AES instrs
- 7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support
- 7, 0, ecx, 11, avx512_vnni , Vector neural network instructions
- 7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle
- 7, 0, ecx, 13, tme , Intel total memory encryption
- 7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW
- 7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging)
- 7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode
- 7, 0, ecx, 22, rdpid , RDPID instruction
- 7, 0, ecx, 23, key_locker , Intel key locker support
- 7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection
- 7, 0, ecx, 25, cldemote , CLDEMOTE instruction
- 7, 0, ecx, 27, movdiri , MOVDIRI instruction
- 7, 0, ecx, 28, movdir64b , MOVDIR64B instruction
- 7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S})
- 7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration
- 7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages
- 7, 0, edx, 1, sgx_keys , Intel SGX attestation services
- 7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions
- 7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision
- 7, 0, edx, 4, fsrm , Fast short REP MOV
- 7, 0, edx, 5, uintr , CPU supports user interrupts
- 7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions
- 7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available
- 7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support
- 7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts
- 7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported
- 7, 0, edx, 14, serialize , SERIALIZE instruction
- 7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part'
- 7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking
- 7, 0, edx, 18, pconfig , PCONFIG instruction
- 7, 0, edx, 19, arch_lbr , Intel architectural LBRs
- 7, 0, edx, 20, ibt , CET indirect branch tracking
- 7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support
- 7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions
- 7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support
- 7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support
- 7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions)
- 7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors
- 7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR
- 7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR
- 7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR
- 7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable
- 7, 1, eax, 4, avx_vnni , AVX-VNNI instructions
- 7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions
- 7, 1, eax, 6, lass , Linear address space separation
- 7, 1, eax, 7, cmpccxadd , CMPccXADD instructions
- 7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported
- 7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB
- 7, 1, eax, 11, fsrs , Fast short REP STOSB
- 7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB
- 7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions
- 7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS
- 7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing)
- 7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations
- 7, 1, eax, 22, hreset , History reset support
- 7, 1, eax, 23, avx_ifma , Integer fused multiply add
- 7, 1, eax, 26, lam , Linear address masking
- 7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions
- 7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs)
- 7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions
- 7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions
- 7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids)
- 7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions
- 7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use
- 7, 2, edx, 0, intel_psfd , Intel predictive store forward disable
- 7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}
- 7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}
- 7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U
- 7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S
- 7, 2, edx, 5, mcdt_no , MCDT mitigation not needed
- 7, 2, edx, 6, uclock_disable , UC-lock disable is supported
+ 0x7, 0, eax, 31:0, leaf7_n_subleaves , Number of leaf 0x7 subleaves
+ 0x7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support
+ 0x7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported
+ 0x7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions)
+ 0x7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1
+ 0x7, 0, ebx, 4, hle , Hardware Lock Elision
+ 0x7, 0, ebx, 5, avx2 , AVX2 instruction set
+ 0x7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions
+ 0x7, 0, ebx, 7, smep , Supervisor Mode Execution Protection
+ 0x7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2
+ 0x7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB
+ 0x7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID)
+ 0x7, 0, ebx, 11, rtm , Intel restricted transactional memory
+ 0x7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring
+ 0x7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero)
+ 0x7, 0, ebx, 14, mpx , Intel memory protection extensions
+ 0x7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcement
+ 0x7, 0, ebx, 16, avx512f , AVX-512 foundation instructions
+ 0x7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions
+ 0x7, 0, ebx, 18, rdseed , RDSEED instruction
+ 0x7, 0, ebx, 19, adx , ADCX/ADOX instructions
+ 0x7, 0, ebx, 20, smap , Supervisor mode access prevention
+ 0x7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add
+ 0x7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction
+ 0x7, 0, ebx, 24, clwb , CLWB instruction
+ 0x7, 0, ebx, 25, intel_pt , Intel processor trace
+ 0x7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions
+ 0x7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instructions
+ 0x7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instructions
+ 0x7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions
+ 0x7, 0, ebx, 30, avx512bw , AVX-512 byte/word instructions
+ 0x7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions
+ 0x7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only)
+ 0x7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instructions
+ 0x7, 0, ecx, 2, umip , User mode instruction protection
+ 0x7, 0, ecx, 3, pku , Protection keys for user-space
+ 0x7, 0, ecx, 4, ospke , OS protection keys enable
+ 0x7, 0, ecx, 5, waitpkg , WAITPKG instructions
+ 0x7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instructions group 2
+ 0x7, 0, ecx, 7, cet_ss , CET shadow stack features
+ 0x7, 0, ecx, 8, gfni , Galois field new instructions
+ 0x7, 0, ecx, 9, vaes , Vector AES instructions
+ 0x7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support
+ 0x7, 0, ecx, 11, avx512_vnni , Vector neural network instructions
+ 0x7, 0, ecx, 12, avx512_bitalg , AVX-512 bitwise algorithms
+ 0x7, 0, ecx, 13, tme , Intel total memory encryption
+ 0x7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DWORD/QWORD
+ 0x7, 0, ecx, 16, la57 , 57-bit linear addresses (five-level paging)
+ 0x7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode
+ 0x7, 0, ecx, 22, rdpid , RDPID instruction
+ 0x7, 0, ecx, 23, key_locker , Intel key locker support
+ 0x7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection
+ 0x7, 0, ecx, 25, cldemote , CLDEMOTE instruction
+ 0x7, 0, ecx, 27, movdiri , MOVDIRI instruction
+ 0x7, 0, ecx, 28, movdir64b , MOVDIR64B instruction
+ 0x7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S})
+ 0x7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration
+ 0x7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages
+ 0x7, 0, edx, 1, sgx_keys , Intel SGX attestation services
+ 0x7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions
+ 0x7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision
+ 0x7, 0, edx, 4, fsrm , Fast short REP MOV
+ 0x7, 0, edx, 5, uintr , CPU supports user interrupts
+ 0x7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions
+ 0x7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available
+ 0x7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support
+ 0x7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts
+ 0x7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported
+ 0x7, 0, edx, 14, serialize , SERIALIZE instruction
+ 0x7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part'
+ 0x7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking
+ 0x7, 0, edx, 18, pconfig , PCONFIG instruction
+ 0x7, 0, edx, 19, arch_lbr , Intel architectural LBRs
+ 0x7, 0, edx, 20, ibt , CET indirect branch tracking
+ 0x7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support
+ 0x7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions
+ 0x7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support
+ 0x7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support
+ 0x7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions)
+ 0x7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors
+ 0x7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR
+ 0x7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR
+ 0x7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR
+ 0x7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable
+ 0x7, 1, eax, 4, avx_vnni , AVX-VNNI instructions
+ 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bfloat16 instructions
+ 0x7, 1, eax, 6, lass , Linear address space separation
+ 0x7, 1, eax, 7, cmpccxadd , CMPccXADD instructions
+ 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: leaf 0x23 is supported
+ 0x7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB
+ 0x7, 1, eax, 11, fsrs , Fast short REP STOSB
+ 0x7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB
+ 0x7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions
+ 0x7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS
+ 0x7, 1, eax, 19, wrmsrns , WRMSRNS instruction (WRMSR-non-serializing)
+ 0x7, 1, eax, 20, nmi_src , NMI-source reporting with FRED event data
+ 0x7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations
+ 0x7, 1, eax, 22, hreset , History reset support
+ 0x7, 1, eax, 23, avx_ifma , Integer fused multiply add
+ 0x7, 1, eax, 26, lam , Linear address masking
+ 0x7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions
+ 0x7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs)
+ 0x7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions
+ 0x7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions
+ 0x7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids)
+ 0x7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions
+ 0x7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use
+ 0x7, 2, edx, 0, intel_psfd , Intel predictive store forward disable
+ 0x7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}
+ 0x7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}
+ 0x7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U
+ 0x7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S
+ 0x7, 2, edx, 5, mcdt_no , MCDT mitigation not needed
+ 0x7, 2, edx, 6, uclock_disable , UC-lock disable is supported
# Leaf 9H
# Intel DCA (Direct Cache Access) enumeration
- 9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS
+ 0x9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS
# Leaf AH
# Intel PMU (Performance Monitoring Unit) enumeration
@@ -310,7 +311,7 @@
0xa, 0, eax, 7:0, pmu_version , Performance monitoring unit version ID
0xa, 0, eax, 15:8, pmu_n_gcounters , Number of general PMU counters per logical CPU
0xa, 0, eax, 23:16, pmu_gcounters_nbits , Bitwidth of PMU general counters
- 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of cpuid leaf 0xa EBX bit vector
+ 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of leaf 0xa EBX bit vector
0xa, 0, ebx, 0, no_core_cycle_evt , Core cycle event not available
0xa, 0, ebx, 1, no_insn_retired_evt , Instruction retired event not available
0xa, 0, ebx, 2, no_refcycle_evt , Reference cycles event not available
@@ -339,18 +340,18 @@
0xd, 0, eax, 0, xcr0_x87 , XCR0.X87 (bit 0) supported
0xd, 0, eax, 1, xcr0_sse , XCR0.SEE (bit 1) supported
0xd, 0, eax, 2, xcr0_avx , XCR0.AVX (bit 2) supported
- 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
- 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
- 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
- 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
- 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
- 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg)
- 0xd, 0, eax, 11, xcr0_cet_u , AMD XCR0.CET_U (bit 11) supported (CET supervisor state)
- 0xd, 0, eax, 12, xcr0_cet_s , AMD XCR0.CET_S (bit 12) support (CET user state)
+ 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 registers)
+ 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers)
+ 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 registers)
+ 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers)
+ 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers)
+ 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU registers)
+ 0xd, 0, eax, 11, xcr0_cet_u , XCR0.CET_U (bit 11) supported (CET user state)
+ 0xd, 0, eax, 12, xcr0_cet_s , XCR0.CET_S (bit 12) supported (CET supervisor state)
0xd, 0, eax, 17, xcr0_tileconfig , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
0xd, 0, eax, 18, xcr0_tiledata , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
- 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTR area byte size, for XCR0 enabled features
- 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTR area max byte size, all CPU features
+ 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTOR area byte size, for XCR0 enabled features
+ 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTOR area max byte size, all CPU features
0xd, 0, edx, 30, xcr0_lwp , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling)
0xd, 1, eax, 0, xsaveopt , XSAVEOPT instruction
0xd, 1, eax, 1, xsavec , XSAVEC instruction
@@ -369,7 +370,7 @@
0xd, 63:2, eax, 31:0, xsave_sz , Size of save area for subleaf-N feature, in bytes
0xd, 63:2, ebx, 31:0, xsave_offset , Offset of save area for subleaf-N feature, in bytes
0xd, 63:2, ecx, 0, is_xss_bit , Subleaf N describes an XSS bit, otherwise XCR0 bit
- 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned
+ 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature XSAVE area is 64-byte aligned
# Leaf FH
# Intel RDT / AMD PQoS resource monitoring
@@ -426,17 +427,17 @@
0x12, 1, ecx, 0, xfrm_x87 , Enclave XFRM.X87 (bit 0) supported
0x12, 1, ecx, 1, xfrm_sse , Enclave XFRM.SEE (bit 1) supported
0x12, 1, ecx, 2, xfrm_avx , Enclave XFRM.AVX (bit 2) supported
- 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
- 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
- 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
- 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
- 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
- 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg)
+ 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 registers)
+ 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers)
+ 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 registers)
+ 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers)
+ 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers)
+ 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU registers)
0x12, 1, ecx, 17, xfrm_tileconfig , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
0x12, 1, ecx, 18, xfrm_tiledata , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
0x12, 31:2, eax, 3:0, subleaf_type , Subleaf type (dictates output layout)
- 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base addr, bits[12:31]
- 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base addr, bits[32:51]
+ 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base address, bits[12:31]
+ 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base address, bits[32:51]
0x12, 31:2, ecx, 3:0, epc_sec_type , EPC section type / property encoding
0x12, 31:2, ecx, 31:12, epc_sec_size_0 , EPC section size, bits[12:31]
0x12, 31:2, edx, 19:0, epc_sec_size_1 , EPC section size, bits[32:51]
@@ -444,7 +445,7 @@
# Leaf 14H
# Intel Processor Trace enumeration
- 0x14, 0, eax, 31:0, pt_max_subleaf , Max cpuid 0x14 subleaf
+ 0x14, 0, eax, 31:0, pt_max_subleaf , Maximum leaf 0x14 subleaf
0x14, 0, ebx, 0, cr3_filtering , IA32_RTIT_CR3_MATCH is accessible
0x14, 0, ebx, 1, psb_cyc , Configurable PSB and cycle-accurate mode
0x14, 0, ebx, 2, ip_filtering , IP/TraceStop filtering; Warm-reset PT MSRs preservation
@@ -472,7 +473,7 @@
0x15, 0, ecx, 31:0, cpu_crystal_hz , Core crystal clock nominal frequency, in Hz
# Leaf 16H
-# Intel processor fequency enumeration
+# Intel processor frequency enumeration
0x16, 0, eax, 15:0, cpu_base_mhz , Processor base frequency, in MHz
0x16, 0, ebx, 15:0, cpu_max_mhz , Processor max frequency, in MHz
@@ -481,9 +482,9 @@
# Leaf 17H
# Intel SoC vendor attributes enumeration
- 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf
+ 0x17, 0, eax, 31:0, soc_max_subleaf , Maximum leaf 0x17 subleaf
0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID
- 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumaeratoion scheme (not Intel)
+ 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumeration scheme (not Intel)
0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor
0x17, 0, edx, 31:0, soc_stepping_id , Soc project stepping ID, assigned by vendor
0x17, 3:1, eax, 31:0, vendor_brand_a , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3)
@@ -494,18 +495,18 @@
# Leaf 18H
# Intel determenestic address translation (TLB) parameters
- 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Max cpuid 0x18 subleaf
+ 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Maximum leaf 0x18 subleaf
0x18, 31:0, ebx, 0, tlb_4k_page , TLB 4KB-page entries supported
0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported
0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported
0x18, 31:0, ebx, 3, tlb_1g_page , TLB 1GB-page entries supported
- 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this struct
+ 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this structure
0x18, 31:0, ebx, 31:16, n_way_associative , Ways of associativity
0x18, 31:0, ecx, 31:0, n_sets , Number of sets
0x18, 31:0, edx, 4:0, tlb_type , Translation cache type (TLB type)
0x18, 31:0, edx, 7:5, tlb_cache_level , Translation cache level (1-based)
0x18, 31:0, edx, 8, is_fully_associative , Fully-associative structure
- 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1
+ 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max number of addressable IDs for logical CPUs sharing this TLB - 1
# Leaf 19H
# Intel Key Locker enumeration
@@ -568,7 +569,7 @@
# Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration
0x1e, 0, ebx, 7:0, tmul_maxk , TMUL unit maximum height, K (rows or columns)
- 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maxiumum SIMD dimension, N (column bytes)
+ 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maximum SIMD dimension, N (column bytes)
# Leaf 1FH
# Intel extended topology enumeration v2
@@ -623,9 +624,9 @@
0x40000000, 0, edx, 31:0, hypervisor_id_2 , Hypervisor ID string bytes 8 - 11
# Leaf 80000000H
-# Maximum extended leaf number + CPU vendor string (AMD)
+# Maximum extended leaf number + AMD/Transmeta CPU vendor string
-0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported
+0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended CPUID leaf supported
0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3
0x80000000, 0, ecx, 31:0, cpu_vendorid_2 , Vendor ID string bytes 8 - 11
0x80000000, 0, edx, 31:0, cpu_vendorid_1 , Vendor ID string bytes 4 - 7
@@ -636,6 +637,7 @@
0x80000001, 0, eax, 3:0, e_stepping_id , Stepping ID
0x80000001, 0, eax, 7:4, e_base_model , Base processor model
0x80000001, 0, eax, 11:8, e_base_family , Base processor family
+0x80000001, 0, eax, 13:12, e_base_type , Base processor type (Transmeta)
0x80000001, 0, eax, 19:16, e_ext_model , Extended processor model
0x80000001, 0, eax, 27:20, e_ext_family , Extended processor family
0x80000001, 0, ebx, 15:0, brand_id , Brand ID
@@ -659,7 +661,7 @@
0x80000001, 0, ecx, 17, tce , Translation cache extension
0x80000001, 0, ecx, 19, nodeid_msr , NodeId MSR (0xc001100c)
0x80000001, 0, ecx, 21, tbm , Trailing bit manipulations
-0x80000001, 0, ecx, 22, topoext , Topology Extensions (cpuid leaf 0x8000001d)
+0x80000001, 0, ecx, 22, topoext , Topology Extensions (leaf 0x8000001d)
0x80000001, 0, ecx, 23, perfctr_core , Core performance counter extensions
0x80000001, 0, ecx, 24, perfctr_nb , NB/DF performance counter extensions
0x80000001, 0, ecx, 26, bpext , Data access breakpoint extension
@@ -687,6 +689,7 @@
0x80000001, 0, edx, 19, mp , Out-of-spec AMD Multiprocessing bit
0x80000001, 0, edx, 20, nx , No-execute page protection
0x80000001, 0, edx, 22, mmxext , AMD MMX extensions
+0x80000001, 0, edx, 23, e_mmx , MMX instructions
0x80000001, 0, edx, 24, e_fxsr , FXSAVE and FXRSTOR instructions
0x80000001, 0, edx, 25, fxsr_opt , FXSAVE and FXRSTOR optimizations
0x80000001, 0, edx, 26, pdpe1gb , 1-GB large page support
@@ -720,11 +723,11 @@
0x80000004, 0, edx, 31:0, cpu_brandid_11 , CPU brand ID string, bytes 44 - 47
# Leaf 80000005H
-# AMD L1 cache and L1 TLB enumeration
+# AMD/Transmeta L1 cache and L1 TLB enumeration
-0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages
+0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entries, 2M and 4M pages
0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages
-0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages
+0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entries, 2M and 4M pages
0x80000005, 0, eax, 31:24, l1_dtlb_2m_4m_assoc , L1 DTLB associativity, 2M and 4M pages
0x80000005, 0, ebx, 7:0, l1_itlb_4k_nentries , L1 ITLB #entries, 4K pages
0x80000005, 0, ebx, 15:8, l1_itlb_4k_assoc , L1 ITLB associativity, 4K pages
@@ -763,11 +766,11 @@
# CPU power management (mostly AMD) and AMD RAS enumeration
0x80000007, 0, ebx, 0, overflow_recov , MCA overflow conditions not fatal
-0x80000007, 0, ebx, 1, succor , Software containment of UnCORRectable errors
+0x80000007, 0, ebx, 1, succor , Software containment of uncorrectable errors
0x80000007, 0, ebx, 2, hw_assert , Hardware assert MSRs
0x80000007, 0, ebx, 3, smca , Scalable MCA (MCAX MSRs)
0x80000007, 0, ecx, 31:0, cpu_pwr_sample_ratio , CPU power sample time ratio
-0x80000007, 0, edx, 0, digital_temp , Digital temprature sensor
+0x80000007, 0, edx, 0, digital_temp , Digital temperature sensor
0x80000007, 0, edx, 1, powernow_freq_id , PowerNOW! frequency scaling
0x80000007, 0, edx, 2, powernow_volt_id , PowerNOW! voltage scaling
0x80000007, 0, edx, 3, thermal_trip , THERMTRIP (Thermal Trip)
@@ -810,7 +813,7 @@
0x80000008, 0, ebx, 23, amd_ppin , Protected Processor Inventory Number
0x80000008, 0, ebx, 24, amd_ssbd , Speculative Store Bypass Disable
0x80000008, 0, ebx, 25, virt_ssbd , virtualized SSBD (Speculative Store Bypass Disable)
-0x80000008, 0, ebx, 26, amd_ssb_no , SSBD not needed (fixed in HW)
+0x80000008, 0, ebx, 26, amd_ssb_no , SSBD is not needed (fixed in hardware)
0x80000008, 0, ebx, 27, cppc , Collaborative Processor Performance Control
0x80000008, 0, ebx, 28, amd_psfd , Predictive Store Forward Disable
0x80000008, 0, ebx, 29, btc_no , CPU not affected by Branch Type Confusion
@@ -838,7 +841,7 @@
0x8000000a, 0, edx, 10, pausefilter , Pause intercept filter
0x8000000a, 0, edx, 12, pfthreshold , Pause filter threshold
0x8000000a, 0, edx, 13, avic , Advanced virtual interrupt controller
-0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virt)
+0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virtualization)
0x8000000a, 0, edx, 16, vgif , Virtualize the Global Interrupt Flag
0x8000000a, 0, edx, 17, gmet , Guest mode execution trap
0x8000000a, 0, edx, 18, x2avic , Virtual x2APIC
@@ -850,7 +853,7 @@
0x8000000a, 0, edx, 25, vnmi , NMI virtualization
0x8000000a, 0, edx, 26, ibs_virt , IBS Virtualization
0x8000000a, 0, edx, 27, ext_lvt_off_chg , Extended LVT offset fault change
-0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME addr check
+0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME address check
# Leaf 80000019H
# AMD TLB 1G-pages enumeration
@@ -891,20 +894,20 @@
# AMD LWP (Lightweight Profiling)
0x8000001c, 0, eax, 0, os_lwp_avail , LWP is available to application programs (supported by OS)
-0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction (EventId=1) is supported by OS
-0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event (EventId=2) is supported by OS
-0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event (EventId=3) is supported by OS
-0x8000001c, 0, eax, 4, os_lwp_dme , DCache Miss Event (EventId=4) is supported by OS
-0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is supported by OS
-0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is supported by OS
+0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction is supported by OS
+0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event is supported by OS
+0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event is supported by OS
+0x8000001c, 0, eax, 4, os_lwp_dme , Dcache Miss Event is supported by OS
+0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event is supported by OS
+0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event is supported by OS
0x8000001c, 0, eax, 29, os_lwp_cont , LWP sampling in continuous mode is supported by OS
0x8000001c, 0, eax, 30, os_lwp_ptsc , Performance Time Stamp Counter in event records is supported by OS
0x8000001c, 0, eax, 31, os_lwp_int , Interrupt on threshold overflow is supported by OS
0x8000001c, 0, ebx, 7:0, lwp_lwpcb_sz , LWP Control Block size, in quadwords
0x8000001c, 0, ebx, 15:8, lwp_event_sz , LWP event record size, in bytes
-0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventId value (EventID 255 not included)
+0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventID value (EventID 255 not included)
0x8000001c, 0, ebx, 31:24, lwp_event_offset , LWP events area offset in the LWP Control Block
-0x8000001c, 0, ecx, 4:0, lwp_latency_max , Num of bits in cache latency counters (10 to 31)
+0x8000001c, 0, ecx, 4:0, lwp_latency_max , Number of bits in cache latency counters (10 to 31)
0x8000001c, 0, ecx, 5, lwp_data_adddr , Cache miss events report the data address of the reference
0x8000001c, 0, ecx, 8:6, lwp_latency_rnd , Amount by which cache latency is rounded
0x8000001c, 0, ecx, 15:9, lwp_version , LWP implementation version
@@ -913,16 +916,16 @@
0x8000001c, 0, ecx, 29, lwp_ip_filtering , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported
0x8000001c, 0, ecx, 30, lwp_cache_levels , Cache-related events can be filtered by cache level
0x8000001c, 0, ecx, 31, lwp_cache_latency , Cache-related events can be filtered by latency
-0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in Hardware
-0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction (EventId=1) is available in HW
-0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event (EventId=2) is available in HW
-0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event (EventId=3) is available in HW
-0x8000001c, 0, edx, 4, hw_lwp_dme , DCache Miss Event (EventId=4) is available in HW
-0x8000001c, 0, edx, 5, hw_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is available in HW
-0x8000001c, 0, edx, 6, hw_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is available in HW
-0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in HW
-0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in HW
-0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in HW
+0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in hardware
+0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction is available in hardware
+0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event is available in hardware
+0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event is available in hardware
+0x8000001c, 0, edx, 4, hw_lwp_dme , Dcache Miss Event is available in hardware
+0x8000001c, 0, edx, 5, hw_lwp_cnh , Clocks Not Halted event is available in hardware
+0x8000001c, 0, edx, 6, hw_lwp_rnh , Reference clocks Not Halted event is available in hardware
+0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in hardware
+0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in hardware
+0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in hardware
# Leaf 8000001DH
# AMD deterministic cache parameters
@@ -958,10 +961,10 @@
0x8000001f, 0, eax, 4, sev_nested_paging , SEV secure nested paging supported
0x8000001f, 0, eax, 5, vm_permission_levels , VMPL supported
0x8000001f, 0, eax, 6, rpmquery , RPMQUERY instruction supported
-0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadwo stack supported
+0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadow stack supported
0x8000001f, 0, eax, 8, secure_tsc , Secure TSC supported
0x8000001f, 0, eax, 9, v_tsc_aux , Hardware virtualizes TSC_AUX
-0x8000001f, 0, eax, 10, sme_coherent , HW enforces cache coherency across encryption domains
+0x8000001f, 0, eax, 10, sme_coherent , Cache coherency is enforced across encryption domains
0x8000001f, 0, eax, 11, req_64bit_hypervisor , SEV guest mandates 64-bit hypervisor
0x8000001f, 0, eax, 12, restricted_injection , Restricted Injection supported
0x8000001f, 0, eax, 13, alternate_injection , Alternate Injection supported
@@ -973,13 +976,13 @@
0x8000001f, 0, eax, 19, virt_ibs , IBS state virtualization is supported for SEV-ES guests
0x8000001f, 0, eax, 24, vmsa_reg_protection , VMSA register protection is supported
0x8000001f, 0, eax, 25, smt_protection , SMT protection is supported
-0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000h) is supported
+0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000) is supported
0x8000001f, 0, eax, 29, nested_virt_snp_msr , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported
0x8000001f, 0, ebx, 5:0, pte_cbit_pos , PTE bit number used to enable memory encryption
0x8000001f, 0, ebx, 11:6, phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits
0x8000001f, 0, ebx, 15:12, vmpl_count , Number of VM permission levels (VMPL) supported
0x8000001f, 0, ecx, 31:0, enc_guests_max , Max supported number of simultaneous encrypted guests
-0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest
+0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Minimum ASID for SEV-enabled SEV-ES-disabled guest
# Leaf 80000020H
# AMD Platform QoS extended feature IDs
@@ -988,6 +991,8 @@
0x80000020, 0, ebx, 2, smba , Slow Memory Bandwidth Allocation support
0x80000020, 0, ebx, 3, bmec , Bandwidth Monitoring Event Configuration support
0x80000020, 0, ebx, 4, l3rr , L3 Range Reservation support
+0x80000020, 0, ebx, 5, abmc , Assignable Bandwidth Monitoring Counters
+0x80000020, 0, ebx, 6, sdciae , Smart Data Cache Injection (SDCI) Allocation Enforcement
0x80000020, 1, eax, 31:0, mba_limit_len , MBA enforcement limit size
0x80000020, 1, edx, 31:0, mba_cos_max , MBA max Class of Service number (zero-based)
0x80000020, 2, eax, 31:0, smba_limit_len , SMBA enforcement limit size
@@ -1007,17 +1012,26 @@
0x80000021, 0, eax, 0, no_nested_data_bp , No nested data breakpoints
0x80000021, 0, eax, 1, fsgs_non_serializing , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing
0x80000021, 0, eax, 2, lfence_rdtsc , LFENCE always serializing / synchronizes RDTSC
-0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock is supported
+0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock
0x80000021, 0, eax, 6, null_sel_clr_base , Null selector clears base
-0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported
-0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported
-0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present
-0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep Stosb (FSRS) is supported
-0x80000021, 0, eax, 11, fsrc_supported , Fast Short Repe Cmpsb (FSRC) is supported
-0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported
+0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore
+0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS
+0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not available
+0x80000021, 0, eax, 10, fsrs , Fast Short Rep STOSB
+0x80000021, 0, eax, 11, fsrc , Fast Short Rep CMPSB
+0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is available
+0x80000021, 0, eax, 16, opcode_reclaim , Reserves opcode space
0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported
-0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported
-0x80000021, 0, ebx, 11:0, microcode_patch_size , Size of microcode patch, in 16-byte units
+0x80000021, 0, eax, 18, epsf , Enhanced Predictive Store Forwarding
+0x80000021, 0, eax, 22, wl_feedback , Workload-based heuristic feedback to OS
+0x80000021, 0, eax, 24, eraps , Enhanced Return Address Predictor Security
+0x80000021, 0, eax, 27, sbpb , Selective Branch Predictor Barrier
+0x80000021, 0, eax, 28, ibpb_brtype , Branch predictions flushed from CPU branch predictor
+0x80000021, 0, eax, 29, srso_no , CPU is not subject to the SRSO vulnerability
+0x80000021, 0, eax, 30, srso_uk_no , CPU is not vulnerable to SRSO at user-kernel boundary
+0x80000021, 0, eax, 31, srso_msr_fix , Software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO
+0x80000021, 0, ebx, 15:0, microcode_patch_size , Size of microcode patch, in 16-byte units
+0x80000021, 0, ebx, 23:16, rap_size , Return Address Predictor size
# Leaf 80000022H
# AMD Performance Monitoring v2 enumeration
@@ -1025,7 +1039,7 @@
0x80000022, 0, eax, 0, perfmon_v2 , Performance monitoring v2 supported
0x80000022, 0, eax, 1, lbr_v2 , Last Branch Record v2 extensions (LBR Stack)
0x80000022, 0, eax, 2, lbr_pmc_freeze , Freezing core performance counters / LBR Stack supported
-0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core perfomance counters
+0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core performance counters
0x80000022, 0, ebx, 9:4, lbr_v2_stack_size , Number of available LBR stack entries
0x80000022, 0, ebx, 15:10, n_pmc_northbridge , Number of available northbridge (data fabric) performance counters
0x80000022, 0, ebx, 21:16, n_pmc_umc , Number of available UMC performance counters
@@ -1035,7 +1049,7 @@
# AMD Secure Multi-key Encryption enumeration
0x80000023, 0, eax, 0, mem_hmk_mode , MEM-HMK encryption mode is supported
-0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total num of available encryption keys
+0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total number of available encryption keys
# Leaf 80000026H
# AMD extended topology enumeration v2
@@ -1051,3 +1065,108 @@
0x80000026, 3:0, ecx, 7:0, domain_level , This domain level (subleaf ID)
0x80000026, 3:0, ecx, 15:8, domain_type , This domain type
0x80000026, 3:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU
+
+# Leaf 80860000H
+# Maximum Transmeta leaf number + CPU vendor ID string
+
+0x80860000, 0, eax, 31:0, max_tra_leaf , Maximum supported Transmeta leaf number
+0x80860000, 0, ebx, 31:0, cpu_vendorid_0 , Transmeta Vendor ID string bytes 0 - 3
+0x80860000, 0, ecx, 31:0, cpu_vendorid_2 , Transmeta Vendor ID string bytes 8 - 11
+0x80860000, 0, edx, 31:0, cpu_vendorid_1 , Transmeta Vendor ID string bytes 4 - 7
+
+# Leaf 80860001H
+# Transmeta extended CPU information
+
+0x80860001, 0, eax, 3:0, stepping , Stepping ID
+0x80860001, 0, eax, 7:4, base_model , Base CPU model ID
+0x80860001, 0, eax, 11:8, base_family_id , Base CPU family ID
+0x80860001, 0, eax, 13:12, cpu_type , CPU type
+0x80860001, 0, ebx, 7:0, cpu_rev_mask_minor , CPU revision ID, mask minor
+0x80860001, 0, ebx, 15:8, cpu_rev_mask_major , CPU revision ID, mask major
+0x80860001, 0, ebx, 23:16, cpu_rev_minor , CPU revision ID, minor
+0x80860001, 0, ebx, 31:24, cpu_rev_major , CPU revision ID, major
+0x80860001, 0, ecx, 31:0, cpu_base_mhz , CPU nominal frequency, in MHz
+0x80860001, 0, edx, 0, recovery , Recovery CMS is active (after bad flush)
+0x80860001, 0, edx, 1, longrun , LongRun power management capabilities
+0x80860001, 0, edx, 3, lrti , LongRun Table Interface
+
+# Leaf 80860002H
+# Transmeta Code Morphing Software (CMS) enumeration
+
+0x80860002, 0, eax, 31:0, cpu_rev_id , CPU revision ID
+0x80860002, 0, ebx, 7:0, cms_rev_mask_2 , CMS revision ID, mask component 2
+0x80860002, 0, ebx, 15:8, cms_rev_mask_1 , CMS revision ID, mask component 1
+0x80860002, 0, ebx, 23:16, cms_rev_minor , CMS revision ID, minor
+0x80860002, 0, ebx, 31:24, cms_rev_major , CMS revision ID, major
+0x80860002, 0, ecx, 31:0, cms_rev_mask_3 , CMS revision ID, mask component 3
+
+# Leaf 80860003H
+# Transmeta CPU information string, bytes 0 - 15
+
+0x80860003, 0, eax, 31:0, cpu_info_0 , CPU info string bytes 0 - 3
+0x80860003, 0, ebx, 31:0, cpu_info_1 , CPU info string bytes 4 - 7
+0x80860003, 0, ecx, 31:0, cpu_info_2 , CPU info string bytes 8 - 11
+0x80860003, 0, edx, 31:0, cpu_info_3 , CPU info string bytes 12 - 15
+
+# Leaf 80860004H
+# Transmeta CPU information string, bytes 16 - 31
+
+0x80860004, 0, eax, 31:0, cpu_info_4 , CPU info string bytes 16 - 19
+0x80860004, 0, ebx, 31:0, cpu_info_5 , CPU info string bytes 20 - 23
+0x80860004, 0, ecx, 31:0, cpu_info_6 , CPU info string bytes 24 - 27
+0x80860004, 0, edx, 31:0, cpu_info_7 , CPU info string bytes 28 - 31
+
+# Leaf 80860005H
+# Transmeta CPU information string, bytes 32 - 47
+
+0x80860005, 0, eax, 31:0, cpu_info_8 , CPU info string bytes 32 - 35
+0x80860005, 0, ebx, 31:0, cpu_info_9 , CPU info string bytes 36 - 39
+0x80860005, 0, ecx, 31:0, cpu_info_10 , CPU info string bytes 40 - 43
+0x80860005, 0, edx, 31:0, cpu_info_11 , CPU info string bytes 44 - 47
+
+# Leaf 80860006H
+# Transmeta CPU information string, bytes 48 - 63
+
+0x80860006, 0, eax, 31:0, cpu_info_12 , CPU info string bytes 48 - 51
+0x80860006, 0, ebx, 31:0, cpu_info_13 , CPU info string bytes 52 - 55
+0x80860006, 0, ecx, 31:0, cpu_info_14 , CPU info string bytes 56 - 59
+0x80860006, 0, edx, 31:0, cpu_info_15 , CPU info string bytes 60 - 63
+
+# Leaf 80860007H
+# Transmeta live CPU information
+
+0x80860007, 0, eax, 31:0, cpu_cur_mhz , Current CPU frequency, in MHz
+0x80860007, 0, ebx, 31:0, cpu_cur_voltage , Current CPU voltage, in millivolts
+0x80860007, 0, ecx, 31:0, cpu_cur_perf_pctg , Current CPU performance percentage, 0 - 100
+0x80860007, 0, edx, 31:0, cpu_cur_gate_delay , Current CPU gate delay, in femtoseconds
+
+# Leaf C0000000H
+# Maximum Centaur/Zhaoxin leaf number
+
+0xc0000000, 0, eax, 31:0, max_cntr_leaf , Maximum Centaur/Zhaoxin leaf number
+
+# Leaf C0000001H
+# Centaur/Zhaoxin extended CPU features
+
+0xc0000001, 0, edx, 0, ccs_sm2 , CCS SM2 instructions
+0xc0000001, 0, edx, 1, ccs_sm2_en , CCS SM2 enabled
+0xc0000001, 0, edx, 2, xstore , Random Number Generator
+0xc0000001, 0, edx, 3, xstore_en , RNG enabled
+0xc0000001, 0, edx, 4, ccs_sm3_sm4 , CCS SM3 and SM4 instructions
+0xc0000001, 0, edx, 5, ccs_sm3_sm4_en , CCS SM3/SM4 enabled
+0xc0000001, 0, edx, 6, ace , Advanced Cryptography Engine
+0xc0000001, 0, edx, 7, ace_en , ACE enabled
+0xc0000001, 0, edx, 8, ace2 , Advanced Cryptography Engine v2
+0xc0000001, 0, edx, 9, ace2_en , ACE v2 enabled
+0xc0000001, 0, edx, 10, phe , PadLock Hash Engine
+0xc0000001, 0, edx, 11, phe_en , PHE enabled
+0xc0000001, 0, edx, 12, pmm , PadLock Montgomery Multiplier
+0xc0000001, 0, edx, 13, pmm_en , PMM enabled
+0xc0000001, 0, edx, 16, parallax , Parallax auto adjust processor voltage
+0xc0000001, 0, edx, 17, parallax_en , Parallax enabled
+0xc0000001, 0, edx, 20, tm3 , Thermal Monitor v3
+0xc0000001, 0, edx, 21, tm3_en , TM v3 enabled
+0xc0000001, 0, edx, 25, phe2 , PadLock Hash Engine v2 (SHA384/SHA512)
+0xc0000001, 0, edx, 26, phe2_en , PHE v2 enabled
+0xc0000001, 0, edx, 27, rsa , RSA instructions (XMODEXP/MONTMUL2)
+0xc0000001, 0, edx, 28, rsa_en , RSA instructions enabled
diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c
index 1b25c0a95d3f..7dc6b9235d02 100644
--- a/tools/arch/x86/kcpuid/kcpuid.c
+++ b/tools/arch/x86/kcpuid/kcpuid.c
@@ -1,14 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
-#include <stdio.h>
+#include <cpuid.h>
+#include <err.h>
+#include <getopt.h>
#include <stdbool.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <getopt.h>
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define __noreturn __attribute__((__noreturn__))
typedef unsigned int u32;
typedef unsigned long long u64;
@@ -49,7 +52,7 @@ static const char * const reg_names[] = {
struct subleaf {
u32 index;
u32 sub;
- u32 eax, ebx, ecx, edx;
+ u32 output[NR_REGS];
struct reg_desc info[NR_REGS];
};
@@ -63,21 +66,64 @@ struct cpuid_func {
int nr;
};
+enum range_index {
+ RANGE_STD = 0, /* Standard */
+ RANGE_EXT = 0x80000000, /* Extended */
+ RANGE_TSM = 0x80860000, /* Transmeta */
+ RANGE_CTR = 0xc0000000, /* Centaur/Zhaoxin */
+};
+
+#define CPUID_INDEX_MASK 0xffff0000
+#define CPUID_FUNCTION_MASK (~CPUID_INDEX_MASK)
+
struct cpuid_range {
/* array of main leafs */
struct cpuid_func *funcs;
/* number of valid leafs */
int nr;
- bool is_ext;
+ enum range_index index;
};
-/*
- * basic: basic functions range: [0... ]
- * ext: extended functions range: [0x80000000... ]
- */
-struct cpuid_range *leafs_basic, *leafs_ext;
+static struct cpuid_range ranges[] = {
+ { .index = RANGE_STD, },
+ { .index = RANGE_EXT, },
+ { .index = RANGE_TSM, },
+ { .index = RANGE_CTR, },
+};
+
+static char *range_to_str(struct cpuid_range *range)
+{
+ switch (range->index) {
+ case RANGE_STD: return "Standard";
+ case RANGE_EXT: return "Extended";
+ case RANGE_TSM: return "Transmeta";
+ case RANGE_CTR: return "Centaur";
+ default: return NULL;
+ }
+}
+
+#define __for_each_cpuid_range(range, __condition) \
+ for (unsigned int i = 0; \
+ i < ARRAY_SIZE(ranges) && ((range) = &ranges[i]) && (__condition); \
+ i++)
+
+#define for_each_valid_cpuid_range(range) __for_each_cpuid_range(range, (range)->nr != 0)
+#define for_each_cpuid_range(range) __for_each_cpuid_range(range, true)
+
+struct cpuid_range *index_to_cpuid_range(u32 index)
+{
+ u32 func_idx = index & CPUID_FUNCTION_MASK;
+ u32 range_idx = index & CPUID_INDEX_MASK;
+ struct cpuid_range *range;
+
+ for_each_valid_cpuid_range(range) {
+ if (range->index == range_idx && (u32)range->nr > func_idx)
+ return range;
+ }
+
+ return NULL;
+}
-static bool is_amd;
static bool show_details;
static bool show_raw;
static bool show_flags_only = true;
@@ -85,16 +131,16 @@ static u32 user_index = 0xFFFFFFFF;
static u32 user_sub = 0xFFFFFFFF;
static int flines;
-static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
+/*
+ * Force using <cpuid.h> __cpuid_count() instead of __cpuid(). The
+ * latter leaves ECX uninitialized, which can break CPUID queries.
+ */
+
+#define cpuid(leaf, a, b, c, d) \
+ __cpuid_count(leaf, 0, a, b, c, d)
+
+#define cpuid_count(leaf, subleaf, a, b, c, d) \
+ __cpuid_count(leaf, subleaf, a, b, c, d)
static inline bool has_subleafs(u32 f)
{
@@ -117,11 +163,11 @@ static void leaf_print_raw(struct subleaf *leaf)
if (leaf->sub == 0)
printf("0x%08x: subleafs:\n", leaf->index);
- printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
- leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+ printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->sub,
+ leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]);
} else {
- printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
- leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+ printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->index,
+ leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]);
}
}
@@ -140,19 +186,19 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf,
* Cut off vendor-prefix from CPUID function as we're using it as an
* index into ->funcs.
*/
- func = &range->funcs[f & 0xffff];
+ func = &range->funcs[f & CPUID_FUNCTION_MASK];
if (!func->leafs) {
func->leafs = malloc(sizeof(struct subleaf));
if (!func->leafs)
- perror("malloc func leaf");
+ err(EXIT_FAILURE, NULL);
func->nr = 1;
} else {
s = func->nr;
func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf));
if (!func->leafs)
- perror("realloc f->leafs");
+ err(EXIT_FAILURE, NULL);
func->nr++;
}
@@ -161,84 +207,73 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf,
leaf->index = f;
leaf->sub = subleaf;
- leaf->eax = a;
- leaf->ebx = b;
- leaf->ecx = c;
- leaf->edx = d;
+ leaf->output[R_EAX] = a;
+ leaf->output[R_EBX] = b;
+ leaf->output[R_ECX] = c;
+ leaf->output[R_EDX] = d;
return false;
}
static void raw_dump_range(struct cpuid_range *range)
{
- u32 f;
- int i;
-
- printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic");
+ printf("%s Leafs :\n", range_to_str(range));
printf("================\n");
- for (f = 0; (int)f < range->nr; f++) {
+ for (u32 f = 0; (int)f < range->nr; f++) {
struct cpuid_func *func = &range->funcs[f];
- u32 index = f;
-
- if (range->is_ext)
- index += 0x80000000;
/* Skip leaf without valid items */
if (!func->nr)
continue;
/* First item is the main leaf, followed by all subleafs */
- for (i = 0; i < func->nr; i++)
+ for (int i = 0; i < func->nr; i++)
leaf_print_raw(&func->leafs[i]);
}
}
#define MAX_SUBLEAF_NUM 64
-struct cpuid_range *setup_cpuid_range(u32 input_eax)
+#define MAX_RANGE_INDEX_OFFSET 0xff
+void setup_cpuid_range(struct cpuid_range *range)
{
- u32 max_func, idx_func, subleaf, max_subleaf;
- u32 eax, ebx, ecx, edx, f = input_eax;
- struct cpuid_range *range;
- bool allzero;
-
- eax = input_eax;
- ebx = ecx = edx = 0;
+ u32 max_func, range_funcs_sz;
+ u32 eax, ebx, ecx, edx;
- cpuid(&eax, &ebx, &ecx, &edx);
- max_func = eax;
- idx_func = (max_func & 0xffff) + 1;
+ cpuid(range->index, max_func, ebx, ecx, edx);
- range = malloc(sizeof(struct cpuid_range));
- if (!range)
- perror("malloc range");
+ /*
+ * If the CPUID range's maximum function value is garbage, then it
+ * is not recognized by this CPU. Set the range's number of valid
+ * leaves to zero so that for_each_valid_cpu_range() can ignore it.
+ */
+ if (max_func < range->index || max_func > (range->index + MAX_RANGE_INDEX_OFFSET)) {
+ range->nr = 0;
+ return;
+ }
- if (input_eax & 0x80000000)
- range->is_ext = true;
- else
- range->is_ext = false;
+ range->nr = (max_func & CPUID_FUNCTION_MASK) + 1;
+ range_funcs_sz = range->nr * sizeof(struct cpuid_func);
- range->funcs = malloc(sizeof(struct cpuid_func) * idx_func);
+ range->funcs = malloc(range_funcs_sz);
if (!range->funcs)
- perror("malloc range->funcs");
+ err(EXIT_FAILURE, NULL);
- range->nr = idx_func;
- memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func);
+ memset(range->funcs, 0, range_funcs_sz);
- for (; f <= max_func; f++) {
- eax = f;
- subleaf = ecx = 0;
+ for (u32 f = range->index; f <= max_func; f++) {
+ u32 max_subleaf = MAX_SUBLEAF_NUM;
+ bool allzero;
- cpuid(&eax, &ebx, &ecx, &edx);
- allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
+ cpuid(f, eax, ebx, ecx, edx);
+
+ allzero = cpuid_store(range, f, 0, eax, ebx, ecx, edx);
if (allzero)
continue;
if (!has_subleafs(f))
continue;
- max_subleaf = MAX_SUBLEAF_NUM;
-
/*
* Some can provide the exact number of subleafs,
* others have to be tried (0xf)
@@ -256,20 +291,15 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
if (f == 0x80000026)
max_subleaf = 5;
- for (subleaf = 1; subleaf < max_subleaf; subleaf++) {
- eax = f;
- ecx = subleaf;
+ for (u32 subleaf = 1; subleaf < max_subleaf; subleaf++) {
+ cpuid_count(f, subleaf, eax, ebx, ecx, edx);
- cpuid(&eax, &ebx, &ecx, &edx);
- allzero = cpuid_store(range, f, subleaf,
- eax, ebx, ecx, edx);
+ allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
if (allzero)
continue;
}
}
-
- return range;
}
/*
@@ -280,15 +310,13 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
* 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs
* 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3)
*/
-static int parse_line(char *line)
+static void parse_line(char *line)
{
char *str;
- int i;
struct cpuid_range *range;
struct cpuid_func *func;
struct subleaf *leaf;
u32 index;
- u32 sub;
char buffer[512];
char *buf;
/*
@@ -310,12 +338,12 @@ static int parse_line(char *line)
/* Skip comments and NULL line */
if (line[0] == '#' || line[0] == '\n')
- return 0;
+ return;
strncpy(buffer, line, 511);
buffer[511] = 0;
str = buffer;
- for (i = 0; i < 5; i++) {
+ for (int i = 0; i < 5; i++) {
tokens[i] = strtok(str, ",");
if (!tokens[i])
goto err_exit;
@@ -328,21 +356,19 @@ static int parse_line(char *line)
/* index/main-leaf */
index = strtoull(tokens[0], NULL, 0);
- if (index & 0x80000000)
- range = leafs_ext;
- else
- range = leafs_basic;
-
- index &= 0x7FFFFFFF;
- /* Skip line parsing for non-existing indexes */
- if ((int)index >= range->nr)
- return -1;
+ /*
+ * Skip line parsing if the index is not covered by known-valid
+ * CPUID ranges on this CPU.
+ */
+ range = index_to_cpuid_range(index);
+ if (!range)
+ return;
+ /* Skip line parsing if the index CPUID output is all zero */
+ index &= CPUID_FUNCTION_MASK;
func = &range->funcs[index];
-
- /* Return if the index has no valid item on this platform */
if (!func->nr)
- return 0;
+ return;
/* subleaf */
buf = tokens[1];
@@ -355,11 +381,11 @@ static int parse_line(char *line)
subleaf_start = strtoul(start, NULL, 0);
subleaf_end = min(subleaf_end, (u32)(func->nr - 1));
if (subleaf_start > subleaf_end)
- return 0;
+ return;
} else {
subleaf_start = subleaf_end;
if (subleaf_start > (u32)(func->nr - 1))
- return 0;
+ return;
}
/* register */
@@ -382,7 +408,7 @@ static int parse_line(char *line)
bit_end = strtoul(end, NULL, 0);
bit_start = (start) ? strtoul(start, NULL, 0) : bit_end;
- for (sub = subleaf_start; sub <= subleaf_end; sub++) {
+ for (u32 sub = subleaf_start; sub <= subleaf_end; sub++) {
leaf = &func->leafs[sub];
reg = &leaf->info[reg_index];
bdesc = &reg->descs[reg->nr++];
@@ -392,12 +418,11 @@ static int parse_line(char *line)
strcpy(bdesc->simp, strtok(tokens[4], " \t"));
strcpy(bdesc->detail, tokens[5]);
}
- return 0;
+ return;
err_exit:
- printf("Warning: wrong line format:\n");
- printf("\tline[%d]: %s\n", flines, line);
- return -1;
+ warnx("Wrong line format:\n"
+ "\tline[%d]: %s", flines, line);
}
/* Parse csv file, and construct the array of all leafs and subleafs */
@@ -418,10 +443,8 @@ static void parse_text(void)
file = fopen("./cpuid.csv", "r");
}
- if (!file) {
- printf("Fail to open '%s'\n", filename);
- return;
- }
+ if (!file)
+ err(EXIT_FAILURE, "%s", filename);
while (1) {
ret = getline(&line, &len, file);
@@ -436,21 +459,13 @@ static void parse_text(void)
fclose(file);
}
-
-/* Decode every eax/ebx/ecx/edx */
-static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg)
+static void show_reg(const struct reg_desc *rdesc, u32 value)
{
- struct bits_desc *bdesc;
- int start, end, i;
+ const struct bits_desc *bdesc;
+ int start, end;
u32 mask;
- if (!rdesc->nr) {
- if (show_details)
- printf("\t %s: 0x%08x\n", reg_names[reg], value);
- return;
- }
-
- for (i = 0; i < rdesc->nr; i++) {
+ for (int i = 0; i < rdesc->nr; i++) {
bdesc = &rdesc->descs[i];
start = bdesc->start;
@@ -480,23 +495,21 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg)
}
}
-static void show_leaf(struct subleaf *leaf)
+static void show_reg_header(bool has_entries, u32 leaf, u32 subleaf, const char *reg_name)
{
- if (!leaf)
- return;
+ if (show_details && has_entries)
+ printf("CPUID_0x%x_%s[0x%x]:\n", leaf, reg_name, subleaf);
+}
- if (show_raw) {
+static void show_leaf(struct subleaf *leaf)
+{
+ if (show_raw)
leaf_print_raw(leaf);
- } else {
- if (show_details)
- printf("CPUID_0x%x_ECX[0x%x]:\n",
- leaf->index, leaf->sub);
- }
- decode_bits(leaf->eax, &leaf->info[R_EAX], R_EAX);
- decode_bits(leaf->ebx, &leaf->info[R_EBX], R_EBX);
- decode_bits(leaf->ecx, &leaf->info[R_ECX], R_ECX);
- decode_bits(leaf->edx, &leaf->info[R_EDX], R_EDX);
+ for (int i = R_EAX; i < NR_REGS; i++) {
+ show_reg_header((leaf->info[i].nr > 0), leaf->index, leaf->sub, reg_names[i]);
+ show_reg(&leaf->info[i], leaf->output[i]);
+ }
if (!show_raw && show_details)
printf("\n");
@@ -504,46 +517,37 @@ static void show_leaf(struct subleaf *leaf)
static void show_func(struct cpuid_func *func)
{
- int i;
-
- if (!func)
- return;
-
- for (i = 0; i < func->nr; i++)
+ for (int i = 0; i < func->nr; i++)
show_leaf(&func->leafs[i]);
}
static void show_range(struct cpuid_range *range)
{
- int i;
-
- for (i = 0; i < range->nr; i++)
+ for (int i = 0; i < range->nr; i++)
show_func(&range->funcs[i]);
}
static inline struct cpuid_func *index_to_func(u32 index)
{
+ u32 func_idx = index & CPUID_FUNCTION_MASK;
struct cpuid_range *range;
- u32 func_idx;
-
- range = (index & 0x80000000) ? leafs_ext : leafs_basic;
- func_idx = index & 0xffff;
- if ((func_idx + 1) > (u32)range->nr) {
- printf("ERR: invalid input index (0x%x)\n", index);
+ range = index_to_cpuid_range(index);
+ if (!range)
return NULL;
- }
+
return &range->funcs[func_idx];
}
static void show_info(void)
{
+ struct cpuid_range *range;
struct cpuid_func *func;
if (show_raw) {
/* Show all of the raw output of 'cpuid' instr */
- raw_dump_range(leafs_basic);
- raw_dump_range(leafs_ext);
+ for_each_valid_cpuid_range(range)
+ raw_dump_range(range);
return;
}
@@ -551,18 +555,19 @@ static void show_info(void)
/* Only show specific leaf/subleaf info */
func = index_to_func(user_index);
if (!func)
- return;
+ errx(EXIT_FAILURE, "Invalid input leaf (0x%x)", user_index);
/* Dump the raw data also */
show_raw = true;
if (user_sub != 0xFFFFFFFF) {
- if (user_sub + 1 <= (u32)func->nr) {
- show_leaf(&func->leafs[user_sub]);
- return;
+ if (user_sub + 1 > (u32)func->nr) {
+ errx(EXIT_FAILURE, "Leaf 0x%x has no valid subleaf = 0x%x",
+ user_index, user_sub);
}
- printf("ERR: invalid input subleaf (0x%x)\n", user_sub);
+ show_leaf(&func->leafs[user_sub]);
+ return;
}
show_func(func);
@@ -570,38 +575,21 @@ static void show_info(void)
}
printf("CPU features:\n=============\n\n");
- show_range(leafs_basic);
- show_range(leafs_ext);
+ for_each_valid_cpuid_range(range)
+ show_range(range);
}
-static void setup_platform_cpuid(void)
+static void __noreturn usage(int exit_code)
{
- u32 eax, ebx, ecx, edx;
-
- /* Check vendor */
- eax = ebx = ecx = edx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
-
- /* "htuA" */
- if (ebx == 0x68747541)
- is_amd = true;
-
- /* Setup leafs for the basic and extended range */
- leafs_basic = setup_cpuid_range(0x0);
- leafs_ext = setup_cpuid_range(0x80000000);
-}
-
-static void usage(void)
-{
- printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n"
- "\t-a|--all Show both bit flags and complex bit fields info\n"
- "\t-b|--bitflags Show boolean flags only\n"
- "\t-d|--detail Show details of the flag/fields (default)\n"
- "\t-f|--flags Specify the cpuid csv file\n"
- "\t-h|--help Show usage info\n"
- "\t-l|--leaf=index Specify the leaf you want to check\n"
- "\t-r|--raw Show raw cpuid data\n"
- "\t-s|--subleaf=sub Specify the subleaf you want to check\n"
+ errx(exit_code, "kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n"
+ "\t-a|--all Show both bit flags and complex bit fields info\n"
+ "\t-b|--bitflags Show boolean flags only\n"
+ "\t-d|--detail Show details of the flag/fields (default)\n"
+ "\t-f|--flags Specify the CPUID CSV file\n"
+ "\t-h|--help Show usage info\n"
+ "\t-l|--leaf=index Specify the leaf you want to check\n"
+ "\t-r|--raw Show raw CPUID data\n"
+ "\t-s|--subleaf=sub Specify the subleaf you want to check"
);
}
@@ -617,7 +605,7 @@ static struct option opts[] = {
{ NULL, 0, NULL, 0 }
};
-static int parse_options(int argc, char *argv[])
+static void parse_options(int argc, char *argv[])
{
int c;
@@ -637,9 +625,7 @@ static int parse_options(int argc, char *argv[])
user_csv = optarg;
break;
case 'h':
- usage();
- exit(1);
- break;
+ usage(EXIT_SUCCESS);
case 'l':
/* main leaf */
user_index = strtoul(optarg, NULL, 0);
@@ -652,11 +638,8 @@ static int parse_options(int argc, char *argv[])
user_sub = strtoul(optarg, NULL, 0);
break;
default:
- printf("%s: Invalid option '%c'\n", argv[0], optopt);
- return -1;
- }
-
- return 0;
+ usage(EXIT_FAILURE);
+ }
}
/*
@@ -669,11 +652,13 @@ static int parse_options(int argc, char *argv[])
*/
int main(int argc, char *argv[])
{
- if (parse_options(argc, argv))
- return -1;
+ struct cpuid_range *range;
+
+ parse_options(argc, argv);
/* Setup the cpuid leafs of current platform */
- setup_platform_cpuid();
+ for_each_cpuid_range(range)
+ setup_cpuid_range(range);
/* Read and parse the 'cpuid.csv' */
parse_text();
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index e91d4c4e1c16..bce69c6bfa69 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -324,6 +324,11 @@ int insn_get_opcode(struct insn *insn)
}
insn->attr = inat_get_opcode_attribute(op);
+ if (insn->x86_64 && inat_is_invalid64(insn->attr)) {
+ /* This instruction is invalid, like UD2. Stop decoding. */
+ insn->attr &= INAT_INV64;
+ }
+
while (inat_is_escape(insn->attr)) {
/* Get escaped opcode */
op = get_next(insn_byte_t, insn);
@@ -337,6 +342,7 @@ int insn_get_opcode(struct insn *insn)
insn->attr = 0;
return -EINVAL;
}
+
end:
opcode->got = 1;
return 0;
@@ -658,7 +664,6 @@ int insn_get_immediate(struct insn *insn)
}
if (!inat_has_immediate(insn->attr))
- /* no immediates */
goto done;
switch (inat_immediate_size(insn->attr)) {
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index f5dd84eb55dc..262f7ca1fb95 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -35,7 +35,7 @@
# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
#
-# REX2 Prefix
+# REX2 Prefix Superscripts
# - (!REX2): REX2 is not allowed
# - (REX2): REX2 variant e.g. JMPABS
@@ -147,7 +147,7 @@ AVXcode:
# 0x60 - 0x6f
60: PUSHA/PUSHAD (i64)
61: POPA/POPAD (i64)
-62: BOUND Gv,Ma (i64) | EVEX (Prefix)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix),(o64)
63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
64: SEG=FS (Prefix)
65: SEG=GS (Prefix)
@@ -253,8 +253,8 @@ c0: Grp2 Eb,Ib (1A)
c1: Grp2 Ev,Ib (1A)
c2: RETN Iw (f64)
c3: RETN
-c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
-c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix),(o64)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix),(o64)
c6: Grp11A Eb,Ib (1A)
c7: Grp11B Ev,Iz (1A)
c8: ENTER Iw,Ib
@@ -286,10 +286,10 @@ df: ESC
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
-e0: LOOPNE/LOOPNZ Jb (f64) (!REX2)
-e1: LOOPE/LOOPZ Jb (f64) (!REX2)
-e2: LOOP Jb (f64) (!REX2)
-e3: JrCXZ Jb (f64) (!REX2)
+e0: LOOPNE/LOOPNZ Jb (f64),(!REX2)
+e1: LOOPE/LOOPZ Jb (f64),(!REX2)
+e2: LOOP Jb (f64),(!REX2)
+e3: JrCXZ Jb (f64),(!REX2)
e4: IN AL,Ib (!REX2)
e5: IN eAX,Ib (!REX2)
e6: OUT Ib,AL (!REX2)
@@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2)
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
-e8: CALL Jz (f64) (!REX2)
-e9: JMP-near Jz (f64) (!REX2)
-ea: JMP-far Ap (i64) (!REX2)
-eb: JMP-short Jb (f64) (!REX2)
+e8: CALL Jz (f64),(!REX2)
+e9: JMP-near Jz (f64),(!REX2)
+ea: JMP-far Ap (i64),(!REX2)
+eb: JMP-short Jb (f64),(!REX2)
ec: IN AL,DX (!REX2)
ed: IN eAX,DX (!REX2)
ee: OUT DX,AL (!REX2)
@@ -478,22 +478,22 @@ AVXcode: 1
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
-80: JO Jz (f64) (!REX2)
-81: JNO Jz (f64) (!REX2)
-82: JB/JC/JNAE Jz (f64) (!REX2)
-83: JAE/JNB/JNC Jz (f64) (!REX2)
-84: JE/JZ Jz (f64) (!REX2)
-85: JNE/JNZ Jz (f64) (!REX2)
-86: JBE/JNA Jz (f64) (!REX2)
-87: JA/JNBE Jz (f64) (!REX2)
-88: JS Jz (f64) (!REX2)
-89: JNS Jz (f64) (!REX2)
-8a: JP/JPE Jz (f64) (!REX2)
-8b: JNP/JPO Jz (f64) (!REX2)
-8c: JL/JNGE Jz (f64) (!REX2)
-8d: JNL/JGE Jz (f64) (!REX2)
-8e: JLE/JNG Jz (f64) (!REX2)
-8f: JNLE/JG Jz (f64) (!REX2)
+80: JO Jz (f64),(!REX2)
+81: JNO Jz (f64),(!REX2)
+82: JB/JC/JNAE Jz (f64),(!REX2)
+83: JAE/JNB/JNC Jz (f64),(!REX2)
+84: JE/JZ Jz (f64),(!REX2)
+85: JNE/JNZ Jz (f64),(!REX2)
+86: JBE/JNA Jz (f64),(!REX2)
+87: JA/JNBE Jz (f64),(!REX2)
+88: JS Jz (f64),(!REX2)
+89: JNS Jz (f64),(!REX2)
+8a: JP/JPE Jz (f64),(!REX2)
+8b: JNP/JPO Jz (f64),(!REX2)
+8c: JL/JNGE Jz (f64),(!REX2)
+8d: JNL/JGE Jz (f64),(!REX2)
+8e: JLE/JNG Jz (f64),(!REX2)
+8f: JNLE/JG Jz (f64),(!REX2)
# 0x0f 0x90-0x9f
90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index 5770c8097f32..2c19d7fc8a85 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -64,6 +64,8 @@ BEGIN {
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
+ invalid64_expr = "\\(i64\\)"
+ only64_expr = "\\(o64\\)"
rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))"
rex2_expr = "\\(REX2\\)"
no_rex2_expr = "\\(!REX2\\)"
@@ -319,6 +321,11 @@ function convert_operands(count,opnd, i,j,imm,mod)
if (match(ext, force64_expr))
flags = add_flags(flags, "INAT_FORCE64")
+ # check invalid in 64-bit (and no only64)
+ if (match(ext, invalid64_expr) &&
+ !match($0, only64_expr))
+ flags = add_flags(flags, "INAT_INV64")
+
# check REX2 not allowed
if (match(ext, no_rex2_expr))
flags = add_flags(flags, "INAT_NO_REX2")
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index d6304e01afe0..da3152c16228 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -31,7 +31,7 @@ PROG COMMANDS
| **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }]
| **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }]
| **bpftool** **prog pin** *PROG* *FILE*
-| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**]
+| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] [**kernel_btf** *BTF_FILE*]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
@@ -127,7 +127,7 @@ bpftool prog pin *PROG* *FILE*
Note: *FILE* must be located in *bpffs* mount. It must not contain a dot
character ('.'), which is reserved for future extensions of *bpffs*.
-bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach]
+bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | name *NAME* } *MAP*] [{ offload_dev | xdpmeta_dev } *NAME*] [pinmaps *MAP_DIR*] [autoattach] [kernel_btf *BTF_FILE*]
Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog
load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog
loadall** pins all programs from the *OBJ* under *PATH* directory. **type**
@@ -153,6 +153,12 @@ bpftool prog { load | loadall } *OBJ* *PATH* [type *TYPE*] [map { idx *IDX* | na
program does not support autoattach, bpftool falls back to regular pinning
for that program instead.
+ The **kernel_btf** option allows specifying an external BTF file to replace
+ the system's own vmlinux BTF file for CO-RE relocations. Note that any
+ other feature relying on BTF (such as fentry/fexit programs, struct_ops)
+ requires the BTF file for the actual kernel running on the host, often
+ exposed at /sys/kernel/btf/vmlinux.
+
Note: *PATH* must be located in *bpffs* mount. It must not contain a dot
character ('.'), which is reserved for future extensions of *bpffs*.
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 1ce409a6cbd9..27512feb5c70 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -505,13 +505,13 @@ _bpftool()
_bpftool_get_map_names
return 0
;;
- pinned|pinmaps)
+ pinned|pinmaps|kernel_btf)
_filedir
return 0
;;
*)
COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
- _bpftool_once_attr 'type pinmaps autoattach'
+ _bpftool_once_attr 'type pinmaps autoattach kernel_btf'
_bpftool_one_of_list 'offload_dev xdpmeta_dev'
return 0
;;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 93b139bfb988..944ebe21a216 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -221,7 +221,7 @@ static int cgroup_has_attached_progs(int cgroup_fd)
for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) {
int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]);
- if (count < 0)
+ if (count < 0 && errno != EINVAL)
return -1;
if (count > 0) {
@@ -318,11 +318,11 @@ static int show_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
static int do_show(int argc, char **argv)
{
- enum bpf_attach_type type;
int has_attached_progs;
const char *path;
int cgroup_fd;
int ret = -1;
+ unsigned int i;
query_flags = 0;
@@ -370,14 +370,14 @@ static int do_show(int argc, char **argv)
"AttachFlags", "Name");
btf_vmlinux = libbpf_find_kernel_btf();
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) {
/*
* Not all attach types may be supported, so it's expected,
* that some requests will fail.
* If we were able to get the show for at least one
* attach type, let's return 0.
*/
- if (show_bpf_progs(cgroup_fd, type, 0) == 0)
+ if (show_bpf_progs(cgroup_fd, cgroup_attach_types[i], 0) == 0)
ret = 0;
}
@@ -400,9 +400,9 @@ exit:
static int do_show_tree_fn(const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftw)
{
- enum bpf_attach_type type;
int has_attached_progs;
int cgroup_fd;
+ unsigned int i;
if (typeflag != FTW_D)
return 0;
@@ -434,8 +434,8 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
}
btf_vmlinux = libbpf_find_kernel_btf();
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
- show_bpf_progs(cgroup_fd, type, ftw->level);
+ for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++)
+ show_bpf_progs(cgroup_fd, cgroup_attach_types[i], ftw->level);
if (errno == EINVAL)
/* Last attach type does not support query.
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 52fd2c9fac56..3535afc80a49 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -380,6 +380,7 @@ show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr)
u64_to_ptr(info->perf_event.uprobe.file_name));
jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset);
jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie);
+ jsonw_uint_field(wtr, "ref_ctr_offset", info->perf_event.uprobe.ref_ctr_offset);
}
static void
@@ -823,6 +824,8 @@ static void show_perf_event_uprobe_plain(struct bpf_link_info *info)
printf("%s+%#x ", buf, info->perf_event.uprobe.offset);
if (info->perf_event.uprobe.cookie)
printf("cookie %llu ", info->perf_event.uprobe.cookie);
+ if (info->perf_event.uprobe.ref_ctr_offset)
+ printf("ref_ctr_offset 0x%llx ", info->perf_event.uprobe.ref_ctr_offset);
}
static void show_perf_event_tracepoint_plain(struct bpf_link_info *info)
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f010295350be..96eea8a67225 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1681,8 +1681,17 @@ offload_dev:
} else if (is_prefix(*argv, "autoattach")) {
auto_attach = true;
NEXT_ARG();
+ } else if (is_prefix(*argv, "kernel_btf")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(1))
+ goto err_free_reuse_maps;
+
+ open_opts.btf_custom_path = GET_ARG();
} else {
- p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?",
+ p_err("expected no more arguments, "
+ "'type', 'map', 'offload_dev', 'xdpmeta_dev', 'pinmaps', "
+ "'autoattach', or 'kernel_btf', got: '%s'?",
*argv);
goto err_free_reuse_maps;
}
@@ -2474,6 +2483,7 @@ static int do_help(int argc, char **argv)
" [map { idx IDX | name NAME } MAP]\\\n"
" [pinmaps MAP_DIR]\n"
" [autoattach]\n"
+ " [kernel_btf BTF_FILE]\n"
" %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n"
" %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n"
" %1$s %2$s run PROG \\\n"
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index f9702877ac21..c335ce0bd195 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -30,18 +30,41 @@ all_files := \
crt.h \
ctype.h \
dirent.h \
+ elf.h \
errno.h \
+ fcntl.h \
+ getopt.h \
limits.h \
+ math.h \
nolibc.h \
+ poll.h \
+ sched.h \
signal.h \
stackprotector.h \
std.h \
stdarg.h \
stdbool.h \
+ stddef.h \
stdint.h \
stdlib.h \
string.h \
sys.h \
+ sys/auxv.h \
+ sys/ioctl.h \
+ sys/mman.h \
+ sys/mount.h \
+ sys/prctl.h \
+ sys/random.h \
+ sys/reboot.h \
+ sys/resource.h \
+ sys/stat.h \
+ sys/syscall.h \
+ sys/sysmacros.h \
+ sys/time.h \
+ sys/timerfd.h \
+ sys/types.h \
+ sys/utsname.h \
+ sys/wait.h \
time.h \
types.h \
unistd.h \
@@ -72,7 +95,7 @@ help:
headers:
$(Q)mkdir -p $(OUTPUT)sysroot
$(Q)mkdir -p $(OUTPUT)sysroot/include
- $(Q)cp $(all_files) $(OUTPUT)sysroot/include/
+ $(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/
$(Q)if [ "$(ARCH)" = "x86" ]; then \
sed -e \
's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \
@@ -91,5 +114,14 @@ headers_standalone: headers
$(Q)$(MAKE) -C $(srctree) headers
$(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot
+# GCC uses "s390", clang "systemz"
+CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS))
+
+headers_check: headers_standalone
+ for header in $(filter-out crt.h std.h,$(all_files)); do \
+ $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \
+ -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \
+ done
+
clean:
$(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot"
diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h
index 06fdef7b291a..937a348da42e 100644
--- a/tools/include/nolibc/arch-aarch64.h
+++ b/tools/include/nolibc/arch-aarch64.h
@@ -146,7 +146,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
{
__asm__ volatile (
"mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */
- "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */
"bl _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 6180ff99ab43..1f66e7e5a444 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -189,8 +189,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
{
__asm__ volatile (
"mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */
- "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */
- "mov sp, ip\n"
"bl _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h
index ff5afc35bbd8..7c9b38e96418 100644
--- a/tools/include/nolibc/arch-i386.h
+++ b/tools/include/nolibc/arch-i386.h
@@ -167,8 +167,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
__asm__ volatile (
"xor %ebp, %ebp\n" /* zero the stack frame */
"mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */
- "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */
- "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */
"sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */
"push %eax\n" /* push arg1 on stack to support plain stack modes too */
"call _start_c\n" /* transfer to c runtime */
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index fb519545959e..5511705303ea 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -142,18 +142,11 @@
_arg1; \
})
-#if __loongarch_grlen == 32
-#define LONG_BSTRINS "bstrins.w"
-#else /* __loongarch_grlen == 64 */
-#define LONG_BSTRINS "bstrins.d"
-#endif
-
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
"move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */
- LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */
"bl _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h
new file mode 100644
index 000000000000..6dac1845f298
--- /dev/null
+++ b/tools/include/nolibc/arch-m68k.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * m68k specific definitions for NOLIBC
+ * Copyright (C) 2025 Daniel Palmer<daniel@thingy.jp>
+ *
+ * Roughly based on one or more of the other arch files.
+ *
+ */
+
+#ifndef _NOLIBC_ARCH_M68K_H
+#define _NOLIBC_ARCH_M68K_H
+
+#include "compiler.h"
+#include "crt.h"
+
+#define _NOLIBC_SYSCALL_CLOBBERLIST "memory"
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_num) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1), "r"(_arg2) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r"(_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ register long _arg5 __asm__ ("d5") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("d0") = (num); \
+ register long _arg1 __asm__ ("d1") = (long)(arg1); \
+ register long _arg2 __asm__ ("d2") = (long)(arg2); \
+ register long _arg3 __asm__ ("d3") = (long)(arg3); \
+ register long _arg4 __asm__ ("d4") = (long)(arg4); \
+ register long _arg5 __asm__ ("d5") = (long)(arg5); \
+ register long _arg6 __asm__ ("a0") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ "trap #0\n" \
+ : "+r" (_num) \
+ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
+ "r"(_arg6) \
+ : _NOLIBC_SYSCALL_CLOBBERLIST \
+ ); \
+ _num; \
+})
+
+void _start(void);
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ "movel %sp, %sp@-\n"
+ "jsr _start_c\n"
+ );
+ __nolibc_entrypoint_epilogue();
+}
+
+#endif /* _NOLIBC_ARCH_M68K_H */
diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
index ee2fdb8d601d..204564bbcd32 100644
--- a/tools/include/nolibc/arch-powerpc.h
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -201,7 +201,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
__asm__ volatile (
"mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */
- "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */
"li 0, 0\n" /* zero the frame pointer */
"stdu 1, -32(1)\n" /* the initial stack frame */
"bl _start_c\n" /* transfer to c runtime */
@@ -209,7 +208,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
#else
__asm__ volatile (
"mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */
- "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */
"li 0, 0\n" /* zero the frame pointer */
"stwu 1, -16(1)\n" /* the initial stack frame */
"bl _start_c\n" /* transfer to c runtime */
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index 8827bf936212..885383a86c38 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -148,7 +148,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
"lla gp, __global_pointer$\n"
".option pop\n"
"mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */
- "andi sp, a0, -16\n" /* sp must be 16-byte aligned */
"call _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h
new file mode 100644
index 000000000000..1435172f3dfe
--- /dev/null
+++ b/tools/include/nolibc/arch-sparc.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * SPARC (32bit and 64bit) specific definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_ARCH_SPARC_H
+#define _NOLIBC_ARCH_SPARC_H
+
+#include <linux/unistd.h>
+
+#include "compiler.h"
+#include "crt.h"
+
+/*
+ * Syscalls for SPARC:
+ * - registers are native word size
+ * - syscall number is passed in g1
+ * - arguments are in o0-o5
+ * - the system call is performed by calling a trap instruction
+ * - syscall return value is in o0
+ * - syscall error flag is in the carry bit of the processor status register
+ */
+
+#ifdef __arch64__
+
+#define _NOLIBC_SYSCALL "t 0x6d\n" \
+ "bcs,a %%xcc, 1f\n" \
+ "sub %%g0, %%o0, %%o0\n" \
+ "1:\n"
+
+#else
+
+#define _NOLIBC_SYSCALL "t 0x10\n" \
+ "bcs,a 1f\n" \
+ "sub %%g0, %%o0, %%o0\n" \
+ "1:\n"
+
+#endif /* __arch64__ */
+
+#define my_syscall0(num) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0"); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall1(num, arg1) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall2(num, arg1, arg2) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ register long _arg5 __asm__ ("o4") = (long)(arg5); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \
+({ \
+ register long _num __asm__ ("g1") = (num); \
+ register long _arg1 __asm__ ("o0") = (long)(arg1); \
+ register long _arg2 __asm__ ("o1") = (long)(arg2); \
+ register long _arg3 __asm__ ("o2") = (long)(arg3); \
+ register long _arg4 __asm__ ("o3") = (long)(arg4); \
+ register long _arg5 __asm__ ("o4") = (long)(arg5); \
+ register long _arg6 __asm__ ("o5") = (long)(arg6); \
+ \
+ __asm__ volatile ( \
+ _NOLIBC_SYSCALL \
+ : "+r"(_arg1) \
+ : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+ "r"(_num) \
+ : "memory", "cc" \
+ ); \
+ _arg1; \
+})
+
+/* startup code */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+ __asm__ volatile (
+ /*
+ * Save argc pointer to o0, as arg1 of _start_c.
+ * Account for the window save area, which is 16 registers wide.
+ */
+#ifdef __arch64__
+ "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */
+#else
+ "add %sp, 64, %o0\n"
+#endif
+ "b,a _start_c\n" /* transfer to c runtime */
+ );
+ __nolibc_entrypoint_epilogue();
+}
+
+static pid_t getpid(void);
+
+static __attribute__((unused))
+pid_t sys_fork(void)
+{
+ pid_t parent, ret;
+
+ parent = getpid();
+ ret = my_syscall0(__NR_fork);
+
+ /* The syscall returns the parent pid in the child instead of 0 */
+ if (ret == parent)
+ return 0;
+ else
+ return ret;
+}
+#define sys_fork sys_fork
+
+#endif /* _NOLIBC_ARCH_SPARC_H */
diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h
index 1e40620a2b33..67305e24dbef 100644
--- a/tools/include/nolibc/arch-x86_64.h
+++ b/tools/include/nolibc/arch-x86_64.h
@@ -166,7 +166,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
__asm__ volatile (
"xor %ebp, %ebp\n" /* zero the stack frame */
"mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */
- "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */
"call _start_c\n" /* transfer to c runtime */
"hlt\n" /* ensure it does not return */
);
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 8a2c143c0fba..d20b2304aac2 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -33,6 +33,10 @@
#include "arch-s390.h"
#elif defined(__loongarch__)
#include "arch-loongarch.h"
+#elif defined(__sparc__)
+#include "arch-sparc.h"
+#elif defined(__m68k__)
+#include "arch-m68k.h"
#else
#error Unsupported Architecture
#endif
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
index fa1f547e7f13..369cfb5a0e78 100644
--- a/tools/include/nolibc/compiler.h
+++ b/tools/include/nolibc/compiler.h
@@ -12,6 +12,15 @@
# define __nolibc_has_attribute(attr) 0
#endif
+#if defined(__has_feature)
+# define __nolibc_has_feature(feature) __has_feature(feature)
+#else
+# define __nolibc_has_feature(feature) 0
+#endif
+
+#define __nolibc_aligned(alignment) __attribute__((aligned(alignment)))
+#define __nolibc_aligned_as(type) __nolibc_aligned(__alignof__(type))
+
#if __nolibc_has_attribute(naked)
# define __nolibc_entrypoint __attribute__((naked))
# define __nolibc_entrypoint_epilogue()
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index c4b10103bbec..961cfe777c35 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_CRT_H
#define _NOLIBC_CRT_H
+#include "compiler.h"
+
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
@@ -25,6 +27,9 @@ extern void (*const __fini_array_end[])(void) __attribute__((weak));
void _start_c(long *sp);
__attribute__((weak,used))
+#if __nolibc_has_feature(undefined_behavior_sanitizer)
+ __attribute__((no_sanitize("function")))
+#endif
void _start_c(long *sp)
{
long argc;
diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h
index 6f90706d0644..470fdf34394a 100644
--- a/tools/include/nolibc/ctype.h
+++ b/tools/include/nolibc/ctype.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_CTYPE_H
#define _NOLIBC_CTYPE_H
@@ -96,7 +99,4 @@ int ispunct(int c)
return isgraph(c) && !isalnum(c);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_CTYPE_H */
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
index c5c30d0dd680..758b95c48e7a 100644
--- a/tools/include/nolibc/dirent.h
+++ b/tools/include/nolibc/dirent.h
@@ -4,11 +4,16 @@
* Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_DIRENT_H
#define _NOLIBC_DIRENT_H
+#include "compiler.h"
#include "stdint.h"
#include "types.h"
+#include "fcntl.h"
#include <linux/limits.h>
@@ -58,7 +63,7 @@ int closedir(DIR *dirp)
static __attribute__((unused))
int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
{
- char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1];
+ char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1] __nolibc_aligned_as(struct linux_dirent64);
struct linux_dirent64 *ldir = (void *)buf;
intptr_t i = (intptr_t)dirp;
int fd, ret;
@@ -92,7 +97,4 @@ int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
return 0;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_DIRENT_H */
diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h
new file mode 100644
index 000000000000..3e2c5228bf3d
--- /dev/null
+++ b/tools/include/nolibc/elf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Shim elf.h header for NOLIBC.
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SYS_ELF_H
+#define _NOLIBC_SYS_ELF_H
+
+#include <linux/elf.h>
+
+#endif /* _NOLIBC_SYS_ELF_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
index 1d8d8033e8ff..08a33c40ec0c 100644
--- a/tools/include/nolibc/errno.h
+++ b/tools/include/nolibc/errno.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_ERRNO_H
#define _NOLIBC_ERRNO_H
@@ -22,7 +25,4 @@ int errno __attribute__((weak));
*/
#define MAX_ERRNO 4095
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_ERRNO_H */
diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h
new file mode 100644
index 000000000000..bff2e542f20f
--- /dev/null
+++ b/tools/include/nolibc/fcntl.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * fcntl definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_FCNTL_H
+#define _NOLIBC_FCNTL_H
+
+#include "arch.h"
+#include "types.h"
+#include "sys.h"
+
+/*
+ * int openat(int dirfd, const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_openat(int dirfd, const char *path, int flags, mode_t mode)
+{
+ return my_syscall4(__NR_openat, dirfd, path, flags, mode);
+}
+
+static __attribute__((unused))
+int openat(int dirfd, const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ return __sysret(sys_openat(dirfd, path, flags, mode));
+}
+
+/*
+ * int open(const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_open(const char *path, int flags, mode_t mode)
+{
+ return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
+}
+
+static __attribute__((unused))
+int open(const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ return __sysret(sys_open(path, flags, mode));
+}
+
+#endif /* _NOLIBC_FCNTL_H */
diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h
new file mode 100644
index 000000000000..217abb95264b
--- /dev/null
+++ b/tools/include/nolibc/getopt.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * getopt function definitions for NOLIBC, adapted from musl libc
+ * Copyright (C) 2005-2020 Rich Felker, et al.
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_GETOPT_H
+#define _NOLIBC_GETOPT_H
+
+struct FILE;
+static struct FILE *const stderr;
+static int fprintf(struct FILE *stream, const char *fmt, ...);
+
+__attribute__((weak,unused,section(".data.nolibc_getopt")))
+char *optarg;
+
+__attribute__((weak,unused,section(".data.nolibc_getopt")))
+int optind = 1, opterr = 1, optopt;
+
+static __attribute__((unused))
+int getopt(int argc, char * const argv[], const char *optstring)
+{
+ static int __optpos;
+ int i;
+ char c, d;
+ char *optchar;
+
+ if (!optind) {
+ __optpos = 0;
+ optind = 1;
+ }
+
+ if (optind >= argc || !argv[optind])
+ return -1;
+
+ if (argv[optind][0] != '-') {
+ if (optstring[0] == '-') {
+ optarg = argv[optind++];
+ return 1;
+ }
+ return -1;
+ }
+
+ if (!argv[optind][1])
+ return -1;
+
+ if (argv[optind][1] == '-' && !argv[optind][2])
+ return optind++, -1;
+
+ if (!__optpos)
+ __optpos++;
+ c = argv[optind][__optpos];
+ optchar = argv[optind] + __optpos;
+ __optpos++;
+
+ if (!argv[optind][__optpos]) {
+ optind++;
+ __optpos = 0;
+ }
+
+ if (optstring[0] == '-' || optstring[0] == '+')
+ optstring++;
+
+ i = 0;
+ d = 0;
+ do {
+ d = optstring[i++];
+ } while (d && d != c);
+
+ if (d != c || c == ':') {
+ optopt = c;
+ if (optstring[0] != ':' && opterr)
+ fprintf(stderr, "%s: unrecognized option: %c\n", argv[0], *optchar);
+ return '?';
+ }
+ if (optstring[i] == ':') {
+ optarg = 0;
+ if (optstring[i + 1] != ':' || __optpos) {
+ optarg = argv[optind++];
+ if (__optpos)
+ optarg += __optpos;
+ __optpos = 0;
+ }
+ if (optind > argc) {
+ optopt = c;
+ if (optstring[0] == ':')
+ return ':';
+ if (opterr)
+ fprintf(stderr, "%s: option requires argument: %c\n",
+ argv[0], *optchar);
+ return '?';
+ }
+ }
+ return c;
+}
+
+#endif /* _NOLIBC_GETOPT_H */
diff --git a/tools/include/nolibc/math.h b/tools/include/nolibc/math.h
new file mode 100644
index 000000000000..9df823ddd412
--- /dev/null
+++ b/tools/include/nolibc/math.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * math definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SYS_MATH_H
+#define _NOLIBC_SYS_MATH_H
+
+static __inline__
+double fabs(double x)
+{
+ return x >= 0 ? x : -x;
+}
+
+static __inline__
+float fabsf(float x)
+{
+ return x >= 0 ? x : -x;
+}
+
+static __inline__
+long double fabsl(long double x)
+{
+ return x >= 0 ? x : -x;
+}
+
+#endif /* _NOLIBC_SYS_MATH_H */
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 70872401aca8..c199ade200c2 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -96,7 +96,24 @@
#include "arch.h"
#include "types.h"
#include "sys.h"
+#include "sys/auxv.h"
+#include "sys/ioctl.h"
+#include "sys/mman.h"
+#include "sys/mount.h"
+#include "sys/prctl.h"
+#include "sys/random.h"
+#include "sys/reboot.h"
+#include "sys/resource.h"
+#include "sys/stat.h"
+#include "sys/syscall.h"
+#include "sys/sysmacros.h"
+#include "sys/time.h"
+#include "sys/timerfd.h"
+#include "sys/utsname.h"
+#include "sys/wait.h"
#include "ctype.h"
+#include "elf.h"
+#include "sched.h"
#include "signal.h"
#include "unistd.h"
#include "stdio.h"
@@ -105,6 +122,10 @@
#include "time.h"
#include "stackprotector.h"
#include "dirent.h"
+#include "fcntl.h"
+#include "getopt.h"
+#include "poll.h"
+#include "math.h"
/* Used by programs to avoid std includes */
#define NOLIBC
diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h
new file mode 100644
index 000000000000..1765acb17ea0
--- /dev/null
+++ b/tools/include/nolibc/poll.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * poll definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_POLL_H
+#define _NOLIBC_POLL_H
+
+#include "arch.h"
+#include "sys.h"
+
+#include <linux/poll.h>
+#include <linux/time.h>
+
+/*
+ * int poll(struct pollfd *fds, int nfds, int timeout);
+ */
+
+static __attribute__((unused))
+int sys_poll(struct pollfd *fds, int nfds, int timeout)
+{
+#if defined(__NR_ppoll)
+ struct timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
+#elif defined(__NR_ppoll_time64)
+ struct __kernel_timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
+#elif defined(__NR_poll)
+ return my_syscall3(__NR_poll, fds, nfds, timeout);
+#else
+ return __nolibc_enosys(__func__, fds, nfds, timeout);
+#endif
+}
+
+static __attribute__((unused))
+int poll(struct pollfd *fds, int nfds, int timeout)
+{
+ return __sysret(sys_poll(fds, nfds, timeout));
+}
+
+#endif /* _NOLIBC_POLL_H */
diff --git a/tools/include/nolibc/sched.h b/tools/include/nolibc/sched.h
new file mode 100644
index 000000000000..32221562c166
--- /dev/null
+++ b/tools/include/nolibc/sched.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * sched function definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_SCHED_H
+#define _NOLIBC_SCHED_H
+
+#include "sys.h"
+
+#include <linux/sched.h>
+
+/*
+ * int setns(int fd, int nstype);
+ */
+
+static __attribute__((unused))
+int sys_setns(int fd, int nstype)
+{
+ return my_syscall2(__NR_setns, fd, nstype);
+}
+
+static __attribute__((unused))
+int setns(int fd, int nstype)
+{
+ return __sysret(sys_setns(fd, nstype));
+}
+
+
+/*
+ * int unshare(int flags);
+ */
+
+static __attribute__((unused))
+int sys_unshare(int flags)
+{
+ return my_syscall1(__NR_unshare, flags);
+}
+
+static __attribute__((unused))
+int unshare(int flags)
+{
+ return __sysret(sys_unshare(flags));
+}
+
+#endif /* _NOLIBC_SCHED_H */
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
index cdcc5904c51e..ac13e53ac31d 100644
--- a/tools/include/nolibc/signal.h
+++ b/tools/include/nolibc/signal.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_SIGNAL_H
#define _NOLIBC_SIGNAL_H
@@ -20,7 +23,4 @@ int raise(int signal)
return sys_kill(sys_getpid(), signal);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_SIGNAL_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
index 933bc0be7e1c..adda7333d12e 100644
--- a/tools/include/nolibc/std.h
+++ b/tools/include/nolibc/std.h
@@ -13,12 +13,8 @@
* syscall-specific stuff, as this file is expected to be included very early.
*/
-/* note: may already be defined */
-#ifndef NULL
-#define NULL ((void *)0)
-#endif
-
#include "stdint.h"
+#include "stddef.h"
/* those are commonly provided by sys/types.h */
typedef unsigned int dev_t;
diff --git a/tools/include/nolibc/stddef.h b/tools/include/nolibc/stddef.h
new file mode 100644
index 000000000000..ecbd13eab1f5
--- /dev/null
+++ b/tools/include/nolibc/stddef.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Stddef definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#ifndef _NOLIBC_STDDEF_H
+#define _NOLIBC_STDDEF_H
+
+#include "stdint.h"
+
+/* note: may already be defined */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef offsetof
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#endif
+
+#endif /* _NOLIBC_STDDEF_H */
diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h
index cd79ddd6170e..b052ad6303c3 100644
--- a/tools/include/nolibc/stdint.h
+++ b/tools/include/nolibc/stdint.h
@@ -39,8 +39,8 @@ typedef size_t uint_fast32_t;
typedef int64_t int_fast64_t;
typedef uint64_t uint_fast64_t;
-typedef int64_t intmax_t;
-typedef uint64_t uintmax_t;
+typedef __INTMAX_TYPE__ intmax_t;
+typedef __UINTMAX_TYPE__ uintmax_t;
/* limits of integral types */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index a403351dbf60..c470d334ef3f 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -4,12 +4,16 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STDIO_H
#define _NOLIBC_STDIO_H
#include "std.h"
#include "arch.h"
#include "errno.h"
+#include "fcntl.h"
#include "types.h"
#include "sys.h"
#include "stdarg.h"
@@ -17,6 +21,8 @@
#include "string.h"
#include "compiler.h"
+static const char *strerror(int errnum);
+
#ifndef EOF
#define EOF (-1)
#endif
@@ -50,6 +56,32 @@ FILE *fdopen(int fd, const char *mode __attribute__((unused)))
return (FILE*)(intptr_t)~fd;
}
+static __attribute__((unused))
+FILE *fopen(const char *pathname, const char *mode)
+{
+ int flags, fd;
+
+ switch (*mode) {
+ case 'r':
+ flags = O_RDONLY;
+ break;
+ case 'w':
+ flags = O_WRONLY | O_CREAT | O_TRUNC;
+ break;
+ case 'a':
+ flags = O_WRONLY | O_CREAT | O_APPEND;
+ break;
+ default:
+ SET_ERRNO(EINVAL); return NULL;
+ }
+
+ if (mode[1] == '+')
+ flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR;
+
+ fd = open(pathname, flags, 0666);
+ return fdopen(fd, mode);
+}
+
/* provides the fd of stream. */
static __attribute__((unused))
int fileno(FILE *stream)
@@ -208,28 +240,40 @@ char *fgets(char *s, int size, FILE *stream)
}
-/* minimal vfprintf(). It supports the following formats:
+/* minimal printf(). It supports the following formats:
* - %[l*]{d,u,c,x,p}
* - %s
* - unknown modifiers are ignored.
*/
-static __attribute__((unused, format(printf, 2, 0)))
-int vfprintf(FILE *stream, const char *fmt, va_list args)
+typedef int (*__nolibc_printf_cb)(intptr_t state, const char *buf, size_t size);
+
+static __attribute__((unused, format(printf, 4, 0)))
+int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char *fmt, va_list args)
{
char escape, lpref, c;
unsigned long long v;
- unsigned int written;
- size_t len, ofs;
+ unsigned int written, width;
+ size_t len, ofs, w;
char tmpbuf[21];
const char *outstr;
written = ofs = escape = lpref = 0;
while (1) {
c = fmt[ofs++];
+ width = 0;
if (escape) {
/* we're in an escape sequence, ofs == 1 */
escape = 0;
+
+ /* width */
+ while (c >= '0' && c <= '9') {
+ width *= 10;
+ width += c - '0';
+
+ c = fmt[ofs++];
+ }
+
if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') {
char *out = tmpbuf;
@@ -277,6 +321,11 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
if (!outstr)
outstr="(null)";
}
+#ifndef NOLIBC_IGNORE_ERRNO
+ else if (c == 'm') {
+ outstr = strerror(errno);
+ }
+#endif /* NOLIBC_IGNORE_ERRNO */
else if (c == '%') {
/* queue it verbatim */
continue;
@@ -286,6 +335,8 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
if (c == 'l') {
/* long format prefix, maintain the escape */
lpref++;
+ } else if (c == 'j') {
+ lpref = 2;
}
escape = 1;
goto do_escape;
@@ -302,8 +353,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
outstr = fmt;
len = ofs - 1;
flush_str:
- if (_fwrite(outstr, len, stream) != 0)
- break;
+ if (n) {
+ w = len < n ? len : n;
+ n -= w;
+ while (width-- > w) {
+ if (cb(state, " ", 1) != 0)
+ break;
+ written += 1;
+ }
+ if (cb(state, outstr, w) != 0)
+ break;
+ }
written += len;
do_escape:
@@ -319,6 +379,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args)
return written;
}
+static int __nolibc_fprintf_cb(intptr_t state, const char *buf, size_t size)
+{
+ return _fwrite(buf, size, (FILE *)state);
+}
+
+static __attribute__((unused, format(printf, 2, 0)))
+int vfprintf(FILE *stream, const char *fmt, va_list args)
+{
+ return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, SIZE_MAX, fmt, args);
+}
+
static __attribute__((unused, format(printf, 1, 0)))
int vprintf(const char *fmt, va_list args)
{
@@ -349,6 +420,85 @@ int printf(const char *fmt, ...)
return ret;
}
+static __attribute__((unused, format(printf, 2, 0)))
+int vdprintf(int fd, const char *fmt, va_list args)
+{
+ FILE *stream;
+
+ stream = fdopen(fd, NULL);
+ if (!stream)
+ return -1;
+ /* Technically 'stream' is leaked, but as it's only a wrapper around 'fd' that is fine */
+ return vfprintf(stream, fmt, args);
+}
+
+static __attribute__((unused, format(printf, 2, 3)))
+int dprintf(int fd, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vdprintf(fd, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int __nolibc_sprintf_cb(intptr_t _state, const char *buf, size_t size)
+{
+ char **state = (char **)_state;
+
+ memcpy(*state, buf, size);
+ *state += size;
+ return 0;
+}
+
+static __attribute__((unused, format(printf, 3, 0)))
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ char *state = buf;
+ int ret;
+
+ ret = __nolibc_printf(__nolibc_sprintf_cb, (intptr_t)&state, size, fmt, args);
+ if (ret < 0)
+ return ret;
+ buf[(size_t)ret < size ? (size_t)ret : size - 1] = '\0';
+ return ret;
+}
+
+static __attribute__((unused, format(printf, 3, 4)))
+int snprintf(char *buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static __attribute__((unused, format(printf, 2, 0)))
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ return vsnprintf(buf, SIZE_MAX, fmt, args);
+}
+
+static __attribute__((unused, format(printf, 2, 3)))
+int sprintf(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vsprintf(buf, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
static __attribute__((unused))
int vsscanf(const char *str, const char *format, va_list args)
{
@@ -485,7 +635,4 @@ const char *strerror(int errno)
return buf;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_STDIO_H */
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 86ad378ab1ea..5fd99a480f82 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STDLIB_H
#define _NOLIBC_STDLIB_H
@@ -29,6 +32,24 @@ static __attribute__((unused)) char itoa_buffer[21];
* As much as possible, please keep functions alphabetically sorted.
*/
+static __inline__
+int abs(int j)
+{
+ return j >= 0 ? j : -j;
+}
+
+static __inline__
+long labs(long j)
+{
+ return j >= 0 ? j : -j;
+}
+
+static __inline__
+long long llabs(long long j)
+{
+ return j >= 0 ? j : -j;
+}
+
/* must be exported, as it's used by libgcc for various divide functions */
void abort(void);
__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
@@ -103,32 +124,6 @@ char *getenv(const char *name)
}
static __attribute__((unused))
-unsigned long getauxval(unsigned long type)
-{
- const unsigned long *auxv = _auxv;
- unsigned long ret;
-
- if (!auxv)
- return 0;
-
- while (1) {
- if (!auxv[0] && !auxv[1]) {
- ret = 0;
- break;
- }
-
- if (auxv[0] == type) {
- ret = auxv[1];
- break;
- }
-
- auxv += 2;
- }
-
- return ret;
-}
-
-static __attribute__((unused))
void *malloc(size_t len)
{
struct nolibc_heap *heap;
@@ -275,7 +270,7 @@ int itoa_r(long in, char *buffer)
int len = 0;
if (in < 0) {
- in = -in;
+ in = -(unsigned long)in;
*(ptr++) = '-';
len++;
}
@@ -411,7 +406,7 @@ int i64toa_r(int64_t in, char *buffer)
int len = 0;
if (in < 0) {
- in = -in;
+ in = -(uint64_t)in;
*(ptr++) = '-';
len++;
}
@@ -548,7 +543,4 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base)
return __strtox(nptr, endptr, base, 0, UINTMAX_MAX);
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_STDLIB_H */
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index ba84ab700e30..163a17e7dd38 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_STRING_H
#define _NOLIBC_STRING_H
@@ -289,7 +292,40 @@ char *strrchr(const char *s, int c)
return (char *)ret;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
+static __attribute__((unused))
+char *strstr(const char *haystack, const char *needle)
+{
+ size_t len_haystack, len_needle;
+
+ len_needle = strlen(needle);
+ if (!len_needle)
+ return NULL;
+
+ len_haystack = strlen(haystack);
+ while (len_haystack >= len_needle) {
+ if (!memcmp(haystack, needle, len_needle))
+ return (char *)haystack;
+ haystack++;
+ len_haystack--;
+ }
+
+ return NULL;
+}
+
+static __attribute__((unused))
+int tolower(int c)
+{
+ if (c >= 'A' && c <= 'Z')
+ return c - 'A' + 'a';
+ return c;
+}
+
+static __attribute__((unused))
+int toupper(int c)
+{
+ if (c >= 'a' && c <= 'z')
+ return c - 'a' + 'A';
+ return c;
+}
#endif /* _NOLIBC_STRING_H */
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 08c1c074bec8..9556c69a6ae1 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_SYS_H
#define _NOLIBC_SYS_H
@@ -20,11 +23,7 @@
#include <linux/auxvec.h>
#include <linux/fcntl.h> /* for O_* and AT_* */
#include <linux/stat.h> /* for statx() */
-#include <linux/prctl.h>
-#include <linux/resource.h>
-#include <linux/utsname.h>
-#include "arch.h"
#include "errno.h"
#include "stdarg.h"
#include "types.h"
@@ -301,11 +300,17 @@ void sys_exit(int status)
}
static __attribute__((noreturn,unused))
-void exit(int status)
+void _exit(int status)
{
sys_exit(status);
}
+static __attribute__((noreturn,unused))
+void exit(int status)
+{
+ _exit(status);
+}
+
/*
* pid_t fork(void);
@@ -489,27 +494,6 @@ int getpagesize(void)
/*
- * int gettimeofday(struct timeval *tv, struct timezone *tz);
- */
-
-static __attribute__((unused))
-int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
-#ifdef __NR_gettimeofday
- return my_syscall2(__NR_gettimeofday, tv, tz);
-#else
- return __nolibc_enosys(__func__, tv, tz);
-#endif
-}
-
-static __attribute__((unused))
-int gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- return __sysret(sys_gettimeofday(tv, tz));
-}
-
-
-/*
* uid_t getuid(void);
*/
@@ -531,18 +515,6 @@ uid_t getuid(void)
/*
- * int ioctl(int fd, unsigned long cmd, ... arg);
- */
-
-static __attribute__((unused))
-long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
- return my_syscall3(__NR_ioctl, fd, cmd, arg);
-}
-
-#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg)))
-
-/*
* int kill(pid_t pid, int signal);
*/
@@ -697,125 +669,6 @@ int mknod(const char *path, mode_t mode, dev_t dev)
return __sysret(sys_mknod(path, mode, dev));
}
-#ifndef sys_mmap
-static __attribute__((unused))
-void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
- off_t offset)
-{
- int n;
-
-#if defined(__NR_mmap2)
- n = __NR_mmap2;
- offset >>= 12;
-#else
- n = __NR_mmap;
-#endif
-
- return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
-}
-#endif
-
-/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret()
- * which returns -1 upon error and still satisfy user land that checks for
- * MAP_FAILED.
- */
-
-static __attribute__((unused))
-void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
-{
- void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
-
- if ((unsigned long)ret >= -4095UL) {
- SET_ERRNO(-(long)ret);
- ret = MAP_FAILED;
- }
- return ret;
-}
-
-static __attribute__((unused))
-int sys_munmap(void *addr, size_t length)
-{
- return my_syscall2(__NR_munmap, addr, length);
-}
-
-static __attribute__((unused))
-int munmap(void *addr, size_t length)
-{
- return __sysret(sys_munmap(addr, length));
-}
-
-/*
- * int mount(const char *source, const char *target,
- * const char *fstype, unsigned long flags,
- * const void *data);
- */
-static __attribute__((unused))
-int sys_mount(const char *src, const char *tgt, const char *fst,
- unsigned long flags, const void *data)
-{
- return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
-}
-
-static __attribute__((unused))
-int mount(const char *src, const char *tgt,
- const char *fst, unsigned long flags,
- const void *data)
-{
- return __sysret(sys_mount(src, tgt, fst, flags, data));
-}
-
-/*
- * int openat(int dirfd, const char *path, int flags[, mode_t mode]);
- */
-
-static __attribute__((unused))
-int sys_openat(int dirfd, const char *path, int flags, mode_t mode)
-{
- return my_syscall4(__NR_openat, dirfd, path, flags, mode);
-}
-
-static __attribute__((unused))
-int openat(int dirfd, const char *path, int flags, ...)
-{
- mode_t mode = 0;
-
- if (flags & O_CREAT) {
- va_list args;
-
- va_start(args, flags);
- mode = va_arg(args, mode_t);
- va_end(args);
- }
-
- return __sysret(sys_openat(dirfd, path, flags, mode));
-}
-
-/*
- * int open(const char *path, int flags[, mode_t mode]);
- */
-
-static __attribute__((unused))
-int sys_open(const char *path, int flags, mode_t mode)
-{
- return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-}
-
-static __attribute__((unused))
-int open(const char *path, int flags, ...)
-{
- mode_t mode = 0;
-
- if (flags & O_CREAT) {
- va_list args;
-
- va_start(args, flags);
- mode = va_arg(args, mode_t);
- va_end(args);
- }
-
- return __sysret(sys_open(path, flags, mode));
-}
-
/*
* int pipe2(int pipefd[2], int flags);
@@ -842,26 +695,6 @@ int pipe(int pipefd[2])
/*
- * int prctl(int option, unsigned long arg2, unsigned long arg3,
- * unsigned long arg4, unsigned long arg5);
- */
-
-static __attribute__((unused))
-int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5)
-{
- return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5);
-}
-
-static __attribute__((unused))
-int prctl(int option, unsigned long arg2, unsigned long arg3,
- unsigned long arg4, unsigned long arg5)
-{
- return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5));
-}
-
-
-/*
* int pivot_root(const char *new, const char *old);
*/
@@ -879,35 +712,6 @@ int pivot_root(const char *new, const char *old)
/*
- * int poll(struct pollfd *fds, int nfds, int timeout);
- */
-
-static __attribute__((unused))
-int sys_poll(struct pollfd *fds, int nfds, int timeout)
-{
-#if defined(__NR_ppoll)
- struct timespec t;
-
- if (timeout >= 0) {
- t.tv_sec = timeout / 1000;
- t.tv_nsec = (timeout % 1000) * 1000000;
- }
- return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
-#elif defined(__NR_poll)
- return my_syscall3(__NR_poll, fds, nfds, timeout);
-#else
- return __nolibc_enosys(__func__, fds, nfds, timeout);
-#endif
-}
-
-static __attribute__((unused))
-int poll(struct pollfd *fds, int nfds, int timeout)
-{
- return __sysret(sys_poll(fds, nfds, timeout));
-}
-
-
-/*
* ssize_t read(int fd, void *buf, size_t count);
*/
@@ -925,61 +729,6 @@ ssize_t read(int fd, void *buf, size_t count)
/*
- * int reboot(int cmd);
- * <cmd> is among LINUX_REBOOT_CMD_*
- */
-
-static __attribute__((unused))
-ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
-{
- return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
-}
-
-static __attribute__((unused))
-int reboot(int cmd)
-{
- return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0));
-}
-
-
-/*
- * int getrlimit(int resource, struct rlimit *rlim);
- * int setrlimit(int resource, const struct rlimit *rlim);
- */
-
-static __attribute__((unused))
-int sys_prlimit64(pid_t pid, int resource,
- const struct rlimit64 *new_limit, struct rlimit64 *old_limit)
-{
- return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit);
-}
-
-static __attribute__((unused))
-int getrlimit(int resource, struct rlimit *rlim)
-{
- struct rlimit64 rlim64;
- int ret;
-
- ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64));
- rlim->rlim_cur = rlim64.rlim_cur;
- rlim->rlim_max = rlim64.rlim_max;
-
- return ret;
-}
-
-static __attribute__((unused))
-int setrlimit(int resource, const struct rlimit *rlim)
-{
- struct rlimit64 rlim64 = {
- .rlim_cur = rlim->rlim_cur,
- .rlim_max = rlim->rlim_max,
- };
-
- return __sysret(sys_prlimit64(0, resource, &rlim64, NULL));
-}
-
-
-/*
* int sched_yield(void);
*/
@@ -1023,6 +772,14 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva
t.tv_nsec = timeout->tv_usec * 1000;
}
return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#elif defined(__NR_pselect6_time64)
+ struct __kernel_timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
#else
return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout);
#endif
@@ -1051,77 +808,31 @@ int setpgid(pid_t pid, pid_t pgid)
return __sysret(sys_setpgid(pid, pgid));
}
-
/*
- * pid_t setsid(void);
+ * pid_t setpgrp(void)
*/
static __attribute__((unused))
-pid_t sys_setsid(void)
+pid_t setpgrp(void)
{
- return my_syscall0(__NR_setsid);
+ return setpgid(0, 0);
}
-static __attribute__((unused))
-pid_t setsid(void)
-{
- return __sysret(sys_setsid());
-}
/*
- * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf);
- * int stat(const char *path, struct stat *buf);
+ * pid_t setsid(void);
*/
static __attribute__((unused))
-int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
-{
-#ifdef __NR_statx
- return my_syscall5(__NR_statx, fd, path, flags, mask, buf);
-#else
- return __nolibc_enosys(__func__, fd, path, flags, mask, buf);
-#endif
-}
-
-static __attribute__((unused))
-int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+pid_t sys_setsid(void)
{
- return __sysret(sys_statx(fd, path, flags, mask, buf));
+ return my_syscall0(__NR_setsid);
}
-
static __attribute__((unused))
-int stat(const char *path, struct stat *buf)
+pid_t setsid(void)
{
- struct statx statx;
- long ret;
-
- ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx));
- if (ret == -1)
- return ret;
-
- buf->st_dev = ((statx.stx_dev_minor & 0xff)
- | (statx.stx_dev_major << 8)
- | ((statx.stx_dev_minor & ~0xff) << 12));
- buf->st_ino = statx.stx_ino;
- buf->st_mode = statx.stx_mode;
- buf->st_nlink = statx.stx_nlink;
- buf->st_uid = statx.stx_uid;
- buf->st_gid = statx.stx_gid;
- buf->st_rdev = ((statx.stx_rdev_minor & 0xff)
- | (statx.stx_rdev_major << 8)
- | ((statx.stx_rdev_minor & ~0xff) << 12));
- buf->st_size = statx.stx_size;
- buf->st_blksize = statx.stx_blksize;
- buf->st_blocks = statx.stx_blocks;
- buf->st_atim.tv_sec = statx.stx_atime.tv_sec;
- buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec;
- buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec;
- buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec;
- buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec;
- buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec;
-
- return 0;
+ return __sysret(sys_setsid());
}
@@ -1183,32 +894,6 @@ int umount2(const char *path, int flags)
/*
- * int uname(struct utsname *buf);
- */
-
-struct utsname {
- char sysname[65];
- char nodename[65];
- char release[65];
- char version[65];
- char machine[65];
- char domainname[65];
-};
-
-static __attribute__((unused))
-int sys_uname(struct utsname *buf)
-{
- return my_syscall1(__NR_uname, buf);
-}
-
-static __attribute__((unused))
-int uname(struct utsname *buf)
-{
- return __sysret(sys_uname(buf));
-}
-
-
-/*
* int unlink(const char *path);
*/
@@ -1232,59 +917,6 @@ int unlink(const char *path)
/*
- * pid_t wait(int *status);
- * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
- * pid_t waitpid(pid_t pid, int *status, int options);
- */
-
-static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
-#ifdef __NR_wait4
- return my_syscall4(__NR_wait4, pid, status, options, rusage);
-#else
- return __nolibc_enosys(__func__, pid, status, options, rusage);
-#endif
-}
-
-static __attribute__((unused))
-pid_t wait(int *status)
-{
- return __sysret(sys_wait4(-1, status, 0, NULL));
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- return __sysret(sys_wait4(pid, status, options, rusage));
-}
-
-
-static __attribute__((unused))
-pid_t waitpid(pid_t pid, int *status, int options)
-{
- return __sysret(sys_wait4(pid, status, options, NULL));
-}
-
-
-/*
- * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
- */
-
-static __attribute__((unused))
-int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage)
-{
- return my_syscall5(__NR_waitid, which, pid, infop, options, rusage);
-}
-
-static __attribute__((unused))
-int waitid(int which, pid_t pid, siginfo_t *infop, int options)
-{
- return __sysret(sys_waitid(which, pid, infop, options, NULL));
-}
-
-
-/*
* ssize_t write(int fd, const void *buf, size_t count);
*/
@@ -1317,7 +949,4 @@ int memfd_create(const char *name, unsigned int flags)
return __sysret(sys_memfd_create(name, flags));
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_SYS_H */
diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h
new file mode 100644
index 000000000000..c52463d6c18d
--- /dev/null
+++ b/tools/include/nolibc/sys/auxv.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * auxv definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_AUXV_H
+#define _NOLIBC_SYS_AUXV_H
+
+#include "../crt.h"
+
+static __attribute__((unused))
+unsigned long getauxval(unsigned long type)
+{
+ const unsigned long *auxv = _auxv;
+ unsigned long ret;
+
+ if (!auxv)
+ return 0;
+
+ while (1) {
+ if (!auxv[0] && !auxv[1]) {
+ ret = 0;
+ break;
+ }
+
+ if (auxv[0] == type) {
+ ret = auxv[1];
+ break;
+ }
+
+ auxv += 2;
+ }
+
+ return ret;
+}
+
+#endif /* _NOLIBC_SYS_AUXV_H */
diff --git a/tools/include/nolibc/sys/ioctl.h b/tools/include/nolibc/sys/ioctl.h
new file mode 100644
index 000000000000..fc880687e02a
--- /dev/null
+++ b/tools/include/nolibc/sys/ioctl.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Ioctl definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_IOCTL_H
+#define _NOLIBC_SYS_IOCTL_H
+
+#include "../sys.h"
+
+#include <linux/ioctl.h>
+
+/*
+ * int ioctl(int fd, unsigned long cmd, ... arg);
+ */
+
+static __attribute__((unused))
+long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+ return my_syscall3(__NR_ioctl, fd, cmd, arg);
+}
+
+#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg)))
+
+#endif /* _NOLIBC_SYS_IOCTL_H */
diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h
new file mode 100644
index 000000000000..5228751b458c
--- /dev/null
+++ b/tools/include/nolibc/sys/mman.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * mm definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_MMAN_H
+#define _NOLIBC_SYS_MMAN_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+#ifndef sys_mmap
+static __attribute__((unused))
+void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset)
+{
+ int n;
+
+#if defined(__NR_mmap2)
+ n = __NR_mmap2;
+ offset >>= 12;
+#else
+ n = __NR_mmap;
+#endif
+
+ return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset);
+}
+#endif
+
+/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret()
+ * which returns -1 upon error and still satisfy user land that checks for
+ * MAP_FAILED.
+ */
+
+static __attribute__((unused))
+void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
+{
+ void *ret = sys_mmap(addr, length, prot, flags, fd, offset);
+
+ if ((unsigned long)ret >= -4095UL) {
+ SET_ERRNO(-(long)ret);
+ ret = MAP_FAILED;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+void *sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address)
+{
+ return (void *)my_syscall5(__NR_mremap, old_address, old_size,
+ new_size, flags, new_address);
+}
+
+static __attribute__((unused))
+void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address)
+{
+ void *ret = sys_mremap(old_address, old_size, new_size, flags, new_address);
+
+ if ((unsigned long)ret >= -4095UL) {
+ SET_ERRNO(-(long)ret);
+ ret = MAP_FAILED;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
+int sys_munmap(void *addr, size_t length)
+{
+ return my_syscall2(__NR_munmap, addr, length);
+}
+
+static __attribute__((unused))
+int munmap(void *addr, size_t length)
+{
+ return __sysret(sys_munmap(addr, length));
+}
+
+#endif /* _NOLIBC_SYS_MMAN_H */
diff --git a/tools/include/nolibc/sys/mount.h b/tools/include/nolibc/sys/mount.h
new file mode 100644
index 000000000000..e39ec02ea24c
--- /dev/null
+++ b/tools/include/nolibc/sys/mount.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Mount definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_MOUNT_H
+#define _NOLIBC_SYS_MOUNT_H
+
+#include "../sys.h"
+
+#include <linux/mount.h>
+
+/*
+ * int mount(const char *source, const char *target,
+ * const char *fstype, unsigned long flags,
+ * const void *data);
+ */
+static __attribute__((unused))
+int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return my_syscall5(__NR_mount, src, tgt, fst, flags, data);
+}
+
+static __attribute__((unused))
+int mount(const char *src, const char *tgt,
+ const char *fst, unsigned long flags,
+ const void *data)
+{
+ return __sysret(sys_mount(src, tgt, fst, flags, data));
+}
+
+#endif /* _NOLIBC_SYS_MOUNT_H */
diff --git a/tools/include/nolibc/sys/prctl.h b/tools/include/nolibc/sys/prctl.h
new file mode 100644
index 000000000000..0205907b6ac8
--- /dev/null
+++ b/tools/include/nolibc/sys/prctl.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Prctl definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_PRCTL_H
+#define _NOLIBC_SYS_PRCTL_H
+
+#include "../sys.h"
+
+#include <linux/prctl.h>
+
+/*
+ * int prctl(int option, unsigned long arg2, unsigned long arg3,
+ * unsigned long arg4, unsigned long arg5);
+ */
+
+static __attribute__((unused))
+int sys_prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5);
+}
+
+static __attribute__((unused))
+int prctl(int option, unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5));
+}
+
+#endif /* _NOLIBC_SYS_PRCTL_H */
diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h
new file mode 100644
index 000000000000..8d9749f1c845
--- /dev/null
+++ b/tools/include/nolibc/sys/random.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * random definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_RANDOM_H
+#define _NOLIBC_SYS_RANDOM_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+#include <linux/random.h>
+
+/*
+ * ssize_t getrandom(void *buf, size_t buflen, unsigned int flags);
+ */
+
+static __attribute__((unused))
+ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+ return my_syscall3(__NR_getrandom, buf, buflen, flags);
+}
+
+static __attribute__((unused))
+ssize_t getrandom(void *buf, size_t buflen, unsigned int flags)
+{
+ return __sysret(sys_getrandom(buf, buflen, flags));
+}
+
+#endif /* _NOLIBC_SYS_RANDOM_H */
diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h
new file mode 100644
index 000000000000..4a1e435be669
--- /dev/null
+++ b/tools/include/nolibc/sys/reboot.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Reboot definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_REBOOT_H
+#define _NOLIBC_SYS_REBOOT_H
+
+#include "../sys.h"
+
+#include <linux/reboot.h>
+
+/*
+ * int reboot(int cmd);
+ * <cmd> is among LINUX_REBOOT_CMD_*
+ */
+
+static __attribute__((unused))
+ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
+{
+ return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg);
+}
+
+static __attribute__((unused))
+int reboot(int cmd)
+{
+ return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0));
+}
+
+#endif /* _NOLIBC_SYS_REBOOT_H */
diff --git a/tools/include/nolibc/sys/resource.h b/tools/include/nolibc/sys/resource.h
new file mode 100644
index 000000000000..b990f914dc56
--- /dev/null
+++ b/tools/include/nolibc/sys/resource.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Resource definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_RESOURCE_H
+#define _NOLIBC_SYS_RESOURCE_H
+
+#include "../sys.h"
+
+#include <linux/resource.h>
+
+/*
+ * int getrlimit(int resource, struct rlimit *rlim);
+ * int setrlimit(int resource, const struct rlimit *rlim);
+ */
+
+static __attribute__((unused))
+int sys_prlimit64(pid_t pid, int resource,
+ const struct rlimit64 *new_limit, struct rlimit64 *old_limit)
+{
+ return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit);
+}
+
+static __attribute__((unused))
+int getrlimit(int resource, struct rlimit *rlim)
+{
+ struct rlimit64 rlim64;
+ int ret;
+
+ ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64));
+ rlim->rlim_cur = rlim64.rlim_cur;
+ rlim->rlim_max = rlim64.rlim_max;
+
+ return ret;
+}
+
+static __attribute__((unused))
+int setrlimit(int resource, const struct rlimit *rlim)
+{
+ struct rlimit64 rlim64 = {
+ .rlim_cur = rlim->rlim_cur,
+ .rlim_max = rlim->rlim_max,
+ };
+
+ return __sysret(sys_prlimit64(0, resource, &rlim64, NULL));
+}
+
+#endif /* _NOLIBC_SYS_RESOURCE_H */
diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h
new file mode 100644
index 000000000000..8b4d80e3ea03
--- /dev/null
+++ b/tools/include/nolibc/sys/stat.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * stat definition for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_STAT_H
+#define _NOLIBC_SYS_STAT_H
+
+#include "../arch.h"
+#include "../types.h"
+#include "../sys.h"
+
+/*
+ * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf);
+ * int stat(const char *path, struct stat *buf);
+ * int fstatat(int fd, const char *path, struct stat *buf, int flag);
+ * int fstat(int fildes, struct stat *buf);
+ * int lstat(const char *path, struct stat *buf);
+ */
+
+static __attribute__((unused))
+int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+{
+#ifdef __NR_statx
+ return my_syscall5(__NR_statx, fd, path, flags, mask, buf);
+#else
+ return __nolibc_enosys(__func__, fd, path, flags, mask, buf);
+#endif
+}
+
+static __attribute__((unused))
+int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf)
+{
+ return __sysret(sys_statx(fd, path, flags, mask, buf));
+}
+
+
+static __attribute__((unused))
+int fstatat(int fd, const char *path, struct stat *buf, int flag)
+{
+ struct statx statx;
+ long ret;
+
+ ret = __sysret(sys_statx(fd, path, flag | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx));
+ if (ret == -1)
+ return ret;
+
+ buf->st_dev = ((statx.stx_dev_minor & 0xff)
+ | (statx.stx_dev_major << 8)
+ | ((statx.stx_dev_minor & ~0xff) << 12));
+ buf->st_ino = statx.stx_ino;
+ buf->st_mode = statx.stx_mode;
+ buf->st_nlink = statx.stx_nlink;
+ buf->st_uid = statx.stx_uid;
+ buf->st_gid = statx.stx_gid;
+ buf->st_rdev = ((statx.stx_rdev_minor & 0xff)
+ | (statx.stx_rdev_major << 8)
+ | ((statx.stx_rdev_minor & ~0xff) << 12));
+ buf->st_size = statx.stx_size;
+ buf->st_blksize = statx.stx_blksize;
+ buf->st_blocks = statx.stx_blocks;
+ buf->st_atim.tv_sec = statx.stx_atime.tv_sec;
+ buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec;
+ buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec;
+ buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec;
+ buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec;
+ buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec;
+
+ return 0;
+}
+
+static __attribute__((unused))
+int stat(const char *path, struct stat *buf)
+{
+ return fstatat(AT_FDCWD, path, buf, 0);
+}
+
+static __attribute__((unused))
+int fstat(int fildes, struct stat *buf)
+{
+ return fstatat(fildes, "", buf, AT_EMPTY_PATH);
+}
+
+static __attribute__((unused))
+int lstat(const char *path, struct stat *buf)
+{
+ return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW);
+}
+
+#endif /* _NOLIBC_SYS_STAT_H */
diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h
new file mode 100644
index 000000000000..4bf97f1386a0
--- /dev/null
+++ b/tools/include/nolibc/sys/syscall.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * syscall() definition for NOLIBC
+ * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SYSCALL_H
+#define _NOLIBC_SYS_SYSCALL_H
+
+#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N
+#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
+#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__))
+#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__)
+#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__)
+
+#endif /* _NOLIBC_SYS_SYSCALL_H */
diff --git a/tools/include/nolibc/sys/sysmacros.h b/tools/include/nolibc/sys/sysmacros.h
new file mode 100644
index 000000000000..37c33f030f02
--- /dev/null
+++ b/tools/include/nolibc/sys/sysmacros.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Sysmacro definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SYSMACROS_H
+#define _NOLIBC_SYS_SYSMACROS_H
+
+#include "../std.h"
+
+/* WARNING, it only deals with the 4096 first majors and 256 first minors */
+#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
+#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
+#define minor(dev) ((unsigned int)((dev) & 0xff))
+
+#endif /* _NOLIBC_SYS_SYSMACROS_H */
diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h
new file mode 100644
index 000000000000..33782a19aae9
--- /dev/null
+++ b/tools/include/nolibc/sys/time.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * time definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_TIME_H
+#define _NOLIBC_SYS_TIME_H
+
+#include "../arch.h"
+#include "../sys.h"
+
+static int sys_clock_gettime(clockid_t clockid, struct timespec *tp);
+
+/*
+ * int gettimeofday(struct timeval *tv, struct timezone *tz);
+ */
+
+static __attribute__((unused))
+int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+#ifdef __NR_gettimeofday
+ return my_syscall2(__NR_gettimeofday, tv, tz);
+#else
+ (void) tz; /* Non-NULL tz is undefined behaviour */
+
+ struct timespec tp;
+ int ret;
+
+ ret = sys_clock_gettime(CLOCK_REALTIME, &tp);
+ if (!ret && tv) {
+ tv->tv_sec = tp.tv_sec;
+ tv->tv_usec = tp.tv_nsec / 1000;
+ }
+
+ return ret;
+#endif
+}
+
+static __attribute__((unused))
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return __sysret(sys_gettimeofday(tv, tz));
+}
+
+#endif /* _NOLIBC_SYS_TIME_H */
diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h
new file mode 100644
index 000000000000..4375d546ba58
--- /dev/null
+++ b/tools/include/nolibc/sys/timerfd.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * timerfd definitions for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_TIMERFD_H
+#define _NOLIBC_SYS_TIMERFD_H
+
+#include "../sys.h"
+#include "../time.h"
+
+#include <linux/timerfd.h>
+
+
+static __attribute__((unused))
+int sys_timerfd_create(int clockid, int flags)
+{
+ return my_syscall2(__NR_timerfd_create, clockid, flags);
+}
+
+static __attribute__((unused))
+int timerfd_create(int clockid, int flags)
+{
+ return __sysret(sys_timerfd_create(clockid, flags));
+}
+
+
+static __attribute__((unused))
+int sys_timerfd_gettime(int fd, struct itimerspec *curr_value)
+{
+#if defined(__NR_timerfd_gettime)
+ return my_syscall2(__NR_timerfd_gettime, fd, curr_value);
+#elif defined(__NR_timerfd_gettime64)
+ struct __kernel_itimerspec kcurr_value;
+ int ret;
+
+ ret = my_syscall2(__NR_timerfd_gettime64, fd, &kcurr_value);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
+ return ret;
+#else
+ return __nolibc_enosys(__func__, fd, curr_value);
+#endif
+}
+
+static __attribute__((unused))
+int timerfd_gettime(int fd, struct itimerspec *curr_value)
+{
+ return __sysret(sys_timerfd_gettime(fd, curr_value));
+}
+
+
+static __attribute__((unused))
+int sys_timerfd_settime(int fd, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+#if defined(__NR_timerfd_settime)
+ return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value);
+#elif defined(__NR_timerfd_settime64)
+ struct __kernel_itimerspec knew_value, kold_value;
+ int ret;
+
+ __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value);
+ __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval);
+ ret = my_syscall4(__NR_timerfd_settime64, fd, flags, &knew_value, &kold_value);
+ if (old_value) {
+ __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
+ }
+ return ret;
+#else
+ return __nolibc_enosys(__func__, fd, flags, new_value, old_value);
+#endif
+}
+
+static __attribute__((unused))
+int timerfd_settime(int fd, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+ return __sysret(sys_timerfd_settime(fd, flags, new_value, old_value));
+}
+
+#endif /* _NOLIBC_SYS_TIMERFD_H */
diff --git a/tools/include/nolibc/sys/types.h b/tools/include/nolibc/sys/types.h
new file mode 100644
index 000000000000..8a264a13275c
--- /dev/null
+++ b/tools/include/nolibc/sys/types.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * sys/types.h shim for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+#include "../types.h"
diff --git a/tools/include/nolibc/sys/utsname.h b/tools/include/nolibc/sys/utsname.h
new file mode 100644
index 000000000000..01023e1bb439
--- /dev/null
+++ b/tools/include/nolibc/sys/utsname.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Utsname definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_UTSNAME_H
+#define _NOLIBC_SYS_UTSNAME_H
+
+#include "../sys.h"
+
+#include <linux/utsname.h>
+
+/*
+ * int uname(struct utsname *buf);
+ */
+
+struct utsname {
+ char sysname[65];
+ char nodename[65];
+ char release[65];
+ char version[65];
+ char machine[65];
+ char domainname[65];
+};
+
+static __attribute__((unused))
+int sys_uname(struct utsname *buf)
+{
+ return my_syscall1(__NR_uname, buf);
+}
+
+static __attribute__((unused))
+int uname(struct utsname *buf)
+{
+ return __sysret(sys_uname(buf));
+}
+
+#endif /* _NOLIBC_SYS_UTSNAME_H */
diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h
new file mode 100644
index 000000000000..4d44e3da0ba8
--- /dev/null
+++ b/tools/include/nolibc/sys/wait.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * wait definitions for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_WAIT_H
+#define _NOLIBC_SYS_WAIT_H
+
+#include "../arch.h"
+#include "../std.h"
+#include "../types.h"
+
+/*
+ * pid_t wait(int *status);
+ * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
+ * pid_t waitpid(pid_t pid, int *status, int options);
+ * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
+ */
+
+static __attribute__((unused))
+pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+#ifdef __NR_wait4
+ return my_syscall4(__NR_wait4, pid, status, options, rusage);
+#else
+ return __nolibc_enosys(__func__, pid, status, options, rusage);
+#endif
+}
+
+static __attribute__((unused))
+pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
+{
+ return __sysret(sys_wait4(pid, status, options, rusage));
+}
+
+static __attribute__((unused))
+int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage)
+{
+ return my_syscall5(__NR_waitid, which, pid, infop, options, rusage);
+}
+
+static __attribute__((unused))
+int waitid(int which, pid_t pid, siginfo_t *infop, int options)
+{
+ return __sysret(sys_waitid(which, pid, infop, options, NULL));
+}
+
+
+static __attribute__((unused))
+pid_t waitpid(pid_t pid, int *status, int options)
+{
+ int idtype, ret;
+ siginfo_t info;
+ pid_t id;
+
+ if (pid == INT_MIN) {
+ SET_ERRNO(ESRCH);
+ return -1;
+ } else if (pid < -1) {
+ idtype = P_PGID;
+ id = -pid;
+ } else if (pid == -1) {
+ idtype = P_ALL;
+ id = 0;
+ } else if (pid == 0) {
+ idtype = P_PGID;
+ id = 0;
+ } else {
+ idtype = P_PID;
+ id = pid;
+ }
+
+ options |= WEXITED;
+
+ ret = waitid(idtype, id, &info, options);
+ if (ret)
+ return ret;
+
+ switch (info.si_code) {
+ case 0:
+ *status = 0;
+ break;
+ case CLD_EXITED:
+ *status = (info.si_status & 0xff) << 8;
+ break;
+ case CLD_KILLED:
+ *status = info.si_status & 0x7f;
+ break;
+ case CLD_DUMPED:
+ *status = (info.si_status & 0x7f) | 0x80;
+ break;
+ case CLD_STOPPED:
+ case CLD_TRAPPED:
+ *status = (info.si_status << 8) + 0x7f;
+ break;
+ case CLD_CONTINUED:
+ *status = 0xffff;
+ break;
+ default:
+ return -1;
+ }
+
+ return info.si_pid;
+}
+
+static __attribute__((unused))
+pid_t wait(int *status)
+{
+ return waitpid(-1, status, 0);
+}
+
+#endif /* _NOLIBC_SYS_WAIT_H */
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
index 84655361b9ad..fc387940d51f 100644
--- a/tools/include/nolibc/time.h
+++ b/tools/include/nolibc/time.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_TIME_H
#define _NOLIBC_TIME_H
@@ -12,6 +15,106 @@
#include "types.h"
#include "sys.h"
+#include <linux/signal.h>
+#include <linux/time.h>
+
+static __inline__
+void __nolibc_timespec_user_to_kernel(const struct timespec *ts, struct __kernel_timespec *kts)
+{
+ kts->tv_sec = ts->tv_sec;
+ kts->tv_nsec = ts->tv_nsec;
+}
+
+static __inline__
+void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struct timespec *ts)
+{
+ ts->tv_sec = kts->tv_sec;
+ ts->tv_nsec = kts->tv_nsec;
+}
+
+/*
+ * int clock_getres(clockid_t clockid, struct timespec *res);
+ * int clock_gettime(clockid_t clockid, struct timespec *tp);
+ * int clock_settime(clockid_t clockid, const struct timespec *tp);
+ */
+
+static __attribute__((unused))
+int sys_clock_getres(clockid_t clockid, struct timespec *res)
+{
+#if defined(__NR_clock_getres)
+ return my_syscall2(__NR_clock_getres, clockid, res);
+#elif defined(__NR_clock_getres_time64)
+ struct __kernel_timespec kres;
+ int ret;
+
+ ret = my_syscall2(__NR_clock_getres_time64, clockid, &kres);
+ if (res)
+ __nolibc_timespec_kernel_to_user(&kres, res);
+ return ret;
+#else
+ return __nolibc_enosys(__func__, clockid, res);
+#endif
+}
+
+static __attribute__((unused))
+int clock_getres(clockid_t clockid, struct timespec *res)
+{
+ return __sysret(sys_clock_getres(clockid, res));
+}
+
+static __attribute__((unused))
+int sys_clock_gettime(clockid_t clockid, struct timespec *tp)
+{
+#if defined(__NR_clock_gettime)
+ return my_syscall2(__NR_clock_gettime, clockid, tp);
+#elif defined(__NR_clock_gettime64)
+ struct __kernel_timespec ktp;
+ int ret;
+
+ ret = my_syscall2(__NR_clock_gettime64, clockid, &ktp);
+ if (tp)
+ __nolibc_timespec_kernel_to_user(&ktp, tp);
+ return ret;
+#else
+ return __nolibc_enosys(__func__, clockid, tp);
+#endif
+}
+
+static __attribute__((unused))
+int clock_gettime(clockid_t clockid, struct timespec *tp)
+{
+ return __sysret(sys_clock_gettime(clockid, tp));
+}
+
+static __attribute__((unused))
+int sys_clock_settime(clockid_t clockid, struct timespec *tp)
+{
+#if defined(__NR_clock_settime)
+ return my_syscall2(__NR_clock_settime, clockid, tp);
+#elif defined(__NR_clock_settime64)
+ struct __kernel_timespec ktp;
+
+ __nolibc_timespec_user_to_kernel(tp, &ktp);
+ return my_syscall2(__NR_clock_settime64, clockid, &ktp);
+#else
+ return __nolibc_enosys(__func__, clockid, tp);
+#endif
+}
+
+static __attribute__((unused))
+int clock_settime(clockid_t clockid, struct timespec *tp)
+{
+ return __sysret(sys_clock_settime(clockid, tp));
+}
+
+
+static __inline__
+double difftime(time_t time1, time_t time2)
+{
+ return time1 - time2;
+}
+
+
static __attribute__((unused))
time_t time(time_t *tptr)
{
@@ -25,7 +128,89 @@ time_t time(time_t *tptr)
return tv.tv_sec;
}
-/* make sure to include all global symbols */
-#include "nolibc.h"
+
+/*
+ * int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid);
+ * int timer_gettime(timer_t timerid, struct itimerspec *curr_value);
+ * int timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value);
+ */
+
+static __attribute__((unused))
+int sys_timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid)
+{
+ return my_syscall3(__NR_timer_create, clockid, evp, timerid);
+}
+
+static __attribute__((unused))
+int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid)
+{
+ return __sysret(sys_timer_create(clockid, evp, timerid));
+}
+
+static __attribute__((unused))
+int sys_timer_delete(timer_t timerid)
+{
+ return my_syscall1(__NR_timer_delete, timerid);
+}
+
+static __attribute__((unused))
+int timer_delete(timer_t timerid)
+{
+ return __sysret(sys_timer_delete(timerid));
+}
+
+static __attribute__((unused))
+int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
+{
+#if defined(__NR_timer_gettime)
+ return my_syscall2(__NR_timer_gettime, timerid, curr_value);
+#elif defined(__NR_timer_gettime64)
+ struct __kernel_itimerspec kcurr_value;
+ int ret;
+
+ ret = my_syscall2(__NR_timer_gettime64, timerid, &kcurr_value);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
+ return ret;
+#else
+ return __nolibc_enosys(__func__, timerid, curr_value);
+#endif
+}
+
+static __attribute__((unused))
+int timer_gettime(timer_t timerid, struct itimerspec *curr_value)
+{
+ return __sysret(sys_timer_gettime(timerid, curr_value));
+}
+
+static __attribute__((unused))
+int sys_timer_settime(timer_t timerid, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+#if defined(__NR_timer_settime)
+ return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value);
+#elif defined(__NR_timer_settime64)
+ struct __kernel_itimerspec knew_value, kold_value;
+ int ret;
+
+ __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value);
+ __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval);
+ ret = my_syscall4(__NR_timer_settime64, timerid, flags, &knew_value, &kold_value);
+ if (old_value) {
+ __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval);
+ __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
+ }
+ return ret;
+#else
+ return __nolibc_enosys(__func__, timerid, flags, new_value, old_value);
+#endif
+}
+
+static __attribute__((unused))
+int timer_settime(timer_t timerid, int flags,
+ const struct itimerspec *new_value, struct itimerspec *old_value)
+{
+ return __sysret(sys_timer_settime(timerid, flags, new_value, old_value));
+}
#endif /* _NOLIBC_TIME_H */
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index b26a5d0c417c..30904be544ed 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -4,16 +4,17 @@
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_TYPES_H
#define _NOLIBC_TYPES_H
#include "std.h"
#include <linux/mman.h>
-#include <linux/reboot.h> /* for LINUX_REBOOT_* */
#include <linux/stat.h>
#include <linux/time.h>
#include <linux/wait.h>
-#include <linux/resource.h>
/* Only the generic macros and types may be defined here. The arch-specific
@@ -156,20 +157,6 @@ typedef struct {
__set->fds[__idx] = 0; \
} while (0)
-/* for poll() */
-#define POLLIN 0x0001
-#define POLLPRI 0x0002
-#define POLLOUT 0x0004
-#define POLLERR 0x0008
-#define POLLHUP 0x0010
-#define POLLNVAL 0x0020
-
-struct pollfd {
- int fd;
- short int events;
- short int revents;
-};
-
/* for getdents64() */
struct linux_dirent64 {
uint64_t d_ino;
@@ -198,14 +185,8 @@ struct stat {
union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */
};
-/* WARNING, it only deals with the 4096 first majors and 256 first minors */
-#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff)))
-#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff))
-#define minor(dev) ((unsigned int)(((dev) & 0xff))
-
-#ifndef offsetof
-#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
-#endif
+typedef __kernel_clockid_t clockid_t;
+typedef int timer_t;
#ifndef container_of
#define container_of(PTR, TYPE, FIELD) ({ \
@@ -214,7 +195,4 @@ struct stat {
})
#endif
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_TYPES_H */
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index e38f3660c051..25bfc7732ec7 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -4,6 +4,9 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
#ifndef _NOLIBC_UNISTD_H
#define _NOLIBC_UNISTD_H
@@ -17,6 +20,34 @@
#define STDOUT_FILENO 1
#define STDERR_FILENO 2
+#define F_OK 0
+#define X_OK 1
+#define W_OK 2
+#define R_OK 4
+
+/*
+ * int access(const char *path, int amode);
+ * int faccessat(int fd, const char *path, int amode, int flag);
+ */
+
+static __attribute__((unused))
+int sys_faccessat(int fd, const char *path, int amode, int flag)
+{
+ return my_syscall4(__NR_faccessat, fd, path, amode, flag);
+}
+
+static __attribute__((unused))
+int faccessat(int fd, const char *path, int amode, int flag)
+{
+ return __sysret(sys_faccessat(fd, path, amode, flag));
+}
+
+static __attribute__((unused))
+int access(const char *path, int amode)
+{
+ return faccessat(AT_FDCWD, path, amode, 0);
+}
+
static __attribute__((unused))
int msleep(unsigned int msecs)
@@ -56,13 +87,4 @@ int tcsetpgrp(int fd, pid_t pid)
return ioctl(fd, TIOCSPGRP, &pid);
}
-#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N
-#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0)
-#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__))
-#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__)
-#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__)
-
-/* make sure to include all global symbols */
-#include "nolibc.h"
-
#endif /* _NOLIBC_UNISTD_H */
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index aa5016ff3d91..f333a0ac4ee4 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -145,6 +145,8 @@
#define SO_RCVPRIORITY 82
+#define SO_PASSRIGHTS 83
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index fd404729b115..85180e4aaa5a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1506,7 +1506,7 @@ union bpf_attr {
__s32 map_token_fd;
};
- struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
__u32 map_fd;
__aligned_u64 key;
union {
@@ -1995,11 +1995,15 @@ union bpf_attr {
* long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
- * associated to *skb*, at *offset*. *flags* are a combination of
- * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
- * checksum for the packet after storing the bytes) and
- * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
- * **->swhash** and *skb*\ **->l4hash** to 0).
+ * associated to *skb*, at *offset*. The *flags* are a combination
+ * of the following values:
+ *
+ * **BPF_F_RECOMPUTE_CSUM**
+ * Automatically update *skb*\ **->csum** after storing the
+ * bytes.
+ * **BPF_F_INVALIDATE_HASH**
+ * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\
+ * **->l4hash** to 0.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2051,7 +2055,7 @@ union bpf_attr {
* untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
* for updates resulting in a null checksum the value is set to
* **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
- * the checksum is to be computed against a pseudo-header.
+ * that the modified header field is part of the pseudo-header.
*
* This helper works in combination with **bpf_csum_diff**\ (),
* which does not update the checksum in-place, but offers more
@@ -6723,6 +6727,7 @@ struct bpf_link_info {
__u32 name_len;
__u32 offset; /* offset from file_name */
__u64 cookie;
+ __u64 ref_ctr_offset;
} uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */
struct {
__aligned_u64 func_name; /* in/out */
diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h
new file mode 100644
index 000000000000..e710967c7c26
--- /dev/null
+++ b/tools/include/uapi/linux/fanotify.h
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FANOTIFY_H
+#define _UAPI_LINUX_FANOTIFY_H
+
+#include <linux/types.h>
+
+/* the following events that user-space can register for */
+#define FAN_ACCESS 0x00000001 /* File was accessed */
+#define FAN_MODIFY 0x00000002 /* File was modified */
+#define FAN_ATTRIB 0x00000004 /* Metadata changed */
+#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */
+#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */
+#define FAN_OPEN 0x00000020 /* File was opened */
+#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */
+#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */
+#define FAN_CREATE 0x00000100 /* Subfile was created */
+#define FAN_DELETE 0x00000200 /* Subfile was deleted */
+#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */
+#define FAN_MOVE_SELF 0x00000800 /* Self was moved */
+#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */
+
+#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
+#define FAN_FS_ERROR 0x00008000 /* Filesystem error */
+
+#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
+#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
+#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
+/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
+
+#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+
+#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
+
+#define FAN_RENAME 0x10000000 /* File was renamed */
+
+#define FAN_ONDIR 0x40000000 /* Event occurred against dir */
+
+/* helper events */
+#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */
+#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */
+
+/* flags used for fanotify_init() */
+#define FAN_CLOEXEC 0x00000001
+#define FAN_NONBLOCK 0x00000002
+
+/* These are NOT bitwise flags. Both bits are used together. */
+#define FAN_CLASS_NOTIF 0x00000000
+#define FAN_CLASS_CONTENT 0x00000004
+#define FAN_CLASS_PRE_CONTENT 0x00000008
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
+ FAN_CLASS_PRE_CONTENT)
+
+#define FAN_UNLIMITED_QUEUE 0x00000010
+#define FAN_UNLIMITED_MARKS 0x00000020
+#define FAN_ENABLE_AUDIT 0x00000040
+
+/* Flags to determine fanotify event format */
+#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */
+#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */
+#define FAN_REPORT_FID 0x00000200 /* Report unique file id */
+#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */
+#define FAN_REPORT_NAME 0x00000800 /* Report events with name */
+#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */
+#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+
+/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */
+#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME)
+/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */
+#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \
+ FAN_REPORT_FID | FAN_REPORT_TARGET_FID)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \
+ FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
+ FAN_UNLIMITED_MARKS)
+
+/* flags used for fanotify_modify_mark() */
+#define FAN_MARK_ADD 0x00000001
+#define FAN_MARK_REMOVE 0x00000002
+#define FAN_MARK_DONT_FOLLOW 0x00000004
+#define FAN_MARK_ONLYDIR 0x00000008
+/* FAN_MARK_MOUNT is 0x00000010 */
+#define FAN_MARK_IGNORED_MASK 0x00000020
+#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
+#define FAN_MARK_FLUSH 0x00000080
+/* FAN_MARK_FILESYSTEM is 0x00000100 */
+#define FAN_MARK_EVICTABLE 0x00000200
+/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */
+#define FAN_MARK_IGNORE 0x00000400
+
+/* These are NOT bitwise flags. Both bits can be used togther. */
+#define FAN_MARK_INODE 0x00000000
+#define FAN_MARK_MOUNT 0x00000010
+#define FAN_MARK_FILESYSTEM 0x00000100
+#define FAN_MARK_MNTNS 0x00000110
+
+/*
+ * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY
+ * for non-inode mark types.
+ */
+#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
+ FAN_MARK_REMOVE |\
+ FAN_MARK_DONT_FOLLOW |\
+ FAN_MARK_ONLYDIR |\
+ FAN_MARK_MOUNT |\
+ FAN_MARK_IGNORED_MASK |\
+ FAN_MARK_IGNORED_SURV_MODIFY |\
+ FAN_MARK_FLUSH)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_EVENTS (FAN_ACCESS |\
+ FAN_MODIFY |\
+ FAN_CLOSE |\
+ FAN_OPEN)
+
+/*
+ * All events which require a permission response from userspace
+ */
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
+ FAN_ACCESS_PERM)
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
+#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\
+ FAN_ALL_PERM_EVENTS |\
+ FAN_Q_OVERFLOW)
+
+#define FANOTIFY_METADATA_VERSION 3
+
+struct fanotify_event_metadata {
+ __u32 event_len;
+ __u8 vers;
+ __u8 reserved;
+ __u16 metadata_len;
+ __aligned_u64 mask;
+ __s32 fd;
+ __s32 pid;
+};
+
+#define FAN_EVENT_INFO_TYPE_FID 1
+#define FAN_EVENT_INFO_TYPE_DFID_NAME 2
+#define FAN_EVENT_INFO_TYPE_DFID 3
+#define FAN_EVENT_INFO_TYPE_PIDFD 4
+#define FAN_EVENT_INFO_TYPE_ERROR 5
+#define FAN_EVENT_INFO_TYPE_RANGE 6
+#define FAN_EVENT_INFO_TYPE_MNT 7
+
+/* Special info types for FAN_RENAME */
+#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
+/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */
+#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12
+/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */
+
+/* Variable length info record following event metadata */
+struct fanotify_event_info_header {
+ __u8 info_type;
+ __u8 pad;
+ __u16 len;
+};
+
+/*
+ * Unique file identifier info record.
+ * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID,
+ * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME.
+ * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated
+ * name immediately after the file handle.
+ */
+struct fanotify_event_info_fid {
+ struct fanotify_event_info_header hdr;
+ __kernel_fsid_t fsid;
+ /*
+ * Following is an opaque struct file_handle that can be passed as
+ * an argument to open_by_handle_at(2).
+ */
+ unsigned char handle[];
+};
+
+/*
+ * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD.
+ * It holds a pidfd for the pid that was responsible for generating an event.
+ */
+struct fanotify_event_info_pidfd {
+ struct fanotify_event_info_header hdr;
+ __s32 pidfd;
+};
+
+struct fanotify_event_info_error {
+ struct fanotify_event_info_header hdr;
+ __s32 error;
+ __u32 error_count;
+};
+
+struct fanotify_event_info_range {
+ struct fanotify_event_info_header hdr;
+ __u32 pad;
+ __u64 offset;
+ __u64 count;
+};
+
+struct fanotify_event_info_mnt {
+ struct fanotify_event_info_header hdr;
+ __u64 mnt_id;
+};
+
+/*
+ * User space may need to record additional information about its decision.
+ * The extra information type records what kind of information is included.
+ * The default is none. We also define an extra information buffer whose
+ * size is determined by the extra information type.
+ *
+ * If the information type is Audit Rule, then the information following
+ * is the rule number that triggered the user space decision that
+ * requires auditing.
+ */
+
+#define FAN_RESPONSE_INFO_NONE 0
+#define FAN_RESPONSE_INFO_AUDIT_RULE 1
+
+struct fanotify_response {
+ __s32 fd;
+ __u32 response;
+};
+
+struct fanotify_response_info_header {
+ __u8 type;
+ __u8 pad;
+ __u16 len;
+};
+
+struct fanotify_response_info_audit_rule {
+ struct fanotify_response_info_header hdr;
+ __u32 rule_number;
+ __u32 subj_trust;
+ __u32 obj_trust;
+};
+
+/* Legit userspace responses to a _PERM event */
+#define FAN_ALLOW 0x01
+#define FAN_DENY 0x02
+/* errno other than EPERM can specified in upper byte of deny response */
+#define FAN_ERRNO_BITS 8
+#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS)
+#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1)
+#define FAN_DENY_ERRNO(err) \
+ (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT))
+
+#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */
+#define FAN_INFO 0x20 /* Bitmask to indicate additional information */
+
+/* No fd set in event */
+#define FAN_NOFD -1
+#define FAN_NOPIDFD FAN_NOFD
+#define FAN_EPIDFD -2
+
+/* Helper functions to deal with fanotify_event_metadata buffers */
+#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
+
+#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \
+ (struct fanotify_event_metadata*)(((char *)(meta)) + \
+ (meta)->event_len))
+
+#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
+ (long)(meta)->event_len <= (long)(len))
+
+#endif /* _UAPI_LINUX_FANOTIFY_H */
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 42869770776e..44f2bb93e7e6 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -7,8 +7,8 @@
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
-#ifndef _LINUX_IF_XDP_H
-#define _LINUX_IF_XDP_H
+#ifndef _UAPI_LINUX_IF_XDP_H
+#define _UAPI_LINUX_IF_XDP_H
#include <linux/types.h>
@@ -180,4 +180,4 @@ struct xdp_desc {
/* TX packet carries valid metadata. */
#define XDP_TX_METADATA (1 << 1)
-#endif /* _LINUX_IF_XDP_H */
+#endif /* _UAPI_LINUX_IF_XDP_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
new file mode 100644
index 000000000000..7fa67c2031a5
--- /dev/null
+++ b/tools/include/uapi/linux/mount.h
@@ -0,0 +1,235 @@
+#ifndef _UAPI_LINUX_MOUNT_H
+#define _UAPI_LINUX_MOUNT_H
+
+#include <linux/types.h>
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ *
+ * Usage of these is restricted within the kernel to core mount(2) code and
+ * callers of sys_mount() only. Filesystems should be using the SB_*
+ * equivalent instead.
+ */
+#define MS_RDONLY 1 /* Mount read-only */
+#define MS_NOSUID 2 /* Ignore suid and sgid bits */
+#define MS_NODEV 4 /* Disallow access to device special files */
+#define MS_NOEXEC 8 /* Disallow program execution */
+#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
+#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
+#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
+#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
+#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */
+#define MS_NOATIME 1024 /* Do not update access times. */
+#define MS_NODIRATIME 2048 /* Do not update directory access times */
+#define MS_BIND 4096
+#define MS_MOVE 8192
+#define MS_REC 16384
+#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
+ MS_VERBOSE is deprecated. */
+#define MS_SILENT 32768
+#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
+#define MS_UNBINDABLE (1<<17) /* change to unbindable */
+#define MS_PRIVATE (1<<18) /* change to private */
+#define MS_SLAVE (1<<19) /* change to slave */
+#define MS_SHARED (1<<20) /* change to shared */
+#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION (1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT (1<<26)
+#define MS_NOREMOTELOCK (1<<27)
+#define MS_NOSEC (1<<28)
+#define MS_BORN (1<<29)
+#define MS_ACTIVE (1<<30)
+#define MS_NOUSER (1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+ MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+/*
+ * open_tree() flags.
+ */
+#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
+#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
+
+/*
+ * move_mount() flags.
+ */
+#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */
+#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
+#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
+#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */
+#define MOVE_MOUNT__MASK 0x00000377
+
+/*
+ * fsopen() flags.
+ */
+#define FSOPEN_CLOEXEC 0x00000001
+
+/*
+ * fspick() flags.
+ */
+#define FSPICK_CLOEXEC 0x00000001
+#define FSPICK_SYMLINK_NOFOLLOW 0x00000002
+#define FSPICK_NO_AUTOMOUNT 0x00000004
+#define FSPICK_EMPTY_PATH 0x00000008
+
+/*
+ * The type of fsconfig() call made.
+ */
+enum fsconfig_command {
+ FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
+ FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
+ FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
+ FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
+ FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
+ FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
+ FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */
+ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
+ FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */
+};
+
+/*
+ * fsmount() flags.
+ */
+#define FSMOUNT_CLOEXEC 0x00000001
+
+/*
+ * Mount attributes.
+ */
+#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */
+#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */
+#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */
+#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */
+#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
+#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
+#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
+#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
+#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+ __u64 attr_set;
+ __u64 attr_clr;
+ __u64 propagation;
+ __u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
+
+
+/*
+ * Structure for getting mount/superblock/filesystem info with statmount(2).
+ *
+ * The interface is similar to statx(2): individual fields or groups can be
+ * selected with the @mask argument of statmount(). Kernel will set the @mask
+ * field according to the supported fields.
+ *
+ * If string fields are selected, then the caller needs to pass a buffer that
+ * has space after the fixed part of the structure. Nul terminated strings are
+ * copied there and offsets relative to @str are stored in the relevant fields.
+ * If the buffer is too small, then EOVERFLOW is returned. The actually used
+ * size is returned in @size.
+ */
+struct statmount {
+ __u32 size; /* Total size, including strings */
+ __u32 mnt_opts; /* [str] Options (comma separated, escaped) */
+ __u64 mask; /* What results were written */
+ __u32 sb_dev_major; /* Device ID */
+ __u32 sb_dev_minor;
+ __u64 sb_magic; /* ..._SUPER_MAGIC */
+ __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
+ __u32 fs_type; /* [str] Filesystem type */
+ __u64 mnt_id; /* Unique ID of mount */
+ __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
+ __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
+ __u32 mnt_parent_id_old;
+ __u64 mnt_attr; /* MOUNT_ATTR_... */
+ __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
+ __u64 mnt_peer_group; /* ID of shared peer group */
+ __u64 mnt_master; /* Mount receives propagation from this ID */
+ __u64 propagate_from; /* Propagation from in current namespace */
+ __u32 mnt_root; /* [str] Root of mount relative to root of fs */
+ __u32 mnt_point; /* [str] Mountpoint relative to current root */
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */
+ __u32 sb_source; /* [str] Source string of the mount */
+ __u32 opt_num; /* Number of fs options */
+ __u32 opt_array; /* [str] Array of nul terminated fs options */
+ __u32 opt_sec_num; /* Number of security options */
+ __u32 opt_sec_array; /* [str] Array of nul terminated security options */
+ __u64 supported_mask; /* Mask flags that this kernel supports */
+ __u32 mnt_uidmap_num; /* Number of uid mappings */
+ __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
+ __u32 mnt_gidmap_num; /* Number of gid mappings */
+ __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
+ __u64 __spare2[43];
+ char str[]; /* Variable size part containing strings */
+};
+
+/*
+ * Structure for passing mount ID and miscellaneous parameters to statmount(2)
+ * and listmount(2).
+ *
+ * For statmount(2) @param represents the request mask.
+ * For listmount(2) @param represents the last listed mount id (or zero).
+ */
+struct mnt_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 mnt_id;
+ __u64 param;
+ __u64 mnt_ns_id;
+};
+
+/* List of all mnt_id_req versions. */
+#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
+
+/*
+ * @mask bits for statmount(2)
+ */
+#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
+#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
+#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
+#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
+#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
+#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
+#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */
+#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
+#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
+#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
+#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
+#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
+#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
+
+/*
+ * Special @mnt_id values that can be passed to listmount
+ */
+#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
+
+#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 7600bf62dbdf..7eb9571786b8 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -219,6 +219,7 @@ enum {
NETDEV_CMD_QSTATS_GET,
NETDEV_CMD_BIND_RX,
NETDEV_CMD_NAPI_SET,
+ NETDEV_CMD_BIND_TX,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
new file mode 100644
index 000000000000..34127653fd00
--- /dev/null
+++ b/tools/include/uapi/linux/nsfs.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_NSFS_H
+#define __LINUX_NSFS_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define NSIO 0xb7
+
+/* Returns a file descriptor that refers to an owning user namespace */
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+/* Returns a file descriptor that refers to a parent namespace */
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+/* Returns the type of namespace (CLONE_NEW* value) referred to by
+ file descriptor */
+#define NS_GET_NSTYPE _IO(NSIO, 0x3)
+/* Get owner UID (in the caller's user namespace) for a user namespace */
+#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
+/* Get the id for a mount namespace */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64)
+/* Translate pid from target pid namespace into the caller's pid namespace. */
+#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
+/* Return thread-group leader id of pid in the callers pid namespace. */
+#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
+/* Translate pid from caller's pid namespace into a target pid namespace. */
+#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
+/* Return thread-group leader id of pid in the target pid namespace. */
+#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
+
+struct mnt_ns_info {
+ __u32 size;
+ __u32 nr_mounts;
+ __u64 mnt_ns_id;
+};
+
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+
+/* Get information about namespace. */
+#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info)
+/* Get next namespace. */
+#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info)
+/* Get previous namespace. */
+#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+
+#endif /* __LINUX_NSFS_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 5fc753c23734..78a362b80027 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -39,18 +39,21 @@ enum perf_type_id {
/*
* attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE
+ *
* PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA
* AA: hardware event ID
* EEEEEEEE: PMU type ID
+ *
* PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB
* BB: hardware cache ID
* CC: hardware cache op ID
* DD: hardware cache op result ID
* EEEEEEEE: PMU type ID
- * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
+ *
+ * If the PMU type ID is 0, PERF_TYPE_RAW will be applied.
*/
-#define PERF_PMU_TYPE_SHIFT 32
-#define PERF_HW_EVENT_MASK 0xffffffff
+#define PERF_PMU_TYPE_SHIFT 32
+#define PERF_HW_EVENT_MASK 0xffffffff
/*
* Generalized performance event event_id types, used by the
@@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id {
/*
* Special "software" events provided by the kernel, even if the hardware
* does not support performance events. These events measure various
- * physical and sw events of the kernel (and allow the profiling of them as
+ * physical and SW events of the kernel (and allow the profiling of them as
* well):
*/
enum perf_sw_ids {
@@ -167,8 +170,9 @@ enum perf_event_sample_format {
};
#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
+
/*
- * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set.
*
* If the user does not pass priv level information via branch_sample_type,
* the kernel uses the event's priv level. Branch and event priv levels do
@@ -178,20 +182,20 @@ enum perf_event_sample_format {
* of branches and therefore it supersedes all the other types.
*/
enum perf_branch_sample_type_shift {
- PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
- PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
- PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
-
- PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
- PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
- PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
- PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
- PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
- PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
- PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
- PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */
PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
@@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift {
};
enum perf_branch_sample_type {
- PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
- PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
- PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
- PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
- PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
- PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
- PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
- PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
- PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
- PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
- PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
- PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
- PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
- PERF_SAMPLE_BRANCH_TYPE_SAVE =
- 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
- PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT,
- PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
+ PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT,
- PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
/*
- * Common flow change classification
+ * Common control flow change classifications:
*/
enum {
- PERF_BR_UNKNOWN = 0, /* unknown */
- PERF_BR_COND = 1, /* conditional */
- PERF_BR_UNCOND = 2, /* unconditional */
- PERF_BR_IND = 3, /* indirect */
- PERF_BR_CALL = 4, /* function call */
- PERF_BR_IND_CALL = 5, /* indirect function call */
- PERF_BR_RET = 6, /* function return */
- PERF_BR_SYSCALL = 7, /* syscall */
- PERF_BR_SYSRET = 8, /* syscall return */
- PERF_BR_COND_CALL = 9, /* conditional function call */
- PERF_BR_COND_RET = 10, /* conditional function return */
- PERF_BR_ERET = 11, /* exception return */
- PERF_BR_IRQ = 12, /* irq */
- PERF_BR_SERROR = 13, /* system error */
- PERF_BR_NO_TX = 14, /* not in transaction */
- PERF_BR_EXTEND_ABI = 15, /* extend ABI */
+ PERF_BR_UNKNOWN = 0, /* Unknown */
+ PERF_BR_COND = 1, /* Conditional */
+ PERF_BR_UNCOND = 2, /* Unconditional */
+ PERF_BR_IND = 3, /* Indirect */
+ PERF_BR_CALL = 4, /* Function call */
+ PERF_BR_IND_CALL = 5, /* Indirect function call */
+ PERF_BR_RET = 6, /* Function return */
+ PERF_BR_SYSCALL = 7, /* Syscall */
+ PERF_BR_SYSRET = 8, /* Syscall return */
+ PERF_BR_COND_CALL = 9, /* Conditional function call */
+ PERF_BR_COND_RET = 10, /* Conditional function return */
+ PERF_BR_ERET = 11, /* Exception return */
+ PERF_BR_IRQ = 12, /* IRQ */
+ PERF_BR_SERROR = 13, /* System error */
+ PERF_BR_NO_TX = 14, /* Not in transaction */
+ PERF_BR_EXTEND_ABI = 15, /* Extend ABI */
PERF_BR_MAX,
};
/*
- * Common branch speculation outcome classification
+ * Common branch speculation outcome classifications:
*/
enum {
- PERF_BR_SPEC_NA = 0, /* Not available */
- PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
- PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
- PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
+ PERF_BR_SPEC_NA = 0, /* Not available */
+ PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */
+ PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */
+ PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */
PERF_BR_SPEC_MAX,
};
enum {
- PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
- PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
- PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
- PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
- PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
- PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
- PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
- PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
+ PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */
+ PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */
+ PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */
+ PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */
+ PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */
+ PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */
+ PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */
+ PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */
PERF_BR_NEW_MAX,
};
enum {
- PERF_BR_PRIV_UNKNOWN = 0,
- PERF_BR_PRIV_USER = 1,
- PERF_BR_PRIV_KERNEL = 2,
- PERF_BR_PRIV_HV = 3,
+ PERF_BR_PRIV_UNKNOWN = 0,
+ PERF_BR_PRIV_USER = 1,
+ PERF_BR_PRIV_KERNEL = 2,
+ PERF_BR_PRIV_HV = 3,
};
-#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
-#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
-#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
-#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
-#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
+#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1
+#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2
+#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3
+#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4
+#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5
#define PERF_SAMPLE_BRANCH_PLM_ALL \
(PERF_SAMPLE_BRANCH_USER|\
@@ -310,9 +313,9 @@ enum {
* Values to determine ABI of the registers dump.
*/
enum perf_sample_regs_abi {
- PERF_SAMPLE_REGS_ABI_NONE = 0,
- PERF_SAMPLE_REGS_ABI_32 = 1,
- PERF_SAMPLE_REGS_ABI_64 = 2,
+ PERF_SAMPLE_REGS_ABI_NONE = 0,
+ PERF_SAMPLE_REGS_ABI_32 = 1,
+ PERF_SAMPLE_REGS_ABI_64 = 2,
};
/*
@@ -320,21 +323,21 @@ enum perf_sample_regs_abi {
* abort events. Multiple bits can be set.
*/
enum {
- PERF_TXN_ELISION = (1 << 0), /* From elision */
- PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
- PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
- PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
- PERF_TXN_RETRY = (1 << 4), /* Retry possible */
- PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
- PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
- PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
+ PERF_TXN_ELISION = (1 << 0), /* From elision */
+ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
+ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
+ PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */
+ PERF_TXN_RETRY = (1 << 4), /* Retry possible */
+ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
+ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
- PERF_TXN_MAX = (1 << 8), /* non-ABI */
+ PERF_TXN_MAX = (1 << 8), /* non-ABI */
- /* bits 32..63 are reserved for the abort code */
+ /* Bits 32..63 are reserved for the abort code */
- PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
- PERF_TXN_ABORT_SHIFT = 32,
+ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
+ PERF_TXN_ABORT_SHIFT = 32,
};
/*
@@ -369,24 +372,22 @@ enum perf_event_read_format {
PERF_FORMAT_MAX = 1U << 5, /* non-ABI */
};
-#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
-#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
-#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
-#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
- /* add: sample_stack_user */
-#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
-#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
-#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
-#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */
-#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */
+#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */
+#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */
+#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */
+ /* Add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */
+#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
+#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
/*
- * Hardware event_id to monitor via a performance monitoring event:
- *
- * @sample_max_stack: Max number of frame pointers in a callchain,
- * should be < /proc/sys/kernel/perf_event_max_stack
- * Max number of entries of branch stack
- * should be < hardware limit
+ * 'struct perf_event_attr' contains various attributes that define
+ * a performance event - most of them hardware related configuration
+ * details, but also a lot of behavioral switches and values implemented
+ * by the kernel.
*/
struct perf_event_attr {
@@ -396,7 +397,7 @@ struct perf_event_attr {
__u32 type;
/*
- * Size of the attr structure, for fwd/bwd compat.
+ * Size of the attr structure, for forward/backwards compatibility.
*/
__u32 size;
@@ -451,21 +452,21 @@ struct perf_event_attr {
comm_exec : 1, /* flag comm events that are due to an exec */
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
- write_backward : 1, /* Write ring buffer from end to beginning */
+ write_backward : 1, /* write ring buffer from end to beginning */
namespaces : 1, /* include namespaces data */
ksymbol : 1, /* include ksymbol events */
- bpf_event : 1, /* include bpf events */
+ bpf_event : 1, /* include BPF events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- build_id : 1, /* use build id in mmap2 events */
+ build_id : 1, /* use build ID in mmap2 events */
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
__reserved_1 : 26;
union {
- __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_events; /* wake up every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
@@ -474,13 +475,13 @@ struct perf_event_attr {
__u64 bp_addr;
__u64 kprobe_func; /* for perf_kprobe */
__u64 uprobe_path; /* for perf_uprobe */
- __u64 config1; /* extension of config */
+ __u64 config1; /* extension of config */
};
union {
__u64 bp_len;
- __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
__u64 probe_offset; /* for perf_[k,u]probe */
- __u64 config2; /* extension of config1 */
+ __u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
@@ -510,7 +511,16 @@ struct perf_event_attr {
* Wakeup watermark for AUX area
*/
__u32 aux_watermark;
+
+ /*
+ * Max number of frame pointers in a callchain, should be
+ * lower than /proc/sys/kernel/perf_event_max_stack.
+ *
+ * Max number of entries of branch stack should be lower
+ * than the hardware limit.
+ */
__u16 sample_max_stack;
+
__u16 __reserved_2;
__u32 aux_sample_size;
@@ -537,7 +547,7 @@ struct perf_event_attr {
/*
* Structure used by below PERF_EVENT_IOC_QUERY_BPF command
- * to query bpf programs attached to the same perf tracepoint
+ * to query BPF programs attached to the same perf tracepoint
* as the given perf event.
*/
struct perf_event_query_bpf {
@@ -559,21 +569,21 @@ struct perf_event_query_bpf {
/*
* Ioctls that can be done on a perf event fd:
*/
-#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
-#define PERF_EVENT_IOC_RESET _IO ('$', 3)
-#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
-#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
-#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
-#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
-#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_RESET _IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32)
#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
-#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *)
enum perf_event_ioc_flags {
- PERF_IOC_FLAG_GROUP = 1U << 0,
+ PERF_IOC_FLAG_GROUP = 1U << 0,
};
/*
@@ -584,7 +594,7 @@ struct perf_event_mmap_page {
__u32 compat_version; /* lowest version this is compat with */
/*
- * Bits needed to read the hw events in user-space.
+ * Bits needed to read the HW events in user-space.
*
* u32 seq, time_mult, time_shift, index, width;
* u64 count, enabled, running;
@@ -622,7 +632,7 @@ struct perf_event_mmap_page {
__u32 index; /* hardware event identifier */
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
- __u64 time_running; /* time event on cpu */
+ __u64 time_running; /* time event on CPU */
union {
__u64 capabilities;
struct {
@@ -650,7 +660,7 @@ struct perf_event_mmap_page {
/*
* If cap_usr_time the below fields can be used to compute the time
- * delta since time_enabled (in ns) using rdtsc or similar.
+ * delta since time_enabled (in ns) using RDTSC or similar.
*
* u64 quot, rem;
* u64 delta;
@@ -723,7 +733,7 @@ struct perf_event_mmap_page {
* after reading this value.
*
* When the mapping is PROT_WRITE the @data_tail value should be
- * written by userspace to reflect the last read data, after issueing
+ * written by user-space to reflect the last read data, after issuing
* an smp_mb() to separate the data read from the ->data_tail store.
* In this case the kernel will not over-write unread data.
*
@@ -739,7 +749,7 @@ struct perf_event_mmap_page {
/*
* AUX area is defined by aux_{offset,size} fields that should be set
- * by the userspace, so that
+ * by the user-space, so that
*
* aux_offset >= data_offset + data_size
*
@@ -813,7 +823,7 @@ struct perf_event_mmap_page {
* Indicates that thread was preempted in TASK_RUNNING state.
*
* PERF_RECORD_MISC_MMAP_BUILD_ID:
- * Indicates that mmap2 event carries build id data.
+ * Indicates that mmap2 event carries build ID data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
@@ -824,26 +834,26 @@ struct perf_event_mmap_page {
#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
struct perf_event_header {
- __u32 type;
- __u16 misc;
- __u16 size;
+ __u32 type;
+ __u16 misc;
+ __u16 size;
};
struct perf_ns_link_info {
- __u64 dev;
- __u64 ino;
+ __u64 dev;
+ __u64 ino;
};
enum {
- NET_NS_INDEX = 0,
- UTS_NS_INDEX = 1,
- IPC_NS_INDEX = 2,
- PID_NS_INDEX = 3,
- USER_NS_INDEX = 4,
- MNT_NS_INDEX = 5,
- CGROUP_NS_INDEX = 6,
-
- NR_NAMESPACES, /* number of available namespaces */
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
};
enum perf_event_type {
@@ -859,11 +869,11 @@ enum perf_event_type {
* optional fields being ignored.
*
* struct sample_id {
- * { u32 pid, tid; } && PERF_SAMPLE_TID
- * { u64 time; } && PERF_SAMPLE_TIME
- * { u64 id; } && PERF_SAMPLE_ID
- * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
- * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 id; } && PERF_SAMPLE_IDENTIFIER
* } && perf_event_attr::sample_id_all
*
@@ -874,7 +884,7 @@ enum perf_event_type {
/*
* The MMAP events record the PROT_EXEC mappings so that we can
- * correlate userspace IPs to code. They have the following structure:
+ * correlate user-space IPs to code. They have the following structure:
*
* struct {
* struct perf_event_header header;
@@ -884,7 +894,7 @@ enum perf_event_type {
* u64 len;
* u64 pgoff;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP = 1,
@@ -894,7 +904,7 @@ enum perf_event_type {
* struct perf_event_header header;
* u64 id;
* u64 lost;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_LOST = 2,
@@ -905,7 +915,7 @@ enum perf_event_type {
*
* u32 pid, tid;
* char comm[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_COMM = 3,
@@ -916,7 +926,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_EXIT = 4,
@@ -927,7 +937,7 @@ enum perf_event_type {
* u64 time;
* u64 id;
* u64 stream_id;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_THROTTLE = 5,
@@ -939,7 +949,7 @@ enum perf_event_type {
* u32 pid, ppid;
* u32 tid, ptid;
* u64 time;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_FORK = 7,
@@ -950,7 +960,7 @@ enum perf_event_type {
* u32 pid, tid;
*
* struct read_format values;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_READ = 8,
@@ -1005,12 +1015,12 @@ enum perf_event_type {
* { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS
* } && PERF_SAMPLE_BRANCH_STACK
*
- * { u64 abi; # enum perf_sample_regs_abi
- * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
*
- * { u64 size;
- * char data[size];
- * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
+ * { u64 size;
+ * char data[size];
+ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
* { union perf_sample_weight
* {
@@ -1035,10 +1045,11 @@ enum perf_event_type {
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
- * { u64 size;
- * char data[size]; } && PERF_SAMPLE_AUX
+ * { u64 cgroup;} && PERF_SAMPLE_CGROUP
* { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
* { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE
+ * { u64 size;
+ * char data[size]; } && PERF_SAMPLE_AUX
* };
*/
PERF_RECORD_SAMPLE = 9,
@@ -1070,7 +1081,7 @@ enum perf_event_type {
* };
* u32 prot, flags;
* char filename[];
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_MMAP2 = 10,
@@ -1079,12 +1090,12 @@ enum perf_event_type {
* Records that new data landed in the AUX buffer part.
*
* struct {
- * struct perf_event_header header;
+ * struct perf_event_header header;
*
- * u64 aux_offset;
- * u64 aux_size;
+ * u64 aux_offset;
+ * u64 aux_size;
* u64 flags;
- * struct sample_id sample_id;
+ * struct sample_id sample_id;
* };
*/
PERF_RECORD_AUX = 11,
@@ -1167,7 +1178,7 @@ enum perf_event_type {
PERF_RECORD_KSYMBOL = 17,
/*
- * Record bpf events:
+ * Record BPF events:
* enum perf_bpf_event_type {
* PERF_BPF_EVENT_UNKNOWN = 0,
* PERF_BPF_EVENT_PROG_LOAD = 1,
@@ -1245,181 +1256,181 @@ enum perf_record_ksymbol_type {
#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0)
enum perf_bpf_event_type {
- PERF_BPF_EVENT_UNKNOWN = 0,
- PERF_BPF_EVENT_PROG_LOAD = 1,
- PERF_BPF_EVENT_PROG_UNLOAD = 2,
- PERF_BPF_EVENT_MAX, /* non-ABI */
+ PERF_BPF_EVENT_UNKNOWN = 0,
+ PERF_BPF_EVENT_PROG_LOAD = 1,
+ PERF_BPF_EVENT_PROG_UNLOAD = 2,
+ PERF_BPF_EVENT_MAX, /* non-ABI */
};
-#define PERF_MAX_STACK_DEPTH 127
-#define PERF_MAX_CONTEXTS_PER_STACK 8
+#define PERF_MAX_STACK_DEPTH 127
+#define PERF_MAX_CONTEXTS_PER_STACK 8
enum perf_callchain_context {
- PERF_CONTEXT_HV = (__u64)-32,
- PERF_CONTEXT_KERNEL = (__u64)-128,
- PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_HV = (__u64)-32,
+ PERF_CONTEXT_KERNEL = (__u64)-128,
+ PERF_CONTEXT_USER = (__u64)-512,
- PERF_CONTEXT_GUEST = (__u64)-2048,
- PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
- PERF_CONTEXT_GUEST_USER = (__u64)-2560,
+ PERF_CONTEXT_GUEST = (__u64)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
+ PERF_CONTEXT_GUEST_USER = (__u64)-2560,
- PERF_CONTEXT_MAX = (__u64)-4095,
+ PERF_CONTEXT_MAX = (__u64)-4095,
};
/**
* PERF_RECORD_AUX::flags bits
*/
-#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */
+#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */
#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */
/* CoreSight PMU AUX buffer formats */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
-#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */
-#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
-#define PERF_FLAG_FD_OUTPUT (1UL << 1)
-#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
-#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */
+#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
#if defined(__LITTLE_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_op:5, /* type of opcode */
- mem_lvl:14, /* memory hierarchy level */
- mem_snoop:5, /* snoop mode */
- mem_lock:2, /* lock instr */
- mem_dtlb:7, /* tlb access */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_remote:1, /* remote */
- mem_snoopx:2, /* snoop mode, ext */
- mem_blk:3, /* access blocked */
- mem_hops:3, /* hop level */
- mem_rsvd:18;
+ __u64 mem_op : 5, /* Type of opcode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lock : 2, /* Lock instr */
+ mem_dtlb : 7, /* TLB access */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_remote : 1, /* Remote */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_blk : 3, /* Access blocked */
+ mem_hops : 3, /* Hop level */
+ mem_rsvd : 18;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:18,
- mem_hops:3, /* hop level */
- mem_blk:3, /* access blocked */
- mem_snoopx:2, /* snoop mode, ext */
- mem_remote:1, /* remote */
- mem_lvl_num:4, /* memory hierarchy level number */
- mem_dtlb:7, /* tlb access */
- mem_lock:2, /* lock instr */
- mem_snoop:5, /* snoop mode */
- mem_lvl:14, /* memory hierarchy level */
- mem_op:5; /* type of opcode */
+ __u64 mem_rsvd : 18,
+ mem_hops : 3, /* Hop level */
+ mem_blk : 3, /* Access blocked */
+ mem_snoopx : 2, /* Snoop mode, ext */
+ mem_remote : 1, /* Remote */
+ mem_lvl_num : 4, /* Memory hierarchy level number */
+ mem_dtlb : 7, /* TLB access */
+ mem_lock : 2, /* Lock instr */
+ mem_snoop : 5, /* Snoop mode */
+ mem_lvl : 14, /* Memory hierarchy level */
+ mem_op : 5; /* Type of opcode */
};
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
-/* type of opcode (load/store/prefetch,code) */
-#define PERF_MEM_OP_NA 0x01 /* not available */
-#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
-#define PERF_MEM_OP_STORE 0x04 /* store instruction */
-#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
-#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
-#define PERF_MEM_OP_SHIFT 0
+/* Type of memory opcode: */
+#define PERF_MEM_OP_NA 0x0001 /* Not available */
+#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */
+#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */
+#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */
+#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */
+#define PERF_MEM_OP_SHIFT 0
/*
- * PERF_MEM_LVL_* namespace being depricated to some extent in the
+ * The PERF_MEM_LVL_* namespace is being deprecated to some extent in
* favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields.
- * Supporting this namespace inorder to not break defined ABIs.
+ * We support this namespace in order to not break defined ABIs.
*
- * memory hierarchy (memory level, hit or miss)
+ * Memory hierarchy (memory level, hit or miss)
*/
-#define PERF_MEM_LVL_NA 0x01 /* not available */
-#define PERF_MEM_LVL_HIT 0x02 /* hit level */
-#define PERF_MEM_LVL_MISS 0x04 /* miss level */
-#define PERF_MEM_LVL_L1 0x08 /* L1 */
-#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
-#define PERF_MEM_LVL_L2 0x20 /* L2 */
-#define PERF_MEM_LVL_L3 0x40 /* L3 */
-#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
-#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
-#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
-#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
-#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
-#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
-#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
-#define PERF_MEM_LVL_SHIFT 5
-
-#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
-#define PERF_MEM_REMOTE_SHIFT 37
-
-#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
-#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
-#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
-#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
-#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
-/* 0x7 available */
-#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
-#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
-#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
-#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
-#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
-#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
-#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
-
-#define PERF_MEM_LVLNUM_SHIFT 33
-
-/* snoop mode */
-#define PERF_MEM_SNOOP_NA 0x01 /* not available */
-#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
-#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
-#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
-#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
-#define PERF_MEM_SNOOP_SHIFT 19
-
-#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
-#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */
-#define PERF_MEM_SNOOPX_SHIFT 38
-
-/* locked instruction */
-#define PERF_MEM_LOCK_NA 0x01 /* not available */
-#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
-#define PERF_MEM_LOCK_SHIFT 24
+#define PERF_MEM_LVL_NA 0x0001 /* Not available */
+#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */
+#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */
+#define PERF_MEM_LVL_L1 0x0008 /* L1 */
+#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2 0x0020 /* L2 */
+#define PERF_MEM_LVL_L3 0x0040 /* L3 */
+#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT 5
+
+#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */
+#define PERF_MEM_REMOTE_SHIFT 37
+
+#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */
+#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */
+#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */
+#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
+#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
+/* 0x007 available */
+#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
+#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
+#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */
+#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT 33
+
+/* Snoop mode */
+#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */
+#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */
+#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */
+#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */
+#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT 19
+
+#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */
+#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */
+#define PERF_MEM_SNOOPX_SHIFT 38
+
+/* Locked instruction */
+#define PERF_MEM_LOCK_NA 0x0001 /* Not available */
+#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */
+#define PERF_MEM_LOCK_SHIFT 24
/* TLB access */
-#define PERF_MEM_TLB_NA 0x01 /* not available */
-#define PERF_MEM_TLB_HIT 0x02 /* hit level */
-#define PERF_MEM_TLB_MISS 0x04 /* miss level */
-#define PERF_MEM_TLB_L1 0x08 /* L1 */
-#define PERF_MEM_TLB_L2 0x10 /* L2 */
-#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
-#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
-#define PERF_MEM_TLB_SHIFT 26
+#define PERF_MEM_TLB_NA 0x0001 /* Not available */
+#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */
+#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */
+#define PERF_MEM_TLB_L1 0x0008 /* L1 */
+#define PERF_MEM_TLB_L2 0x0010 /* L2 */
+#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT 26
/* Access blocked */
-#define PERF_MEM_BLK_NA 0x01 /* not available */
-#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
-#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
-#define PERF_MEM_BLK_SHIFT 40
-
-/* hop level */
-#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
-#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
-#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
-#define PERF_MEM_HOPS_3 0x04 /* remote board */
+#define PERF_MEM_BLK_NA 0x0001 /* Not available */
+#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */
+#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */
+#define PERF_MEM_BLK_SHIFT 40
+
+/* Hop level */
+#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */
+#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */
+#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */
+#define PERF_MEM_HOPS_3 0x0004 /* Remote board */
/* 5-7 available */
-#define PERF_MEM_HOPS_SHIFT 43
+#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
/*
- * single taken branch record layout:
+ * Layout of single taken branch records:
*
* from: source instruction (may not always be a branch insn)
* to: branch target
@@ -1438,37 +1449,37 @@ union perf_mem_data_src {
struct perf_branch_entry {
__u64 from;
__u64 to;
- __u64 mispred:1, /* target mispredicted */
- predicted:1,/* target predicted */
- in_tx:1, /* in transaction */
- abort:1, /* transaction abort */
- cycles:16, /* cycle count to last branch */
- type:4, /* branch type */
- spec:2, /* branch speculation info */
- new_type:4, /* additional branch type */
- priv:3, /* privilege level */
- reserved:31;
+ __u64 mispred : 1, /* target mispredicted */
+ predicted : 1, /* target predicted */
+ in_tx : 1, /* in transaction */
+ abort : 1, /* transaction abort */
+ cycles : 16, /* cycle count to last branch */
+ type : 4, /* branch type */
+ spec : 2, /* branch speculation info */
+ new_type : 4, /* additional branch type */
+ priv : 3, /* privilege level */
+ reserved : 31;
};
/* Size of used info bits in struct perf_branch_entry */
#define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33
union perf_sample_weight {
- __u64 full;
+ __u64 full;
#if defined(__LITTLE_ENDIAN_BITFIELD)
struct {
- __u32 var1_dw;
- __u16 var2_w;
- __u16 var3_w;
+ __u32 var1_dw;
+ __u16 var2_w;
+ __u16 var3_w;
};
#elif defined(__BIG_ENDIAN_BITFIELD)
struct {
- __u16 var3_w;
- __u16 var2_w;
- __u32 var1_dw;
+ __u16 var3_w;
+ __u16 var2_w;
+ __u32 var1_dw;
};
#else
-#error "Unknown endianness"
+# error "Unknown endianness"
#endif
};
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 35791791a879..43dec6eed559 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -230,7 +230,7 @@ struct prctl_mm_map {
# define PR_PAC_APDBKEY (1UL << 3)
# define PR_PAC_APGAKEY (1UL << 4)
-/* Tagged user address controls for arm64 */
+/* Tagged user address controls for arm64 and RISC-V */
#define PR_SET_TAGGED_ADDR_CTRL 55
#define PR_GET_TAGGED_ADDR_CTRL 56
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
@@ -244,6 +244,9 @@ struct prctl_mm_map {
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
/* Unused; kept only for source compatibility */
# define PR_MTE_TCF_SHIFT 1
+/* RISC-V pointer masking tag length */
+# define PR_PMLEN_SHIFT 24
+# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT)
/* Control reclaim behavior when allocating memory */
#define PR_SET_IO_FLUSHER 57
@@ -328,4 +331,44 @@ struct prctl_mm_map {
# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
# define PR_PPC_DEXCR_CTRL_MASK 0x1f
+/*
+ * Get the current shadow stack configuration for the current thread,
+ * this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
+ */
+#define PR_GET_SHADOW_STACK_STATUS 74
+
+/*
+ * Set the current shadow stack configuration. Enabling the shadow
+ * stack will cause a shadow stack to be allocated for the thread.
+ */
+#define PR_SET_SHADOW_STACK_STATUS 75
+# define PR_SHADOW_STACK_ENABLE (1UL << 0)
+# define PR_SHADOW_STACK_WRITE (1UL << 1)
+# define PR_SHADOW_STACK_PUSH (1UL << 2)
+
+/*
+ * Prevent further changes to the specified shadow stack
+ * configuration. All bits may be locked via this call, including
+ * undefined bits.
+ */
+#define PR_LOCK_SHADOW_STACK_STATUS 76
+
+/*
+ * Controls the mode of timer_create() for CRIU restore operations.
+ * Enabling this allows CRIU to restore timers with explicit IDs.
+ *
+ * Don't use for normal operations as the result might be undefined.
+ */
+#define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+
+/* FUTEX hash management */
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index c0e13cdf9660..b997c68bd945 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j
#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__)
+#if defined(__clang__) && (__clang_major__ >= 19)
+#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__))
+#elif defined(__GNUC__) && (__GNUC__ >= 14)
+#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__))
+#else
#define ___type(...) typeof(___arrow(__VA_ARGS__))
+#endif
#define ___read(read_fn, dst, src_type, src, accessor) \
read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 686824b8b413..a50773d4616e 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -15,6 +15,14 @@
#define __array(name, val) typeof(val) *name[]
#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name
+#ifndef likely
+#define likely(x) (__builtin_expect(!!(x), 1))
+#endif
+
+#ifndef unlikely
+#define unlikely(x) (__builtin_expect(!!(x), 0))
+#endif
+
/*
* Helper macro to place programs, maps, license in
* different sections in elf_bpf file. Section names
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 38bc6b14b066..f1d495dc66bb 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -12,6 +12,7 @@
#include <sys/utsname.h>
#include <sys/param.h>
#include <sys/stat.h>
+#include <sys/mman.h>
#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/btf.h>
@@ -120,6 +121,9 @@ struct btf {
/* whether base_btf should be freed in btf_free for this instance */
bool owns_base;
+ /* whether raw_data is a (read-only) mmap */
+ bool raw_data_is_mmap;
+
/* BTF object FD, if loaded into kernel */
int fd;
@@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf)
return (void *)btf->hdr != btf->raw_data;
}
+static void btf_free_raw_data(struct btf *btf)
+{
+ if (btf->raw_data_is_mmap) {
+ munmap(btf->raw_data, btf->raw_size);
+ btf->raw_data_is_mmap = false;
+ } else {
+ free(btf->raw_data);
+ }
+ btf->raw_data = NULL;
+}
+
void btf__free(struct btf *btf)
{
if (IS_ERR_OR_NULL(btf))
@@ -970,7 +985,7 @@ void btf__free(struct btf *btf)
free(btf->types_data);
strset__free(btf->strs_set);
}
- free(btf->raw_data);
+ btf_free_raw_data(btf);
free(btf->raw_data_swapped);
free(btf->type_offs);
if (btf->owns_base)
@@ -996,7 +1011,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)
if (base_btf) {
btf->base_btf = base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len;
+ btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
btf->swapped_endian = base_btf->swapped_endian;
}
@@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf)
return libbpf_ptr(btf_new_empty(base_btf));
}
-static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
+static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)
{
struct btf *btf;
int err;
@@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
btf->start_str_off = base_btf->hdr->str_len;
}
- btf->raw_data = malloc(size);
- if (!btf->raw_data) {
- err = -ENOMEM;
- goto done;
+ if (is_mmap) {
+ btf->raw_data = (void *)data;
+ btf->raw_data_is_mmap = true;
+ } else {
+ btf->raw_data = malloc(size);
+ if (!btf->raw_data) {
+ err = -ENOMEM;
+ goto done;
+ }
+ memcpy(btf->raw_data, data, size);
}
- memcpy(btf->raw_data, data, size);
+
btf->raw_size = size;
btf->hdr = btf->raw_data;
@@ -1083,12 +1104,12 @@ done:
struct btf *btf__new(const void *data, __u32 size)
{
- return libbpf_ptr(btf_new(data, size, NULL));
+ return libbpf_ptr(btf_new(data, size, NULL, false));
}
struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
{
- return libbpf_ptr(btf_new(data, size, base_btf));
+ return libbpf_ptr(btf_new(data, size, base_btf, false));
}
struct btf_elf_secs {
@@ -1148,6 +1169,12 @@ static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs
else
continue;
+ if (sh.sh_type != SHT_PROGBITS) {
+ pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n",
+ sh.sh_type, idx, name, path);
+ goto err;
+ }
+
data = elf_getdata(scn, 0);
if (!data) {
pr_warn("failed to get section(%d, %s) data from %s\n",
@@ -1203,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
if (secs.btf_base_data) {
dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
- NULL);
+ NULL, false);
if (IS_ERR(dist_base_btf)) {
err = PTR_ERR(dist_base_btf);
dist_base_btf = NULL;
@@ -1212,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
}
btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
- dist_base_btf ?: base_btf);
+ dist_base_btf ?: base_btf, false);
if (IS_ERR(btf)) {
err = PTR_ERR(btf);
goto done;
@@ -1329,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
}
/* finally parse BTF data */
- btf = btf_new(data, sz, base_btf);
+ btf = btf_new(data, sz, base_btf, false);
err_out:
free(data);
@@ -1348,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
return libbpf_ptr(btf_parse_raw(path, base_btf));
}
+static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
+{
+ struct stat st;
+ void *data;
+ struct btf *btf;
+ int fd, err;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return libbpf_err_ptr(-errno);
+
+ if (fstat(fd, &st) < 0) {
+ err = -errno;
+ close(fd);
+ return libbpf_err_ptr(err);
+ }
+
+ data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ err = -errno;
+ close(fd);
+
+ if (data == MAP_FAILED)
+ return libbpf_err_ptr(err);
+
+ btf = btf_new(data, st.st_size, base_btf, true);
+ if (IS_ERR(btf))
+ munmap(data, st.st_size);
+
+ return btf;
+}
+
static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
{
struct btf *btf;
@@ -1612,7 +1670,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
goto exit_free;
}
- btf = btf_new(ptr, btf_info.btf_size, base_btf);
+ btf = btf_new(ptr, btf_info.btf_size, base_btf, false);
exit_free:
free(ptr);
@@ -1652,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
static void btf_invalidate_raw_data(struct btf *btf)
{
- if (btf->raw_data) {
- free(btf->raw_data);
- btf->raw_data = NULL;
- }
+ if (btf->raw_data)
+ btf_free_raw_data(btf);
if (btf->raw_data_swapped) {
free(btf->raw_data_swapped);
btf->raw_data_swapped = NULL;
@@ -4350,46 +4406,109 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
}
-/* Check if given two types are identical ARRAY definitions */
-static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth)
{
struct btf_type *t1, *t2;
+ int k1, k2;
+recur:
+ if (depth <= 0)
+ return false;
t1 = btf_type_by_id(d->btf, id1);
t2 = btf_type_by_id(d->btf, id2);
- if (!btf_is_array(t1) || !btf_is_array(t2))
+
+ k1 = btf_kind(t1);
+ k2 = btf_kind(t2);
+ if (k1 != k2)
return false;
- return btf_equal_array(t1, t2);
-}
+ switch (k1) {
+ case BTF_KIND_UNKN: /* VOID */
+ return true;
+ case BTF_KIND_INT:
+ return btf_equal_int_tag(t1, t2);
+ case BTF_KIND_ENUM:
+ case BTF_KIND_ENUM64:
+ return btf_compat_enum(t1, t2);
+ case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
+ return btf_equal_common(t1, t2);
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_TYPE_TAG:
+ if (t1->info != t2->info || t1->name_off != t2->name_off)
+ return false;
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_ARRAY: {
+ struct btf_array *a1, *a2;
-/* Check if given two types are identical STRUCT/UNION definitions */
-static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
-{
- const struct btf_member *m1, *m2;
- struct btf_type *t1, *t2;
- int n, i;
+ if (!btf_compat_array(t1, t2))
+ return false;
- t1 = btf_type_by_id(d->btf, id1);
- t2 = btf_type_by_id(d->btf, id2);
+ a1 = btf_array(t1);
+ a2 = btf_array(t1);
- if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
- return false;
+ if (a1->index_type != a2->index_type &&
+ !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1))
+ return false;
- if (!btf_shallow_equal_struct(t1, t2))
- return false;
+ if (a1->type != a2->type &&
+ !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1))
+ return false;
- m1 = btf_members(t1);
- m2 = btf_members(t2);
- for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
- if (m1->type != m2->type &&
- !btf_dedup_identical_arrays(d, m1->type, m2->type) &&
- !btf_dedup_identical_structs(d, m1->type, m2->type))
+ return true;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m1, *m2;
+ int i, n;
+
+ if (!btf_shallow_equal_struct(t1, t2))
return false;
+
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
+ if (m1->type == m2->type)
+ continue;
+ if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1))
+ return false;
+ }
+ return true;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p1, *p2;
+ int i, n;
+
+ if (!btf_compat_fnproto(t1, t2))
+ return false;
+
+ if (t1->type != t2->type &&
+ !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1))
+ return false;
+
+ p1 = btf_params(t1);
+ p2 = btf_params(t2);
+ for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) {
+ if (p1->type == p2->type)
+ continue;
+ if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1))
+ return false;
+ }
+ return true;
+ }
+ default:
+ return false;
}
- return true;
}
+
/*
* Check equivalence of BTF type graph formed by candidate struct/union (we'll
* call it "candidate graph" in this description for brevity) to a type graph
@@ -4508,19 +4627,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
* different fields within the *same* struct. This breaks type
* equivalence check, which makes an assumption that candidate
* types sub-graph has a consistent and deduped-by-compiler
- * types within a single CU. So work around that by explicitly
- * allowing identical array types here.
+ * types within a single CU. And similar situation can happen
+ * with struct/union sometimes, and event with pointers.
+ * So accommodate cases like this doing a structural
+ * comparison recursively, but avoiding being stuck in endless
+ * loops by limiting the depth up to which we check.
*/
- if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
- return 1;
- /* It turns out that similar situation can happen with
- * struct/union sometimes, sigh... Handle the case where
- * structs/unions are exactly the same, down to the referenced
- * type IDs. Anything more complicated (e.g., if referenced
- * types are different, but equivalent) is *way more*
- * complicated and requires a many-to-many equivalence mapping.
- */
- if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
+ if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16))
return 1;
return 0;
}
@@ -5268,7 +5381,10 @@ struct btf *btf__load_vmlinux_btf(void)
pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
sysfs_btf_path);
} else {
- btf = btf__parse(sysfs_btf_path, NULL);
+ btf = btf_parse_raw_mmap(sysfs_btf_path, NULL);
+ if (IS_ERR(btf))
+ btf = btf__parse(sysfs_btf_path, NULL);
+
if (!btf) {
err = -errno;
pr_warn("failed to read kernel BTF from '%s': %s\n",
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6b85060f07b3..e9c641a2fb20 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -60,6 +60,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define MAX_EVENT_NAME_LEN 64
+
#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
@@ -284,7 +286,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
old_errno = errno;
va_start(args, format);
- __libbpf_pr(level, format, args);
+ print_fn(level, format, args);
va_end(args);
errno = old_errno;
@@ -896,7 +898,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
return -LIBBPF_ERRNO__FORMAT;
}
- if (sec_off + prog_sz > sec_sz) {
+ if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {
pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
sec_name, sec_off);
return -LIBBPF_ERRNO__FORMAT;
@@ -1725,15 +1727,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
return ERR_PTR(-ENOENT);
}
-/* Some versions of Android don't provide memfd_create() in their libc
- * implementation, so avoid complications and just go straight to Linux
- * syscall.
- */
-static int sys_memfd_create(const char *name, unsigned flags)
-{
- return syscall(__NR_memfd_create, name, flags);
-}
-
#ifndef MFD_CLOEXEC
#define MFD_CLOEXEC 0x0001U
#endif
@@ -9455,6 +9448,30 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log
return 0;
}
+struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog)
+{
+ if (prog->func_info_rec_size != sizeof(struct bpf_func_info))
+ return libbpf_err_ptr(-EOPNOTSUPP);
+ return prog->func_info;
+}
+
+__u32 bpf_program__func_info_cnt(const struct bpf_program *prog)
+{
+ return prog->func_info_cnt;
+}
+
+struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog)
+{
+ if (prog->line_info_rec_size != sizeof(struct bpf_line_info))
+ return libbpf_err_ptr(-EOPNOTSUPP);
+ return prog->line_info;
+}
+
+__u32 bpf_program__line_info_cnt(const struct bpf_program *prog)
+{
+ return prog->line_info_cnt;
+}
+
#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
.sec = (char *)sec_pfx, \
.prog_type = BPF_PROG_TYPE_##ptype, \
@@ -11121,16 +11138,16 @@ static const char *tracefs_available_filter_functions_addrs(void)
: TRACEFS"/available_filter_functions_addrs";
}
-static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
- const char *kfunc_name, size_t offset)
+static void gen_probe_legacy_event_name(char *buf, size_t buf_sz,
+ const char *name, size_t offset)
{
static int index = 0;
int i;
- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
- __sync_fetch_and_add(&index, 1));
+ snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(),
+ __sync_fetch_and_add(&index, 1), name, offset);
- /* sanitize binary_path in the probe name */
+ /* sanitize name in the probe name */
for (i = 0; buf[i]; i++) {
if (!isalnum(buf[i]))
buf[i] = '_';
@@ -11255,9 +11272,9 @@ int probe_kern_syscall_wrapper(int token_fd)
return pfd >= 0 ? 1 : 0;
} else { /* legacy mode */
- char probe_name[128];
+ char probe_name[MAX_EVENT_NAME_LEN];
- gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
return 0;
@@ -11313,10 +11330,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
func_name, offset,
-1 /* pid */, 0 /* ref_ctr_off */);
} else {
- char probe_name[256];
+ char probe_name[MAX_EVENT_NAME_LEN];
- gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
- func_name, offset);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
+ func_name, offset);
legacy_probe = strdup(probe_name);
if (!legacy_probe)
@@ -11860,20 +11877,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru
return ret;
}
-static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
- const char *binary_path, uint64_t offset)
-{
- int i;
-
- snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
-
- /* sanitize binary_path in the probe name */
- for (i = 0; buf[i]; i++) {
- if (!isalnum(buf[i]))
- buf[i] = '_';
- }
-}
-
static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
const char *binary_path, size_t offset)
{
@@ -12297,13 +12300,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
func_offset, pid, ref_ctr_off);
} else {
- char probe_name[PATH_MAX + 64];
+ char probe_name[MAX_EVENT_NAME_LEN];
if (ref_ctr_off)
return libbpf_err_ptr(-EINVAL);
- gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
- binary_path, func_offset);
+ gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
+ strrchr(binary_path, '/') ? : binary_path,
+ func_offset);
legacy_probe = strdup(probe_name);
if (!legacy_probe)
@@ -13371,7 +13375,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
- attr.sample_period = sample_period;
attr.wakeup_events = sample_period;
p.attr = &attr;
@@ -14099,6 +14102,12 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
}
link = map_skel->link;
+ if (!link) {
+ pr_warn("map '%s': BPF map skeleton link is uninitialized\n",
+ bpf_map__name(map));
+ continue;
+ }
+
if (*link)
continue;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index e0605403f977..1137e7d2e1b5 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -940,6 +940,12 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le
LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size);
LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size);
+LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog);
+LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog);
+
+LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog);
+LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog);
+
/**
* @brief **bpf_program__set_attach_target()** sets BTF-based attach target
* for supported BPF program types:
@@ -1283,6 +1289,7 @@ enum bpf_tc_attach_point {
BPF_TC_INGRESS = 1 << 0,
BPF_TC_EGRESS = 1 << 1,
BPF_TC_CUSTOM = 1 << 2,
+ BPF_TC_QDISC = 1 << 3,
};
#define BPF_TC_PARENT(a, b) \
@@ -1297,9 +1304,11 @@ struct bpf_tc_hook {
int ifindex;
enum bpf_tc_attach_point attach_point;
__u32 parent;
+ __u32 handle;
+ const char *qdisc;
size_t :0;
};
-#define bpf_tc_hook__last_field parent
+#define bpf_tc_hook__last_field qdisc
struct bpf_tc_opts {
size_t sz;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d8b71f22f197..1205f9a4fe04 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -437,6 +437,10 @@ LIBBPF_1.6.0 {
bpf_linker__add_fd;
bpf_linker__new_fd;
bpf_object__prepare;
+ bpf_program__func_info;
+ bpf_program__func_info_cnt;
+ bpf_program__line_info;
+ bpf_program__line_info_cnt;
btf__add_decl_attr;
btf__add_type_attr;
} LIBBPF_1.5.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 76669c73dcd1..477a3b3389a0 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -667,6 +667,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags)
return syscall(__NR_dup3, oldfd, newfd, flags);
}
+/* Some versions of Android don't provide memfd_create() in their libc
+ * implementation, so avoid complications and just go straight to Linux
+ * syscall.
+ */
+static inline int sys_memfd_create(const char *name, unsigned flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
/* Point *fixed_fd* to the same file that *tmp_fd* points to.
* Regardless of success, *tmp_fd* is closed.
* Whatever *fixed_fd* pointed to is closed silently.
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 800e0ef09c37..a469e5d4fee7 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -573,7 +573,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,
snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz);
- fd = memfd_create(filename, 0);
+ fd = sys_memfd_create(filename, 0);
if (fd < 0) {
ret = -errno;
pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret));
@@ -1376,7 +1376,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
} else {
if (!secs_match(dst_sec, src_sec)) {
pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name);
- return -1;
+ return -EINVAL;
}
/* "license" and "version" sections are deduped */
@@ -2223,7 +2223,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
}
} else if (!secs_match(dst_sec, src_sec)) {
pr_warn("sections %s are not compatible\n", src_sec->sec_name);
- return -1;
+ return -EINVAL;
}
/* shdr->sh_link points to SYMTAB */
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 68a2def17175..c997e69d507f 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -529,9 +529,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
}
-typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook);
-static int clsact_config(struct libbpf_nla_req *req)
+static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
{
req->tc.tcm_parent = TC_H_CLSACT;
req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
@@ -539,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req)
return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
}
+static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
+{
+ const char *qdisc = OPTS_GET(hook, qdisc, NULL);
+
+ req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT);
+ req->tc.tcm_handle = OPTS_GET(hook, handle, 0);
+
+ return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1);
+}
+
static int attach_point_to_config(struct bpf_tc_hook *hook,
qdisc_config_t *config)
{
@@ -552,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook,
return 0;
case BPF_TC_CUSTOM:
return -EOPNOTSUPP;
+ case BPF_TC_QDISC:
+ *config = &qdisc_config;
+ return 0;
default:
return -EINVAL;
}
@@ -596,7 +609,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
req.tc.tcm_family = AF_UNSPEC;
req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
- ret = config(&req);
+ ret = config(&req, hook);
if (ret < 0)
return ret;
@@ -639,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
case BPF_TC_INGRESS:
case BPF_TC_EGRESS:
return libbpf_err(__bpf_tc_detach(hook, NULL, true));
+ case BPF_TC_QDISC:
case BPF_TC_INGRESS | BPF_TC_EGRESS:
return libbpf_err(tc_qdisc_delete(hook));
case BPF_TC_CUSTOM:
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 975e265eab3b..06663f9ea581 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype,
minlen = nla_attr_minlen[pt->type];
if (libbpf_nla_len(nla) < minlen)
- return -1;
+ return -EINVAL;
if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
- return -1;
+ return -EINVAL;
if (pt->type == LIBBPF_NLA_STRING) {
char *data = libbpf_nla_data(nla);
if (data[libbpf_nla_len(nla) - 1] != '\0')
- return -1;
+ return -EINVAL;
}
return 0;
@@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
if (policy) {
err = validate_nla(nla, maxtype, policy);
if (err < 0)
- goto errout;
+ return err;
}
- if (tb[type])
+ if (tb[type]) {
pr_warn("Attribute of type %#x found multiple times in message, "
"previous attribute is being ignored.\n", type);
+ }
tb[type] = nla;
}
- err = 0;
-errout:
- return err;
+ return 0;
}
/**
diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README
index 9999c1effdb6..88870b0bceea 100644
--- a/tools/memory-model/Documentation/README
+++ b/tools/memory-model/Documentation/README
@@ -23,8 +23,11 @@ o You are familiar with the Linux-kernel concurrency primitives
that you need, and just want to get started with LKMM litmus
tests: litmus-tests.txt
-o You would like to access lock-protected shared variables without
- having their corresponding locks held: locking.txt
+o You need to locklessly access shared variables that are otherwise
+ protected by a lock: locking.txt
+
+ This locking.txt file expands on the "Locking" section in
+ recipes.txt, but is self-contained.
o You are familiar with Linux-kernel concurrency, and would
like a detailed intuitive understanding of LKMM, including
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 6dc8b3642458..34aa3172071b 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -1896,7 +1896,7 @@ following respects:
3. The srcu_down_read() and srcu_up_read() primitives work
exactly like srcu_read_lock() and srcu_read_unlock(), except
- that matching calls don't have to execute on the same CPU.
+ that matching calls don't have to execute within the same context.
(The names are meant to be suggestive of operations on
semaphores.) Since the matching is determined by the domain
pointer and index value, these primitives make it possible for
diff --git a/tools/memory-model/Documentation/locking.txt b/tools/memory-model/Documentation/locking.txt
index 65c898c64a93..d6dc3cc34ab6 100644
--- a/tools/memory-model/Documentation/locking.txt
+++ b/tools/memory-model/Documentation/locking.txt
@@ -1,3 +1,8 @@
+[!] Note:
+ This file expands on the "Locking" section of recipes.txt,
+ focusing on locklessly accessing shared variables that are
+ otherwise protected by a lock.
+
Locking
=======
diff --git a/tools/memory-model/Documentation/ordering.txt b/tools/memory-model/Documentation/ordering.txt
index 9b0949d3f5ec..7ab3744929d8 100644
--- a/tools/memory-model/Documentation/ordering.txt
+++ b/tools/memory-model/Documentation/ordering.txt
@@ -223,7 +223,7 @@ The Linux kernel's compiler barrier is barrier(). This primitive
prohibits compiler code-motion optimizations that might move memory
references across the point in the code containing the barrier(), but
does not constrain hardware memory ordering. For example, this can be
-used to prevent to compiler from moving code across an infinite loop:
+used to prevent the compiler from moving code across an infinite loop:
WRITE_ONCE(x, 1);
while (dontstop)
@@ -274,7 +274,7 @@ different pieces of the concurrent algorithm. The variable stored to
by the smp_store_release(), in this case "y", will normally be used in
an acquire operation in other parts of the concurrent algorithm.
-To see the performance advantages, suppose that the above example read
+To see the performance advantages, suppose that the above example reads
from "x" instead of writing to it. Then an smp_wmb() could not guarantee
ordering, and an smp_mb() would be needed instead:
@@ -394,17 +394,17 @@ from the value returned by the rcu_dereference() or srcu_dereference()
to that subsequent memory access.
A call to rcu_dereference() for a given RCU-protected pointer is
-usually paired with a call to a call to rcu_assign_pointer() for that
-same pointer in much the same way that a call to smp_load_acquire() is
-paired with a call to smp_store_release(). Calls to rcu_dereference()
-and rcu_assign_pointer are often buried in other APIs, for example,
+usually paired with a call to rcu_assign_pointer() for that same pointer
+in much the same way that a call to smp_load_acquire() is paired with
+a call to smp_store_release(). Calls to rcu_dereference() and
+rcu_assign_pointer() are often buried in other APIs, for example,
the RCU list API members defined in include/linux/rculist.h. For more
information, please see the docbook headers in that file, the most
-recent LWN article on the RCU API (https://lwn.net/Articles/777036/),
+recent LWN article on the RCU API (https://lwn.net/Articles/988638/),
and of course the material in Documentation/RCU.
If the pointer value is manipulated between the rcu_dereference()
-that returned it and a later dereference(), please read
+that returned it and a later rcu_dereference(), please read
Documentation/RCU/rcu_dereference.rst. It can also be quite helpful to
review uses in the Linux kernel.
@@ -457,7 +457,7 @@ described earlier in this document.
These operations come in three categories:
o Marked writes, such as WRITE_ONCE() and atomic_set(). These
- primitives required the compiler to emit the corresponding store
+ primitives require the compiler to emit the corresponding store
instructions in the expected execution order, thus suppressing
a number of destructive optimizations. However, they provide no
hardware ordering guarantees, and in fact many CPUs will happily
@@ -465,7 +465,7 @@ o Marked writes, such as WRITE_ONCE() and atomic_set(). These
operations, unless these operations are to the same variable.
o Marked reads, such as READ_ONCE() and atomic_read(). These
- primitives required the compiler to emit the corresponding load
+ primitives require the compiler to emit the corresponding load
instructions in the expected execution order, thus suppressing
a number of destructive optimizations. However, they provide no
hardware ordering guarantees, and in fact many CPUs will happily
@@ -506,7 +506,7 @@ of the old value and the new value.
Unmarked C-language accesses are unordered, and are also subject to
any number of compiler optimizations, many of which can break your
-concurrent code. It is possible to used unmarked C-language accesses for
+concurrent code. It is possible to use unmarked C-language accesses for
shared variables that are subject to concurrent access, but great care
is required on an ongoing basis. The compiler-constraining barrier()
primitive can be helpful, as can the various ordering primitives discussed
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index 03f58b11c252..52115ee5f393 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -61,6 +61,10 @@ usual) some things to be careful of:
Locking
-------
+[!] Note:
+ locking.txt expands on this section, providing more detail on
+ locklessly accessing lock-protected shared variables.
+
Locking is well-known and straightforward, at least if you don't think
about it too hard. And the basic rule is indeed quite simple: Any CPU that
has acquired a given lock sees any changes previously seen or made by any
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index c5fdfd19df24..d691390620b3 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -46,8 +46,7 @@ o ARM Ltd. (Ed.). 2014. "ARM Architecture Reference Manual (ARMv8,
o Imagination Technologies, LTD. 2015. "MIPS(R) Architecture
For Programmers, Volume II-A: The MIPS64(R) Instruction,
- Set Reference Manual". Imagination Technologies,
- LTD. https://imgtec.com/?do-download=4302.
+ Set Reference Manual". Imagination Technologies, LTD.
o Shaked Flur, Kathryn E. Gray, Christopher Pulte, Susmit
Sarkar, Ali Sezgin, Luc Maranget, Will Deacon, and Peter
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
index 21f06c1d1b70..2df148630cdc 100644
--- a/tools/memory-model/Documentation/simple.txt
+++ b/tools/memory-model/Documentation/simple.txt
@@ -134,7 +134,7 @@ Packaged primitives: Sequence locking
Lockless programming is considered by many to be more difficult than
lock-based programming, but there are a few lockless design patterns that
have been built out into an API. One of these APIs is sequence locking.
-Although this APIs can be used in extremely complex ways, there are simple
+Although this API can be used in extremely complex ways, there are simple
and effective ways of using it that avoid the need to pay attention to
memory ordering.
@@ -205,7 +205,7 @@ If you want to keep things simple, use the initialization and read-out
operations from the previous section only when there are no racing
accesses. Otherwise, use only fully ordered operations when accessing
or modifying the variable. This approach guarantees that code prior
-to a given access to that variable will be seen by all CPUs has having
+to a given access to that variable will be seen by all CPUs as having
happened before any code following any later access to that same variable.
Please note that per-CPU functions are not atomic operations and
diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_array.j2
index 0ec8660d621a..b21476629679 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_array.j2
@@ -2,8 +2,10 @@
{% if annotate %}
/* member {{ name }} (variable-length array) */
{% endif %}
+{% if maxsize != "0" %}
if (value->{{ name }}.count > {{ maxsize }})
return false;
+{% endif %}
if (xdr_stream_encode_u32(xdr, value->{{ name }}.count) != XDR_UNIT)
return false;
for (u32 i = 0; i < value->{{ name }}.count; i++)
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_array.j2
index 0ec8660d621a..b21476629679 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_array.j2
@@ -2,8 +2,10 @@
{% if annotate %}
/* member {{ name }} (variable-length array) */
{% endif %}
+{% if maxsize != "0" %}
if (value->{{ name }}.count > {{ maxsize }})
return false;
+{% endif %}
if (xdr_stream_encode_u32(xdr, value->{{ name }}.count) != XDR_UNIT)
return false;
for (u32 i = 0; i < value->{{ name }}.count; i++)
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_array.j2
index 51ad736d2530..53dfaf9cec68 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_array.j2
@@ -4,8 +4,10 @@
{% endif %}
if (xdr_stream_decode_u32(xdr, &count) < 0)
return false;
+{% if maxsize != "0" %}
if (count > {{ maxsize }})
return false;
+{% endif %}
for (u32 i = 0; i < count; i++) {
if (xdrgen_decode_{{ type }}(xdr, &ptr->{{ name }}.items[i]) < 0)
return false;
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index f3269ce39e5b..90686e241157 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -20,13 +20,30 @@ CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
+CFLAGS_lockd_netlink:=$(call get_hdr_inc,_LINUX_LOCKD_NETLINK_H,lockd_netlink.h)
CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h)
CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
+CFLAGS_ovpn:=$(call get_hdr_inc,_LINUX_OVPN_H,ovpn.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,__LINUX_IF_ADDR_H,if_addr.h)
+CFLAGS_rt-link:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,_LINUX_IF_LINK_H,if_link.h)
+CFLAGS_rt-neigh:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,__LINUX_NEIGHBOUR_H,neighbour.h)
+CFLAGS_rt-route:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h)
+CFLAGS_rt-rule:=$(call get_hdr_inc,__LINUX_FIB_RULES_H,fib_rules.h)
+CFLAGS_tc:= $(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,__LINUX_PKT_SCHED_H,pkt_sched.h) \
+ $(call get_hdr_inc,__LINUX_PKT_CLS_H,pkt_cls.h) \
+ $(call get_hdr_inc,_TC_CT_H,tc_act/tc_ct.h) \
+ $(call get_hdr_inc,_TC_MIRRED_H,tc_act/tc_mirred.h) \
+ $(call get_hdr_inc,_TC_SKBEDIT_H,tc_act/tc_skbedit.h) \
+ $(call get_hdr_inc,_TC_TUNNEL_KEY_H,tc_act/tc_tunnel_key.h)
CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h)
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 21f9e299dc75..86e1e4a959a7 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -22,10 +22,9 @@ TOOL:=../pyynl/ynl_gen_c.py
TOOL_RST:=../pyynl/ynl_gen_rst.py
SPECS_DIR:=../../../../Documentation/netlink/specs
-GENS_PATHS=$(shell grep -nrI --files-without-match \
- 'protocol: netlink' \
- $(SPECS_DIR))
-GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS})
+SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml)
+GENS_UNSUP=conntrack nftables
+GENS=$(filter-out ${GENS_UNSUP},$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS}))
SRCS=$(patsubst %,%-user.c,${GENS})
HDRS=$(patsubst %,%-user.h,${GENS})
OBJS=$(patsubst %,%-user.o,${GENS})
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 3c09a7bbfba5..824777d7e05e 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -25,6 +25,7 @@ enum ynl_policy_type {
YNL_PT_UINT,
YNL_PT_NUL_STR,
YNL_PT_BITFIELD32,
+ YNL_PT_SUBMSG,
};
enum ynl_parse_result {
@@ -42,7 +43,10 @@ typedef int (*ynl_parse_cb_t)(const struct nlmsghdr *nlh,
struct ynl_parse_arg *yarg);
struct ynl_policy_attr {
- enum ynl_policy_type type;
+ enum ynl_policy_type type:8;
+ __u8 is_submsg:1;
+ __u8 is_selector:1;
+ __u16 selector_type;
unsigned int len;
const char *name;
const struct ynl_policy_nest *nest;
@@ -94,12 +98,17 @@ struct ynl_ntf_base_type {
unsigned char data[] __attribute__((aligned(8)));
};
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags);
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id);
+
struct nlmsghdr *
ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
struct nlmsghdr *
ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
+int ynl_submsg_failed(struct ynl_parse_arg *yarg, const char *field_name,
+ const char *sel_name);
/* YNL specific helpers used by the auto-generated code */
@@ -204,11 +213,15 @@ static inline void *ynl_attr_data_end(const struct nlattr *attr)
NLMSG_HDRLEN + fixed_hdr_sz); attr; \
(attr) = ynl_attr_next(ynl_nlmsg_end_addr(nlh), attr))
-#define ynl_attr_for_each_nested(attr, outer) \
+#define ynl_attr_for_each_nested_off(attr, outer, offset) \
for ((attr) = ynl_attr_first(outer, outer->nla_len, \
- sizeof(struct nlattr)); attr; \
+ sizeof(struct nlattr) + offset); \
+ attr; \
(attr) = ynl_attr_next(ynl_attr_data_end(outer), attr))
+#define ynl_attr_for_each_nested(attr, outer) \
+ ynl_attr_for_each_nested_off(attr, outer, 0)
+
#define ynl_attr_for_each_payload(start, len, attr) \
for ((attr) = ynl_attr_first(start, len, 0); attr; \
(attr) = ynl_attr_next(start + len, attr))
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index c4da34048ef8..2a169c3c0797 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -45,8 +45,39 @@
#define perr(_ys, _msg) __yerr(&(_ys)->err, errno, _msg)
/* -- Netlink boiler plate */
+static bool
+ynl_err_walk_is_sel(const struct ynl_policy_nest *policy,
+ const struct nlattr *attr)
+{
+ unsigned int type = ynl_attr_type(attr);
+
+ return policy && type <= policy->max_attr &&
+ policy->table[type].is_selector;
+}
+
+static const struct ynl_policy_nest *
+ynl_err_walk_sel_policy(const struct ynl_policy_attr *policy_attr,
+ const struct nlattr *selector)
+{
+ const struct ynl_policy_nest *policy = policy_attr->nest;
+ const char *sel;
+ unsigned int i;
+
+ if (!policy_attr->is_submsg)
+ return policy;
+
+ sel = ynl_attr_get_str(selector);
+ for (i = 0; i <= policy->max_attr; i++) {
+ if (!strcmp(sel, policy->table[i].name))
+ return policy->table[i].nest;
+ }
+
+ return NULL;
+}
+
static int
-ynl_err_walk_report_one(const struct ynl_policy_nest *policy, unsigned int type,
+ynl_err_walk_report_one(const struct ynl_policy_nest *policy,
+ const struct nlattr *selector, unsigned int type,
char *str, int str_sz, int *n)
{
if (!policy) {
@@ -67,9 +98,34 @@ ynl_err_walk_report_one(const struct ynl_policy_nest *policy, unsigned int type,
return 1;
}
- if (*n < str_sz)
- *n += snprintf(str, str_sz - *n,
- ".%s", policy->table[type].name);
+ if (*n < str_sz) {
+ int sz;
+
+ sz = snprintf(str, str_sz - *n,
+ ".%s", policy->table[type].name);
+ *n += sz;
+ str += sz;
+ }
+
+ if (policy->table[type].is_submsg) {
+ if (!selector) {
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz, "(!selector)");
+ return 1;
+ }
+
+ if (ynl_attr_type(selector) !=
+ policy->table[type].selector_type) {
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz, "(!=selector)");
+ return 1;
+ }
+
+ if (*n < str_sz)
+ *n += snprintf(str, str_sz - *n, "(%s)",
+ ynl_attr_get_str(selector));
+ }
+
return 0;
}
@@ -78,6 +134,8 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
const struct ynl_policy_nest *policy, char *str, int str_sz,
const struct ynl_policy_nest **nest_pol)
{
+ const struct ynl_policy_nest *next_pol;
+ const struct nlattr *selector = NULL;
unsigned int astart_off, aend_off;
const struct nlattr *attr;
unsigned int data_len;
@@ -96,6 +154,10 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
ynl_attr_for_each_payload(start, data_len, attr) {
astart_off = (char *)attr - (char *)start;
aend_off = (char *)ynl_attr_data_end(attr) - (char *)start;
+
+ if (ynl_err_walk_is_sel(policy, attr))
+ selector = attr;
+
if (aend_off <= off)
continue;
@@ -109,16 +171,20 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
type = ynl_attr_type(attr);
- if (ynl_err_walk_report_one(policy, type, str, str_sz, &n))
+ if (ynl_err_walk_report_one(policy, selector, type, str, str_sz, &n))
+ return n;
+
+ next_pol = ynl_err_walk_sel_policy(&policy->table[type], selector);
+ if (!next_pol)
return n;
if (!off) {
if (nest_pol)
- *nest_pol = policy->table[type].nest;
+ *nest_pol = next_pol;
return n;
}
- if (!policy->table[type].nest) {
+ if (!next_pol) {
if (n < str_sz)
n += snprintf(str, str_sz, "!nest");
return n;
@@ -128,7 +194,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
start = ynl_attr_data(attr);
end = start + ynl_attr_data_len(attr);
- return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest,
+ return n + ynl_err_walk(ys, start, end, off, next_pol,
&str[n], str_sz - n, nest_pol);
}
@@ -191,12 +257,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ",
str ? " (" : "");
- start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len);
end = ynl_nlmsg_end_addr(ys->nlh);
off = ys->err.attr_offs;
off -= sizeof(struct nlmsghdr);
- off -= ys->family->hdr_len;
+ off -= ys->req_hdr_len;
n += ynl_err_walk(ys, start, end, off, ys->req_policy,
&bad_attr[n], sizeof(bad_attr) - n, NULL);
@@ -216,14 +282,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ",
bad_attr[0] ? ", " : (str ? " (" : ""));
- start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len);
end = ynl_nlmsg_end_addr(ys->nlh);
nest_pol = ys->req_policy;
if (tb[NLMSGERR_ATTR_MISS_NEST]) {
off = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
off -= sizeof(struct nlmsghdr);
- off -= ys->family->hdr_len;
+ off -= ys->req_hdr_len;
n += ynl_err_walk(ys, start, end, off, ys->req_policy,
&miss_attr[n], sizeof(miss_attr) - n,
@@ -231,7 +297,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
}
n2 = 0;
- ynl_err_walk_report_one(nest_pol, type, &miss_attr[n],
+ ynl_err_walk_report_one(nest_pol, NULL, type, &miss_attr[n],
sizeof(miss_attr) - n, &n2);
n += n2;
@@ -384,6 +450,15 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
return 0;
}
+int ynl_submsg_failed(struct ynl_parse_arg *yarg, const char *field_name,
+ const char *sel_name)
+{
+ yerr(yarg->ys, YNL_ERROR_SUBMSG_KEY,
+ "Parsing error: Sub-message key not set (msg %s, key %s)",
+ field_name, sel_name);
+ return YNL_PARSE_CB_ERROR;
+}
+
/* Generic code */
static void ynl_err_reset(struct ynl_sock *ys)
@@ -451,14 +526,14 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
return nlh;
}
-void ynl_msg_start_req(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags)
{
- ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK);
+ return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | flags);
}
-void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
{
- ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+ return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
}
struct nlmsghdr *
@@ -663,6 +738,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
struct sockaddr_nl addr;
struct ynl_sock *ys;
socklen_t addrlen;
+ int sock_type;
int one = 1;
ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE);
@@ -675,7 +751,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE];
ys->ntf_last_next = &ys->ntf_first;
- ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ sock_type = yf->is_classic ? yf->classic_id : NETLINK_GENERIC;
+
+ ys->socket = socket(AF_NETLINK, SOCK_RAW, sock_type);
if (ys->socket < 0) {
__perr(yse, "failed to create a netlink socket");
goto err_free_sock;
@@ -708,8 +786,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
ys->portid = addr.nl_pid;
ys->seq = random();
-
- if (ynl_sock_read_family(ys, yf->name)) {
+ if (yf->is_classic) {
+ ys->family_id = yf->classic_id;
+ } else if (ynl_sock_read_family(ys, yf->name)) {
if (yse)
memcpy(yse, &ys->err, sizeof(*yse));
goto err_close_sock;
@@ -791,13 +870,21 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
struct ynl_parse_arg yarg = { .ys = ys, };
const struct ynl_ntf_info *info;
struct ynl_ntf_base_type *rsp;
- struct genlmsghdr *gehdr;
+ __u32 cmd;
int ret;
- gehdr = ynl_nlmsg_data(nlh);
- if (gehdr->cmd >= ys->family->ntf_info_size)
+ if (ys->family->is_classic) {
+ cmd = nlh->nlmsg_type;
+ } else {
+ struct genlmsghdr *gehdr;
+
+ gehdr = ynl_nlmsg_data(nlh);
+ cmd = gehdr->cmd;
+ }
+
+ if (cmd >= ys->family->ntf_info_size)
return YNL_PARSE_CB_ERROR;
- info = &ys->family->ntf_info[gehdr->cmd];
+ info = &ys->family->ntf_info[cmd];
if (!info->cb)
return YNL_PARSE_CB_ERROR;
@@ -811,7 +898,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
goto err_free;
rsp->family = nlh->nlmsg_type;
- rsp->cmd = gehdr->cmd;
+ rsp->cmd = cmd;
*ys->ntf_last_next = rsp;
ys->ntf_last_next = &rsp->next;
@@ -863,18 +950,23 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
static int
ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
{
- struct genlmsghdr *gehdr;
+ if (ys->family->is_classic) {
+ if (nlh->nlmsg_type != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
+ } else {
+ struct genlmsghdr *gehdr;
+
+ if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
+ yerr(ys, YNL_ERROR_INV_RESP,
+ "Kernel responded with truncated message");
+ return -1;
+ }
- if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
- yerr(ys, YNL_ERROR_INV_RESP,
- "Kernel responded with truncated message");
- return -1;
+ gehdr = ynl_nlmsg_data(nlh);
+ if (gehdr->cmd != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
}
- gehdr = ynl_nlmsg_data(nlh);
- if (gehdr->cmd != rsp_cmd)
- return ynl_ntf_parse(ys, nlh);
-
return 0;
}
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index 6cd570b283ea..db7c0591a63f 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -2,6 +2,7 @@
#ifndef __YNL_C_H
#define __YNL_C_H 1
+#include <stdbool.h>
#include <stddef.h>
#include <linux/genetlink.h>
#include <linux/types.h>
@@ -22,6 +23,7 @@ enum ynl_error_code {
YNL_ERROR_INV_RESP,
YNL_ERROR_INPUT_INVALID,
YNL_ERROR_INPUT_TOO_BIG,
+ YNL_ERROR_SUBMSG_KEY,
};
/**
@@ -48,6 +50,8 @@ struct ynl_family {
/* private: */
const char *name;
size_t hdr_len;
+ bool is_classic;
+ __u16 classic_id;
const struct ynl_ntf_info *ntf_info;
unsigned int ntf_info_size;
};
@@ -77,11 +81,25 @@ struct ynl_sock {
struct nlmsghdr *nlh;
const struct ynl_policy_nest *req_policy;
+ size_t req_hdr_len;
unsigned char *tx_buf;
unsigned char *rx_buf;
unsigned char raw_buf[];
};
+/**
+ * struct ynl_string - parsed individual string
+ * @len: length of the string (excluding terminating character)
+ * @str: value of the string
+ *
+ * Parsed and nul-terminated string. This struct is only used for arrays of
+ * strings. Non-array string members are placed directly in respective types.
+ */
+struct ynl_string {
+ unsigned int len;
+ char str[];
+};
+
struct ynl_sock *
ynl_sock_create(const struct ynl_family *yf, struct ynl_error *e);
void ynl_sock_destroy(struct ynl_sock *ys);
diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 794e3c7dcc65..33ccc5c1843b 100755
--- a/tools/net/ynl/pyynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -144,16 +144,17 @@ def main():
ops = [ (item[0], json.loads(item[1]), args.flags or []) for item in args.multi ]
reply = ynl.do_multi(ops)
output(reply)
- except NlError as e:
- print(e)
- exit(1)
- if args.ntf:
- try:
+ if args.ntf:
for msg in ynl.poll_ntf(duration=args.duration):
output(msg)
- except KeyboardInterrupt:
- pass
+ except NlError as e:
+ print(e)
+ exit(1)
+ except KeyboardInterrupt:
+ pass
+ except BrokenPipeError:
+ pass
if __name__ == "__main__":
diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index af7fddd7b085..cab6b576c876 100755
--- a/tools/net/ynl/pyynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -338,16 +338,24 @@ def main():
print('Capabilities:')
[print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])]
- print(f'PTP Hardware Clock: {tsinfo["phc-index"]}')
+ print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}')
- print('Hardware Transmit Timestamp Modes:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])]
+ if 'tx-types' in tsinfo:
+ print('Hardware Transmit Timestamp Modes:')
+ [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])]
+ else:
+ print('Hardware Transmit Timestamp Modes: none')
+
+ if 'rx-filters' in tsinfo:
+ print('Hardware Receive Filter Modes:')
+ [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])]
+ else:
+ print('Hardware Receive Filter Modes: none')
- print('Hardware Receive Filter Modes:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])]
+ if 'stats' in tsinfo and tsinfo['stats']:
+ print('Statistics:')
+ [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()]
- print('Statistics:')
- [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()]
return
print(f'Settings for {args.device}:')
diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 9137b83e580a..71518b9842ee 100644
--- a/tools/net/ynl/pyynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
- SpecFamily, SpecOperation
+ SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat
from .ynl import YnlFamily, Netlink, NlError
__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
- "SpecFamily", "SpecOperation", "YnlFamily", "Netlink", "NlError"]
+ "SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat",
+ "YnlFamily", "Netlink", "NlError"]
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index dcc2c6b298d6..55b59f6c79b8 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -594,7 +594,7 @@ class YnlFamily(SpecFamily):
scalar_selector = self._get_scalar(attr, value["selector"])
attr_payload = struct.pack("II", scalar_value, scalar_selector)
elif attr['type'] == 'sub-message':
- msg_format = self._resolve_selector(attr, search_attrs)
+ msg_format, _ = self._resolve_selector(attr, search_attrs)
attr_payload = b''
if msg_format.fixed_header:
attr_payload += self._encode_struct(msg_format.fixed_header, value)
@@ -712,10 +712,10 @@ class YnlFamily(SpecFamily):
raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'")
spec = sub_msg_spec.formats[value]
- return spec
+ return spec, value
def _decode_sub_msg(self, attr, attr_spec, search_attrs):
- msg_format = self._resolve_selector(attr_spec, search_attrs)
+ msg_format, _ = self._resolve_selector(attr_spec, search_attrs)
decoded = {}
offset = 0
if msg_format.fixed_header:
@@ -787,7 +787,7 @@ class YnlFamily(SpecFamily):
return rsp
- def _decode_extack_path(self, attrs, attr_set, offset, target):
+ def _decode_extack_path(self, attrs, attr_set, offset, target, search_attrs):
for attr in attrs:
try:
attr_spec = attr_set.attrs_by_val[attr.type]
@@ -801,26 +801,37 @@ class YnlFamily(SpecFamily):
if offset + attr.full_len <= target:
offset += attr.full_len
continue
- if attr_spec['type'] != 'nest':
+
+ pathname = attr_spec.name
+ if attr_spec['type'] == 'nest':
+ sub_attrs = self.attr_sets[attr_spec['nested-attributes']]
+ search_attrs = SpaceAttrs(sub_attrs, search_attrs.lookup(attr_spec['name']))
+ elif attr_spec['type'] == 'sub-message':
+ msg_format, value = self._resolve_selector(attr_spec, search_attrs)
+ if msg_format is None:
+ raise Exception(f"Can't resolve sub-message of {attr_spec['name']} for extack")
+ sub_attrs = self.attr_sets[msg_format.attr_set]
+ pathname += f"({value})"
+ else:
raise Exception(f"Can't dive into {attr.type} ({attr_spec['name']}) for extack")
offset += 4
- subpath = self._decode_extack_path(NlAttrs(attr.raw),
- self.attr_sets[attr_spec['nested-attributes']],
- offset, target)
+ subpath = self._decode_extack_path(NlAttrs(attr.raw), sub_attrs,
+ offset, target, search_attrs)
if subpath is None:
return None
- return '.' + attr_spec.name + subpath
+ return '.' + pathname + subpath
return None
- def _decode_extack(self, request, op, extack):
+ def _decode_extack(self, request, op, extack, vals):
if 'bad-attr-offs' not in extack:
return
msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set), op)
offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header)
+ search_attrs = SpaceAttrs(op.attr_set, vals)
path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset,
- extack['bad-attr-offs'])
+ extack['bad-attr-offs'], search_attrs)
if path:
del extack['bad-attr-offs']
extack['bad-attr'] = path
@@ -1012,7 +1023,7 @@ class YnlFamily(SpecFamily):
for (method, vals, flags) in ops:
op = self.ops[method]
msg = self._encode_message(op, vals, flags, req_seq)
- reqs_by_seq[req_seq] = (op, msg, flags)
+ reqs_by_seq[req_seq] = (op, vals, msg, flags)
payload += msg
req_seq += 1
@@ -1027,9 +1038,9 @@ class YnlFamily(SpecFamily):
self._recv_dbg_print(reply, nms)
for nl_msg in nms:
if nl_msg.nl_seq in reqs_by_seq:
- (op, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq]
+ (op, vals, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq]
if nl_msg.extack:
- self._decode_extack(req_msg, op, nl_msg.extack)
+ self._decode_extack(req_msg, op, nl_msg.extack, vals)
else:
op = None
req_flags = []
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index 30c0a34b2784..76032e01c2e7 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -14,6 +14,7 @@ import yaml
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
+from lib import SpecSubMessage, SpecSubMessageFormat
def c_upper(name):
@@ -56,11 +57,20 @@ class Type(SpecAttr):
self.request = False
self.reply = False
+ self.is_selector = False
+
if 'len' in attr:
self.len = attr['len']
if 'nested-attributes' in attr:
- self.nested_attrs = attr['nested-attributes']
+ nested = attr['nested-attributes']
+ elif 'sub-message' in attr:
+ nested = attr['sub-message']
+ else:
+ nested = None
+
+ if nested:
+ self.nested_attrs = nested
if self.nested_attrs == family.name:
self.nested_render_name = c_lower(f"{family.ident_name}")
else:
@@ -119,7 +129,9 @@ class Type(SpecAttr):
return c_upper(value)
def resolve(self):
- if 'name-prefix' in self.attr:
+ if 'parent-sub-message' in self.attr:
+ enum_name = self.attr['parent-sub-message'].enum_name
+ elif 'name-prefix' in self.attr:
enum_name = f"{self.attr['name-prefix']}{self.name}"
else:
enum_name = f"{self.attr_set.name_prefix}{self.name}"
@@ -142,19 +154,19 @@ class Type(SpecAttr):
return self.is_recursive() and not ri.op
def presence_type(self):
- return 'bit'
+ return 'present'
def presence_member(self, space, type_filter):
if self.presence_type() != type_filter:
return
- if self.presence_type() == 'bit':
+ if self.presence_type() == 'present':
pfx = '__' if space == 'user' else ''
return f"{pfx}u32 {self.c_name}:1;"
- if self.presence_type() == 'len':
+ if self.presence_type() in {'len', 'count'}:
pfx = '__' if space == 'user' else ''
- return f"{pfx}u32 {self.c_name}_len;"
+ return f"{pfx}u32 {self.c_name};"
def _complex_member_type(self, ri):
return None
@@ -163,7 +175,7 @@ class Type(SpecAttr):
return False
def _free_lines(self, ri, var, ref):
- if self.is_multi_val() or self.presence_type() == 'len':
+ if self.is_multi_val() or self.presence_type() in {'count', 'len'}:
return [f'free({var}->{ref}{self.c_name});']
return []
@@ -175,21 +187,21 @@ class Type(SpecAttr):
def arg_member(self, ri):
member = self._complex_member_type(ri)
if member:
- arg = [member + ' *' + self.c_name]
+ spc = ' ' if member[-1] != '*' else ''
+ arg = [member + spc + '*' + self.c_name]
if self.presence_type() == 'count':
arg += ['unsigned int n_' + self.c_name]
return arg
raise Exception(f"Struct member not implemented for class type {self.type}")
def struct_member(self, ri):
- if self.is_multi_val():
- ri.cw.p(f"unsigned int n_{self.c_name};")
member = self._complex_member_type(ri)
if member:
ptr = '*' if self.is_multi_val() else ''
if self.is_recursive_for_op(ri):
ptr = '*'
- ri.cw.p(f"{member} {ptr}{self.c_name};")
+ spc = ' ' if member[-1] != '*' else ''
+ ri.cw.p(f"{member}{spc}{ptr}{self.c_name};")
return
members = self.arg_member(ri)
for one in members:
@@ -215,10 +227,9 @@ class Type(SpecAttr):
cw.p(f'[{self.enum_name}] = {"{"} .name = "{self.name}", {typol}{"}"},')
def _attr_put_line(self, ri, var, line):
- if self.presence_type() == 'bit':
- ri.cw.p(f"if ({var}->_present.{self.c_name})")
- elif self.presence_type() == 'len':
- ri.cw.p(f"if ({var}->_present.{self.c_name}_len)")
+ presence = self.presence_type()
+ if presence in {'present', 'len'}:
+ ri.cw.p(f"if ({var}->_{presence}.{self.c_name})")
ri.cw.p(f"{line};")
def _attr_put_simple(self, ri, var, put_type):
@@ -248,7 +259,7 @@ class Type(SpecAttr):
if not self.is_multi_val():
ri.cw.p("if (ynl_attr_validate(yarg, attr))")
ri.cw.p("return YNL_PARSE_CB_ERROR;")
- if self.presence_type() == 'bit':
+ if self.presence_type() == 'present':
ri.cw.p(f"{var}->_present.{self.c_name} = 1;")
if init_lines:
@@ -279,7 +290,8 @@ class Type(SpecAttr):
presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
# Every layer below last is a nest, so we know it uses bit presence
# last layer is "self" and may be a complex type
- if i == len(ref) - 1 and self.presence_type() != 'bit':
+ if i == len(ref) - 1 and self.presence_type() != 'present':
+ presence = f"{var}->{'.'.join(ref[:i] + [''])}_{self.presence_type()}.{ref[i]}"
continue
code.append(presence + ' = 1;')
ref_path = '.'.join(ref[:-1])
@@ -355,26 +367,10 @@ class TypeScalar(Type):
if 'byte-order' in attr:
self.byte_order_comment = f" /* {attr['byte-order']} */"
- if 'enum' in self.attr:
- enum = self.family.consts[self.attr['enum']]
- low, high = enum.value_range()
- if 'min' not in self.checks:
- if low != 0 or self.type[0] == 's':
- self.checks['min'] = low
- if 'max' not in self.checks:
- self.checks['max'] = high
-
- if 'min' in self.checks and 'max' in self.checks:
- if self.get_limit('min') > self.get_limit('max'):
- raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}')
- self.checks['range'] = True
-
- low = min(self.get_limit('min', 0), self.get_limit('max', 0))
- high = max(self.get_limit('min', 0), self.get_limit('max', 0))
- if low < 0 and self.type[0] == 'u':
- raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type')
- if low < -32768 or high > 32767:
- self.checks['full-range'] = True
+ # Classic families have some funny enums, don't bother
+ # computing checks, since we only need them for kernel policies
+ if not family.is_classic():
+ self._init_checks()
# Added by resolve():
self.is_bitfield = None
@@ -399,6 +395,31 @@ class TypeScalar(Type):
else:
self.type_name = '__' + self.type
+ def _init_checks(self):
+ if 'enum' in self.attr:
+ enum = self.family.consts[self.attr['enum']]
+ low, high = enum.value_range()
+ if low == None and high == None:
+ self.checks['sparse'] = True
+ else:
+ if 'min' not in self.checks:
+ if low != 0 or self.type[0] == 's':
+ self.checks['min'] = low
+ if 'max' not in self.checks:
+ self.checks['max'] = high
+
+ if 'min' in self.checks and 'max' in self.checks:
+ if self.get_limit('min') > self.get_limit('max'):
+ raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}')
+ self.checks['range'] = True
+
+ low = min(self.get_limit('min', 0), self.get_limit('max', 0))
+ high = max(self.get_limit('min', 0), self.get_limit('max', 0))
+ if low < 0 and self.type[0] == 'u':
+ raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type')
+ if low < -32768 or high > 32767:
+ self.checks['full-range'] = True
+
def _attr_policy(self, policy):
if 'flags-mask' in self.checks or self.is_bitfield:
if self.is_bitfield:
@@ -417,6 +438,8 @@ class TypeScalar(Type):
return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})"
elif 'max' in self.checks:
return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})"
+ elif 'sparse' in self.checks:
+ return f"NLA_POLICY_VALIDATE_FN({policy}, &{c_lower(self.enum_name)}_validate)"
return super()._attr_policy(policy)
def _attr_typol(self):
@@ -463,7 +486,10 @@ class TypeString(Type):
ri.cw.p(f"char *{self.c_name};")
def _attr_typol(self):
- return f'.type = YNL_PT_NUL_STR, '
+ typol = f'.type = YNL_PT_NUL_STR, '
+ if self.is_selector:
+ typol += '.is_selector = 1, '
+ return typol
def _attr_policy(self, policy):
if 'exact-len' in self.checks:
@@ -488,7 +514,7 @@ class TypeString(Type):
self._attr_put_simple(ri, var, 'str')
def _attr_get(self, ri, var):
- len_mem = var + '->_present.' + self.c_name + '_len'
+ len_mem = var + '->_len.' + self.c_name
return [f"{len_mem} = len;",
f"{var}->{self.c_name} = malloc(len + 1);",
f"memcpy({var}->{self.c_name}, ynl_attr_get_str(attr), len);",
@@ -497,10 +523,10 @@ class TypeString(Type):
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
- return [f"{presence}_len = strlen({self.c_name});",
- f"{member} = malloc({presence}_len + 1);",
- f'memcpy({member}, {self.c_name}, {presence}_len);',
- f'{member}[{presence}_len] = 0;']
+ return [f"{presence} = strlen({self.c_name});",
+ f"{member} = malloc({presence} + 1);",
+ f'memcpy({member}, {self.c_name}, {presence});',
+ f'{member}[{presence}] = 0;']
class TypeBinary(Type):
@@ -539,20 +565,71 @@ class TypeBinary(Type):
def attr_put(self, ri, var):
self._attr_put_line(ri, var, f"ynl_attr_put(nlh, {self.enum_name}, " +
- f"{var}->{self.c_name}, {var}->_present.{self.c_name}_len)")
+ f"{var}->{self.c_name}, {var}->_len.{self.c_name})")
+
+ def _attr_get(self, ri, var):
+ len_mem = var + '->_len.' + self.c_name
+ return [f"{len_mem} = len;",
+ f"{var}->{self.c_name} = malloc(len);",
+ f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
+ ['len = ynl_attr_data_len(attr);'], \
+ ['unsigned int len;']
+
+ def _setter_lines(self, ri, member, presence):
+ return [f"{presence} = len;",
+ f"{member} = malloc({presence});",
+ f'memcpy({member}, {self.c_name}, {presence});']
+
+
+class TypeBinaryStruct(TypeBinary):
+ def struct_member(self, ri):
+ ri.cw.p(f'struct {c_lower(self.get("struct"))} *{self.c_name};')
def _attr_get(self, ri, var):
- len_mem = var + '->_present.' + self.c_name + '_len'
+ struct_sz = 'sizeof(struct ' + c_lower(self.get("struct")) + ')'
+ len_mem = var + '->_' + self.presence_type() + '.' + self.c_name
return [f"{len_mem} = len;",
+ f"if (len < {struct_sz})",
+ f"{var}->{self.c_name} = calloc(1, {struct_sz});",
+ "else",
+ f"{var}->{self.c_name} = malloc(len);",
+ f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
+ ['len = ynl_attr_data_len(attr);'], \
+ ['unsigned int len;']
+
+
+class TypeBinaryScalarArray(TypeBinary):
+ def arg_member(self, ri):
+ return [f'__{self.get("sub-type")} *{self.c_name}', 'size_t count']
+
+ def presence_type(self):
+ return 'count'
+
+ def struct_member(self, ri):
+ ri.cw.p(f'__{self.get("sub-type")} *{self.c_name};')
+
+ def attr_put(self, ri, var):
+ presence = self.presence_type()
+ ri.cw.block_start(line=f"if ({var}->_{presence}.{self.c_name})")
+ ri.cw.p(f"i = {var}->_{presence}.{self.c_name} * sizeof(__{self.get('sub-type')});")
+ ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, " +
+ f"{var}->{self.c_name}, i);")
+ ri.cw.block_end()
+
+ def _attr_get(self, ri, var):
+ len_mem = var + '->_count.' + self.c_name
+ return [f"{len_mem} = len / sizeof(__{self.get('sub-type')});",
+ f"len = {len_mem} * sizeof(__{self.get('sub-type')});",
f"{var}->{self.c_name} = malloc(len);",
f"memcpy({var}->{self.c_name}, ynl_attr_data(attr), len);"], \
['len = ynl_attr_data_len(attr);'], \
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
- return [f"{presence}_len = len;",
- f"{member} = malloc({presence}_len);",
- f'memcpy({member}, {self.c_name}, {presence}_len);']
+ return [f"{presence} = count;",
+ f"count *= sizeof(__{self.get('sub-type')});",
+ f"{member} = malloc(count);",
+ f'memcpy({member}, {self.c_name}, count);']
class TypeBitfield32(Type):
@@ -608,7 +685,11 @@ class TypeNest(Type):
f"{self.enum_name}, {at}{var}->{self.c_name})")
def _attr_get(self, ri, var):
- get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))",
+ pns = self.family.pure_nested_structs[self.nested_attrs]
+ args = ["&parg", "attr"]
+ for sel in pns.external_selectors():
+ args.append(f'{var}->{sel.name}')
+ get_lines = [f"if ({self.nested_render_name}_parse({', '.join(args)}))",
"return YNL_PARSE_CB_ERROR;"]
init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
f"parg.data = &{var}->{self.c_name};"]
@@ -638,22 +719,40 @@ class TypeMultiAttr(Type):
def _complex_member_type(self, ri):
if 'type' not in self.attr or self.attr['type'] == 'nest':
return self.nested_struct_type
+ elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+ return None # use arg_member()
+ elif self.attr['type'] == 'string':
+ return 'struct ynl_string *'
elif self.attr['type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['type']
else:
raise Exception(f"Sub-type {self.attr['type']} not supported yet")
+ def arg_member(self, ri):
+ if self.type == 'binary' and 'struct' in self.attr:
+ return [f'struct {c_lower(self.attr["struct"])} *{self.c_name}',
+ f'unsigned int n_{self.c_name}']
+ return super().arg_member(ri)
+
def free_needs_iter(self):
- return 'type' not in self.attr or self.attr['type'] == 'nest'
+ return self.attr['type'] in {'nest', 'string'}
def _free_lines(self, ri, var, ref):
lines = []
if self.attr['type'] in scalars:
lines += [f"free({var}->{ref}{self.c_name});"]
+ elif self.attr['type'] == 'binary':
+ lines += [f"free({var}->{ref}{self.c_name});"]
+ elif self.attr['type'] == 'string':
+ lines += [
+ f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
+ f"free({var}->{ref}{self.c_name}[i]);",
+ f"free({var}->{ref}{self.c_name});",
+ ]
elif 'type' not in self.attr or self.attr['type'] == 'nest':
lines += [
- f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)",
+ f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
f"free({var}->{ref}{self.c_name});",
]
@@ -673,18 +772,22 @@ class TypeMultiAttr(Type):
def attr_put(self, ri, var):
if self.attr['type'] in scalars:
put_type = self.type
- ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
+ elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+ ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
+ ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, &{var}->{self.c_name}[i], sizeof(struct {c_lower(self.attr['struct'])}));")
+ elif self.attr['type'] == 'string':
+ ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
+ ri.cw.p(f"ynl_attr_put_str(nlh, {self.enum_name}, {var}->{self.c_name}[i]->str);")
elif 'type' not in self.attr or self.attr['type'] == 'nest':
- ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"for (i = 0; i < {var}->_count.{self.c_name}; i++)")
self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
f"{self.enum_name}, &{var}->{self.c_name}[i])")
else:
raise Exception(f"Put of MultiAttr sub-type {self.attr['type']} not supported yet")
def _setter_lines(self, ri, member, presence):
- # For multi-attr we have a count, not presence, hack up the presence
- presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name
return [f"{member} = {self.c_name};",
f"{presence} = n_{self.c_name};"]
@@ -702,12 +805,22 @@ class TypeArrayNest(Type):
elif self.attr['sub-type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['sub-type']
+ elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+ return None # use arg_member()
else:
raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet")
+ def arg_member(self, ri):
+ if self.sub_type == 'binary' and 'exact-len' in self.checks:
+ return [f'unsigned char (*{self.c_name})[{self.checks["exact-len"]}]',
+ f'unsigned int n_{self.c_name}']
+ return super().arg_member(ri)
+
def _attr_typol(self):
if self.attr['sub-type'] in scalars:
return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
+ elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+ return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, '
else:
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
@@ -717,10 +830,31 @@ class TypeArrayNest(Type):
'ynl_attr_for_each_nested(attr2, attr) {',
'\tif (ynl_attr_validate(yarg, attr2))',
'\t\treturn YNL_PARSE_CB_ERROR;',
- f'\t{var}->n_{self.c_name}++;',
+ f'\t{var}->_count.{self.c_name}++;',
'}']
return get_lines, None, local_vars
+ def attr_put(self, ri, var):
+ ri.cw.p(f'array = ynl_attr_nest_start(nlh, {self.enum_name});')
+ if self.sub_type in scalars:
+ put_type = self.sub_type
+ ri.cw.block_start(line=f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+ ri.cw.p(f"ynl_attr_put_{put_type}(nlh, i, {var}->{self.c_name}[i]);")
+ ri.cw.block_end()
+ elif self.sub_type == 'binary' and 'exact-len' in self.checks:
+ ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+ ri.cw.p(f"ynl_attr_put(nlh, i, {var}->{self.c_name}[i], {self.checks['exact-len']});")
+ elif self.sub_type == 'nest':
+ ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
+ ri.cw.p(f"{self.nested_render_name}_put(nlh, i, &{var}->{self.c_name}[i]);")
+ else:
+ raise Exception(f"Put for ArrayNest sub-type {self.attr['sub-type']} not supported, yet")
+ ri.cw.p('ynl_attr_nest_end(nlh, array);')
+
+ def _setter_lines(self, ri, member, presence):
+ return [f"{member} = {self.c_name};",
+ f"{presence} = n_{self.c_name};"]
+
class TypeNestTypeValue(Type):
def _complex_member_type(self, ri):
@@ -752,14 +886,71 @@ class TypeNestTypeValue(Type):
return get_lines, init_lines, local_vars
+class TypeSubMessage(TypeNest):
+ def __init__(self, family, attr_set, attr, value):
+ super().__init__(family, attr_set, attr, value)
+
+ self.selector = Selector(attr, attr_set)
+
+ def _attr_typol(self):
+ typol = f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
+ typol += '.is_submsg = 1, '
+ # Reverse-parsing of the policy (ynl_err_walk() in ynl.c) does not
+ # support external selectors. No family uses sub-messages with external
+ # selector for requests so this is fine for now.
+ if not self.selector.is_external():
+ typol += f'.selector_type = {self.attr_set[self["selector"]].value} '
+ return typol
+
+ def _attr_get(self, ri, var):
+ sel = c_lower(self['selector'])
+ if self.selector.is_external():
+ sel_var = f"_sel_{sel}"
+ else:
+ sel_var = f"{var}->{sel}"
+ get_lines = [f'if (!{sel_var})',
+ f'return ynl_submsg_failed(yarg, "%s", "%s");' %
+ (self.name, self['selector']),
+ f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))",
+ "return YNL_PARSE_CB_ERROR;"]
+ init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
+ f"parg.data = &{var}->{self.c_name};"]
+ return get_lines, init_lines, None
+
+
+class Selector:
+ def __init__(self, msg_attr, attr_set):
+ self.name = msg_attr["selector"]
+
+ if self.name in attr_set:
+ self.attr = attr_set[self.name]
+ self.attr.is_selector = True
+ self._external = False
+ else:
+ # The selector will need to get passed down thru the structs
+ self.attr = None
+ self._external = True
+
+ def set_attr(self, attr):
+ self.attr = attr
+
+ def is_external(self):
+ return self._external
+
+
class Struct:
- def __init__(self, family, space_name, type_list=None, inherited=None):
+ def __init__(self, family, space_name, type_list=None, fixed_header=None,
+ inherited=None, submsg=None):
self.family = family
self.space_name = space_name
self.attr_set = family.attr_sets[space_name]
# Use list to catch comparisons with empty sets
self._inherited = inherited if inherited is not None else []
self.inherited = []
+ self.fixed_header = None
+ if fixed_header:
+ self.fixed_header = 'struct ' + c_lower(fixed_header)
+ self.submsg = submsg
self.nested = type_list is None
if family.name == c_lower(space_name):
@@ -809,6 +1000,19 @@ class Struct:
raise Exception("Inheriting different members not supported")
self.inherited = [c_lower(x) for x in sorted(self._inherited)]
+ def external_selectors(self):
+ sels = []
+ for name, attr in self.attr_list:
+ if isinstance(attr, TypeSubMessage) and attr.selector.is_external():
+ sels.append(attr.selector)
+ return sels
+
+ def free_needs_iter(self):
+ for _, attr in self.attr_list:
+ if attr.free_needs_iter():
+ return True
+ return False
+
class EnumEntry(SpecEnumEntry):
def __init__(self, enum_set, yaml, prev, value_start):
@@ -862,7 +1066,7 @@ class EnumSet(SpecEnumSet):
high = max([x.value for x in self.entries.values()])
if high - low + 1 != len(self.entries):
- raise Exception("Can't get value range for a noncontiguous enum")
+ return None, None
return low, high
@@ -909,18 +1113,25 @@ class AttrSet(SpecAttrSet):
elif elem['type'] == 'string':
t = TypeString(self.family, self, elem, value)
elif elem['type'] == 'binary':
- t = TypeBinary(self.family, self, elem, value)
+ if 'struct' in elem:
+ t = TypeBinaryStruct(self.family, self, elem, value)
+ elif elem.get('sub-type') in scalars:
+ t = TypeBinaryScalarArray(self.family, self, elem, value)
+ else:
+ t = TypeBinary(self.family, self, elem, value)
elif elem['type'] == 'bitfield32':
t = TypeBitfield32(self.family, self, elem, value)
elif elem['type'] == 'nest':
t = TypeNest(self.family, self, elem, value)
elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
- if elem["sub-type"] in ['nest', 'u32']:
+ if elem["sub-type"] in ['binary', 'nest', 'u32']:
t = TypeArrayNest(self.family, self, elem, value)
else:
raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
elif elem['type'] == 'nest-type-value':
t = TypeNestTypeValue(self.family, self, elem, value)
+ elif elem['type'] == 'sub-message':
+ t = TypeSubMessage(self.family, self, elem, value)
else:
raise Exception(f"No typed class for type {elem['type']}")
@@ -932,6 +1143,14 @@ class AttrSet(SpecAttrSet):
class Operation(SpecOperation):
def __init__(self, family, yaml, req_value, rsp_value):
+ # Fill in missing operation properties (for fixed hdr-only msgs)
+ for mode in ['do', 'dump', 'event']:
+ for direction in ['request', 'reply']:
+ try:
+ yaml[mode][direction].setdefault('attributes', [])
+ except KeyError:
+ pass
+
super().__init__(family, yaml, req_value, rsp_value)
self.render_name = c_lower(family.ident_name + '_' + self.name)
@@ -957,6 +1176,16 @@ class Operation(SpecOperation):
self.has_ntf = True
+class SubMessage(SpecSubMessage):
+ def __init__(self, family, yaml):
+ super().__init__(family, yaml)
+
+ self.render_name = c_lower(family.ident_name + '-' + yaml['name'])
+
+ def resolve(self):
+ self.resolve_up(super())
+
+
class Family(SpecFamily):
def __init__(self, file_name, exclude_ops):
# Added by resolve:
@@ -993,9 +1222,6 @@ class Family(SpecFamily):
def resolve(self):
self.resolve_up(super())
- if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}:
- raise Exception("Codegen only supported for genetlink")
-
self.c_name = c_lower(self.ident_name)
if 'name-prefix' in self.yaml['operations']:
self.op_prefix = c_upper(self.yaml['operations']['name-prefix'])
@@ -1018,7 +1244,7 @@ class Family(SpecFamily):
# dict space-name -> 'request': set(attrs), 'reply': set(attrs)
self.root_sets = dict()
- # dict space-name -> set('request', 'reply')
+ # dict space-name -> Struct
self.pure_nested_structs = dict()
self._mark_notify()
@@ -1027,6 +1253,7 @@ class Family(SpecFamily):
self._load_root_sets()
self._load_nested_sets()
self._load_attr_use()
+ self._load_selector_passing()
self._load_hooks()
self.kernel_policy = self.yaml.get('kernel-policy', 'split')
@@ -1042,6 +1269,12 @@ class Family(SpecFamily):
def new_operation(self, elem, req_value, rsp_value):
return Operation(self, elem, req_value, rsp_value)
+ def new_sub_message(self, elem):
+ return SubMessage(self, elem)
+
+ def is_classic(self):
+ return self.proto == 'netlink-raw'
+
def _mark_notify(self):
for op in self.msgs.values():
if 'notify' in op:
@@ -1091,20 +1324,85 @@ class Family(SpecFamily):
for _, spec in self.attr_sets[name].items():
if 'nested-attributes' in spec:
nested = spec['nested-attributes']
- # If the unknown nest we hit is recursive it's fine, it'll be a pointer
- if self.pure_nested_structs[nested].recursive:
- continue
- if nested not in pns_key_seen:
- # Dicts are sorted, this will make struct last
- struct = self.pure_nested_structs.pop(name)
- self.pure_nested_structs[name] = struct
- finished = False
- break
+ elif 'sub-message' in spec:
+ nested = spec.sub_message
+ else:
+ continue
+
+ # If the unknown nest we hit is recursive it's fine, it'll be a pointer
+ if self.pure_nested_structs[nested].recursive:
+ continue
+ if nested not in pns_key_seen:
+ # Dicts are sorted, this will make struct last
+ struct = self.pure_nested_structs.pop(name)
+ self.pure_nested_structs[name] = struct
+ finished = False
+ break
if finished:
pns_key_seen.add(name)
else:
pns_key_list.append(name)
+ def _load_nested_set_nest(self, spec):
+ inherit = set()
+ nested = spec['nested-attributes']
+ if nested not in self.root_sets:
+ if nested not in self.pure_nested_structs:
+ self.pure_nested_structs[nested] = \
+ Struct(self, nested, inherited=inherit,
+ fixed_header=spec.get('fixed-header'))
+ else:
+ raise Exception(f'Using attr set as root and nested not supported - {nested}')
+
+ if 'type-value' in spec:
+ if nested in self.root_sets:
+ raise Exception("Inheriting members to a space used as root not supported")
+ inherit.update(set(spec['type-value']))
+ elif spec['type'] == 'indexed-array':
+ inherit.add('idx')
+ self.pure_nested_structs[nested].set_inherited(inherit)
+
+ return nested
+
+ def _load_nested_set_submsg(self, spec):
+ # Fake the struct type for the sub-message itself
+ # its not a attr_set but codegen wants attr_sets.
+ submsg = self.sub_msgs[spec["sub-message"]]
+ nested = submsg.name
+
+ attrs = []
+ for name, fmt in submsg.formats.items():
+ attr = {
+ "name": name,
+ "parent-sub-message": spec,
+ }
+ if 'attribute-set' in fmt:
+ attr |= {
+ "type": "nest",
+ "nested-attributes": fmt['attribute-set'],
+ }
+ if 'fixed-header' in fmt:
+ attr |= { "fixed-header": fmt["fixed-header"] }
+ elif 'fixed-header' in fmt:
+ attr |= {
+ "type": "binary",
+ "struct": fmt["fixed-header"],
+ }
+ else:
+ attr["type"] = "flag"
+ attrs.append(attr)
+
+ self.attr_sets[nested] = AttrSet(self, {
+ "name": nested,
+ "name-pfx": self.name + '-' + spec.name + '-',
+ "attributes": attrs
+ })
+
+ if nested not in self.pure_nested_structs:
+ self.pure_nested_structs[nested] = Struct(self, nested, submsg=submsg)
+
+ return nested
+
def _load_nested_sets(self):
attr_set_queue = list(self.root_sets.keys())
attr_set_seen = set(self.root_sets.keys())
@@ -1112,61 +1410,61 @@ class Family(SpecFamily):
while len(attr_set_queue):
a_set = attr_set_queue.pop(0)
for attr, spec in self.attr_sets[a_set].items():
- if 'nested-attributes' not in spec:
+ if 'nested-attributes' in spec:
+ nested = self._load_nested_set_nest(spec)
+ elif 'sub-message' in spec:
+ nested = self._load_nested_set_submsg(spec)
+ else:
continue
- nested = spec['nested-attributes']
if nested not in attr_set_seen:
attr_set_queue.append(nested)
attr_set_seen.add(nested)
- inherit = set()
- if nested not in self.root_sets:
- if nested not in self.pure_nested_structs:
- self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit)
- else:
- raise Exception(f'Using attr set as root and nested not supported - {nested}')
-
- if 'type-value' in spec:
- if nested in self.root_sets:
- raise Exception("Inheriting members to a space used as root not supported")
- inherit.update(set(spec['type-value']))
- elif spec['type'] == 'indexed-array':
- inherit.add('idx')
- self.pure_nested_structs[nested].set_inherited(inherit)
-
for root_set, rs_members in self.root_sets.items():
for attr, spec in self.attr_sets[root_set].items():
if 'nested-attributes' in spec:
nested = spec['nested-attributes']
+ elif 'sub-message' in spec:
+ nested = spec.sub_message
+ else:
+ nested = None
+
+ if nested:
if attr in rs_members['request']:
self.pure_nested_structs[nested].request = True
if attr in rs_members['reply']:
self.pure_nested_structs[nested].reply = True
- if spec.is_multi_val():
- child = self.pure_nested_structs.get(nested)
- child.in_multi_val = True
+ if spec.is_multi_val():
+ child = self.pure_nested_structs.get(nested)
+ child.in_multi_val = True
self._sort_pure_types()
# Propagate the request / reply / recursive
for attr_set, struct in reversed(self.pure_nested_structs.items()):
for _, spec in self.attr_sets[attr_set].items():
- if 'nested-attributes' in spec:
- child_name = spec['nested-attributes']
- struct.child_nests.add(child_name)
- child = self.pure_nested_structs.get(child_name)
- if child:
- if not child.recursive:
- struct.child_nests.update(child.child_nests)
- child.request |= struct.request
- child.reply |= struct.reply
- if spec.is_multi_val():
- child.in_multi_val = True
if attr_set in struct.child_nests:
struct.recursive = True
+ if 'nested-attributes' in spec:
+ child_name = spec['nested-attributes']
+ elif 'sub-message' in spec:
+ child_name = spec.sub_message
+ else:
+ continue
+
+ struct.child_nests.add(child_name)
+ child = self.pure_nested_structs.get(child_name)
+ if child:
+ if not child.recursive:
+ struct.child_nests.update(child.child_nests)
+ child.request |= struct.request
+ child.reply |= struct.reply
+ if spec.is_multi_val():
+ child.in_multi_val = True
+
self._sort_pure_types()
def _load_attr_use(self):
@@ -1185,6 +1483,30 @@ class Family(SpecFamily):
if attr in rs_members['reply']:
spec.set_reply()
+ def _load_selector_passing(self):
+ def all_structs():
+ for k, v in reversed(self.pure_nested_structs.items()):
+ yield k, v
+ for k, _ in self.root_sets.items():
+ yield k, None # we don't have a struct, but it must be terminal
+
+ for attr_set, struct in all_structs():
+ for _, spec in self.attr_sets[attr_set].items():
+ if 'nested-attributes' in spec:
+ child_name = spec['nested-attributes']
+ elif 'sub-message' in spec:
+ child_name = spec.sub_message
+ else:
+ continue
+
+ child = self.pure_nested_structs.get(child_name)
+ for selector in child.external_selectors():
+ if selector.name in self.attr_sets[attr_set]:
+ sel_attr = self.attr_sets[attr_set][selector.name]
+ selector.set_attr(sel_attr)
+ else:
+ raise Exception("Passing selector thru more than one layer not supported")
+
def _load_global_policy(self):
global_set = set()
attr_set_name = None
@@ -1234,12 +1556,19 @@ class RenderInfo:
self.op_mode = op_mode
self.op = op
- self.fixed_hdr = None
+ fixed_hdr = op.fixed_header if op else None
+ self.fixed_hdr_len = 'ys->family->hdr_len'
if op and op.fixed_header:
- self.fixed_hdr = 'struct ' + c_lower(op.fixed_header)
+ if op.fixed_header != family.fixed_header:
+ if family.is_classic():
+ self.fixed_hdr_len = f"sizeof(struct {c_lower(fixed_hdr)})"
+ else:
+ raise Exception(f"Per-op fixed header not supported, yet")
+
# 'do' and 'dump' response parsing is identical
self.type_consistent = True
+ self.type_oneside = False
if op_mode != 'do' and 'dump' in op:
if 'do' in op:
if ('reply' in op['do']) != ('reply' in op["dump"]):
@@ -1247,7 +1576,8 @@ class RenderInfo:
elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]:
self.type_consistent = False
else:
- self.type_consistent = False
+ self.type_consistent = True
+ self.type_oneside = True
self.attr_set = attr_set
if not self.attr_set:
@@ -1265,15 +1595,26 @@ class RenderInfo:
self.struct = dict()
if op_mode == 'notify':
- op_mode = 'do'
+ op_mode = 'do' if 'do' in op else 'dump'
for op_dir in ['request', 'reply']:
if op:
type_list = []
if op_dir in op[op_mode]:
type_list = op[op_mode][op_dir]['attributes']
- self.struct[op_dir] = Struct(family, self.attr_set, type_list=type_list)
+ self.struct[op_dir] = Struct(family, self.attr_set,
+ fixed_header=fixed_hdr,
+ type_list=type_list)
if op_mode == 'event':
- self.struct['reply'] = Struct(family, self.attr_set, type_list=op['event']['attributes'])
+ self.struct['reply'] = Struct(family, self.attr_set,
+ fixed_header=fixed_hdr,
+ type_list=op['event']['attributes'])
+
+ def type_empty(self, key):
+ return len(self.struct[key].attr_list) == 0 and \
+ self.struct['request'].fixed_header is None
+
+ def needs_nlflags(self, direction):
+ return self.op_mode == 'do' and direction == 'request' and self.family.is_classic()
class CodeWriter:
@@ -1331,6 +1672,7 @@ class CodeWriter:
if self._silent_block:
ind += 1
self._silent_block = line.endswith(')') and CodeWriter._is_cond(line)
+ self._silent_block |= line.strip() == 'else'
if line[0] == '#':
ind = 0
if add_ind:
@@ -1541,7 +1883,9 @@ def op_prefix(ri, direction, deref=False):
suffix += f"{direction_to_suffix[direction]}"
else:
if direction == 'request':
- suffix += '_req_dump'
+ suffix += '_req'
+ if not ri.type_oneside:
+ suffix += '_dump'
else:
if ri.type_consistent:
if deref:
@@ -1585,11 +1929,37 @@ def print_dump_prototype(ri):
print_prototype(ri, "request")
+def put_typol_submsg(cw, struct):
+ cw.block_start(line=f'const struct ynl_policy_attr {struct.render_name}_policy[] =')
+
+ i = 0
+ for name, arg in struct.member_list():
+ nest = ""
+ if arg.type == 'nest':
+ nest = f" .nest = &{arg.nested_render_name}_nest,"
+ cw.p('[%d] = { .type = YNL_PT_SUBMSG, .name = "%s",%s },' %
+ (i, name, nest))
+ i += 1
+
+ cw.block_end(line=';')
+ cw.nl()
+
+ cw.block_start(line=f'const struct ynl_policy_nest {struct.render_name}_nest =')
+ cw.p(f'.max_attr = {i - 1},')
+ cw.p(f'.table = {struct.render_name}_policy,')
+ cw.block_end(line=';')
+ cw.nl()
+
+
def put_typol_fwd(cw, struct):
cw.p(f'extern const struct ynl_policy_nest {struct.render_name}_nest;')
def put_typol(cw, struct):
+ if struct.submsg:
+ put_typol_submsg(cw, struct)
+ return
+
type_max = struct.attr_set.max_name
cw.block_start(line=f'const struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =')
@@ -1675,13 +2045,24 @@ def put_req_nested(ri, struct):
local_vars = []
init_lines = []
- local_vars.append('struct nlattr *nest;')
- init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
-
+ if struct.submsg is None:
+ local_vars.append('struct nlattr *nest;')
+ init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
+ if struct.fixed_header:
+ local_vars.append('void *hdr;')
+ struct_sz = f'sizeof({struct.fixed_header})'
+ init_lines.append(f"hdr = ynl_nlmsg_put_extra_header(nlh, {struct_sz});")
+ init_lines.append(f"memcpy(hdr, &obj->_hdr, {struct_sz});")
+
+ has_anest = False
+ has_count = False
for _, arg in struct.member_list():
- if arg.presence_type() == 'count':
- local_vars.append('unsigned int i;')
- break
+ has_anest |= arg.type == 'indexed-array'
+ has_count |= arg.presence_type() == 'count'
+ if has_anest:
+ local_vars.append('struct nlattr *array;')
+ if has_count:
+ local_vars.append('unsigned int i;')
put_req_nested_prototype(ri, struct, suffix='')
ri.cw.block_start()
@@ -1693,7 +2074,8 @@ def put_req_nested(ri, struct):
for _, arg in struct.member_list():
arg.attr_put(ri, "obj")
- ri.cw.p("ynl_attr_nest_end(nlh, nest);")
+ if struct.submsg is None:
+ ri.cw.p("ynl_attr_nest_end(nlh, nest);")
ri.cw.nl()
ri.cw.p('return 0;')
@@ -1702,19 +2084,27 @@ def put_req_nested(ri, struct):
def _multi_parse(ri, struct, init_lines, local_vars):
+ if struct.fixed_header:
+ local_vars += ['void *hdr;']
if struct.nested:
- iter_line = "ynl_attr_for_each_nested(attr, nested)"
+ if struct.fixed_header:
+ iter_line = f"ynl_attr_for_each_nested_off(attr, nested, sizeof({struct.fixed_header}))"
+ else:
+ iter_line = "ynl_attr_for_each_nested(attr, nested)"
else:
- if ri.fixed_hdr:
- local_vars += ['void *hdr;']
iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
+ if ri.op.fixed_header != ri.family.fixed_header:
+ if ri.family.is_classic():
+ iter_line = f"ynl_attr_for_each(attr, nlh, sizeof({struct.fixed_header}))"
+ else:
+ raise Exception(f"Per-op fixed header not supported, yet")
array_nests = set()
multi_attrs = set()
needs_parg = False
for arg, aspec in struct.member_list():
if aspec['type'] == 'indexed-array' and 'sub-type' in aspec:
- if aspec["sub-type"] == 'nest':
+ if aspec["sub-type"] in {'binary', 'nest'}:
local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
array_nests.add(arg)
elif aspec['sub-type'] in scalars:
@@ -1725,6 +2115,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if 'multi-attr' in aspec:
multi_attrs.add(arg)
needs_parg |= 'nested-attributes' in aspec
+ needs_parg |= 'sub-message' in aspec
if array_nests or multi_attrs:
local_vars.append('int i;')
if needs_parg:
@@ -1746,9 +2137,14 @@ def _multi_parse(ri, struct, init_lines, local_vars):
for arg in struct.inherited:
ri.cw.p(f'dst->{arg} = {arg};')
- if ri.fixed_hdr:
- ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
- ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));")
+ if struct.fixed_header:
+ if struct.nested:
+ ri.cw.p('hdr = ynl_attr_data(nested);')
+ elif ri.family.is_classic():
+ ri.cw.p('hdr = ynl_nlmsg_data(nlh);')
+ else:
+ ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
+ ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({struct.fixed_header}));")
for anest in sorted(all_multi):
aspec = struct[anest]
ri.cw.p(f"if (dst->{aspec.c_name})")
@@ -1773,7 +2169,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
- ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
+ ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
@@ -1784,6 +2180,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p('return YNL_PARSE_CB_ERROR;')
elif aspec.sub_type in scalars:
ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);")
+ elif aspec.sub_type == 'binary' and 'exact-len' in aspec.checks:
+ # Length is validated by typol
+ ri.cw.p(f'memcpy(dst->{aspec.c_name}[i], ynl_attr_data(attr), {aspec.checks["exact-len"]});')
else:
raise Exception(f"Nest parsing type not supported in {aspec['name']}")
ri.cw.p('i++;')
@@ -1795,7 +2194,7 @@ def _multi_parse(ri, struct, init_lines, local_vars):
aspec = struct[anest]
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
- ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};")
+ ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};")
ri.cw.p('i = 0;')
if 'nested-attributes' in aspec:
ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;")
@@ -1807,8 +2206,22 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p('return YNL_PARSE_CB_ERROR;')
elif aspec.type in scalars:
ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);")
+ elif aspec.type == 'binary' and 'struct' in aspec:
+ ri.cw.p('size_t len = ynl_attr_data_len(attr);')
+ ri.cw.nl()
+ ri.cw.p(f'if (len > sizeof(dst->{aspec.c_name}[0]))')
+ ri.cw.p(f'len = sizeof(dst->{aspec.c_name}[0]);')
+ ri.cw.p(f"memcpy(&dst->{aspec.c_name}[i], ynl_attr_data(attr), len);")
+ elif aspec.type == 'string':
+ ri.cw.p('unsigned int len;')
+ ri.cw.nl()
+ ri.cw.p('len = strnlen(ynl_attr_get_str(attr), ynl_attr_data_len(attr));')
+ ri.cw.p(f'dst->{aspec.c_name}[i] = malloc(sizeof(struct ynl_string) + len + 1);')
+ ri.cw.p(f"dst->{aspec.c_name}[i]->len = len;")
+ ri.cw.p(f"memcpy(dst->{aspec.c_name}[i]->str, ynl_attr_get_str(attr), len);")
+ ri.cw.p(f"dst->{aspec.c_name}[i]->str[len] = 0;")
else:
- raise Exception('Nest parsing type not supported yet')
+ raise Exception(f'Nest parsing of type {aspec.type} not supported yet')
ri.cw.p('i++;')
ri.cw.block_end()
ri.cw.block_end()
@@ -1823,9 +2236,49 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.nl()
+def parse_rsp_submsg(ri, struct):
+ parse_rsp_nested_prototype(ri, struct, suffix='')
+
+ var = 'dst'
+ local_vars = {'const struct nlattr *attr = nested;',
+ f'{struct.ptr_name}{var} = yarg->data;',
+ 'struct ynl_parse_arg parg;'}
+
+ for _, arg in struct.member_list():
+ _, _, l_vars = arg._attr_get(ri, var)
+ local_vars |= set(l_vars) if l_vars else set()
+
+ ri.cw.block_start()
+ ri.cw.write_func_lvar(list(local_vars))
+ ri.cw.p('parg.ys = yarg->ys;')
+ ri.cw.nl()
+
+ first = True
+ for name, arg in struct.member_list():
+ kw = 'if' if first else 'else if'
+ first = False
+
+ ri.cw.block_start(line=f'{kw} (!strcmp(sel, "{name}"))')
+ get_lines, init_lines, _ = arg._attr_get(ri, var)
+ for line in init_lines or []:
+ ri.cw.p(line)
+ for line in get_lines:
+ ri.cw.p(line)
+ if arg.presence_type() == 'present':
+ ri.cw.p(f"{var}->_present.{arg.c_name} = 1;")
+ ri.cw.block_end()
+ ri.cw.p('return 0;')
+ ri.cw.block_end()
+ ri.cw.nl()
+
+
def parse_rsp_nested_prototype(ri, struct, suffix=';'):
func_args = ['struct ynl_parse_arg *yarg',
'const struct nlattr *nested']
+ for sel in struct.external_selectors():
+ func_args.append('const char *_sel_' + sel.name)
+ if struct.submsg:
+ func_args.insert(1, 'const char *sel')
for arg in struct.inherited:
func_args.append('__u32 ' + arg)
@@ -1834,6 +2287,9 @@ def parse_rsp_nested_prototype(ri, struct, suffix=';'):
def parse_rsp_nested(ri, struct):
+ if struct.submsg:
+ return parse_rsp_submsg(ri, struct)
+
parse_rsp_nested_prototype(ri, struct, suffix='')
local_vars = ['const struct nlattr *attr;',
@@ -1886,7 +2342,7 @@ def print_req(ri):
ret_err = 'NULL'
local_vars += [f'{type_name(ri, rdir(direction))} *rsp;']
- if ri.fixed_hdr:
+ if ri.struct["request"].fixed_header:
local_vars += ['size_t hdr_len;',
'void *hdr;']
@@ -1899,14 +2355,18 @@ def print_req(ri):
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
- ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+ if ri.family.is_classic():
+ ri.cw.p(f"nlh = ynl_msg_start_req(ys, {ri.op.enum_name}, req->_nlmsg_flags);")
+ else:
+ ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
+ ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};")
if 'reply' in ri.op[ri.op_mode]:
ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
ri.cw.nl()
- if ri.fixed_hdr:
+ if ri.struct['request'].fixed_header:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
@@ -1952,7 +2412,7 @@ def print_dump(ri):
'struct nlmsghdr *nlh;',
'int err;']
- if ri.fixed_hdr:
+ if ri.struct['request'].fixed_header:
local_vars += ['size_t hdr_len;',
'void *hdr;']
@@ -1968,9 +2428,12 @@ def print_dump(ri):
else:
ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
ri.cw.nl()
- ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+ if ri.family.is_classic():
+ ri.cw.p(f"nlh = ynl_msg_start_dump(ys, {ri.op.enum_name});")
+ else:
+ ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
- if ri.fixed_hdr:
+ if ri.struct['request'].fixed_header:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
ri.cw.p("hdr = ynl_nlmsg_put_extra_header(nlh, hdr_len);")
ri.cw.p("memcpy(hdr, &req->_hdr, hdr_len);")
@@ -1978,6 +2441,7 @@ def print_dump(ri):
if "request" in ri.op[ri.op_mode]:
ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
+ ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};")
ri.cw.nl()
for _, attr in ri.struct["request"].member_list():
attr.attr_put(ri, "req")
@@ -2023,32 +2487,45 @@ def print_free_prototype(ri, direction, suffix=';'):
ri.cw.write_func_prot('void', f"{name}_free", [f"struct {struct_name} *{arg}"], suffix=suffix)
+def print_nlflags_set(ri, direction):
+ name = op_prefix(ri, direction)
+ ri.cw.write_func_prot(f'static inline void', f"{name}_set_nlflags",
+ [f"struct {name} *req", "__u16 nl_flags"])
+ ri.cw.block_start()
+ ri.cw.p('req->_nlmsg_flags = nl_flags;')
+ ri.cw.block_end()
+ ri.cw.nl()
+
+
def _print_type(ri, direction, struct):
suffix = f'_{ri.type_name}{direction_to_suffix[direction]}'
if not direction and ri.type_name_conflict:
suffix += '_'
- if ri.op_mode == 'dump':
+ if ri.op_mode == 'dump' and not ri.type_oneside:
suffix += '_dump'
ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}")
- if ri.fixed_hdr:
- ri.cw.p(ri.fixed_hdr + ' _hdr;')
+ if ri.needs_nlflags(direction):
+ ri.cw.p('__u16 _nlmsg_flags;')
+ ri.cw.nl()
+ if struct.fixed_header:
+ ri.cw.p(struct.fixed_header + ' _hdr;')
ri.cw.nl()
- meta_started = False
- for _, attr in struct.member_list():
- for type_filter in ['len', 'bit']:
+ for type_filter in ['present', 'len', 'count']:
+ meta_started = False
+ for _, attr in struct.member_list():
line = attr.presence_member(ri.ku_space, type_filter)
if line:
if not meta_started:
ri.cw.block_start(line=f"struct")
meta_started = True
ri.cw.p(line)
- if meta_started:
- ri.cw.block_end(line='_present;')
- ri.cw.nl()
+ if meta_started:
+ ri.cw.block_end(line=f'_{type_filter};')
+ ri.cw.nl()
for arg in struct.inherited:
ri.cw.p(f"__u32 {arg};")
@@ -2072,6 +2549,9 @@ def print_type_helpers(ri, direction, deref=False):
print_free_prototype(ri, direction)
ri.cw.nl()
+ if ri.needs_nlflags(direction):
+ print_nlflags_set(ri, direction)
+
if ri.ku_space == 'user' and direction == 'request':
for _, attr in ri.struct[direction].member_list():
attr.setter(ri, ri.attr_set, direction, deref=deref)
@@ -2079,7 +2559,7 @@ def print_type_helpers(ri, direction, deref=False):
def print_req_type_helpers(ri):
- if len(ri.struct["request"].attr_list) == 0:
+ if ri.type_empty("request"):
return
print_alloc_wrapper(ri, "request")
print_type_helpers(ri, "request")
@@ -2102,7 +2582,7 @@ def print_parse_prototype(ri, direction, terminate=True):
def print_req_type(ri):
- if len(ri.struct["request"].attr_list) == 0:
+ if ri.type_empty("request"):
return
print_type(ri, "request")
@@ -2140,11 +2620,9 @@ def print_wrapped_type(ri):
def _free_type_members_iter(ri, struct):
- for _, attr in struct.member_list():
- if attr.free_needs_iter():
- ri.cw.p('unsigned int i;')
- ri.cw.nl()
- break
+ if struct.free_needs_iter():
+ ri.cw.p('unsigned int i;')
+ ri.cw.nl()
def _free_type_members(ri, var, struct, ref=''):
@@ -2281,6 +2759,46 @@ def print_kernel_policy_ranges(family, cw):
cw.nl()
+def print_kernel_policy_sparse_enum_validates(family, cw):
+ first = True
+ for _, attr_set in family.attr_sets.items():
+ if attr_set.subset_of:
+ continue
+
+ for _, attr in attr_set.items():
+ if not attr.request:
+ continue
+ if not attr.enum_name:
+ continue
+ if 'sparse' not in attr.checks:
+ continue
+
+ if first:
+ cw.p('/* Sparse enums validation callbacks */')
+ first = False
+
+ sign = '' if attr.type[0] == 'u' else '_signed'
+ suffix = 'ULL' if attr.type[0] == 'u' else 'LL'
+ cw.write_func_prot('static int', f'{c_lower(attr.enum_name)}_validate',
+ ['const struct nlattr *attr', 'struct netlink_ext_ack *extack'])
+ cw.block_start()
+ cw.block_start(line=f'switch (nla_get_{attr["type"]}(attr))')
+ enum = family.consts[attr['enum']]
+ first_entry = True
+ for entry in enum.entries.values():
+ if first_entry:
+ first_entry = False
+ else:
+ cw.p('fallthrough;')
+ cw.p(f'case {entry.c_name}:')
+ cw.p('return 0;')
+ cw.block_end()
+ cw.p('NL_SET_ERR_MSG_ATTR(extack, attr, "invalid enum value");')
+ cw.p('return -EINVAL;')
+ cw.block_end()
+ cw.nl()
+
+
def print_kernel_op_table_fwd(family, cw, terminate):
exported = not kernel_can_gen_family_struct(family)
@@ -2740,7 +3258,11 @@ def render_uapi(family, cw):
def _render_user_ntf_entry(ri, op):
- ri.cw.block_start(line=f"[{op.enum_name}] = ")
+ if not ri.family.is_classic():
+ ri.cw.block_start(line=f"[{op.enum_name}] = ")
+ else:
+ crud_op = ri.family.req_by_value[op.rsp_value]
+ ri.cw.block_start(line=f"[{crud_op.enum_name}] = ")
ri.cw.p(f".alloc_sz\t= sizeof({type_name(ri, 'event')}),")
ri.cw.p(f".cb\t\t= {op_prefix(ri, 'reply', deref=True)}_parse,")
ri.cw.p(f".policy\t\t= &{ri.struct['reply'].render_name}_nest,")
@@ -2755,7 +3277,7 @@ def render_user_family(family, cw, prototype):
return
if family.ntfs:
- cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ")
+ cw.block_start(line=f"static const struct ynl_ntf_info {family.c_name}_ntf_info[] = ")
for ntf_op_name, ntf_op in family.ntfs.items():
if 'notify' in ntf_op:
op = family.ops[ntf_op['notify']]
@@ -2775,13 +3297,19 @@ def render_user_family(family, cw, prototype):
cw.block_start(f'{symbol} = ')
cw.p(f'.name\t\t= "{family.c_name}",')
- if family.fixed_header:
+ if family.is_classic():
+ cw.p(f'.is_classic\t= true,')
+ cw.p(f'.classic_id\t= {family.get("protonum")},')
+ if family.is_classic():
+ if family.fixed_header:
+ cw.p(f'.hdr_len\t= sizeof(struct {c_lower(family.fixed_header)}),')
+ elif family.fixed_header:
cw.p(f'.hdr_len\t= sizeof(struct genlmsghdr) + sizeof(struct {c_lower(family.fixed_header)}),')
else:
cw.p('.hdr_len\t= sizeof(struct genlmsghdr),')
if family.ntfs:
- cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
- cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),")
+ cw.p(f".ntf_info\t= {family.c_name}_ntf_info,")
+ cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family.c_name}_ntf_info),")
cw.block_end(line=';')
@@ -2888,7 +3416,7 @@ def main():
cw.p(f'#include "{hdr_file}"')
cw.p('#include "ynl.h"')
headers = []
- for definition in parsed['definitions']:
+ for definition in parsed['definitions'] + parsed['attribute-sets']:
if 'header' in definition:
headers.append(definition['header'])
if args.mode == 'user':
@@ -2942,6 +3470,7 @@ def main():
print_kernel_family_struct_hdr(parsed, cw)
else:
print_kernel_policy_ranges(parsed, cw)
+ print_kernel_policy_sparse_enum_validates(parsed, cw)
for _, struct in sorted(parsed.pure_nested_structs.items()):
if struct.request:
@@ -3010,7 +3539,7 @@ def main():
ri = RenderInfo(cw, parsed, args.mode, op, 'dump')
print_req_type(ri)
print_req_type_helpers(ri)
- if not ri.type_consistent:
+ if not ri.type_consistent or ri.type_oneside:
print_rsp_type(ri)
print_wrapped_type(ri)
print_dump_prototype(ri)
@@ -3048,8 +3577,7 @@ def main():
has_recursive_nests = True
if has_recursive_nests:
cw.nl()
- for name in parsed.pure_nested_structs:
- struct = Struct(parsed, name)
+ for struct in parsed.pure_nested_structs.values():
put_typol(cw, struct)
for name in parsed.root_sets:
struct = Struct(parsed, name)
@@ -3088,7 +3616,7 @@ def main():
if 'dump' in op:
cw.p(f"/* {op.enum_name} - dump */")
ri = RenderInfo(cw, parsed, args.mode, op, "dump")
- if not ri.type_consistent:
+ if not ri.type_consistent or ri.type_oneside:
parse_rsp_msg(ri, deref=True)
print_req_free(ri)
print_dump_type_free(ri)
diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 6c56d0d726b4..0cb6348e28d3 100755
--- a/tools/net/ynl/pyynl/ynl_gen_rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
@@ -392,7 +392,7 @@ def parse_arguments() -> argparse.Namespace:
def parse_yaml_file(filename: str) -> str:
- """Transform the YAML specified by filename into a rst-formmated string"""
+ """Transform the YAML specified by filename into an RST-formatted string"""
with open(filename, "r", encoding="utf-8") as spec_file:
yaml_data = yaml.safe_load(spec_file)
content = parse_yaml(yaml_data)
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index dda6686257a7..7f5fca7682d7 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -2,4 +2,8 @@ ethtool
devlink
netdev
ovs
-page-pool \ No newline at end of file
+page-pool
+rt-addr
+rt-link
+rt-route
+tc
diff --git a/tools/net/ynl/samples/devlink.c b/tools/net/ynl/samples/devlink.c
index d2611d7ebab4..ac9dfb01f280 100644
--- a/tools/net/ynl/samples/devlink.c
+++ b/tools/net/ynl/samples/devlink.c
@@ -22,6 +22,7 @@ int main(int argc, char **argv)
ynl_dump_foreach(devs, d) {
struct devlink_info_get_req *info_req;
struct devlink_info_get_rsp *info_rsp;
+ unsigned i;
printf("%s/%s:\n", d->bus_name, d->dev_name);
@@ -34,11 +35,11 @@ int main(int argc, char **argv)
if (!info_rsp)
goto err_free_devs;
- if (info_rsp->_present.info_driver_name_len)
+ if (info_rsp->_len.info_driver_name)
printf(" driver: %s\n", info_rsp->info_driver_name);
- if (info_rsp->n_info_version_running)
+ if (info_rsp->_count.info_version_running)
printf(" running fw:\n");
- for (unsigned i = 0; i < info_rsp->n_info_version_running; i++)
+ for (i = 0; i < info_rsp->_count.info_version_running; i++)
printf(" %s: %s\n",
info_rsp->info_version_running[i].info_version_name,
info_rsp->info_version_running[i].info_version_value);
diff --git a/tools/net/ynl/samples/rt-addr.c b/tools/net/ynl/samples/rt-addr.c
new file mode 100644
index 000000000000..2edde5c36b18
--- /dev/null
+++ b/tools/net/ynl/samples/rt-addr.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-addr-user.h"
+
+static void rt_addr_print(struct rt_addr_getaddr_rsp *a)
+{
+ char ifname[IF_NAMESIZE];
+ char addr_str[64];
+ const char *addr;
+ const char *name;
+
+ name = if_indextoname(a->_hdr.ifa_index, ifname);
+ if (name)
+ printf("%16s: ", name);
+
+ switch (a->_len.address) {
+ case 4:
+ addr = inet_ntop(AF_INET, a->address,
+ addr_str, sizeof(addr_str));
+ break;
+ case 16:
+ addr = inet_ntop(AF_INET6, a->address,
+ addr_str, sizeof(addr_str));
+ break;
+ default:
+ addr = NULL;
+ break;
+ }
+ if (addr)
+ printf("%s", addr);
+ else
+ printf("[%d]", a->_len.address);
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_addr_getaddr_list *rsp;
+ struct rt_addr_getaddr_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_addr_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = rt_addr_getaddr_req_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = rt_addr_getaddr_dump(ys, req);
+ rt_addr_getaddr_req_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no addresses reported\n");
+ ynl_dump_foreach(rsp, addr)
+ rt_addr_print(addr);
+ rt_addr_getaddr_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/rt-link.c b/tools/net/ynl/samples/rt-link.c
new file mode 100644
index 000000000000..acdd4b4a0f74
--- /dev/null
+++ b/tools/net/ynl/samples/rt-link.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-link-user.h"
+
+static void rt_link_print(struct rt_link_getlink_rsp *r)
+{
+ unsigned int i;
+
+ printf("%3d: ", r->_hdr.ifi_index);
+
+ if (r->_len.ifname)
+ printf("%16s: ", r->ifname);
+
+ if (r->_present.mtu)
+ printf("mtu %5d ", r->mtu);
+
+ if (r->linkinfo._len.kind)
+ printf("kind %-8s ", r->linkinfo.kind);
+ else
+ printf(" %8s ", "");
+
+ if (r->prop_list._count.alt_ifname) {
+ printf("altname ");
+ for (i = 0; i < r->prop_list._count.alt_ifname; i++)
+ printf("%s ", r->prop_list.alt_ifname[i]->str);
+ printf(" ");
+ }
+
+ if (r->linkinfo._present.data && r->linkinfo.data._present.netkit) {
+ struct rt_link_linkinfo_netkit_attrs *netkit;
+ const char *name;
+
+ netkit = &r->linkinfo.data.netkit;
+ printf("primary %d ", netkit->primary);
+
+ name = NULL;
+ if (netkit->_present.policy)
+ name = rt_link_netkit_policy_str(netkit->policy);
+ if (name)
+ printf("policy %s ", name);
+ }
+
+ printf("\n");
+}
+
+static int rt_link_create_netkit(struct ynl_sock *ys)
+{
+ struct rt_link_getlink_ntf *ntf_gl;
+ struct rt_link_newlink_req *req;
+ struct ynl_ntf_base_type *ntf;
+ int ret;
+
+ req = rt_link_newlink_req_alloc();
+ if (!req) {
+ fprintf(stderr, "Can't alloc req\n");
+ return -1;
+ }
+
+ /* rtnetlink doesn't provide info about the created object.
+ * It expects us to set the ECHO flag and the dig the info out
+ * of the notifications...
+ */
+ rt_link_newlink_req_set_nlflags(req, NLM_F_CREATE | NLM_F_ECHO);
+
+ rt_link_newlink_req_set_linkinfo_kind(req, "netkit");
+
+ /* Test error messages */
+ rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, 10);
+ ret = rt_link_newlink(ys, req);
+ if (ret) {
+ printf("Testing error message for policy being bad:\n\t%s\n", ys->err.msg);
+ } else {
+ fprintf(stderr, "Warning: unexpected success creating netkit with bad attrs\n");
+ goto created;
+ }
+
+ rt_link_newlink_req_set_linkinfo_data_netkit_policy(req, NETKIT_DROP);
+
+ ret = rt_link_newlink(ys, req);
+created:
+ rt_link_newlink_req_free(req);
+ if (ret) {
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ return -1;
+ }
+
+ if (!ynl_has_ntf(ys)) {
+ fprintf(stderr,
+ "Warning: interface created but received no notification, won't delete the interface\n");
+ return 0;
+ }
+
+ ntf = ynl_ntf_dequeue(ys);
+ if (ntf->cmd != RTM_NEWLINK) {
+ fprintf(stderr,
+ "Warning: unexpected notification type, won't delete the interface\n");
+ return 0;
+ }
+ ntf_gl = (void *)ntf;
+ ret = ntf_gl->obj._hdr.ifi_index;
+ ynl_ntf_free(ntf);
+
+ return ret;
+}
+
+static void rt_link_del(struct ynl_sock *ys, int ifindex)
+{
+ struct rt_link_dellink_req *req;
+
+ req = rt_link_dellink_req_alloc();
+ if (!req) {
+ fprintf(stderr, "Can't alloc req\n");
+ return;
+ }
+
+ req->_hdr.ifi_index = ifindex;
+ if (rt_link_dellink(ys, req))
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ else
+ fprintf(stderr,
+ "Trying to delete a Netkit interface (ifindex %d)\n",
+ ifindex);
+
+ rt_link_dellink_req_free(req);
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_link_getlink_req_dump *req;
+ struct rt_link_getlink_list *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int created = 0;
+
+ ys = ynl_sock_create(&ynl_rt_link_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ if (argc > 1) {
+ fprintf(stderr, "Trying to create a Netkit interface\n");
+ created = rt_link_create_netkit(ys);
+ if (created < 0)
+ goto err_destroy;
+ }
+
+ req = rt_link_getlink_req_dump_alloc();
+ if (!req)
+ goto err_del_ifc;
+
+ rsp = rt_link_getlink_dump(ys, req);
+ rt_link_getlink_req_dump_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no links reported\n");
+ ynl_dump_foreach(rsp, link)
+ rt_link_print(link);
+ rt_link_getlink_list_free(rsp);
+
+ if (created)
+ rt_link_del(ys, created);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_del_ifc:
+ if (created)
+ rt_link_del(ys, created);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/rt-route.c b/tools/net/ynl/samples/rt-route.c
new file mode 100644
index 000000000000..7427104a96df
--- /dev/null
+++ b/tools/net/ynl/samples/rt-route.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-route-user.h"
+
+static void rt_route_print(struct rt_route_getroute_rsp *r)
+{
+ char ifname[IF_NAMESIZE];
+ char route_str[64];
+ const char *route;
+ const char *name;
+
+ /* Ignore local */
+ if (r->_hdr.rtm_table == RT_TABLE_LOCAL)
+ return;
+
+ if (r->_present.oif) {
+ name = if_indextoname(r->oif, ifname);
+ if (name)
+ printf("oif: %-16s ", name);
+ }
+
+ if (r->_len.dst) {
+ route = inet_ntop(r->_hdr.rtm_family, r->dst,
+ route_str, sizeof(route_str));
+ printf("dst: %s/%d", route, r->_hdr.rtm_dst_len);
+ }
+
+ if (r->_len.gateway) {
+ route = inet_ntop(r->_hdr.rtm_family, r->gateway,
+ route_str, sizeof(route_str));
+ printf("gateway: %s ", route);
+ }
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_route_getroute_req_dump *req;
+ struct rt_route_getroute_list *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_route_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = rt_route_getroute_req_dump_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = rt_route_getroute_dump(ys, req);
+ rt_route_getroute_req_dump_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no routeesses reported\n");
+ ynl_dump_foreach(rsp, route)
+ rt_route_print(route);
+ rt_route_getroute_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/tc.c b/tools/net/ynl/samples/tc.c
new file mode 100644
index 000000000000..0bfff0fdd792
--- /dev/null
+++ b/tools/net/ynl/samples/tc.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <net/if.h>
+
+#include "tc-user.h"
+
+static void tc_qdisc_print(struct tc_getqdisc_rsp *q)
+{
+ char ifname[IF_NAMESIZE];
+ const char *name;
+
+ name = if_indextoname(q->_hdr.tcm_ifindex, ifname);
+ if (name)
+ printf("%16s: ", name);
+
+ if (q->_len.kind) {
+ printf("%s ", q->kind);
+
+ if (q->options._present.fq_codel) {
+ struct tc_fq_codel_attrs *fq_codel;
+ struct tc_fq_codel_xstats *stats;
+
+ fq_codel = &q->options.fq_codel;
+ stats = q->stats2.app.fq_codel;
+
+ if (fq_codel->_present.limit)
+ printf("limit: %dp ", fq_codel->limit);
+ if (fq_codel->_present.target)
+ printf("target: %dms ",
+ (fq_codel->target + 500) / 1000);
+ if (q->stats2.app._len.fq_codel)
+ printf("new_flow_cnt: %d ",
+ stats->qdisc_stats.new_flow_count);
+ }
+ }
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct tc_getqdisc_req_dump *req;
+ struct tc_getqdisc_list *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_tc_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = tc_getqdisc_req_dump_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = tc_getqdisc_dump(ys, req);
+ tc_getqdisc_req_dump_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no addresses reported\n");
+ ynl_dump_foreach(rsp, qdisc)
+ tc_qdisc_print(qdisc);
+ tc_getqdisc_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 3ce7b54003c2..98c4713c1b09 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
op2 = ins.opcode.bytes[1];
op3 = ins.opcode.bytes[2];
+ /*
+ * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
+ */
+ if (op1 == 0xea) {
+ insn->len = 1;
+ insn->type = INSN_BUG;
+ return 0;
+ }
+
if (ins.rex_prefix.nbytes) {
rex = ins.rex_prefix.bytes[0];
rex_w = X86_REX_W(rex) >> 3;
@@ -842,12 +851,14 @@ int arch_decode_hint_reg(u8 sp_reg, int *base)
bool arch_is_retpoline(struct symbol *sym)
{
- return !strncmp(sym->name, "__x86_indirect_", 15);
+ return !strncmp(sym->name, "__x86_indirect_", 15) ||
+ !strncmp(sym->name, "__pi___x86_indirect_", 20);
}
bool arch_is_rethunk(struct symbol *sym)
{
- return !strcmp(sym->name, "__x86_return_thunk");
+ return !strcmp(sym->name, "__x86_return_thunk") ||
+ !strcmp(sym->name, "__pi___x86_return_thunk");
}
bool arch_is_embedded_insn(struct symbol *sym)
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 727a3a4fd9d7..ca5d77db692a 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -572,6 +572,34 @@ err:
return -1;
}
+static int mark_group_syms(struct elf *elf)
+{
+ struct section *symtab, *sec;
+ struct symbol *sym;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
+ ERROR("no .symtab");
+ return -1;
+ }
+
+ list_for_each_entry(sec, &elf->sections, list) {
+ if (sec->sh.sh_type == SHT_GROUP &&
+ sec->sh.sh_link == symtab->idx) {
+ sym = find_symbol_by_index(elf, sec->sh.sh_info);
+ if (!sym) {
+ ERROR("%s: can't find SHT_GROUP signature symbol",
+ sec->name);
+ return -1;
+ }
+
+ sym->group_sec = sec;
+ }
+ }
+
+ return 0;
+}
+
/*
* @sym's idx has changed. Update the relocs which reference it.
*/
@@ -745,7 +773,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
/*
* Move the first global symbol, as per sh_info, into a new, higher
- * symbol index. This fees up a spot for a new local symbol.
+ * symbol index. This frees up a spot for a new local symbol.
*/
first_non_local = symtab->sh.sh_info;
old = find_symbol_by_index(elf, first_non_local);
@@ -763,6 +791,11 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
if (elf_update_sym_relocs(elf, old))
return NULL;
+ if (old->group_sec) {
+ old->group_sec->sh.sh_info = new_idx;
+ mark_sec_changed(elf, old->group_sec, true);
+ }
+
new_idx = first_non_local;
}
@@ -1035,6 +1068,9 @@ struct elf *elf_open_read(const char *name, int flags)
if (read_symbols(elf))
goto err;
+ if (mark_group_syms(elf))
+ goto err;
+
if (read_relocs(elf))
goto err;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index c7c4e87ebe88..0a2fa3ac0079 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -72,6 +72,7 @@ struct symbol {
u8 ignore : 1;
struct list_head pv_target;
struct reloc *relocs;
+ struct section *group_sec;
};
struct reloc {
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 279ab2ab4abe..b558ab98719f 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -3,6 +3,7 @@ perf-bench-y += sched-pipe.o
perf-bench-y += sched-seccomp-notify.o
perf-bench-y += syscall.o
perf-bench-y += mem-functions.o
+perf-bench-y += futex.o
perf-bench-y += futex-hash.o
perf-bench-y += futex-wake.o
perf-bench-y += futex-wake-parallel.o
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index b472eded521b..fdf133c9520f 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -18,9 +18,11 @@
#include <stdlib.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/prctl.h>
#include <linux/zalloc.h>
#include <sys/time.h>
#include <sys/mman.h>
+#include <sys/prctl.h>
#include <perf/cpumap.h>
#include "../util/mutex.h"
@@ -50,9 +52,12 @@ struct worker {
static struct bench_futex_parameters params = {
.nfutexes = 1024,
.runtime = 10,
+ .nbuckets = -1,
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
OPT_UINTEGER('f', "futexes", &params.nfutexes, "Specify amount of futexes per threads"),
@@ -118,6 +123,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
(int)bench__runtime.tv_sec);
+ futex_print_nbuckets(&params);
}
int bench_futex_hash(int argc, const char **argv)
@@ -161,6 +167,7 @@ int bench_futex_hash(int argc, const char **argv)
if (!params.fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
+ futex_set_nbuckets_param(&params);
printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime);
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 0416120c091b..5144a158512c 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -41,10 +41,13 @@ static struct stats throughput_stats;
static struct cond thread_parent, thread_worker;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
.runtime = 10,
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
OPT_BOOLEAN( 'M', "multi", &params.multi, "Use multiple futexes"),
@@ -67,6 +70,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
(int)bench__runtime.tv_sec);
+ futex_print_nbuckets(&params);
}
static void toggle_done(int sig __maybe_unused,
@@ -203,6 +207,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
+ futex_set_nbuckets_param(&params);
threads_starting = params.nthreads;
gettimeofday(&bench__start, NULL);
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index aad5bfc4fe18..a2f91ee1950b 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -42,6 +42,7 @@ static unsigned int threads_starting;
static int futex_flag = 0;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
/*
* How many tasks to requeue at a time.
* Default to 1 in order to make the kernel work more.
@@ -50,6 +51,8 @@ static struct bench_futex_parameters params = {
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -77,6 +80,7 @@ static void print_summary(void)
params.nthreads,
requeuetime_avg / USEC_PER_MSEC,
rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+ futex_print_nbuckets(&params);
}
static void *workerfn(void *arg __maybe_unused)
@@ -204,6 +208,8 @@ int bench_futex_requeue(int argc, const char **argv)
if (params.broadcast)
params.nrequeue = params.nthreads;
+ futex_set_nbuckets_param(&params);
+
printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
"%d at a time.\n\n", getpid(), params.nthreads,
params.fshared ? "shared":"private", &futex1,
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 4352e318631e..ee66482c29fd 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -57,9 +57,13 @@ static struct stats waketime_stats, wakeup_stats;
static unsigned int threads_starting;
static int futex_flag = 0;
-static struct bench_futex_parameters params;
+static struct bench_futex_parameters params = {
+ .nbuckets = -1,
+};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -218,6 +222,7 @@ static void print_summary(void)
params.nthreads,
waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
+ futex_print_nbuckets(&params);
}
@@ -291,6 +296,8 @@ int bench_futex_wake_parallel(int argc, const char **argv)
if (!params.fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
+ futex_set_nbuckets_param(&params);
+
printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
"futex %p), %d threads waking up %d at a time.\n\n",
getpid(), params.nthreads, params.fshared ? "shared":"private",
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 49b3c89b0b35..8d6107f7cd94 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -42,6 +42,7 @@ static unsigned int threads_starting;
static int futex_flag = 0;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
/*
* How many wakeups to do at a time.
* Default to 1 in order to make the kernel work more.
@@ -50,6 +51,8 @@ static struct bench_futex_parameters params = {
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
+ OPT_BOOLEAN( 'I', "immutable", &params.buckets_immutable, "Make the hash buckets immutable"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &params.nwakes, "Specify amount of threads to wake at once"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -93,6 +96,7 @@ static void print_summary(void)
params.nthreads,
waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
+ futex_print_nbuckets(&params);
}
static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c
new file mode 100644
index 000000000000..26382e4d8d4c
--- /dev/null
+++ b/tools/perf/bench/futex.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/prctl.h>
+#include <sys/prctl.h>
+
+#include "futex.h"
+
+void futex_set_nbuckets_param(struct bench_futex_parameters *params)
+{
+ unsigned long flags;
+ int ret;
+
+ if (params->nbuckets < 0)
+ return;
+
+ flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0;
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags);
+ if (ret) {
+ printf("Requesting %d hash buckets failed: %d/%m\n",
+ params->nbuckets, ret);
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+}
+
+void futex_print_nbuckets(struct bench_futex_parameters *params)
+{
+ char *futex_hash_mode;
+ int ret;
+
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
+ if (params->nbuckets >= 0) {
+ if (ret != params->nbuckets) {
+ if (ret < 0) {
+ printf("Can't query number of buckets: %m\n");
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+ printf("Requested number of hash buckets does not currently used.\n");
+ printf("Requested: %d in usage: %d\n", params->nbuckets, ret);
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+ if (params->nbuckets == 0) {
+ ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
+ } else {
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
+ if (ret < 0) {
+ printf("Can't check if the hash is immutable: %m\n");
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+ ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets %s",
+ params->nbuckets,
+ ret == 1 ? "(immutable)" : "");
+ }
+ } else {
+ if (ret <= 0) {
+ ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
+ } else {
+ ret = asprintf(&futex_hash_mode, "Futex hashing: auto resized to %d buckets",
+ ret);
+ }
+ }
+ if (ret < 0)
+ err(EXIT_FAILURE, "ENOMEM, futex_hash_mode");
+ printf("%s\n", futex_hash_mode);
+ free(futex_hash_mode);
+}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index ebdc2b032afc..9c9a73f9d865 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -25,6 +25,8 @@ struct bench_futex_parameters {
unsigned int nfutexes;
unsigned int nwakes;
unsigned int nrequeue;
+ int nbuckets;
+ bool buckets_immutable;
};
/**
@@ -143,4 +145,7 @@ futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
val, opflags);
}
+void futex_set_nbuckets_param(struct bench_futex_parameters *params);
+void futex_print_nbuckets(struct bench_futex_parameters *params);
+
#endif /* _FUTEX_H */
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 857f6646cc23..e9fab20e9330 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -186,7 +186,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
-check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
+check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
index 9d0ce88e90e4..456ce64ad822 100644
--- a/tools/perf/util/amd-sample-raw.c
+++ b/tools/perf/util/amd-sample-raw.c
@@ -9,7 +9,7 @@
#include <inttypes.h>
#include <linux/string.h>
-#include "../../arch/x86/include/asm/amd-ibs.h"
+#include "../../arch/x86/include/asm/amd/ibs.h"
#include "debug.h"
#include "session.h"
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index 68b9ea86b86c..af0e558f231c 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -3,7 +3,7 @@
*
* Module Name: cmfsize - Common get file size function
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 6a0cdba6fdfd..3d63626d80e7 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -3,7 +3,7 @@
*
* Module Name: getopt
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 9d70d8c945af..9741e7503591 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -3,7 +3,7 @@
*
* Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -19,7 +19,7 @@ ACPI_MODULE_NAME("oslinuxtbl")
typedef struct osl_table_info {
struct osl_table_info *next;
u32 instance;
- char signature[ACPI_NAMESEG_SIZE];
+ char signature[ACPI_NAMESEG_SIZE] ACPI_NONSTRING;
} osl_table_info;
diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c
index 39f3bffd9355..b9bb83116549 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixdir.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixdir - Unix directory access interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index 2b7d56252684..b93ebc9371a5 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixmap - Unix OSL for file mappings
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 46429417c71a..36f27491713c 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -3,7 +3,7 @@
*
* Module Name: osunixxf - UNIX OSL interfaces
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index 643e3e722340..fe0d23c4f2ed 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -3,7 +3,7 @@
*
* Module Name: acpidump.h - Include file for acpi_dump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index 0742b00b61a1..bf30143efbdc 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -3,7 +3,7 @@
*
* Module Name: apdump - Dump routines for ACPI tables (acpidump)
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 13817f9112c0..75db0091e275 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -3,7 +3,7 @@
*
* Module Name: apfiles - File-related functions for acpidump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
@@ -103,7 +103,7 @@ int ap_open_output_file(char *pathname)
int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
{
- char filename[ACPI_NAMESEG_SIZE + 16];
+ char filename[ACPI_NAMESEG_SIZE + 16] ACPI_NONSTRING;
char instance_str[16];
ACPI_FILE file;
acpi_size actual;
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index 666a9675e743..9f3850e3af5b 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -3,7 +3,7 @@
*
* Module Name: apmain - Main module for the acpidump utility
*
- * Copyright (C) 2000 - 2023, Intel Corp.
+ * Copyright (C) 2000 - 2025, Intel Corp.
*
*****************************************************************************/
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 835123add0ed..be8dfac14076 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -2,6 +2,7 @@
# Makefile for cpupower
#
# Copyright (C) 2005,2006 Dominik Brodowski <linux@dominikbrodowski.net>
+# Copyright (C) 2025 Francesco Poli <invernomuto@paranoici.org>
#
# Based largely on the Makefile for udev by:
#
@@ -71,6 +72,7 @@ bindir ?= /usr/bin
sbindir ?= /usr/sbin
mandir ?= /usr/man
libdir ?= /usr/lib
+libexecdir ?= /usr/libexec
includedir ?= /usr/include
localedir ?= /usr/share/locale
docdir ?= /usr/share/doc/packages/cpupower
@@ -83,6 +85,7 @@ CP = cp -fpR
INSTALL = /usr/bin/install -c
INSTALL_PROGRAM = ${INSTALL}
INSTALL_DATA = ${INSTALL} -m 644
+SETPERM_DATA = chmod 644
#bash completion scripts get sourced and so they should be rw only.
INSTALL_SCRIPT = ${INSTALL} -m 644
@@ -302,6 +305,13 @@ install-tools: $(OUTPUT)cpupower
$(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir}
$(INSTALL) -d $(DESTDIR)${bash_completion_dir}
$(INSTALL_SCRIPT) cpupower-completion.sh '$(DESTDIR)${bash_completion_dir}/cpupower'
+ $(INSTALL) -d $(DESTDIR)${confdir}
+ $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}'
+ $(INSTALL) -d $(DESTDIR)${libexecdir}
+ $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower'
+ $(INSTALL) -d $(DESTDIR)${libdir}/systemd/system
+ sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${libdir}/systemd/system/cpupower.service'
+ $(SETPERM_DATA) '$(DESTDIR)${libdir}/systemd/system/cpupower.service'
install-man:
$(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1
@@ -336,6 +346,9 @@ uninstall:
- rm -f $(DESTDIR)${includedir}/cpufreq.h
- rm -f $(DESTDIR)${includedir}/cpuidle.h
- rm -f $(DESTDIR)${bindir}/utils/cpupower
+ - rm -f $(DESTDIR)${confdir}cpupower-service.conf
+ - rm -f $(DESTDIR)${libexecdir}/cpupower
+ - rm -f $(DESTDIR)${libdir}/systemd/system/cpupower.service
- rm -f $(DESTDIR)${mandir}/man1/cpupower.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1
- rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1
diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README
index 2678ed81d311..9de449469568 100644
--- a/tools/power/cpupower/README
+++ b/tools/power/cpupower/README
@@ -59,6 +59,10 @@ $ sudo make install
-----------------------------------------------------------------------
| man pages | /usr/man |
-----------------------------------------------------------------------
+| systemd service | /usr/lib/systemd/system |
+-----------------------------------------------------------------------
+| systemd support script | /usr/libexec |
+-----------------------------------------------------------------------
To put it in other words it makes build results available system-wide,
enabling any user to simply start using it without any additional steps
@@ -109,6 +113,10 @@ The files will be installed to the following dirs:
-----------------------------------------------------------------------
| man pages | ${DESTDIR}/usr/man |
-----------------------------------------------------------------------
+| systemd service | ${DESTDIR}/usr/lib/systemd/system |
+-----------------------------------------------------------------------
+| systemd support script | ${DESTDIR}/usr/libexec |
+-----------------------------------------------------------------------
If you look at the table for the default 'make' output dirs you will
notice that the only difference with the non-default case is the
@@ -173,6 +181,26 @@ The issue is that binary cannot find the 'libcpupower' library. So, we
shall point to the lib dir:
sudo LD_LIBRARY_PATH=lib64/ ./bin/cpupower
+systemd service
+---------------
+
+A systemd service is also provided to run the cpupower utility at boot with
+settings read from a configuration file.
+
+If you want systemd to find the new service after the installation, the service
+unit must have been installed in one of the system unit search path directories
+(such as '/usr/lib/systemd/system/', which is the default location) and (unless
+you are willing to wait for the next reboot) you need to issue the following
+command:
+
+$ sudo systemctl daemon-reload
+
+If you want to enable this systemd service, edit '/etc/cpupower-service.conf'
+(uncommenting at least one of the options, depending on your preferences)
+and then issue the following command:
+
+$ sudo systemctl enable --now cpupower.service
+
THANKS
------
diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile
index 741f21477432..81db39a03efb 100644
--- a/tools/power/cpupower/bindings/python/Makefile
+++ b/tools/power/cpupower/bindings/python/Makefile
@@ -1,22 +1,20 @@
# SPDX-License-Identifier: GPL-2.0-only
# Makefile for libcpupower's Python bindings
#
-# This Makefile expects you have already run the makefile for cpupower to build
-# the .o files in the lib directory for the bindings to be created.
+# This Makefile expects you have already run `make install-lib` in the lib
+# directory for the bindings to be created.
CC := gcc
HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi)
HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi)
-LIB_DIR := ../../lib
PY_INCLUDE = $(firstword $(shell python-config --includes))
-OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o)
INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])")
all: _raw_pylibcpupower.so
_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o
- $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
+ $(CC) -shared -lcpupower raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c
$(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE)
diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README
index 952e2e02fd32..2a4896b648b7 100644
--- a/tools/power/cpupower/bindings/python/README
+++ b/tools/power/cpupower/bindings/python/README
@@ -5,18 +5,21 @@ libcpupower (aside from the libcpupower object files).
requirements
------------
-* You need the object files in the libcpupower directory compiled by
-cpupower's makefile.
+* If you are building completely from upstream; please install libcpupower by
+running `make install-lib` within the cpupower directory. This installs the
+libcpupower.so file and symlinks needed. Otherwise, please make sure a symlink
+to libcpupower.so exists in your library path from your distribution's
+packages.
* The SWIG program must be installed.
-* The Python's development libraries installed.
+* The Python's development libraries must be installed.
Please check that your version of SWIG is compatible with the version of Python
installed on your machine by checking the SWIG changelog on their website.
https://swig.org/
Note that while SWIG itself is GPL v3+ licensed; the resulting output,
-the bindings code: is permissively licensed + the license of libcpupower's .o
-files. For these bindings that means GPL v2.
+the bindings code: is permissively licensed + the license of libcpupower's
+library files. For these bindings that means GPL v2.
Please see https://swig.org/legal.html and the discussion [1] for more details.
diff --git a/tools/power/cpupower/cpupower-service.conf b/tools/power/cpupower/cpupower-service.conf
new file mode 100644
index 000000000000..02eabe8e3614
--- /dev/null
+++ b/tools/power/cpupower/cpupower-service.conf
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012, Sébastien Luttringer
+# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org>
+
+# Configuration file for cpupower.service systemd service unit
+#
+# Edit this file (uncommenting at least one of the options, depending on
+# your preferences) and then enable cpupower.service, if you want cpupower
+# to run at boot with these settings.
+
+# --- CPU clock frequency ---
+
+# Define CPU governor
+# Valid governors: ondemand, performance, powersave, conservative, userspace
+#GOVERNOR='ondemand'
+
+# Limit frequency range
+# Valid suffixes: Hz, kHz (default), MHz, GHz, THz
+#MIN_FREQ="2.25GHz"
+#MAX_FREQ="3GHz"
+
+# Set a specific frequency
+# Requires userspace governor to be available.
+# If this option is set, all the previous frequency options are ignored
+#FREQ=
+
+# --- CPU policy ---
+
+# Set a register on supported Intel processore which allows software to convey
+# its policy for the relative importance of performance versus energy savings to
+# the processor. See man CPUPOWER-SET(1) for additional details
+#PERF_BIAS=
diff --git a/tools/power/cpupower/cpupower.service.in b/tools/power/cpupower/cpupower.service.in
new file mode 100644
index 000000000000..fbd5b8c14270
--- /dev/null
+++ b/tools/power/cpupower/cpupower.service.in
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012-2020, Sébastien Luttringer
+# Copyright (C) 2024-2025, Francesco Poli <invernomuto@paranoici.org>
+
+[Unit]
+Description=Apply cpupower configuration
+ConditionVirtualization=!container
+
+[Service]
+Type=oneshot
+EnvironmentFile=-___CDIR___cpupower-service.conf
+ExecStart=___LDIR___/cpupower
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
diff --git a/tools/power/cpupower/cpupower.sh b/tools/power/cpupower/cpupower.sh
new file mode 100644
index 000000000000..a37dd4cfdb2b
--- /dev/null
+++ b/tools/power/cpupower/cpupower.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2012, Sébastien Luttringer
+# Copyright (C) 2024, Francesco Poli <invernomuto@paranoici.org>
+
+ESTATUS=0
+
+# apply CPU clock frequency options
+if test -n "$FREQ"
+then
+ cpupower frequency-set -f "$FREQ" > /dev/null || ESTATUS=1
+elif test -n "${GOVERNOR}${MIN_FREQ}${MAX_FREQ}"
+then
+ cpupower frequency-set \
+ ${GOVERNOR:+ -g "$GOVERNOR"} \
+ ${MIN_FREQ:+ -d "$MIN_FREQ"} ${MAX_FREQ:+ -u "$MAX_FREQ"} \
+ > /dev/null || ESTATUS=1
+fi
+
+# apply CPU policy options
+if test -n "$PERF_BIAS"
+then
+ cpupower set -b "$PERF_BIAS" > /dev/null || ESTATUS=1
+fi
+
+exit $ESTATUS
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index e2261f33a082..1555b51a7d55 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -4017,7 +4017,8 @@ def parseKernelLog(data):
'PM: early restore of devices complete after.*'],
'resume_complete': ['PM: resume of devices complete after.*',
'PM: restore of devices complete after.*'],
- 'post_resume': [r'.*Restarting tasks \.\.\..*'],
+ 'post_resume': [r'.*Restarting tasks \.\.\..*',
+ 'Done restarting tasks.*'],
}
# action table (expected events that occur and show up in dmesg)
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index eaa420ac848d..0ce251b8d466 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -16,7 +16,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.22";
+static const char *version_str = "v1.23";
static const int supported_api_ver = 3;
static struct isst_if_platform_info isst_platform_info;
@@ -26,6 +26,7 @@ static FILE *outf;
static int cpu_model;
static int cpu_stepping;
+static int extended_family;
#define MAX_CPUS_IN_ONE_REQ 512
static short max_target_cpus;
@@ -143,6 +144,14 @@ int is_icx_platform(void)
return 0;
}
+static int is_dmr_plus_platform(void)
+{
+ if (extended_family == 0x04)
+ return 1;
+
+ return 0;
+}
+
static int update_cpu_model(void)
{
unsigned int ebx, ecx, edx;
@@ -150,6 +159,7 @@ static int update_cpu_model(void)
__cpuid(1, fms, ebx, ecx, edx);
family = (fms >> 8) & 0xf;
+ extended_family = (fms >> 20) & 0x0f;
cpu_model = (fms >> 4) & 0xf;
if (family == 6 || family == 0xf)
cpu_model += ((fms >> 16) & 0xf) << 4;
@@ -1517,7 +1527,8 @@ display_result:
usleep(2000);
/* Adjusting uncore freq */
- isst_adjust_uncore_freq(id, tdp_level, &ctdp_level);
+ if (!is_dmr_plus_platform())
+ isst_adjust_uncore_freq(id, tdp_level, &ctdp_level);
fprintf(stderr, "Option is set to online/offline\n");
ctdp_level.core_cpumask_size =
diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
index da53aaa27fc9..4f389e1c0525 100644
--- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c
+++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
@@ -227,6 +227,7 @@ static int tpmi_get_ctdp_control(struct isst_id *id, int config_index,
static int tpmi_get_tdp_info(struct isst_id *id, int config_index,
struct isst_pkg_ctdp_level_info *ctdp_level)
{
+ struct isst_perf_level_fabric_info fabric_info;
struct isst_perf_level_data_info info;
int ret;
@@ -253,6 +254,17 @@ static int tpmi_get_tdp_info(struct isst_id *id, int config_index,
ctdp_level->uncore_p1 = info.p1_fabric_freq_mhz;
ctdp_level->uncore_pm = info.pm_fabric_freq_mhz;
+ fabric_info.socket_id = id->pkg;
+ fabric_info.power_domain_id = id->punit;
+ fabric_info.level = config_index;
+
+ ret = tpmi_process_ioctl(ISST_IF_GET_PERF_LEVEL_FABRIC_INFO, &fabric_info);
+ if (ret != -1) {
+ ctdp_level->uncore1_p0 = fabric_info.p0_fabric_freq_mhz[1];
+ ctdp_level->uncore1_p1 = fabric_info.p1_fabric_freq_mhz[1];
+ ctdp_level->uncore1_pm = fabric_info.pm_fabric_freq_mhz[1];
+ }
+
debug_printf
("cpu:%d ctdp:%d CONFIG_TDP_GET_TDP_INFO tdp_ratio:%d pkg_tdp:%d ctdp_level->t_proc_hot:%d\n",
id->cpu, config_index, ctdp_level->tdp_ratio, ctdp_level->pkg_tdp,
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index da5a59a4c545..e4884eb02837 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -460,6 +460,26 @@ void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level
format_and_print(outf, level + 2, header, value);
}
+ if (ctdp_level->uncore1_p1) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-base(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_p1 * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+ if (ctdp_level->uncore1_pm) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-min(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_pm * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+
+ if (ctdp_level->uncore1_p0) {
+ snprintf(header, sizeof(header), "uncore-1-frequency-max(MHz)");
+ snprintf(value, sizeof(value), "%d",
+ ctdp_level->uncore1_p0 * isst_get_disp_freq_multiplier());
+ format_and_print(outf, level + 2, header, value);
+ }
+
if (ctdp_level->mem_freq) {
snprintf(header, sizeof(header), "max-mem-frequency(MHz)");
snprintf(value, sizeof(value), "%d",
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 39ee75677c2c..960f647cfc2d 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -147,6 +147,9 @@ struct isst_pkg_ctdp_level_info {
int uncore_p0;
int uncore_p1;
int uncore_pm;
+ int uncore1_p0;
+ int uncore1_p1;
+ int uncore1_pm;
int sse_p1;
int avx2_p1;
int avx512_p1;
diff --git a/tools/sched_ext/Makefile b/tools/sched_ext/Makefile
index ca3815e572d8..d68780e2e03d 100644
--- a/tools/sched_ext/Makefile
+++ b/tools/sched_ext/Makefile
@@ -61,8 +61,8 @@ SCXOBJ_DIR := $(OBJ_DIR)/sched_ext
BINDIR := $(OUTPUT_DIR)/bin
BPFOBJ := $(BPFOBJ_DIR)/libbpf.a
ifneq ($(CROSS_COMPILE),)
-HOST_BUILD_DIR := $(OBJ_DIR)/host
-HOST_OUTPUT_DIR := host-tools
+HOST_BUILD_DIR := $(OBJ_DIR)/host/obj
+HOST_OUTPUT_DIR := $(OBJ_DIR)/host
HOST_INCLUDE_DIR := $(HOST_OUTPUT_DIR)/include
else
HOST_BUILD_DIR := $(OBJ_DIR)
@@ -98,7 +98,7 @@ ifneq ($(LLVM),)
CFLAGS += -Wno-unused-command-line-argument
endif
-LDFLAGS = -lelf -lz -lpthread
+LDFLAGS += -lelf -lz -lpthread
IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
@@ -136,14 +136,25 @@ $(MAKE_DIRS):
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(APIDIR)/linux/bpf.h \
| $(OBJ_DIR)/libbpf
- $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/ \
+ $(Q)$(MAKE) $(submake_extras) CROSS_COMPILE=$(CROSS_COMPILE) \
+ -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/ \
EXTRA_CFLAGS='-g -O0 -fPIC' \
+ LDFLAGS="$(LDFLAGS)" \
DESTDIR=$(OUTPUT_DIR) prefix= all install_headers
+$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(HOST_BUILD_DIR)/libbpf
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
+ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
+ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD=$(HOSTLD) \
+ EXTRA_CFLAGS='-g -O0 -fPIC' \
+ DESTDIR=$(HOST_OUTPUT_DIR) prefix= all install_headers
+
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
- ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \
+ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD=$(HOSTLD) \
EXTRA_CFLAGS='-g -O0' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
@@ -185,7 +196,7 @@ $(addprefix $(BINDIR)/,$(c-sched-targets)): \
$(SCX_COMMON_DEPS)
$(eval sched=$(notdir $@))
$(CC) $(CFLAGS) -c $(sched).c -o $(SCXOBJ_DIR)/$(sched).o
- $(CC) -o $@ $(SCXOBJ_DIR)/$(sched).o $(HOST_BPFOBJ) $(LDFLAGS)
+ $(CC) -o $@ $(SCXOBJ_DIR)/$(sched).o $(BPFOBJ) $(LDFLAGS)
$(c-sched-targets): %: $(BINDIR)/%
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index 8787048c6762..d4e21558e982 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -48,6 +48,8 @@ static inline void ___vmlinux_h_sanity_check___(void)
s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym;
s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym;
+s32 scx_bpf_select_cpu_and(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+ const struct cpumask *cpus_allowed, u64 flags) __ksym __weak;
void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak;
void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index 26c40ca4f36c..c3cd9a17d48e 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -784,8 +784,8 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
- bpf_printk("%35s: %lld", "SCX_EV_ENQ_SLICE_DFL",
- scx_read_event(&events, SCX_EV_ENQ_SLICE_DFL));
+ bpf_printk("%35s: %lld", "SCX_EV_REFILL_SLICE_DFL",
+ scx_read_event(&events, SCX_EV_REFILL_SLICE_DFL));
bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
scx_read_event(&events, SCX_EV_BYPASS_DURATION));
bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py
index b800d4f5f2e9..7cdcc6729ea4 100644
--- a/tools/sched_ext/scx_show_state.py
+++ b/tools/sched_ext/scx_show_state.py
@@ -24,19 +24,19 @@ def read_atomic(name):
def read_static_key(name):
return prog[name].key.enabled.counter.value_()
-def ops_state_str(state):
- return prog['scx_ops_enable_state_str'][state].string_().decode()
+def state_str(state):
+ return prog['scx_enable_state_str'][state].string_().decode()
ops = prog['scx_ops']
-enable_state = read_atomic("scx_ops_enable_state_var")
+enable_state = read_atomic("scx_enable_state_var")
print(f'ops : {ops.name.string_().decode()}')
-print(f'enabled : {read_static_key("__scx_ops_enabled")}')
+print(f'enabled : {read_static_key("__scx_enabled")}')
print(f'switching_all : {read_int("scx_switching_all")}')
print(f'switched_all : {read_static_key("__scx_switched_all")}')
-print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})')
+print(f'enable_state : {state_str(enable_state)} ({enable_state})')
print(f'in_softlockup : {prog["scx_in_softlockup"].value_()}')
-print(f'breather_depth: {read_atomic("scx_ops_breather_depth")}')
-print(f'bypass_depth : {prog["scx_ops_bypass_depth"].value_()}')
+print(f'breather_depth: {read_atomic("scx_breather_depth")}')
+print(f'bypass_depth : {prog["scx_bypass_depth"].value_()}')
print(f'nr_rejected : {read_atomic("scx_nr_rejected")}')
print(f'enable_seq : {read_atomic("scx_enable_seq")}')
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
index 35ea65c54ffa..827507844e8f 100644
--- a/tools/testing/crypto/chacha20-s390/test-cipher.c
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -50,7 +50,7 @@ struct skcipher_def {
/* Perform cipher operations with the chacha lib */
static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
{
- u32 chacha_state[CHACHA_STATE_WORDS];
+ struct chacha_state chacha_state;
u8 iv[16], key[32];
u64 start, end;
@@ -66,10 +66,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
}
/* Encrypt */
- chacha_init(chacha_state, (u32 *)key, iv);
+ chacha_init(&chacha_state, (u32 *)key, iv);
start = ktime_get_ns();
- chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20);
+ chacha_crypt_arch(&chacha_state, cipher, plain, data_size, 20);
end = ktime_get_ns();
@@ -81,10 +81,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
pr_info("lib encryption took: %lld nsec", end - start);
/* Decrypt */
- chacha_init(chacha_state, (u32 *)key, iv);
+ chacha_init(&chacha_state, (u32 *)key, iv);
start = ktime_get_ns();
- chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20);
+ chacha_crypt_arch(&chacha_state, revert, cipher, data_size, 20);
end = ktime_get_ns();
if (debug)
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index 422e186cf3cf..e70c502a16df 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -10,6 +10,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=y
CONFIG_KUNIT_ALL_TESTS=y
CONFIG_FORTIFY_SOURCE=y
+CONFIG_INIT_STACK_ALL_PATTERN=y
CONFIG_IIO=y
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
index 10ff65689dd8..80fa4e354a17 100644
--- a/tools/testing/kunit/kunit_json.py
+++ b/tools/testing/kunit/kunit_json.py
@@ -39,10 +39,20 @@ def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj:
status = _status_map.get(subtest.status, "FAIL")
test_cases.append({"name": subtest.name, "status": status})
+ test_counts = test.counts
+ counts_json = {
+ "tests": test_counts.total(),
+ "passed": test_counts.passed,
+ "failed": test_counts.failed,
+ "crashed": test_counts.crashed,
+ "skipped": test_counts.skipped,
+ "errors": test_counts.errors,
+ }
test_group = {
"name": test.name,
"sub_groups": sub_groups,
"test_cases": test_cases,
+ "misc": counts_json
}
test_group.update(common_fields)
return test_group
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index d3f39bc1ceec..260d8d9aa1db 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -14,6 +14,7 @@ import os
import shlex
import shutil
import signal
+import sys
import threading
from typing import Iterator, List, Optional, Tuple
from types import FrameType
@@ -201,6 +202,13 @@ def _default_qemu_config_path(arch: str) -> str:
return config_path
options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')]
+
+ if arch == 'help':
+ print('um')
+ for option in options:
+ print(option)
+ sys.exit()
+
raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options)))
def _get_qemu_ops(config_path: str,
diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py
index 7ec38d4131f7..5b4c895d5d5a 100644
--- a/tools/testing/kunit/qemu_configs/powerpc.py
+++ b/tools/testing/kunit/qemu_configs/powerpc.py
@@ -3,6 +3,7 @@ from ..qemu_config import QemuArchParams
QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
kconfig='''
CONFIG_PPC64=y
+CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HVC_CONSOLE=y''',
diff --git a/tools/testing/kunit/qemu_configs/powerpc32.py b/tools/testing/kunit/qemu_configs/powerpc32.py
new file mode 100644
index 000000000000..88bd60dbb948
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpc32.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC32=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_ADB_CUDA=y
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
+''',
+ qemu_arch='ppc',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'g3beige', '-cpu', 'max'])
diff --git a/tools/testing/kunit/qemu_configs/powerpcle.py b/tools/testing/kunit/qemu_configs/powerpcle.py
new file mode 100644
index 000000000000..7ddee8af4bd7
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/powerpcle.py
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='powerpc',
+ kconfig='''
+CONFIG_PPC64=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_HVC_CONSOLE=y
+''',
+ qemu_arch='ppc64',
+ kernel_path='vmlinux',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-M', 'pseries', '-cpu', 'power8'])
diff --git a/tools/testing/kunit/qemu_configs/riscv32.py b/tools/testing/kunit/qemu_configs/riscv32.py
new file mode 100644
index 000000000000..b79ba0ae30f8
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/riscv32.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='riscv',
+ kconfig='''
+CONFIG_NONPORTABLE=y
+CONFIG_ARCH_RV32I=y
+CONFIG_ARCH_VIRT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+''',
+ qemu_arch='riscv32',
+ kernel_path='arch/riscv/boot/Image',
+ kernel_command_line='console=ttyS0',
+ extra_qemu_params=['-machine', 'virt'])
diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py
index 256d9573b446..2019550a1b69 100644
--- a/tools/testing/kunit/qemu_configs/sparc.py
+++ b/tools/testing/kunit/qemu_configs/sparc.py
@@ -2,6 +2,8 @@ from ..qemu_config import QemuArchParams
QEMU_ARCH = QemuArchParams(linux_arch='sparc',
kconfig='''
+CONFIG_KUNIT_FAULT_TEST=n
+CONFIG_SPARC32=y
CONFIG_SERIAL_SUNZILOG=y
CONFIG_SERIAL_SUNZILOG_CONSOLE=y
''',
diff --git a/tools/testing/kunit/qemu_configs/sparc64.py b/tools/testing/kunit/qemu_configs/sparc64.py
new file mode 100644
index 000000000000..53d4e5a8c972
--- /dev/null
+++ b/tools/testing/kunit/qemu_configs/sparc64.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from ..qemu_config import QemuArchParams
+
+QEMU_ARCH = QemuArchParams(linux_arch='sparc',
+ kconfig='''
+CONFIG_64BIT=y
+CONFIG_SPARC64=y
+CONFIG_PCI=y
+CONFIG_SERIAL_SUNSU=y
+CONFIG_SERIAL_SUNSU_CONSOLE=y
+''',
+ qemu_arch='sparc64',
+ kernel_path='arch/sparc/boot/image',
+ kernel_command_line='console=ttyS0 kunit_shutdown=poweroff',
+ extra_qemu_params=[])
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c77c8c8e3d9b..6aa11cd3db42 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -48,6 +48,7 @@ TARGETS += ipc
TARGETS += ir
TARGETS += kcmp
TARGETS += kexec
+TARGETS += kselftest_harness
TARGETS += kvm
TARGETS += landlock
TARGETS += lib
@@ -66,11 +67,13 @@ TARGETS += mseal_system_mappings
TARGETS += nci
TARGETS += net
TARGETS += net/af_unix
+TARGETS += net/can
TARGETS += net/forwarding
TARGETS += net/hsr
TARGETS += net/mptcp
TARGETS += net/netfilter
TARGETS += net/openvswitch
+TARGETS += net/ovpn
TARGETS += net/packetdrill
TARGETS += net/rds
TARGETS += net/tcp_ao
@@ -121,6 +124,7 @@ TARGETS += user_events
TARGETS += vDSO
TARGETS += mm
TARGETS += x86
+TARGETS += x86/bugs
TARGETS += zram
#Please keep the TARGETS list alphabetically sorted
# Run "make quicktest=1 run_tests" or
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 22029e60eff3..c4c72ee2ef55 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -21,6 +21,8 @@ CFLAGS += $(KHDR_INCLUDES)
CFLAGS += -I$(top_srcdir)/tools/include
+OUTPUT ?= $(CURDIR)
+
export CFLAGS
export top_srcdir
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index 285c47dd42f6..eb19dcc37a75 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -169,8 +169,10 @@ static int sys_clone(unsigned long clone_flags, unsigned long newsp,
child_tidptr);
}
+#define __STACK_SIZE (8 * 1024 * 1024)
+
/*
- * If we clone with CLONE_SETTLS then the value in the parent should
+ * If we clone with CLONE_VM then the value in the parent should
* be unchanged and the child should start with zero and be able to
* set its own value.
*/
@@ -179,11 +181,19 @@ static int write_clone_read(void)
int parent_tid, child_tid;
pid_t parent, waiting;
int ret, status;
+ void *stack;
parent = getpid();
set_tpidr2(parent);
- ret = sys_clone(CLONE_SETTLS, 0, &parent_tid, 0, &child_tid);
+ stack = malloc(__STACK_SIZE);
+ if (!stack) {
+ putstr("# malloc() failed\n");
+ return 0;
+ }
+
+ ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE,
+ &parent_tid, 0, &child_tid);
if (ret == -1) {
putstr("# clone() failed\n");
putnum(errno);
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index 4930e03a7b99..191c47ca0ed8 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -439,10 +439,17 @@ static bool check_ptrace_values_sve(pid_t child, struct test_config *config)
pass = false;
}
- if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
- ksft_print_msg("Mismatch in SVE header size: %d != %lu\n",
- sve->size, SVE_PT_SIZE(vq, sve->flags));
- pass = false;
+ if (svcr_in & SVCR_SM) {
+ if (sve->size != sizeof(sve)) {
+ ksft_print_msg("NT_ARM_SVE reports data with PSTATE.SM\n");
+ pass = false;
+ }
+ } else {
+ if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
+ ksft_print_msg("Mismatch in SVE header size: %d != %lu\n",
+ sve->size, SVE_PT_SIZE(vq, sve->flags));
+ pass = false;
+ }
}
/* The registers might be in completely different formats! */
@@ -515,10 +522,17 @@ static bool check_ptrace_values_ssve(pid_t child, struct test_config *config)
pass = false;
}
- if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
- ksft_print_msg("Mismatch in SSVE header size: %d != %lu\n",
- sve->size, SVE_PT_SIZE(vq, sve->flags));
- pass = false;
+ if (!(svcr_in & SVCR_SM)) {
+ if (sve->size != sizeof(sve)) {
+ ksft_print_msg("NT_ARM_SSVE reports data without PSTATE.SM\n");
+ pass = false;
+ }
+ } else {
+ if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
+ ksft_print_msg("Mismatch in SSVE header size: %d != %lu\n",
+ sve->size, SVE_PT_SIZE(vq, sve->flags));
+ pass = false;
+ }
}
/* The registers might be in completely different formats! */
@@ -891,18 +905,11 @@ static void set_initial_values(struct test_config *config)
{
int vq = __sve_vq_from_vl(vl_in(config));
int sme_vq = __sve_vq_from_vl(config->sme_vl_in);
- bool sm_change;
svcr_in = config->svcr_in;
svcr_expected = config->svcr_expected;
svcr_out = 0;
- if (sme_supported() &&
- (svcr_in & SVCR_SM) != (svcr_expected & SVCR_SM))
- sm_change = true;
- else
- sm_change = false;
-
fill_random(&v_in, sizeof(v_in));
memcpy(v_expected, v_in, sizeof(v_in));
memset(v_out, 0, sizeof(v_out));
@@ -953,12 +960,7 @@ static void set_initial_values(struct test_config *config)
if (fpmr_supported()) {
fill_random(&fpmr_in, sizeof(fpmr_in));
fpmr_in &= FPMR_SAFE_BITS;
-
- /* Entering or exiting streaming mode clears FPMR */
- if (sm_change)
- fpmr_expected = 0;
- else
- fpmr_expected = fpmr_in;
+ fpmr_expected = fpmr_in;
} else {
fpmr_in = 0;
fpmr_expected = 0;
@@ -1195,18 +1197,8 @@ static void sve_write(pid_t child, struct test_config *config)
static bool za_write_supported(struct test_config *config)
{
- if (config->sme_vl_in != config->sme_vl_expected) {
- /* Changing the SME VL exits streaming mode. */
- if (config->svcr_expected & SVCR_SM) {
- return false;
- }
- } else {
- /* Otherwise we can't change streaming mode */
- if ((config->svcr_in & SVCR_SM) !=
- (config->svcr_expected & SVCR_SM)) {
- return false;
- }
- }
+ if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM))
+ return false;
return true;
}
@@ -1224,10 +1216,8 @@ static void za_write_expected(struct test_config *config)
memset(zt_expected, 0, sizeof(zt_expected));
}
- /* Changing the SME VL flushes ZT, SVE state and exits SM */
+ /* Changing the SME VL flushes ZT, SVE state */
if (config->sme_vl_in != config->sme_vl_expected) {
- svcr_expected &= ~SVCR_SM;
-
sve_vq = __sve_vq_from_vl(vl_expected(config));
memset(z_expected, 0, __SVE_ZREGS_SIZE(sve_vq));
memset(p_expected, 0, __SVE_PREGS_SIZE(sve_vq));
diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST
index f748f2c33b22..1789a61d0a9b 100644
--- a/tools/testing/selftests/bpf/DENYLIST
+++ b/tools/testing/selftests/bpf/DENYLIST
@@ -1,5 +1,6 @@
# TEMPORARY
# Alphabetical order
+dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next
get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
stacktrace_build_id
stacktrace_build_id_nmi
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index 6d8feda27ce9..12e99c0277a8 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,3 +1 @@
-fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
-fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 66bb50356be0..cf5ed3bee573 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -34,6 +34,9 @@ OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+SKIP_DOCS ?=
+SKIP_LLVM ?=
+
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -172,6 +175,7 @@ override OUTPUT := $(patsubst %/,%,$(OUTPUT))
endif
endif
+ifneq ($(SKIP_LLVM),1)
ifeq ($(feature-llvm),1)
LLVM_CFLAGS += -DHAVE_LLVM_SUPPORT
LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
@@ -180,13 +184,14 @@ ifeq ($(feature-llvm),1)
# Prefer linking statically if it's available, otherwise fallback to shared
ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static)
LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
- LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
+ LLVM_LDLIBS += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)))
LLVM_LDLIBS += -lstdc++
else
LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS))
endif
LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
endif
+endif
SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
@@ -358,7 +363,9 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
endif
+ifneq ($(SKIP_DOCS),1)
all: docs
+endif
docs:
$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
@@ -673,9 +680,6 @@ ifneq ($2:$(OUTPUT),:$(shell pwd))
$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
-$(OUTPUT)/$(TRUNNER_BINARY): LDLIBS += $$(LLVM_LDLIBS)
-$(OUTPUT)/$(TRUNNER_BINARY): LDFLAGS += $$(LLVM_LDFLAGS)
-
# some X.test.o files have runtime dependencies on Y.bpf.o files
$(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
@@ -686,7 +690,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(OUTPUT)/veristat \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@
$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
$(OUTPUT)/$(if $2,$2/)bpftool
@@ -811,6 +815,7 @@ $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.ske
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
+$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -831,6 +836,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_local_storage_create.o \
$(OUTPUT)/bench_htab_mem.o \
$(OUTPUT)/bench_bpf_crypto.o \
+ $(OUTPUT)/bench_sockmap.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 1bd403a5ef7b..ddd73d06a1eb 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -283,6 +283,7 @@ extern struct argp bench_local_storage_create_argp;
extern struct argp bench_htab_mem_argp;
extern struct argp bench_trigger_batch_argp;
extern struct argp bench_crypto_argp;
+extern struct argp bench_sockmap_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -297,6 +298,7 @@ static const struct argp_child bench_parsers[] = {
{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
+ { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
{},
};
@@ -526,6 +528,12 @@ extern const struct bench bench_trig_uprobe_multi_push;
extern const struct bench bench_trig_uretprobe_multi_push;
extern const struct bench bench_trig_uprobe_multi_ret;
extern const struct bench bench_trig_uretprobe_multi_ret;
+#ifdef __x86_64__
+extern const struct bench bench_trig_uprobe_nop5;
+extern const struct bench bench_trig_uretprobe_nop5;
+extern const struct bench bench_trig_uprobe_multi_nop5;
+extern const struct bench bench_trig_uretprobe_multi_nop5;
+#endif
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
@@ -549,6 +557,7 @@ extern const struct bench bench_local_storage_create;
extern const struct bench bench_htab_mem;
extern const struct bench bench_crypto_encrypt;
extern const struct bench bench_crypto_decrypt;
+extern const struct bench bench_sockmap;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -586,6 +595,12 @@ static const struct bench *benchs[] = {
&bench_trig_uretprobe_multi_push,
&bench_trig_uprobe_multi_ret,
&bench_trig_uretprobe_multi_ret,
+#ifdef __x86_64__
+ &bench_trig_uprobe_nop5,
+ &bench_trig_uretprobe_nop5,
+ &bench_trig_uprobe_multi_nop5,
+ &bench_trig_uretprobe_multi_nop5,
+#endif
/* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
&bench_rb_custom,
@@ -609,6 +624,7 @@ static const struct bench *benchs[] = {
&bench_htab_mem,
&bench_crypto_encrypt,
&bench_crypto_decrypt,
+ &bench_sockmap,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
index 926ee822143e..297e32390cd1 100644
--- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -279,6 +279,7 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
}
got = read(fd, buf, sizeof(buf) - 1);
+ close(fd);
if (got <= 0) {
*value = 0;
return;
@@ -286,8 +287,6 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
buf[got] = 0;
*value = strtoull(buf, NULL, 0);
-
- close(fd);
}
static void htab_mem_measure(struct bench_res *res)
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
new file mode 100644
index 000000000000..8ebf563a67a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sendfile.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <argp.h>
+#include "bench.h"
+#include "bench_sockmap_prog.skel.h"
+
+#define FILE_SIZE (128 * 1024)
+#define DATA_REPEAT_SIZE 10
+
+static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+/* c1 <-> [p1, p2] <-> c2
+ * RX bench(BPF_SK_SKB_STREAM_VERDICT):
+ * ARG_FW_RX_PASS:
+ * send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2)
+ * ARG_FW_RX_VERDICT_EGRESS:
+ * send(c1) -> verdict skb to tx queuec of p2 -> recv(c2)
+ * ARG_FW_RX_VERDICT_INGRESS:
+ * send(c1) -> verdict skb to rx queuec of c2 -> recv(c2)
+ *
+ * TX bench(BPF_SK_MSG_VERDIC):
+ * ARG_FW_TX_PASS:
+ * send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2)
+ * ARG_FW_TX_VERDICT_INGRESS:
+ * send(p2) -> verdict msg to rx queue of c2 -> recv(c2)
+ * ARG_FW_TX_VERDICT_EGRESS:
+ * send(p1) -> verdict msg to tx queue of p2 -> recv(c2)
+ */
+enum SOCKMAP_ARG_FLAG {
+ ARG_FW_RX_NORMAL = 11000,
+ ARG_FW_RX_PASS,
+ ARG_FW_RX_VERDICT_EGRESS,
+ ARG_FW_RX_VERDICT_INGRESS,
+ ARG_FW_TX_NORMAL,
+ ARG_FW_TX_PASS,
+ ARG_FW_TX_VERDICT_INGRESS,
+ ARG_FW_TX_VERDICT_EGRESS,
+ ARG_CTL_RX_STRP,
+ ARG_CONSUMER_DELAY_TIME,
+ ARG_PRODUCER_DURATION,
+};
+
+#define TXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_TX_NORMAL)
+
+#define TXMODE_BPF_INGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS)
+
+#define TXMODE_BPF_EGRESS() \
+ ((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS)
+
+#define TXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_TX_PASS)
+
+#define TXMODE_BPF() ( \
+ TXMODE_BPF_PASS() || \
+ TXMODE_BPF_INGRESS() || \
+ TXMODE_BPF_EGRESS())
+
+#define TXMODE() ( \
+ TXMODE_NORMAL() || \
+ TXMODE_BPF())
+
+#define RXMODE_NORMAL() \
+ ((ctx.mode) == ARG_FW_RX_NORMAL)
+
+#define RXMODE_BPF_PASS() \
+ ((ctx.mode) == ARG_FW_RX_PASS)
+
+#define RXMODE_BPF_VERDICT_EGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS)
+
+#define RXMODE_BPF_VERDICT_INGRESS() \
+ ((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS)
+
+#define RXMODE_BPF_VERDICT() ( \
+ RXMODE_BPF_VERDICT_INGRESS() || \
+ RXMODE_BPF_VERDICT_EGRESS())
+
+#define RXMODE_BPF() ( \
+ RXMODE_BPF_PASS() || \
+ RXMODE_BPF_VERDICT())
+
+#define RXMODE() ( \
+ RXMODE_NORMAL() || \
+ RXMODE_BPF())
+
+static struct socmap_ctx {
+ struct bench_sockmap_prog *skel;
+ enum SOCKMAP_ARG_FLAG mode;
+ #define c1 fds[0]
+ #define p1 fds[1]
+ #define c2 fds[2]
+ #define p2 fds[3]
+ #define sfd fds[4]
+ int fds[5];
+ long send_calls;
+ long read_calls;
+ long prod_send;
+ long user_read;
+ int file_size;
+ int delay_consumer;
+ int prod_run_time;
+ int strp_size;
+} ctx = {
+ .prod_send = 0,
+ .user_read = 0,
+ .file_size = FILE_SIZE,
+ .mode = ARG_FW_RX_VERDICT_EGRESS,
+ .fds = {0},
+ .delay_consumer = 0,
+ .prod_run_time = 0,
+ .strp_size = 0,
+};
+
+static void bench_sockmap_prog_destroy(void)
+{
+ int i;
+
+ for (i = 0; i < sizeof(ctx.fds); i++) {
+ if (ctx.fds[0] > 0)
+ close(ctx.fds[i]);
+ }
+
+ bench_sockmap_prog__destroy(ctx.skel);
+}
+
+static void init_addr(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static bool set_non_block(int fd, bool blocking)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (flags == -1)
+ return false;
+ flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK);
+ return (fcntl(fd, F_SETFL, flags) == 0);
+}
+
+static int create_pair(int *c, int *p, int type)
+{
+ struct sockaddr_storage addr;
+ int err, cfd, pfd;
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+ err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len);
+ if (err) {
+ fprintf(stderr, "getsockname error %d\n", errno);
+ return err;
+ }
+ cfd = socket(AF_INET, type, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "socket error %d\n", errno);
+ return err;
+ }
+
+ err = connect(cfd, (struct sockaddr *)&addr, addr_len);
+ if (err && errno != EINPROGRESS) {
+ fprintf(stderr, "connect error %d\n", errno);
+ return err;
+ }
+
+ pfd = accept(ctx.sfd, NULL, NULL);
+ if (pfd < 0) {
+ fprintf(stderr, "accept error %d\n", errno);
+ return err;
+ }
+ *c = cfd;
+ *p = pfd;
+ return 0;
+}
+
+static int create_sockets(void)
+{
+ struct sockaddr_storage addr;
+ int err, one = 1;
+ socklen_t addr_len;
+
+ init_addr(&addr, &addr_len);
+ ctx.sfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (ctx.sfd < 0) {
+ fprintf(stderr, "socket error:%d\n", errno);
+ return ctx.sfd;
+ }
+ err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err) {
+ fprintf(stderr, "setsockopt error:%d\n", errno);
+ return err;
+ }
+
+ err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len);
+ if (err) {
+ fprintf(stderr, "bind error:%d\n", errno);
+ return err;
+ }
+
+ err = listen(ctx.sfd, SOMAXCONN);
+ if (err) {
+ fprintf(stderr, "listen error:%d\n", errno);
+ return err;
+ }
+
+ err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 1 error\n");
+ return err;
+ }
+
+ err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM);
+ if (err) {
+ fprintf(stderr, "create_pair 2 error\n");
+ return err;
+ }
+ printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n",
+ ctx.c1, ctx.p1, ctx.c2, ctx.p2);
+ return 0;
+}
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 2 || env.producer_cnt != 1 ||
+ !env.affinity)
+ goto err;
+ return;
+err:
+ fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary");
+ exit(1);
+}
+
+static int setup_rx_sockmap(void)
+{
+ int verdict, pass, parser, map;
+ int zero = 0, one = 1;
+ int err;
+
+ parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser);
+ verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict);
+ pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass);
+ map = bpf_map__fd(ctx.skel->maps.sock_map_rx);
+
+ if (ctx.strp_size != 0) {
+ ctx.skel->bss->pkt_size = ctx.strp_size;
+ err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return err;
+ }
+
+ if (RXMODE_BPF_VERDICT())
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ else if (RXMODE_BPF_PASS())
+ err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (RXMODE_BPF_PASS())
+ return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST);
+
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ if (err < 0)
+ return err;
+
+ if (RXMODE_BPF_VERDICT_INGRESS()) {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ } else {
+ err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ }
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int setup_tx_sockmap(void)
+{
+ int zero = 0, one = 1;
+ int prog, map;
+ int err;
+
+ map = bpf_map__fd(ctx.skel->maps.sock_map_tx);
+ prog = TXMODE_BPF_PASS() ?
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) :
+ bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict);
+
+ err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return err;
+
+ if (TXMODE_BPF_EGRESS()) {
+ err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+ } else {
+ ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+ err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST);
+ err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+ }
+
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void setup(void)
+{
+ int err;
+
+ ctx.skel = bench_sockmap_prog__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "error loading skel\n");
+ exit(1);
+ }
+
+ if (create_sockets()) {
+ fprintf(stderr, "create_net_mode error\n");
+ goto err;
+ }
+
+ if (RXMODE_BPF()) {
+ err = setup_rx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_rx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else if (TXMODE_BPF()) {
+ err = setup_tx_sockmap();
+ if (err) {
+ fprintf(stderr, "setup_tx_sockmap error:%d\n", err);
+ goto err;
+ }
+ } else {
+ fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode);
+ goto err;
+ }
+
+ return;
+
+err:
+ bench_sockmap_prog_destroy();
+ exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->drops = atomic_swap(&ctx.prod_send, 0);
+ res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0);
+ res->false_hits = atomic_swap(&ctx.user_read, 0);
+ res->important_hits = atomic_swap(&ctx.send_calls, 0);
+ res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32;
+}
+
+static void verify_data(int *check_pos, char *buf, int rcv)
+{
+ for (int i = 0 ; i < rcv; i++) {
+ if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
+ fprintf(stderr, "verify data fail");
+ exit(1);
+ }
+ (*check_pos)++;
+ if (*check_pos >= FILE_SIZE)
+ *check_pos = 0;
+ }
+}
+
+static void *consumer(void *input)
+{
+ int rcv, sent;
+ int check_pos = 0;
+ int tid = (long)input;
+ int recv_buf_size = FILE_SIZE;
+ char *buf = malloc(recv_buf_size);
+ int delay_read = ctx.delay_consumer;
+
+ if (!buf) {
+ fprintf(stderr, "fail to init read buffer");
+ return NULL;
+ }
+
+ while (true) {
+ if (tid == 1) {
+ /* consumer 1 is unused for tx test and stream verdict test */
+ if (RXMODE_BPF() || TXMODE())
+ return NULL;
+ /* it's only for RX_NORMAL which service as reserve-proxy mode */
+ rcv = read(ctx.p1, buf, recv_buf_size);
+ if (rcv < 0) {
+ fprintf(stderr, "fail to read p1");
+ return NULL;
+ }
+
+ sent = send(ctx.p2, buf, recv_buf_size, 0);
+ if (sent < 0) {
+ fprintf(stderr, "fail to send p2");
+ return NULL;
+ }
+ } else {
+ if (delay_read != 0) {
+ if (delay_read < 0)
+ return NULL;
+ sleep(delay_read);
+ delay_read = 0;
+ }
+ /* read real endpoint by consumer 0 */
+ atomic_inc(&ctx.read_calls);
+ rcv = read(ctx.c2, buf, recv_buf_size);
+ if (rcv < 0 && errno != EAGAIN) {
+ fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
+ return NULL;
+ }
+ verify_data(&check_pos, buf, rcv);
+ atomic_add(&ctx.user_read, rcv);
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ int off = 0, fp, need_sent, sent;
+ int file_size = ctx.file_size;
+ struct timespec ts1, ts2;
+ int target;
+ FILE *file;
+
+ file = tmpfile();
+ if (!file) {
+ fprintf(stderr, "create file for sendfile");
+ return NULL;
+ }
+
+ /* we need simple verify */
+ for (int i = 0; i < file_size; i++) {
+ if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) {
+ fprintf(stderr, "init tmpfile error");
+ return NULL;
+ }
+ if (++off >= sizeof(snd_data))
+ off = 0;
+ }
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+
+ fp = fileno(file);
+ need_sent = file_size;
+ clock_gettime(CLOCK_MONOTONIC, &ts1);
+
+ if (RXMODE_BPF_VERDICT())
+ target = ctx.c1;
+ else if (TXMODE_BPF_EGRESS())
+ target = ctx.p1;
+ else
+ target = ctx.p2;
+ set_non_block(target, true);
+ while (true) {
+ if (ctx.prod_run_time) {
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+ if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time)
+ return NULL;
+ }
+
+ errno = 0;
+ atomic_inc(&ctx.send_calls);
+ sent = sendfile(target, fp, NULL, need_sent);
+ if (sent < 0) {
+ if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) {
+ fprintf(stderr, "sendfile return %d, errorno %d:%s\n",
+ sent, errno, strerror(errno));
+ return NULL;
+ }
+ continue;
+ } else if (sent < need_sent) {
+ need_sent -= sent;
+ atomic_add(&ctx.prod_send, sent);
+ continue;
+ }
+ atomic_add(&ctx.prod_send, need_sent);
+ need_sent = file_size;
+ lseek(fp, 0, SEEK_SET);
+ }
+
+ return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz;
+
+ prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+ speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0);
+ bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0);
+ read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0);
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+ printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, "
+ "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n",
+ prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ double verdict_mbs_mean = 0.0;
+ long verdict_total = 0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ verdict_total += res[i].hits / 1000000.0;
+ }
+
+ printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n",
+ verdict_total, verdict_mbs_mean);
+}
+
+static const struct argp_option opts[] = {
+ { "rx-normal", ARG_FW_RX_NORMAL, NULL, 0,
+ "simple reserve-proxy mode, no bfp enabled"},
+ { "rx-pass", ARG_FW_RX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "rx-strp", ARG_CTL_RX_STRP, "Byte", 0,
+ "enable strparser and set the encapsulation size"},
+ { "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0,
+ "forward data with bpf(stream verdict)"},
+ { "tx-normal", ARG_FW_TX_NORMAL, NULL, 0,
+ "simple c-s mode, no bfp enabled"},
+ { "tx-pass", ARG_FW_TX_PASS, NULL, 0,
+ "run bpf prog but no redir applied"},
+ { "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0,
+ "forward msg to ingress queue of another socket"},
+ { "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0,
+ "forward msg to egress queue of another socket"},
+ { "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0,
+ "delay consumer start"},
+ { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
+ "producer duration"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS:
+ ctx.mode = key;
+ break;
+ case ARG_CONSUMER_DELAY_TIME:
+ ctx.delay_consumer = strtol(arg, NULL, 10);
+ break;
+ case ARG_PRODUCER_DURATION:
+ ctx.prod_run_time = strtol(arg, NULL, 10);
+ break;
+ case ARG_CTL_RX_STRP:
+ ctx.strp_size = strtol(arg, NULL, 10);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_sockmap_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Benchmark performance of creating bpf local storage */
+const struct bench bench_sockmap = {
+ .name = "sockmap",
+ .argp = &bench_sockmap_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 32e9f194d449..82327657846e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -333,6 +333,20 @@ static void *uprobe_producer_ret(void *input)
return NULL;
}
+#ifdef __x86_64__
+__nocf_check __weak void uprobe_target_nop5(void)
+{
+ asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
+}
+
+static void *uprobe_producer_nop5(void *input)
+{
+ while (true)
+ uprobe_target_nop5();
+ return NULL;
+}
+#endif
+
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
{
size_t uprobe_offset;
@@ -448,6 +462,28 @@ static void uretprobe_multi_ret_setup(void)
usetup(true, true /* use_multi */, &uprobe_target_ret);
}
+#ifdef __x86_64__
+static void uprobe_nop5_setup(void)
+{
+ usetup(false, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_nop5_setup(void)
+{
+ usetup(true, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uprobe_multi_nop5_setup(void)
+{
+ usetup(false, true /* use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_multi_nop5_setup(void)
+{
+ usetup(true, true /* use_multi */, &uprobe_target_nop5);
+}
+#endif
+
const struct bench bench_trig_syscall_count = {
.name = "trig-syscall-count",
.validate = trigger_validate,
@@ -506,3 +542,9 @@ BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
+#ifdef __x86_64__
+BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
+BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
+BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
+BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
+#endif
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index af169f831f2f..03f55405484b 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5}
do
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 6535c8ae3c46..5e512a1d09d1 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -591,4 +591,9 @@ extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym
extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym;
extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
+struct bpf_iter_dmabuf;
+extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
+
#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index c378d5d07e02..f74e1ea0ad3b 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -22,6 +22,8 @@ CONFIG_CRYPTO_AES=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
CONFIG_DUMMY=y
CONFIG_DYNAMIC_FTRACE=y
CONFIG_FPROBE=y
@@ -71,8 +73,10 @@ CONFIG_NET_IPGRE=y
CONFIG_NET_IPGRE_DEMUX=y
CONFIG_NET_IPIP=y
CONFIG_NET_MPLS_GSO=y
+CONFIG_NET_SCH_BPF=y
CONFIG_NET_SCH_FQ=y
CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCH_HTB=y
CONFIG_NET_SCHED=y
CONFIG_NETDEVSIM=y
CONFIG_NETFILTER=y
@@ -106,6 +110,7 @@ CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_SYN_COOKIES=y
CONFIG_TEST_BPF=m
+CONFIG_UDMABUF=y
CONFIG_USERFAULTFD=y
CONFIG_VSOCKETS=y
CONFIG_VXLAN=y
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 3720b7611523..e1495a4bbc99 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -158,7 +158,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TUN=y
CONFIG_UNIX=y
CONFIG_UPROBES=y
-CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 706931a8c2c6..26c3bc2ce11d 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -128,7 +128,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_TUN=y
CONFIG_UNIX=y
CONFIG_UPROBES=y
-CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
index 7565fc7690c2..0223fce4db2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -51,9 +51,11 @@ static void test_arena_spin_lock_size(int size)
struct arena_spin_lock *skel;
pthread_t thread_id[16];
int prog_fd, i, err;
+ int nthreads;
void *ret;
- if (get_nprocs() < 2) {
+ nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id));
+ if (nthreads < 2) {
test__skip();
return;
}
@@ -66,25 +68,25 @@ static void test_arena_spin_lock_size(int size)
goto end;
}
skel->bss->cs_count = size;
- skel->bss->limit = repeat * 16;
+ skel->bss->limit = repeat * nthreads;
- ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init");
+ ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init");
prog_fd = bpf_program__fd(skel->progs.prog);
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < nthreads; i++) {
err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
if (!ASSERT_OK(err, "pthread_create"))
goto end_barrier;
}
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < nthreads; i++) {
if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
goto end_barrier;
if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
goto end_barrier;
}
- ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value");
+ ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
end_barrier:
pthread_barrier_destroy(&barrier);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 329c7862b52d..cabc51c2ca6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -122,6 +122,85 @@ cleanup:
test_attach_probe_manual__destroy(skel);
}
+/* attach uprobe/uretprobe long event name testings */
+static void test_attach_uprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
+ char path[PATH_MAX] = {0};
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ goto cleanup;
+
+ if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink"))
+ goto cleanup;
+
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ uprobe_opts.ref_ctr_offset = 0;
+ uprobe_opts.retprobe = false;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ path,
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ uprobe_opts.retprobe = true;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ path,
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_uretprobe = uretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_long_event_name(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+ struct bpf_link *kprobe_link, *kretprobe_link;
+ struct test_attach_probe_manual *skel;
+
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+ return;
+
+ /* manual-attach kprobe/kretprobe */
+ kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_kprobe = kprobe_link;
+
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name"))
+ goto cleanup;
+ skel->links.handle_kretprobe = kretprobe_link;
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
static void test_attach_probe_auto(struct test_attach_probe *skel)
{
struct bpf_link *uprobe_err_link;
@@ -323,6 +402,11 @@ void test_attach_probe(void)
if (test__start_subtest("uprobe-ref_ctr"))
test_uprobe_ref_ctr(skel);
+ if (test__start_subtest("uprobe-long_name"))
+ test_attach_uprobe_long_event_name();
+ if (test__start_subtest("kprobe-long_name"))
+ test_attach_kprobe_long_event_name();
+
cleanup:
test_attach_probe__destroy(skel);
ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index dbd13f8e42a7..dd6512fa652b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode)
.repeat = 1,
);
+ if (SYS_NOFAIL("iptables-legacy --version")) {
+ fprintf(stdout, "Missing required iptables-legacy tool\n");
+ test__skip();
+ return;
+ }
+
skel = test_bpf_nf__open_and_load();
if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
new file mode 100644
index 000000000000..730357cd0c9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "bpf_qdisc_fifo.skel.h"
+#include "bpf_qdisc_fq.skel.h"
+#include "bpf_qdisc_fail__incompl_ops.skel.h"
+
+#define LO_IFINDEX 1
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+
+static void do_test(char *qdisc)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_ROOT,
+ .handle = 0x8000000,
+ .qdisc = qdisc);
+ int srv_fd = -1, cli_fd = -1;
+ int err;
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(err, "attach qdisc"))
+ return;
+
+ srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_OK_FD(srv_fd, "start server"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, 0);
+ if (!ASSERT_OK_FD(cli_fd, "connect to client"))
+ goto done;
+
+ err = send_recv_data(srv_fd, cli_fd, total_bytes);
+ ASSERT_OK(err, "send_recv_data");
+
+done:
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+
+ bpf_tc_hook_destroy(&hook);
+}
+
+static void test_fifo(void)
+{
+ struct bpf_qdisc_fifo *fifo_skel;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ do_test("bpf_fifo");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_fq(void)
+{
+ struct bpf_qdisc_fq *fq_skel;
+
+ fq_skel = bpf_qdisc_fq__open_and_load();
+ if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach"))
+ goto out;
+
+ do_test("bpf_fq");
+out:
+ bpf_qdisc_fq__destroy(fq_skel);
+}
+
+static void test_qdisc_attach_to_mq(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ hook.ifindex = if_nametoindex("veth0");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ err = bpf_tc_hook_create(&hook);
+ ASSERT_OK(err, "attach qdisc");
+
+ bpf_tc_hook_destroy(&hook);
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_qdisc_attach_to_non_root(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "tc qdisc add dev lo root handle 1: htb");
+ SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit");
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_ERR(err, "attach qdisc"))
+ bpf_tc_hook_destroy(&hook);
+
+out_del_htb:
+ SYS(out, "tc qdisc delete dev lo root htb");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_incompl_ops(void)
+{
+ struct bpf_qdisc_fail__incompl_ops *skel;
+ struct bpf_link *link;
+
+ skel = bpf_qdisc_fail__incompl_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.test);
+ if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops"))
+ bpf_link__destroy(link);
+
+ bpf_qdisc_fail__incompl_ops__destroy(skel);
+}
+
+static int get_default_qdisc(char *qdisc_name)
+{
+ FILE *f;
+ int num;
+
+ f = fopen("/proc/sys/net/core/default_qdisc", "r");
+ if (!f)
+ return -errno;
+
+ num = fscanf(f, "%s", qdisc_name);
+ fclose(f);
+
+ return num == 1 ? 0 : -EFAULT;
+}
+
+static void test_default_qdisc_attach_to_mq(void)
+{
+ char default_qdisc[IFNAMSIZ] = {};
+ struct bpf_qdisc_fifo *fifo_skel;
+ struct netns_obj *netns = NULL;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ err = get_default_qdisc(default_qdisc);
+ if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc"))
+ goto out;
+
+ err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo");
+ if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc"))
+ goto out;
+
+ netns = netns_new("bpf_qdisc_ns", true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called");
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ netns_free(netns);
+ if (default_qdisc[0])
+ write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc);
+
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+void test_ns_bpf_qdisc(void)
+{
+ if (test__start_subtest("fifo"))
+ test_fifo();
+ if (test__start_subtest("fq"))
+ test_fq();
+ if (test__start_subtest("attach to mq"))
+ test_qdisc_attach_to_mq();
+ if (test__start_subtest("attach to non root"))
+ test_qdisc_attach_to_non_root();
+ if (test__start_subtest("incompl_ops"))
+ test_incompl_ops();
+}
+
+void serial_test_bpf_qdisc_default(void)
+{
+ test_default_qdisc_attach_to_mq();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index d9024c7a892a..5bc15bb6b7ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -440,6 +440,105 @@ cleanup:
btf__free(btf1);
}
+/* Ensure module split BTF dedup worked correctly; when dedup fails badly
+ * core kernel types are in split BTF also, so ensure that references to
+ * such types point at base - not split - BTF.
+ *
+ * bpf_testmod_test_write() has multiple core kernel type parameters;
+ *
+ * ssize_t
+ * bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ * struct bin_attribute *bin_attr,
+ * char *buf, loff_t off, size_t len);
+ *
+ * Ensure each of the FUNC_PROTO params is a core kernel type.
+ *
+ * Do the same for
+ *
+ * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk);
+ *
+ * ...and
+ *
+ * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb);
+ *
+ */
+const char *mod_funcs[] = {
+ "bpf_testmod_test_write",
+ "bpf_kfunc_call_test3",
+ "bpf_kfunc_call_test_pass_ctx"
+};
+
+static void test_split_module(void)
+{
+ struct btf *vmlinux_btf, *btf1 = NULL;
+ int i, nr_base_types;
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf"))
+ return;
+ nr_base_types = btf__type_cnt(vmlinux_btf);
+ if (!ASSERT_GT(nr_base_types, 0, "nr_base_types"))
+ goto cleanup;
+
+ btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf);
+ if (!ASSERT_OK_PTR(btf1, "split_btf"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) {
+ const struct btf_param *p;
+ const struct btf_type *t;
+ __u16 vlen;
+ __u32 id;
+ int j;
+
+ id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC);
+ if (!ASSERT_GE(id, nr_base_types, "func_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "func_id_type"))
+ goto cleanup;
+ t = btf__type_by_id(btf1, t->type);
+ if (!ASSERT_OK_PTR(t, "func_proto_id_type"))
+ goto cleanup;
+ if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto"))
+ goto cleanup;
+ vlen = btf_vlen(t);
+
+ for (j = 0, p = btf_params(t); j < vlen; j++, p++) {
+ /* bpf_testmod uses resilient split BTF, so any
+ * reference types will be added to split BTF and their
+ * associated targets will be base BTF types; for example
+ * for a "struct sock *" the PTR will be in split BTF
+ * while the "struct sock" will be in base.
+ *
+ * In some cases like loff_t we have to resolve
+ * multiple typedefs hence the while() loop below.
+ *
+ * Note that resilient split BTF generation depends
+ * on pahole version, so we do not assert that
+ * reference types are in split BTF, as if pahole
+ * does not support resilient split BTF they will
+ * also be base BTF types.
+ */
+ id = p->type;
+ do {
+ t = btf__type_by_id(btf1, id);
+ if (!ASSERT_OK_PTR(t, "param_ref_type"))
+ goto cleanup;
+ if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t))
+ break;
+ id = t->type;
+ } while (true);
+
+ if (!ASSERT_LT(id, nr_base_types, "verify_base_type"))
+ goto cleanup;
+ }
+ }
+cleanup:
+ btf__free(btf1);
+ btf__free(vmlinux_btf);
+}
+
void test_btf_dedup_split()
{
if (test__start_subtest("split_simple"))
@@ -450,4 +549,6 @@ void test_btf_dedup_split()
test_split_fwd_resolve();
if (test__start_subtest("split_dup_struct_in_cu"))
test_split_dup_struct_in_cu();
+ if (test__start_subtest("split_module"))
+ test_split_module();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index eef1158676ed..3696fb9a05ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -12,10 +12,11 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
vfprintf(ctx, fmt, args);
}
-void test_btf_split() {
+static void __test_btf_split(bool multi)
+{
struct btf_dump *d = NULL;
const struct btf_type *t;
- struct btf *btf1, *btf2;
+ struct btf *btf1, *btf2, *btf3 = NULL;
int str_off, i, err;
btf1 = btf__new_empty();
@@ -63,14 +64,46 @@ void test_btf_split() {
ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
+ if (multi) {
+ btf3 = btf__new_empty_split(btf2);
+ if (!ASSERT_OK_PTR(btf3, "multi_split_btf"))
+ goto cleanup;
+ } else {
+ btf3 = btf2;
+ }
+
+ btf__add_union(btf3, "u1", 16); /* [5] union u1 { */
+ btf__add_field(btf3, "f1", 4, 0, 0); /* struct s2 f1; */
+ btf__add_field(btf3, "uf2", 1, 0, 0); /* int f2; */
+ /* } */
+
+ if (multi) {
+ t = btf__type_by_id(btf2, 5);
+ ASSERT_NULL(t, "multisplit_type_in_first_split");
+ }
+
+ t = btf__type_by_id(btf3, 5);
+ if (!ASSERT_OK_PTR(t, "split_union_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_union(t), true, "split_union_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name");
+ ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt");
+
+ t = btf__type_by_id(btf3, 1);
+ if (!ASSERT_OK_PTR(t, "split_base_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_int(t), true, "split_base_int");
+ ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name");
+
/* BTF-to-C dump of split BTF */
dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
return;
- d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL);
+ d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL);
if (!ASSERT_OK_PTR(d, "btf_dump__new"))
goto cleanup;
- for (i = 1; i < btf__type_cnt(btf2); i++) {
+ for (i = 1; i < btf__type_cnt(btf3); i++) {
err = btf_dump__dump_type(d, i);
ASSERT_OK(err, "dump_type_ok");
}
@@ -79,12 +112,15 @@ void test_btf_split() {
ASSERT_STREQ(dump_buf,
"struct s1 {\n"
" int f1;\n"
-"};\n"
-"\n"
+"};\n\n"
"struct s2 {\n"
" struct s1 f1;\n"
" int f2;\n"
" int *f3;\n"
+"};\n\n"
+"union u1 {\n"
+" struct s2 f1;\n"
+" int uf2;\n"
"};\n\n", "c_dump");
cleanup:
@@ -94,4 +130,14 @@ cleanup:
btf_dump__free(d);
btf__free(btf1);
btf__free(btf2);
+ if (btf2 != btf3)
+ btf__free(btf3);
+}
+
+void test_btf_split(void)
+{
+ if (test__start_subtest("single_split"))
+ __test_btf_split(false);
+ if (test__start_subtest("multi_split"))
+ __test_btf_split(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
new file mode 100644
index 000000000000..3923e64c4c1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2025 Isovalent */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static void test_btf_mmap_sysfs(const char *path, struct btf *base)
+{
+ struct stat st;
+ __u64 btf_size, end;
+ void *raw_data = NULL;
+ int fd = -1;
+ long page_size;
+ struct btf *btf = NULL;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (!ASSERT_GE(page_size, 0, "get_page_size"))
+ goto cleanup;
+
+ if (!ASSERT_OK(stat(path, &st), "stat_btf"))
+ goto cleanup;
+
+ btf_size = st.st_size;
+ end = (btf_size + page_size - 1) / page_size * page_size;
+
+ fd = open(path, O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open_btf"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
+ goto cleanup;
+
+ raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
+ "mprotect_writable"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
+ "mprotect_executable"))
+ goto cleanup;
+
+ /* Check padding is zeroed */
+ for (int i = btf_size; i < end; i++) {
+ if (((__u8 *)raw_data)[i] != 0) {
+ PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
+ goto cleanup;
+ }
+ }
+
+ btf = btf__new_split(raw_data, btf_size, base);
+ if (!ASSERT_OK_PTR(btf, "parse_btf"))
+ goto cleanup;
+
+cleanup:
+ btf__free(btf);
+ if (raw_data && raw_data != MAP_FAILED)
+ munmap(raw_data, btf_size);
+ if (fd >= 0)
+ close(fd);
+}
+
+void test_btf_sysfs(void)
+{
+ test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
new file mode 100644
index 000000000000..6c2b0c3dbcd8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "dmabuf_iter.skel.h"
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/udmabuf.h>
+
+static int udmabuf = -1;
+static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter";
+static size_t udmabuf_test_buffer_size;
+static int sysheap_dmabuf = -1;
+static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter";
+static size_t sysheap_test_buffer_size;
+
+static int create_udmabuf(void)
+{
+ struct udmabuf_create create;
+ int dev_udmabuf, memfd, local_udmabuf;
+
+ udmabuf_test_buffer_size = 10 * getpagesize();
+
+ if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING);
+ if (!ASSERT_OK_FD(memfd, "memfd_create"))
+ return -1;
+
+ if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal"))
+ goto close_memfd;
+
+ dev_udmabuf = open("/dev/udmabuf", O_RDONLY);
+ if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf"))
+ goto close_memfd;
+
+ memset(&create, 0, sizeof(create));
+ create.memfd = memfd;
+ create.flags = UDMABUF_FLAGS_CLOEXEC;
+ create.offset = 0;
+ create.size = udmabuf_test_buffer_size;
+
+ local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create);
+ close(dev_udmabuf);
+ if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create"))
+ goto close_memfd;
+
+ if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name"))
+ goto close_udmabuf;
+
+ return local_udmabuf;
+
+close_udmabuf:
+ close(local_udmabuf);
+close_memfd:
+ close(memfd);
+ return -1;
+}
+
+static int create_sys_heap_dmabuf(void)
+{
+ sysheap_test_buffer_size = 20 * getpagesize();
+
+ struct dma_heap_allocation_data data = {
+ .len = sysheap_test_buffer_size,
+ .fd = 0,
+ .fd_flags = O_RDWR | O_CLOEXEC,
+ .heap_flags = 0,
+ };
+ int heap_fd, ret;
+
+ if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+ return -1;
+
+ heap_fd = open("/dev/dma_heap/system", O_RDONLY);
+ if (!ASSERT_OK_FD(heap_fd, "open dma heap"))
+ return -1;
+
+ ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data);
+ close(heap_fd);
+ if (!ASSERT_OK(ret, "syheap alloc"))
+ return -1;
+
+ if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name"))
+ goto close_sysheap_dmabuf;
+
+ return data.fd;
+
+close_sysheap_dmabuf:
+ close(data.fd);
+ return -1;
+}
+
+static int create_test_buffers(void)
+{
+ udmabuf = create_udmabuf();
+ sysheap_dmabuf = create_sys_heap_dmabuf();
+
+ if (udmabuf < 0 || sysheap_dmabuf < 0)
+ return -1;
+
+ return 0;
+}
+
+static void destroy_test_buffers(void)
+{
+ close(udmabuf);
+ udmabuf = -1;
+
+ close(sysheap_dmabuf);
+ sysheap_dmabuf = -1;
+}
+
+enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT };
+struct DmabufInfo {
+ unsigned long inode;
+ unsigned long size;
+ char name[DMA_BUF_NAME_LEN];
+ char exporter[32];
+};
+
+static bool check_dmabuf_info(const struct DmabufInfo *bufinfo,
+ unsigned long size,
+ const char *name, const char *exporter)
+{
+ return size == bufinfo->size &&
+ !strcmp(name, bufinfo->name) &&
+ !strcmp(exporter, bufinfo->exporter);
+}
+
+static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel)
+{
+ int iter_fd;
+ char buf[256];
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ while (read(iter_fd, buf, sizeof(buf)) > 0)
+ ; /* Read out all contents */
+
+ /* Next reads should return 0 */
+ ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
+
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel)
+{
+ bool found_test_sysheap_dmabuf = false;
+ bool found_test_udmabuf = false;
+ struct DmabufInfo bufinfo;
+ size_t linesize = 0;
+ char *line = NULL;
+ FILE *iter_file;
+ int iter_fd, f = INODE;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+ if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+ return;
+
+ iter_file = fdopen(iter_fd, "r");
+ if (!ASSERT_OK_PTR(iter_file, "fdopen"))
+ goto close_iter_fd;
+
+ while (getline(&line, &linesize, iter_file) != -1) {
+ if (f % FIELD_COUNT == INODE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1,
+ "read inode");
+ } else if (f % FIELD_COUNT == SIZE) {
+ ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1,
+ "read size");
+ } else if (f % FIELD_COUNT == NAME) {
+ ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1,
+ "read name");
+ } else if (f % FIELD_COUNT == EXPORTER) {
+ ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1,
+ "read exporter");
+
+ if (check_dmabuf_info(&bufinfo,
+ sysheap_test_buffer_size,
+ sysheap_test_buffer_name,
+ "system"))
+ found_test_sysheap_dmabuf = true;
+ else if (check_dmabuf_info(&bufinfo,
+ udmabuf_test_buffer_size,
+ udmabuf_test_buffer_name,
+ "udmabuf"))
+ found_test_udmabuf = true;
+ }
+ ++f;
+ }
+
+ ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields");
+
+ ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf");
+ ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf");
+
+ free(line);
+ fclose(iter_file);
+close_iter_fd:
+ close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ char key[DMA_BUF_NAME_LEN];
+ int err, fd;
+ bool found;
+
+ /* No need to attach it, just run it directly */
+ fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each);
+
+ err = bpf_prog_test_run_opts(fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key"))
+ return;
+
+ do {
+ ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup");
+ ASSERT_TRUE(found, "found test buffer");
+ } while (bpf_map_get_next_key(map_fd, key, key));
+}
+
+void test_dmabuf_iter(void)
+{
+ struct dmabuf_iter *skel = NULL;
+ int map_fd;
+ const bool f = false;
+
+ skel = dmabuf_iter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.testbuf_hash);
+ if (!ASSERT_OK_FD(map_fd, "map_fd"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY),
+ "insert udmabuf"))
+ goto destroy_skel;
+ if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY),
+ "insert sysheap buffer"))
+ goto destroy_skel;
+
+ if (!ASSERT_OK(create_test_buffers(), "create_test_buffers"))
+ goto destroy;
+
+ if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach"))
+ goto destroy;
+
+ if (test__start_subtest("no_infinite_reads"))
+ subtest_dmabuf_iter_check_no_infinite_reads(skel);
+ if (test__start_subtest("default_iter"))
+ subtest_dmabuf_iter_check_default_iter(skel);
+ if (test__start_subtest("open_coded"))
+ subtest_dmabuf_iter_check_open_coded(skel, map_fd);
+
+destroy:
+ destroy_test_buffers();
+destroy_skel:
+ dmabuf_iter__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index e29cc16124c2..62e7ec775f24 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -33,10 +33,19 @@ static struct {
{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
{"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
+ {"test_probe_read_user_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG},
+ {"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG},
+ {"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP},
};
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
+ char user_data[384] = {[0 ... 382] = 'a', '\0'};
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
@@ -58,6 +67,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
if (!ASSERT_OK(err, "dynptr_success__load"))
goto cleanup;
+ skel->bss->user_ptr = user_data;
+ skel->data->test_len[0] = sizeof(user_data);
+ memcpy(skel->bss->expected_str, user_data, sizeof(user_data));
+
switch (setup_type) {
case SETUP_SYSCALL_SLEEP:
link = bpf_program__attach(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
new file mode 100644
index 000000000000..ca46fdd6e1ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <test_progs.h>
+#include "fd_htab_lookup.skel.h"
+
+struct htab_op_ctx {
+ int fd;
+ int loop;
+ unsigned int entries;
+ bool stop;
+};
+
+#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err)))
+
+static void *htab_lookup_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0, value;
+ int inner_fd, err;
+
+ err = bpf_map_lookup_elem(ctx->fd, &key, &value);
+ if (err) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, err);
+ }
+
+ inner_fd = bpf_map_get_fd_by_id(value);
+ if (inner_fd < 0) {
+ /* The old map has been freed */
+ if (inner_fd == -ENOENT)
+ continue;
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, inner_fd);
+ }
+
+ err = bpf_map_lookup_elem(inner_fd, &zero, &value);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+
+ if (value != key) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(4, -EINVAL);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int j;
+
+ for (j = 0; j < ctx->entries; j++) {
+ unsigned int key = j, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (inner_fd < 0) {
+ ctx->stop = true;
+ return ERR_TO_RETVAL(1, inner_fd);
+ }
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(2, err);
+ }
+
+ err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST);
+ if (err) {
+ close(inner_fd);
+ ctx->stop = true;
+ return ERR_TO_RETVAL(3, err);
+ }
+ close(inner_fd);
+ }
+ }
+
+ return NULL;
+}
+
+static int setup_htab(int fd, unsigned int entries)
+{
+ unsigned int i;
+
+ for (i = 0; i < entries; i++) {
+ unsigned int key = i, zero = 0;
+ int inner_fd, err;
+
+ inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (!ASSERT_OK_FD(inner_fd, "new array"))
+ return -1;
+
+ err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+ if (!ASSERT_OK(err, "init array")) {
+ close(inner_fd);
+ return -1;
+ }
+
+ err = bpf_map_update_elem(fd, &key, &inner_fd, 0);
+ if (!ASSERT_OK(err, "init outer")) {
+ close(inner_fd);
+ return -1;
+ }
+ close(inner_fd);
+ }
+
+ return 0;
+}
+
+static int get_int_from_env(const char *name, int dft)
+{
+ const char *value;
+
+ value = getenv(name);
+ if (!value)
+ return dft;
+
+ return atoi(value);
+}
+
+void test_fd_htab_lookup(void)
+{
+ unsigned int i, wr_nr = 8, rd_nr = 16;
+ pthread_t tids[wr_nr + rd_nr];
+ struct fd_htab_lookup *skel;
+ struct htab_op_ctx ctx;
+ int err;
+
+ skel = fd_htab_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.outer_map);
+ ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5);
+ ctx.stop = false;
+ ctx.entries = 8;
+
+ err = setup_htab(ctx.fd, ctx.entries);
+ if (err)
+ goto destroy;
+
+ memset(tids, 0, sizeof(tids));
+ for (i = 0; i < wr_nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+ for (i = 0; i < rd_nr; i++) {
+ err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+
+reap:
+ for (i = 0; i < wr_nr + rd_nr; i++) {
+ void *ret = NULL;
+ char desc[32];
+
+ if (!tids[i])
+ continue;
+
+ snprintf(desc, sizeof(desc), "thread %u", i + 1);
+ err = pthread_join(tids[i], &ret);
+ ASSERT_OK(err, desc);
+ ASSERT_EQ(ret, NULL, desc);
+ }
+destroy:
+ fd_htab_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index e59af2aa6601..e40114620751 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -37,6 +37,7 @@ static noinline void uprobe_func(void)
static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
ssize_t offset, ssize_t entry_offset)
{
+ ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */;
struct bpf_link_info info;
__u32 len = sizeof(info);
char buf[PATH_MAX];
@@ -97,6 +98,7 @@ again:
case BPF_PERF_EVENT_UPROBE:
case BPF_PERF_EVENT_URETPROBE:
ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset");
+ ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset");
ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1,
"name_len");
@@ -241,20 +243,32 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
.retprobe = type == BPF_PERF_EVENT_URETPROBE,
.bpf_cookie = PERF_EVENT_COOKIE,
);
+ const char *sema[1] = {
+ "uprobe_link_info_sema_1",
+ };
+ __u64 *ref_ctr_offset;
struct bpf_link *link;
int link_fd, err;
+ err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema,
+ (unsigned long **) &ref_ctr_offset, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+ return;
+
+ opts.ref_ctr_offset = *ref_ctr_offset;
link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
0, /* self pid */
UPROBE_FILE, uprobe_offset,
&opts);
if (!ASSERT_OK_PTR(link, "attach_uprobe"))
- return;
+ goto out;
link_fd = bpf_link__fd(link);
- err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0);
+ err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset);
ASSERT_OK(err, "verify_perf_link_info");
bpf_link__destroy(link);
+out:
+ free(ref_ctr_offset);
}
static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
index 8e13a3416a21..1de14b111931 100644
--- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -104,7 +104,7 @@ void test_kmem_cache_iter(void)
goto destroy;
memset(buf, 0, sizeof(buf));
- while (read(iter_fd, buf, sizeof(buf) > 0)) {
+ while (read(iter_fd, buf, sizeof(buf)) > 0) {
/* Read out all contents */
printf("%s", buf);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 77d07e0a4a55..5266c7022863 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -7,6 +7,7 @@
#include "linked_list.skel.h"
#include "linked_list_fail.skel.h"
+#include "linked_list_peek.skel.h"
static char log_buf[1024 * 1024];
@@ -805,3 +806,8 @@ void test_linked_list(void)
test_linked_list_success(LIST_IN_LIST, true);
test_linked_list_success(TEST_ALL, false);
}
+
+void test_linked_list_peek(void)
+{
+ RUN_TESTS(linked_list_peek);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
index 9818f06c97c5..d8f3d7a45fe9 100644
--- a/tools/testing/selftests/bpf/prog_tests/rbtree.c
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -8,6 +8,7 @@
#include "rbtree_fail.skel.h"
#include "rbtree_btf_fail__wrong_node_type.skel.h"
#include "rbtree_btf_fail__add_wrong_type.skel.h"
+#include "rbtree_search.skel.h"
static void test_rbtree_add_nodes(void)
{
@@ -187,3 +188,8 @@ void test_rbtree_fail(void)
{
RUN_TESTS(rbtree_fail);
}
+
+void test_rbtree_search(void)
+{
+ RUN_TESTS(rbtree_search);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 0b9bd1d6f7cc..10a0ab954b8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -37,8 +37,10 @@ configure_stack(void)
tc = popen("tc -V", "r");
if (CHECK_FAIL(!tc))
return false;
- if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
+ if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) {
+ pclose(tc);
return false;
+ }
if (strstr(tc_version, ", libbpf "))
prog = "test_sk_assign_libbpf.bpf.o";
else
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index d56e18b25528..a4517bee34d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -7,14 +7,433 @@
#define TEST_NS "sock_iter_batch_netns"
+static const int init_batch_size = 16;
static const int nr_soreuse = 4;
+struct iter_out {
+ int idx;
+ __u64 cookie;
+} __packed;
+
+struct sock_count {
+ __u64 cookie;
+ int count;
+};
+
+static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
+{
+ int insert = -1;
+ int i = 0;
+
+ for (; i < counts_len; i++) {
+ if (!counts[i].cookie) {
+ insert = i;
+ } else if (counts[i].cookie == cookie) {
+ insert = i;
+ break;
+ }
+ }
+ if (insert < 0)
+ return insert;
+
+ counts[insert].cookie = cookie;
+ counts[insert].count++;
+
+ return counts[insert].count;
+}
+
+static int read_n(int iter_fd, int n, struct sock_count counts[],
+ int counts_len)
+{
+ struct iter_out out;
+ int nread = 1;
+ int i = 0;
+
+ for (; nread > 0 && (n < 0 || i < n); i++) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
+ break;
+ ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
+ }
+
+ ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
+
+ return i;
+}
+
+static __u64 socket_cookie(int fd)
+{
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
+
+ if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
+ &cookie_len), "getsockopt(SO_COOKIE)"))
+ return 0;
+ return cookie;
+}
+
+static bool was_seen(int fd, struct sock_count counts[], int counts_len)
+{
+ __u64 cookie = socket_cookie(fd);
+ int i = 0;
+
+ for (; cookie && i < counts_len; i++)
+ if (cookie == counts[i].cookie)
+ return true;
+
+ return false;
+}
+
+static int get_seen_socket(int *fds, struct sock_count counts[], int n)
+{
+ int i = 0;
+
+ for (; i < n; i++)
+ if (was_seen(fds[i], counts, n))
+ return i;
+ return -1;
+}
+
+static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
+{
+ int i, nread, iter_fd;
+ int nth_sock_idx = -1;
+ struct iter_out out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ return -1;
+
+ for (; n >= 0; n--) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_GE(nread, 1, "nread"))
+ goto done;
+ }
+
+ for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
+ if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
+ nth_sock_idx = i;
+done:
+ close(iter_fd);
+ return nth_sock_idx;
+}
+
+static int get_seen_count(int fd, struct sock_count counts[], int n)
+{
+ __u64 cookie = socket_cookie(fd);
+ int count = 0;
+ int i = 0;
+
+ for (; cookie && !count && i < n; i++)
+ if (cookie == counts[i].cookie)
+ count = counts[i].count;
+
+ return count;
+}
+
+static void check_n_were_seen_once(int *fds, int fds_len, int n,
+ struct sock_count counts[], int counts_len)
+{
+ int seen_once = 0;
+ int seen_cnt;
+ int i = 0;
+
+ for (; i < fds_len; i++) {
+ /* Skip any sockets that were closed or that weren't seen
+ * exactly once.
+ */
+ if (fds[i] < 0)
+ continue;
+ seen_cnt = get_seen_count(fds[i], counts, counts_len);
+ if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
+ seen_once++;
+ }
+
+ ASSERT_EQ(seen_once, n, "seen_once");
+}
+
+static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_seen_socket(socks, counts, counts_len);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_unseen(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_all(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx, i;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ for (i = 0; i < socks_len - 1; i++) {
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+}
+
+static void add_some(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Double the number of sockets in the bucket. */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+static void force_realloc(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socket just to initialize the batch. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Double the number of sockets in the bucket to force a realloc on the
+ * next read.
+ */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket from the first set was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+struct test_case {
+ void (*test)(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd);
+ const char *description;
+ int init_socks;
+ int max_socks;
+ int sock_type;
+ int family;
+};
+
+static struct test_case resume_tests[] = {
+ {
+ .description = "udp: resume after removing a seen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "udp: resume after removing one unseen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "udp: resume after removing all unseen sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "udp: resume after adding a few sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "udp: force a realloc to occur",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+};
+
+static void do_resume_test(struct test_case *tc)
+{
+ struct sock_iter_batch *skel = NULL;
+ static const __u16 port = 10001;
+ struct bpf_link *link = NULL;
+ struct sock_count *counts;
+ int err, iter_fd = -1;
+ const char *addr;
+ int *fds = NULL;
+ int local_port;
+
+ counts = calloc(tc->max_socks, sizeof(*counts));
+ if (!ASSERT_OK_PTR(counts, "counts"))
+ goto done;
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ /* Prepare a bucket of sockets in the kernel hashtable */
+ addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
+ fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
+ goto done;
+ local_port = get_socket_local_port(*fds);
+ if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
+ goto done;
+ skel->rodata->ports[0] = ntohs(local_port);
+ skel->rodata->sf = tc->family;
+
+ err = sock_iter_batch__load(skel);
+ if (!ASSERT_OK(err, "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
+ skel->progs.iter_tcp_soreuse :
+ skel->progs.iter_udp_soreuse,
+ NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
+ counts, tc->max_socks, link, iter_fd);
+done:
+ free(counts);
+ free_fds(fds, tc->init_socks);
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+}
+
+static void do_resume_tests(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
+ if (test__start_subtest(resume_tests[i].description)) {
+ do_resume_test(&resume_tests[i]);
+ }
+ }
+}
+
static void do_test(int sock_type, bool onebyone)
{
int err, i, nread, to_read, total_read, iter_fd = -1;
- int first_idx, second_idx, indices[nr_soreuse];
+ struct iter_out outputs[nr_soreuse];
struct bpf_link *link = NULL;
struct sock_iter_batch *skel;
+ int first_idx, second_idx;
int *fds[2] = {};
skel = sock_iter_batch__open();
@@ -34,6 +453,7 @@ static void do_test(int sock_type, bool onebyone)
goto done;
skel->rodata->ports[i] = ntohs(local_port);
}
+ skel->rodata->sf = AF_INET6;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -55,38 +475,38 @@ static void do_test(int sock_type, bool onebyone)
* from a bucket and leave one socket out from
* that bucket on purpose.
*/
- to_read = (nr_soreuse - 1) * sizeof(*indices);
+ to_read = (nr_soreuse - 1) * sizeof(*outputs);
total_read = 0;
first_idx = -1;
do {
- nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
- if (nread <= 0 || nread % sizeof(*indices))
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
break;
total_read += nread;
if (first_idx == -1)
- first_idx = indices[0];
- for (i = 0; i < nread / sizeof(*indices); i++)
- ASSERT_EQ(indices[i], first_idx, "first_idx");
+ first_idx = outputs[0].idx;
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
} while (total_read < to_read);
- ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread");
+ ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
ASSERT_EQ(total_read, to_read, "total_read");
free_fds(fds[first_idx], nr_soreuse);
fds[first_idx] = NULL;
/* Read the "whole" second bucket */
- to_read = nr_soreuse * sizeof(*indices);
+ to_read = nr_soreuse * sizeof(*outputs);
total_read = 0;
second_idx = !first_idx;
do {
- nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
- if (nread <= 0 || nread % sizeof(*indices))
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
break;
total_read += nread;
- for (i = 0; i < nread / sizeof(*indices); i++)
- ASSERT_EQ(indices[i], second_idx, "second_idx");
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
} while (total_read <= to_read);
ASSERT_EQ(nread, 0, "nread");
/* Both so_reuseport ports should be in different buckets, so
@@ -128,6 +548,7 @@ void test_sock_iter_batch(void)
do_test(SOCK_DGRAM, true);
do_test(SOCK_DGRAM, false);
}
+ do_resume_tests();
close_netns(nstoken);
done:
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
index 1bdfb79ef009..e02cabcc814e 100644
--- a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -3,6 +3,7 @@
#ifndef __SOCKET_HELPERS__
#define __SOCKET_HELPERS__
+#include <sys/un.h>
#include <linux/vm_sockets.h>
/* include/linux/net.h */
@@ -169,6 +170,15 @@ static inline void init_addr_loopback6(struct sockaddr_storage *ss,
*len = sizeof(*addr6);
}
+static inline void init_addr_loopback_unix(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->sun_family = AF_UNIX;
+ *len = sizeof(sa_family_t);
+}
+
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
socklen_t *len)
{
@@ -190,6 +200,9 @@ static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
case AF_INET6:
init_addr_loopback6(ss, len);
return;
+ case AF_UNIX:
+ init_addr_loopback_unix(ss, len);
+ return;
case AF_VSOCK:
init_addr_loopback_vsock(ss, len);
return;
@@ -315,21 +328,27 @@ static inline int create_pair(int family, int sotype, int *p0, int *p1)
{
__close_fd int s, c = -1, p = -1;
struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
+ socklen_t len;
int err;
s = socket_loopback(family, sotype);
if (s < 0)
return s;
- err = xgetsockname(s, sockaddr(&addr), &len);
- if (err)
- return err;
-
c = xsocket(family, sotype, 0);
if (c < 0)
return c;
+ init_addr_loopback(family, &addr, &len);
+ err = xbind(c, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
err = connect(c, sockaddr(&addr), len);
if (err) {
if (errno != EINPROGRESS) {
@@ -391,4 +410,59 @@ static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
return err;
}
+static inline const char *socket_kind_to_str(int sock_fd)
+{
+ socklen_t opt_len;
+ int domain, type;
+
+ opt_len = sizeof(domain);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_DOMAIN)");
+
+ opt_len = sizeof(type);
+ if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len))
+ FAIL_ERRNO("getsockopt(SO_TYPE)");
+
+ switch (domain) {
+ case AF_INET:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp4";
+ case SOCK_DGRAM:
+ return "udp4";
+ }
+ break;
+ case AF_INET6:
+ switch (type) {
+ case SOCK_STREAM:
+ return "tcp6";
+ case SOCK_DGRAM:
+ return "udp6";
+ }
+ break;
+ case AF_UNIX:
+ switch (type) {
+ case SOCK_STREAM:
+ return "u_str";
+ case SOCK_DGRAM:
+ return "u_dgr";
+ case SOCK_SEQPACKET:
+ return "u_seq";
+ }
+ break;
+ case AF_VSOCK:
+ switch (type) {
+ case SOCK_STREAM:
+ return "v_str";
+ case SOCK_DGRAM:
+ return "v_dgr";
+ case SOCK_SEQPACKET:
+ return "v_seq";
+ }
+ break;
+ }
+
+ return "???";
+}
+
#endif // __SOCKET_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 3e5571dd578d..d815efac52fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -5,12 +5,15 @@
#define MAX_TEST_NAME 80
+#define u32(v) ((u32){(v)})
+#define u64(v) ((u64){(v)})
+
#define __always_unused __attribute__((__unused__))
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
@@ -18,7 +21,7 @@
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
@@ -26,7 +29,7 @@
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
@@ -35,7 +38,7 @@
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
@@ -43,7 +46,7 @@
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret < 0) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
@@ -66,21 +69,15 @@
__ret; \
})
-static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+static inline int add_to_sockmap(int mapfd, int fd1, int fd2)
{
- u64 value;
- u32 key;
int err;
- key = 0;
- value = fd1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST);
if (err)
return err;
- key = 1;
- value = fd2;
- return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST);
}
#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 0a99fd404f6d..b6c471da5c28 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -3,76 +3,62 @@
/*
* Tests for sockmap/sockhash holding kTLS sockets.
*/
-
+#include <error.h>
#include <netinet/tcp.h>
+#include <linux/tls.h>
#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_ktls.skel.h"
#define MAX_TEST_NAME 80
#define TCP_ULP 31
-static int tcp_server(int family)
+static int init_ktls_pairs(int c, int p)
{
- int err, s;
-
- s = socket(family, SOCK_STREAM, 0);
- if (!ASSERT_GE(s, 0, "socket"))
- return -1;
-
- err = listen(s, SOMAXCONN);
- if (!ASSERT_OK(err, "listen"))
- return -1;
-
- return s;
-}
+ int err;
+ struct tls12_crypto_info_aes_gcm_128 crypto_rx;
+ struct tls12_crypto_info_aes_gcm_128 crypto_tx;
-static int disconnect(int fd)
-{
- struct sockaddr unspec = { AF_UNSPEC };
+ err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
- return connect(fd, &unspec, sizeof(unspec));
+ err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto out;
+
+ memset(&crypto_rx, 0, sizeof(crypto_rx));
+ memset(&crypto_tx, 0, sizeof(crypto_tx));
+ crypto_rx.info.version = TLS_1_2_VERSION;
+ crypto_tx.info.version = TLS_1_2_VERSION;
+ crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_TX)"))
+ goto out;
+
+ err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx));
+ if (!ASSERT_OK(err, "setsockopt(TLS_RX)"))
+ goto out;
+ return 0;
+out:
+ return -1;
}
-/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
-static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+static int create_ktls_pairs(int family, int sotype, int *c, int *p)
{
- struct sockaddr_storage addr = {0};
- socklen_t len = sizeof(addr);
- int err, cli, srv, zero = 0;
-
- srv = tcp_server(family);
- if (srv == -1)
- return;
-
- err = getsockname(srv, (struct sockaddr *)&addr, &len);
- if (!ASSERT_OK(err, "getsockopt"))
- goto close_srv;
+ int err;
- cli = socket(family, SOCK_STREAM, 0);
- if (!ASSERT_GE(cli, 0, "socket"))
- goto close_srv;
-
- err = connect(cli, (struct sockaddr *)&addr, len);
- if (!ASSERT_OK(err, "connect"))
- goto close_cli;
-
- err = bpf_map_update_elem(map, &zero, &cli, 0);
- if (!ASSERT_OK(err, "bpf_map_update_elem"))
- goto close_cli;
-
- err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
- if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
- goto close_cli;
-
- err = bpf_map_delete_elem(map, &zero);
- if (!ASSERT_OK(err, "bpf_map_delete_elem"))
- goto close_cli;
-
- err = disconnect(cli);
+ err = create_pair(family, sotype, c, p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ return -1;
-close_cli:
- close(cli);
-close_srv:
- close(srv);
+ err = init_ktls_pairs(*c, *p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ return -1;
+ return 0;
}
static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
@@ -145,6 +131,189 @@ static const char *fmt_test_name(const char *subtest_name, int family,
return test_name;
}
+static void test_sockmap_ktls_offload(int family, int sotype)
+{
+ int err;
+ int c = 0, p = 0, sent, recvd;
+ char msg[12] = "hello world\0";
+ char rcv[13];
+
+ err = create_ktls_pairs(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_ktls_pairs()"))
+ goto out;
+
+ sent = send(c, msg, sizeof(msg), 0);
+ if (!ASSERT_OK(err, "send(msg)"))
+ goto out;
+
+ recvd = recv(p, rcv, sizeof(rcv), 0);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sent, "length mismatch"))
+ goto out;
+
+ ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch");
+
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+}
+
+static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push)
+{
+ int err, off;
+ int i, j;
+ int start_push = 0, push_len = 0;
+ int c = 0, p = 0, one = 1, sent, recvd;
+ int prog_fd, map_fd;
+ char msg[12] = "hello world\0";
+ char rcv[20] = {0};
+ struct test_sockmap_ktls *skel;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ skel->bss->cork_byte = sizeof(msg);
+ if (push) {
+ start_push = 1;
+ push_len = 2;
+ }
+ skel->bss->push_start = start_push;
+ skel->bss->push_end = push_len;
+
+ off = sizeof(msg) / 2;
+ sent = send(c, msg, off, 0);
+ if (!ASSERT_EQ(sent, off, "send(msg)"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "expected no data"))
+ goto out;
+
+ /* send remaining msg */
+ sent = send(c, msg + off, sizeof(msg) - off, 0);
+ if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data"))
+ goto out;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_OK(err, "recv(msg)") ||
+ !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch"))
+ goto out;
+
+ for (i = 0, j = 0; i < recvd;) {
+ /* skip checking the data that has been pushed in */
+ if (i >= start_push && i <= start_push + push_len - 1) {
+ i++;
+ continue;
+ }
+ if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch"))
+ goto out;
+ i++;
+ j++;
+ }
+out:
+ if (c)
+ close(c);
+ if (p)
+ close(p);
+ test_sockmap_ktls__destroy(skel);
+}
+
+static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push)
+{
+ int c = -1, p = -1, one = 1, two = 2;
+ struct test_sockmap_ktls *skel;
+ unsigned char *data = NULL;
+ struct msghdr msg = {0};
+ struct iovec iov[2];
+ int prog_fd, map_fd;
+ int txrx_buf = 1024;
+ int iov_length = 8192;
+ int err;
+
+ skel = test_sockmap_ktls__open_and_load();
+ if (!ASSERT_TRUE(skel, "open ktls skel"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (!ASSERT_OK(err, "create_pair()"))
+ goto out;
+
+ err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int));
+ err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int));
+ if (!ASSERT_OK(err, "set buf limit"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir);
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out;
+
+ skel->bss->apply_bytes = 1024;
+
+ err = init_ktls_pairs(c, p);
+ if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+ goto out;
+
+ data = calloc(iov_length, sizeof(char));
+ if (!data)
+ goto out;
+
+ iov[0].iov_base = data;
+ iov[0].iov_len = iov_length;
+ iov[1].iov_base = data;
+ iov[1].iov_len = iov_length;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 2;
+
+ for (;;) {
+ err = sendmsg(c, &msg, MSG_DONTWAIT);
+ if (err <= 0)
+ break;
+ }
+
+out:
+ if (data)
+ free(data);
+ if (c != -1)
+ close(c);
+ if (p != -1)
+ close(p);
+
+ test_sockmap_ktls__destroy(skel);
+}
+
static void run_tests(int family, enum bpf_map_type map_type)
{
int map;
@@ -153,18 +322,30 @@ static void run_tests(int family, enum bpf_map_type map_type)
if (!ASSERT_GE(map, 0, "bpf_map_create"))
return;
- if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
- test_sockmap_ktls_disconnect_after_delete(family, map);
if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
close(map);
}
+static void run_ktls_test(int family, int sotype)
+{
+ if (test__start_subtest("tls simple offload"))
+ test_sockmap_ktls_offload(family, sotype);
+ if (test__start_subtest("tls tx cork"))
+ test_sockmap_ktls_tx_cork(family, sotype, false);
+ if (test__start_subtest("tls tx cork with push"))
+ test_sockmap_ktls_tx_cork(family, sotype, true);
+ if (test__start_subtest("tls tx egress with no buf"))
+ test_sockmap_ktls_tx_no_buf(family, sotype, true);
+}
+
void test_sockmap_ktls(void)
{
run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+ run_ktls_test(AF_INET, SOCK_STREAM);
+ run_ktls_test(AF_INET6, SOCK_STREAM);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 4ee1148d22be..1d98eee7a2c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1366,237 +1366,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
-static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
- int sock_mapfd, int nop_mapfd,
- int verd_mapfd, enum redir_mode mode,
- int send_flags)
-{
- const char *log_prefix = redir_mode_str(mode);
- unsigned int pass;
- int err, n;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
-
- err = add_to_sockmap(sock_mapfd, peer0, peer1);
- if (err)
- return;
-
- if (nop_mapfd >= 0) {
- err = add_to_sockmap(nop_mapfd, cli0, cli1);
- if (err)
- return;
- }
-
- /* Last byte is OOB data when send_flags has MSG_OOB bit set */
- n = xsend(cli1, "ab", 2, send_flags);
- if (n >= 0 && n < 2)
- FAIL("%s: incomplete send", log_prefix);
- if (n < 2)
- return;
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- return;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
- if (n < 0)
- FAIL_ERRNO("%s: recv_timeout", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
-
- if (send_flags & MSG_OOB) {
- /* Check that we can't read OOB while in sockmap */
- errno = 0;
- n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
- if (n != -1 || errno != EOPNOTSUPP)
- FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d",
- log_prefix, n, errno);
-
- /* Remove peer1 from sockmap */
- xbpf_map_delete_elem(sock_mapfd, &(int){ 1 });
-
- /* Check that OOB was dropped on redirect */
- errno = 0;
- n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
- if (n != -1 || errno != EINVAL)
- FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d",
- log_prefix, n, errno);
- }
-}
-
-static void unix_redir_to_connected(int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int sfd[2];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
- goto close0;
- c1 = sfd[0], p1 = sfd[1];
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close0:
- xclose(c0);
- xclose(p0);
-}
-
-static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int sotype)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
- skel->bss->test_ingress = true;
- unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int sotype)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(AF_UNIX);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
- unix_skb_redir_to_connected(skel, map, sotype);
-}
-
-/* Returns two connected loopback vsock sockets */
-static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
-{
- return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
-}
-
-static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
- enum redir_mode mode, int sotype)
-{
- const char *log_prefix = redir_mode_str(mode);
- char a = 'a', b = 'b';
- int u0, u1, v0, v1;
- int sfd[2];
- unsigned int pass;
- int err, n;
- u32 key;
-
- zero_verdict_count(verd_mapfd);
-
- if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
- return;
-
- u0 = sfd[0];
- u1 = sfd[1];
-
- err = vsock_socketpair_connectible(sotype, &v0, &v1);
- if (err) {
- FAIL("vsock_socketpair_connectible() failed");
- goto close_uds;
- }
-
- err = add_to_sockmap(sock_mapfd, u0, v0);
- if (err) {
- FAIL("add_to_sockmap failed");
- goto close_vsock;
- }
-
- n = write(v1, &a, sizeof(a));
- if (n < 0)
- FAIL_ERRNO("%s: write", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete write", log_prefix);
- if (n < 1)
- goto out;
-
- n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
- if (n < 0)
- FAIL("%s: recv() err, errno=%d", log_prefix, errno);
- if (n == 0)
- FAIL("%s: incomplete recv", log_prefix);
- if (b != a)
- FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
-
- key = SK_PASS;
- err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
- if (err)
- goto out;
- if (pass != 1)
- FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-out:
- key = 0;
- bpf_map_delete_elem(sock_mapfd, &key);
- key = 1;
- bpf_map_delete_elem(sock_mapfd, &key);
-
-close_vsock:
- close(v0);
- close(v1);
-
-close_uds:
- close(u0);
- close(u1);
-}
-
-static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map,
- int sotype)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
- skel->bss->test_ingress = true;
- vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(AF_VSOCK);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
-
- vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
- vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
-}
-
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1637,224 +1406,6 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static int inet_socketpair(int family, int type, int *s, int *c)
-{
- return create_pair(family, type | SOCK_NONBLOCK, s, c);
-}
-
-static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
- enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int err;
-
- err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
- if (err)
- return;
- err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
- if (err)
- goto close_cli0;
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close_cli0:
- xclose(c0);
- xclose(p0);
-}
-
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
- skel->bss->test_ingress = true;
- udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
-{
- const char *family_name, *map_name;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(family);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
- udp_skb_redir_to_connected(skel, map, family);
-}
-
-static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
-{
- int c0, c1, p0, p1;
- int sfd[2];
- int err;
-
- if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
- return;
- c0 = sfd[0], p0 = sfd[1];
-
- err = inet_socketpair(family, type, &p1, &c1);
- if (err)
- goto close;
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
- mode, NO_FLAGS);
-
- xclose(c1);
- xclose(p1);
-close:
- xclose(c0);
- xclose(p0);
-}
-
-static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_EGRESS);
- inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
- REDIR_EGRESS);
- skel->bss->test_ingress = true;
- inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_INGRESS);
- inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
- REDIR_INGRESS);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
- int nop_mapfd, int verd_mapfd,
- enum redir_mode mode, int send_flags)
-{
- int c0, c1, p0, p1;
- int sfd[2];
- int err;
-
- err = inet_socketpair(family, type, &p0, &c0);
- if (err)
- return;
-
- if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
- goto close_cli0;
- c1 = sfd[0], p1 = sfd[1];
-
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd,
- verd_mapfd, mode, send_flags);
-
- xclose(c1);
- xclose(p1);
-close_cli0:
- xclose(c0);
- xclose(p0);
-}
-
-static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
-{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int nop_map = bpf_map__fd(skel->maps.nop_map);
- int verdict_map = bpf_map__fd(skel->maps.verdict_map);
- int sock_map = bpf_map__fd(inner_map);
- int err;
-
- err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
- if (err)
- return;
-
- skel->bss->test_ingress = false;
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, -1, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, -1, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
-
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, NO_FLAGS);
-
- /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_EGRESS, MSG_OOB);
-
- skel->bss->test_ingress = true;
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, -1, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, -1, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
-
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, NO_FLAGS);
-
- /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
- unix_inet_redir_to_connected(family, SOCK_STREAM,
- sock_map, nop_map, verdict_map,
- REDIR_INGRESS, MSG_OOB);
-
- xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
-{
- const char *family_name, *map_name;
- struct netns_obj *netns;
- char s[MAX_TEST_NAME];
-
- family_name = family_str(family);
- map_name = map_type_str(map);
- snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
- if (!test__start_subtest(s))
- return;
-
- netns = netns_new("sockmap_listen", true);
- if (!ASSERT_OK_PTR(netns, "netns_new"))
- return;
-
- inet_unix_skb_redir_to_connected(skel, map, family);
- unix_inet_skb_redir_to_connected(skel, map, family);
-
- netns_free(netns);
-}
-
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1863,8 +1414,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
- test_udp_redir(skel, map, family);
- test_udp_unix_redir(skel, map, family);
}
void serial_test_sockmap_listen(void)
@@ -1880,16 +1429,10 @@ void serial_test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
- test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
- test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
- test_vsock_redir(skel, skel->maps.sock_map);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
- test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
- test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
- test_vsock_redir(skel, skel->maps.sock_hash);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
new file mode 100644
index 000000000000..9c461d93113d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for sockmap/sockhash redirection.
+ *
+ * BPF_MAP_TYPE_SOCKMAP
+ * BPF_MAP_TYPE_SOCKHASH
+ * x
+ * sk_msg-to-egress
+ * sk_msg-to-ingress
+ * sk_skb-to-egress
+ * sk_skb-to-ingress
+ * x
+ * AF_INET, SOCK_STREAM
+ * AF_INET6, SOCK_STREAM
+ * AF_INET, SOCK_DGRAM
+ * AF_INET6, SOCK_DGRAM
+ * AF_UNIX, SOCK_STREAM
+ * AF_UNIX, SOCK_DGRAM
+ * AF_VSOCK, SOCK_STREAM
+ * AF_VSOCK, SOCK_SEQPACKET
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <linux/string.h>
+#include <linux/vm_sockets.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "linux/const.h"
+#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_sockmap_redir.skel.h"
+
+/* The meaning of SUPPORTED is "will redirect packet as expected".
+ */
+#define SUPPORTED _BITUL(0)
+
+/* Note on sk_skb-to-ingress ->af_vsock:
+ *
+ * Peer socket may receive the packet some time after the return from sendmsg().
+ * In a typical usage scenario, recvmsg() will block until the redirected packet
+ * appears in the destination queue, or timeout if the packet was dropped. By
+ * that point, the verdict map has already been updated to reflect what has
+ * happened.
+ *
+ * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
+ * takes place. Which means we may race the execution of the verdict logic and
+ * read map_verd before it has been updated, i.e. we might observe
+ * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
+ *
+ * This confuses the selftest logic: if there was no packet dropped, where's the
+ * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
+ * (which implies the verdict program has not been ran) just re-read the verdict
+ * map again.
+ */
+#define UNSUPPORTED_RACY_VERD _BITUL(1)
+
+enum prog_type {
+ SK_MSG_EGRESS,
+ SK_MSG_INGRESS,
+ SK_SKB_EGRESS,
+ SK_SKB_INGRESS,
+};
+
+enum {
+ SEND_INNER = 0,
+ SEND_OUTER,
+};
+
+enum {
+ RECV_INNER = 0,
+ RECV_OUTER,
+};
+
+struct maps {
+ int in;
+ int out;
+ int verd;
+};
+
+struct combo_spec {
+ enum prog_type prog_type;
+ const char *in, *out;
+};
+
+struct redir_spec {
+ const char *name;
+ int idx_send;
+ int idx_recv;
+ enum prog_type prog_type;
+};
+
+struct socket_spec {
+ int family;
+ int sotype;
+ int send_flags;
+ int in[2];
+ int out[2];
+};
+
+static int socket_spec_pairs(struct socket_spec *s)
+{
+ return create_socket_pairs(s->family, s->sotype,
+ &s->in[0], &s->out[0],
+ &s->in[1], &s->out[1]);
+}
+
+static void socket_spec_close(struct socket_spec *s)
+{
+ xclose(s->in[0]);
+ xclose(s->in[1]);
+ xclose(s->out[0]);
+ xclose(s->out[1]);
+}
+
+static void get_redir_params(struct redir_spec *redir,
+ struct test_sockmap_redir *skel, int *prog_fd,
+ enum bpf_attach_type *attach_type,
+ int *redirect_flags)
+{
+ enum prog_type type = redir->prog_type;
+ struct bpf_program *prog;
+ bool sk_msg;
+
+ sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
+ prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
+
+ *prog_fd = bpf_program__fd(prog);
+ *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
+
+ if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
+ *redirect_flags = BPF_F_INGRESS;
+ else
+ *redirect_flags = 0;
+}
+
+static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
+{
+ ssize_t n;
+ char buf;
+
+ errno = 0;
+ n = recv(fd, &buf, 1, flags);
+ if (n < 0 && expect_success)
+ FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
+ if (!n && !expect_success)
+ FAIL("%s: expected failure: retval=%zd", prefix, n);
+}
+
+static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
+ int sd_recv, int map_verd, int status)
+{
+ unsigned int drop, pass;
+ char recv_buf;
+ ssize_t n;
+
+get_verdict:
+ if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
+ return;
+
+ if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
+ sched_yield();
+ goto get_verdict;
+ }
+
+ if (pass != 0) {
+ FAIL("unsupported: wanted verdict pass 0, have %u", pass);
+ return;
+ }
+
+ /* If nothing was dropped, packet should have reached the peer */
+ if (drop == 0) {
+ errno = 0;
+ n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1)
+ FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
+ }
+
+ /* Ensure queues are empty */
+ try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
+ if (sd_in != sd_send)
+ try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
+
+ try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
+ if (sd_recv != sd_out)
+ try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
+}
+
+static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
+ int sd_in, int sd_out, int sd_recv,
+ struct maps *maps, int status)
+{
+ unsigned int drop, pass;
+ char *send_buf = "ab";
+ char recv_buf = '\0';
+ ssize_t n, len = 1;
+
+ /* Zero out the verdict map */
+ if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
+ xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
+ return;
+
+ if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
+ return;
+
+ if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
+ goto del_in;
+
+ /* Last byte is OOB data when send_flags has MSG_OOB bit set */
+ if (send_flags & MSG_OOB)
+ len++;
+ n = send(sd_send, send_buf, len, send_flags);
+ if (n >= 0 && n < len)
+ FAIL("incomplete send");
+ if (n < 0) {
+ /* sk_msg redirect combo not supported? */
+ if (status & SUPPORTED || errno != EACCES)
+ FAIL_ERRNO("send");
+ goto out;
+ }
+
+ if (!(status & SUPPORTED)) {
+ handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
+ maps->verd, status);
+ goto out;
+ }
+
+ errno = 0;
+ n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+ if (n != 1) {
+ FAIL_ERRNO("recv_timeout()");
+ goto out;
+ }
+
+ /* Check verdict _after_ recv(); af_vsock may need time to catch up */
+ if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
+ xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
+ goto out;
+
+ if (drop != 0 || pass != 1)
+ FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
+ drop, pass);
+
+ if (recv_buf != send_buf[0])
+ FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
+
+ if (send_flags & MSG_OOB) {
+ /* Fail reading OOB while in sockmap */
+ try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ /* Remove sd_out from sockmap */
+ xbpf_map_delete_elem(maps->out, &u32(0));
+
+ /* Check that OOB was dropped on redirect */
+ try_recv("recv(sd_out, MSG_OOB)", sd_out,
+ MSG_OOB | MSG_DONTWAIT, false);
+
+ goto del_in;
+ }
+out:
+ xbpf_map_delete_elem(maps->out, &u32(0));
+del_in:
+ xbpf_map_delete_elem(maps->in, &u32(0));
+}
+
+static int is_redir_supported(enum prog_type type, const char *in,
+ const char *out)
+{
+ /* Matching based on strings returned by socket_kind_to_str():
+ * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
+ * Plus a wildcard: any
+ * Not in use: u_seq, v_dgr
+ */
+ struct combo_spec *c, combos[] = {
+ /* Send to local: TCP -> any, but vsock */
+ { SK_MSG_INGRESS, "tcp", "tcp" },
+ { SK_MSG_INGRESS, "tcp", "udp" },
+ { SK_MSG_INGRESS, "tcp", "u_str" },
+ { SK_MSG_INGRESS, "tcp", "u_dgr" },
+
+ /* Send to egress: TCP -> TCP */
+ { SK_MSG_EGRESS, "tcp", "tcp" },
+
+ /* Ingress to egress: any -> any */
+ { SK_SKB_EGRESS, "any", "any" },
+
+ /* Ingress to local: any -> any, but vsock */
+ { SK_SKB_INGRESS, "any", "tcp" },
+ { SK_SKB_INGRESS, "any", "udp" },
+ { SK_SKB_INGRESS, "any", "u_str" },
+ { SK_SKB_INGRESS, "any", "u_dgr" },
+ };
+
+ for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
+ if (c->prog_type == type &&
+ (!strcmp(c->in, "any") || strstarts(in, c->in)) &&
+ (!strcmp(c->out, "any") || strstarts(out, c->out)))
+ return SUPPORTED;
+ }
+
+ return 0;
+}
+
+static int get_support_status(enum prog_type type, const char *in,
+ const char *out)
+{
+ int status = is_redir_supported(type, in, out);
+
+ if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
+ status |= UNSUPPORTED_RACY_VERD;
+
+ return status;
+}
+
+static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps, struct socket_spec *s_in,
+ struct socket_spec *s_out)
+{
+ int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
+ const char *in_str, *out_str;
+ char s[MAX_TEST_NAME];
+
+ fd_in = s_in->in[0];
+ fd_out = s_out->out[0];
+ fd_send = s_in->in[redir->idx_send];
+ fd_peer = s_in->in[redir->idx_send ^ 1];
+ fd_recv = s_out->out[redir->idx_recv];
+ flags = s_in->send_flags;
+
+ in_str = socket_kind_to_str(fd_in);
+ out_str = socket_kind_to_str(fd_out);
+ status = get_support_status(redir->prog_type, in_str, out_str);
+
+ snprintf(s, sizeof(s),
+ "%-4s %-17s %-5s %s %-5s%6s",
+ /* hash sk_skb-to-ingress u_str → v_str (OOB) */
+ type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
+ redir->name,
+ in_str,
+ status & SUPPORTED ? "→" : " ",
+ out_str,
+ (flags & MSG_OOB) ? "(OOB)" : "");
+
+ if (!test__start_subtest(s))
+ return;
+
+ test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
+ maps, status);
+}
+
+static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
+ struct maps *maps)
+{
+ struct socket_spec *s, sockets[] = {
+ { AF_INET, SOCK_STREAM },
+ // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
+ { AF_INET6, SOCK_STREAM },
+ { AF_INET, SOCK_DGRAM },
+ { AF_INET6, SOCK_DGRAM },
+ { AF_UNIX, SOCK_STREAM },
+ { AF_UNIX, SOCK_STREAM, MSG_OOB },
+ { AF_UNIX, SOCK_DGRAM },
+ // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */
+ { AF_VSOCK, SOCK_STREAM },
+ // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */
+ { AF_VSOCK, SOCK_SEQPACKET },
+ };
+
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ if (socket_spec_pairs(s))
+ goto out;
+
+ /* Intra-proto */
+ for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+ test_socket(type, redir, maps, s, s);
+
+ /* Cross-proto */
+ for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
+ for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
+ struct socket_spec *out = &sockets[j];
+ struct socket_spec *in = &sockets[i];
+
+ /* Skip intra-proto and between variants */
+ if (out->send_flags ||
+ (in->family == out->family &&
+ in->sotype == out->sotype))
+ continue;
+
+ test_socket(type, redir, maps, in, out);
+ }
+ }
+out:
+ while (--s >= sockets)
+ socket_spec_close(s);
+}
+
+static void test_map(enum bpf_map_type type)
+{
+ struct redir_spec *r, redirs[] = {
+ { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
+ { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
+ { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
+ { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
+ };
+
+ for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
+ enum bpf_attach_type attach_type;
+ struct test_sockmap_redir *skel;
+ struct maps maps;
+ int prog_fd;
+
+ skel = test_sockmap_redir__open_and_load();
+ if (!skel) {
+ FAIL("open_and_load");
+ return;
+ }
+
+ switch (type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ maps.in = bpf_map__fd(skel->maps.nop_map);
+ maps.out = bpf_map__fd(skel->maps.sock_map);
+ break;
+ case BPF_MAP_TYPE_SOCKHASH:
+ maps.in = bpf_map__fd(skel->maps.nop_hash);
+ maps.out = bpf_map__fd(skel->maps.sock_hash);
+ break;
+ default:
+ FAIL("Unsupported bpf_map_type");
+ return;
+ }
+
+ skel->bss->redirect_type = type;
+ maps.verd = bpf_map__fd(skel->maps.verdict_map);
+ get_redir_params(r, skel, &prog_fd, &attach_type,
+ &skel->bss->redirect_flags);
+
+ if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
+ return;
+
+ test_redir(type, r, &maps);
+
+ if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
+ return;
+
+ test_sockmap_redir__destroy(skel);
+ }
+}
+
+void serial_test_sockmap_redir(void)
+{
+ test_map(BPF_MAP_TYPE_SOCKMAP);
+ test_map(BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index c85798966aec..76d72a59365e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -56,6 +56,8 @@
#define MAC_DST_FWD "00:11:22:33:44:55"
#define MAC_DST "00:22:33:44:55:66"
+#define MAC_SRC_FWD "00:33:44:55:66:77"
+#define MAC_SRC "00:44:55:66:77:88"
#define IFADDR_STR_LEN 18
#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
@@ -207,11 +209,10 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
int err;
if (result->dev_mode == MODE_VETH) {
- SYS(fail, "ip link add src type veth peer name src_fwd");
- SYS(fail, "ip link add dst type veth peer name dst_fwd");
-
- SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
- SYS(fail, "ip link set dst address " MAC_DST);
+ SYS(fail, "ip link add src address " MAC_SRC " type veth "
+ "peer name src_fwd address " MAC_SRC_FWD);
+ SYS(fail, "ip link add dst address " MAC_DST " type veth "
+ "peer name dst_fwd address " MAC_DST_FWD);
} else if (result->dev_mode == MODE_NETKIT) {
err = create_netkit(NETKIT_L3, "src", "src_fwd");
if (!ASSERT_OK(err, "create_ifindex_src"))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
new file mode 100644
index 000000000000..7d1b478c99a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms Inc. */
+#include <test_progs.h>
+#include "test_btf_ext.skel.h"
+#include "btf_helpers.h"
+
+static void subtest_line_func_info(void)
+{
+ struct test_btf_ext *skel;
+ struct bpf_prog_info info;
+ struct bpf_line_info line_info[128], *libbpf_line_info;
+ struct bpf_func_info func_info[128], *libbpf_func_info;
+ __u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt;
+ int err, fd;
+
+ skel = test_btf_ext__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.line_info = ptr_to_u64(&line_info);
+ info.nr_line_info = sizeof(line_info);
+ info.line_info_rec_size = sizeof(*line_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_line_info"))
+ goto out;
+
+ libbpf_line_info = bpf_program__line_info(skel->progs.global_func);
+ libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func);
+
+ memset(&info, 0, sizeof(info));
+ info.func_info = ptr_to_u64(&func_info);
+ info.nr_func_info = sizeof(func_info);
+ info.func_info_rec_size = sizeof(*func_info);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_func_info"))
+ goto out;
+
+ libbpf_func_info = bpf_program__func_info(skel->progs.global_func);
+ libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func);
+
+ if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt"))
+ goto out;
+ if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info"))
+ goto out;
+ if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt"))
+ goto out;
+ ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info),
+ "line_info");
+ ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info),
+ "func_info");
+out:
+ test_btf_ext__destroy(skel);
+}
+
+void test_btf_ext(void)
+{
+ if (test__start_subtest("line_func_info"))
+ subtest_line_func_info();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
index a95b42bf744a..47b56c258f3f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -63,6 +63,9 @@ static void test_set_global_vars_succeeds(void)
" -G \"var_eb = EB2\" "\
" -G \"var_ec = EC2\" "\
" -G \"var_b = 1\" "\
+ " -G \"struct1.struct2.u.var_u8 = 170\" "\
+ " -G \"union1.struct3.var_u8_l = 0xaa\" "\
+ " -G \"union1.struct3.var_u8_h = 0xaa\" "\
"-vl2 > %s", fix->veristat, fix->tmpfile);
read(fix->fd, fix->output, fix->sz);
@@ -78,6 +81,8 @@ static void test_set_global_vars_succeeds(void)
__CHECK_STR("_w=12 ", "var_eb = EB2");
__CHECK_STR("_w=13 ", "var_ec = EC2");
__CHECK_STR("_w=1 ", "var_b = 1");
+ __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170");
+ __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa");
out:
teardown_fixture(fix);
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index e66a57970d28..c9da06741104 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -14,6 +14,7 @@
#include "verifier_bounds_deduction_non_const.skel.h"
#include "verifier_bounds_mix_sign_unsign.skel.h"
#include "verifier_bpf_get_stack.skel.h"
+#include "verifier_bpf_trap.skel.h"
#include "verifier_bswap.skel.h"
#include "verifier_btf_ctx_access.skel.h"
#include "verifier_btf_unreliable_prog.skel.h"
@@ -148,6 +149,7 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction);
void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); }
void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); }
void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); }
+void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); }
void test_verifier_bswap(void) { RUN(verifier_bswap); }
void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); }
void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 3d47878ef6bf..19f92affc2da 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -351,9 +351,10 @@ void test_xdp_metadata(void)
struct xdp_metadata2 *bpf_obj2 = NULL;
struct xdp_metadata *bpf_obj = NULL;
struct bpf_program *new_prog, *prog;
+ struct bpf_devmap_val devmap_e = {};
+ struct bpf_map *prog_arr, *devmap;
struct nstoken *tok = NULL;
__u32 queue_id = QUEUE_ID;
- struct bpf_map *prog_arr;
struct xsk tx_xsk = {};
struct xsk rx_xsk = {};
__u32 val, key = 0;
@@ -409,6 +410,13 @@ void test_xdp_metadata(void)
bpf_program__set_ifindex(prog, rx_ifindex);
bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+ /* Make sure we can load a dev-bound program that performs
+ * XDP_REDIRECT into a devmap.
+ */
+ new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect");
+ bpf_program__set_ifindex(new_prog, rx_ifindex);
+ bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
goto out;
@@ -423,6 +431,18 @@ void test_xdp_metadata(void)
"update prog_arr"))
goto out;
+ /* Make sure we can't add dev-bound programs to devmaps. */
+ devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map");
+ if (!ASSERT_OK_PTR(devmap, "no dev_map found"))
+ goto out;
+
+ devmap_e.bpf_prog.fd = val;
+ if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key),
+ &devmap_e, sizeof(devmap_e),
+ BPF_ANY),
+ "update dev_map"))
+ goto out;
+
/* Attach BPF program to RX interface. */
ret = bpf_xdp_attach(rx_ifindex,
diff --git a/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
new file mode 100644
index 000000000000..079bf3794b3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+long process_byte = 0;
+int verdict_dir = 0;
+int dropped = 0;
+int pkt_size = 0;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return pkt_size;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ int one = 1;
+ int ret = bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir);
+
+ if (ret == SK_DROP)
+ dropped++;
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return ret;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_pass(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&process_byte, skb->len);
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ int one = 1;
+
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return bpf_msg_redirect_map(msg, &sock_map_tx, one, verdict_dir);
+}
+
+SEC("sk_msg")
+int prog_skmsg_pass(struct sk_msg_md *msg)
+{
+ __sync_fetch_and_add(&process_byte, msg->size);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
index fb8dc0768999..d67466c1ff77 100644
--- a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h
+++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
@@ -32,6 +32,7 @@ extern unsigned long CONFIG_NR_CPUS __kconfig;
struct __qspinlock {
union {
atomic_t val;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
struct {
u8 locked;
u8 pending;
@@ -40,6 +41,17 @@ struct __qspinlock {
u16 locked_pending;
u16 tail;
};
+#else
+ struct {
+ u16 tail;
+ u16 locked_pending;
+ };
+ struct {
+ u8 reserved[2];
+ u8 pending;
+ u8 locked;
+ };
+#endif
};
};
@@ -95,9 +107,6 @@ struct arena_qnode {
#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
#define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET)
-#define likely(x) __builtin_expect(!!(x), 1)
-#define unlikely(x) __builtin_expect(!!(x), 0)
-
struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES];
static inline u32 encode_tail(int cpu, int idx)
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 863df7c0fdd0..6e208e24ba3b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -225,8 +225,9 @@
#define CAN_USE_BPF_ST
#endif
-#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
#define CAN_USE_LOAD_ACQ_STORE_REL
#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
new file mode 100644
index 000000000000..3754f581b328
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_QDISC_COMMON_H
+#define _BPF_QDISC_COMMON_H
+
+#define NET_XMIT_SUCCESS 0x00
+#define NET_XMIT_DROP 0x01 /* skb dropped */
+#define NET_XMIT_CN 0x02 /* congestion notification */
+
+#define TC_PRIO_CONTROL 7
+#define TC_PRIO_MAX 15
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+struct bpf_sk_buff_ptr;
+
+static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
+{
+ return (struct qdisc_skb_cb *)skb->cb;
+}
+
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
+{
+ return qdisc_skb_cb(skb)->pkt_len;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
new file mode 100644
index 000000000000..f188062ed730
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops")
+struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch)
+{
+ return NULL;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch)
+{
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops test = {
+ .enqueue = (void *)bpf_qdisc_test_enqueue,
+ .dequeue = (void *)bpf_qdisc_test_dequeue,
+ .reset = (void *)bpf_qdisc_test_reset,
+ .destroy = (void *)bpf_qdisc_test_destroy,
+ .id = "bpf_qdisc_test",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
new file mode 100644
index 000000000000..1de2be3e370b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct skb_node {
+ struct sk_buff __kptr * skb;
+ struct bpf_list_node node;
+};
+
+private(A) struct bpf_spin_lock q_fifo_lock;
+private(A) struct bpf_list_head q_fifo __contains(skb_node, node);
+
+bool init_called;
+
+SEC("struct_ops/bpf_fifo_enqueue")
+int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct skb_node *skbn;
+ u32 pkt_len;
+
+ if (sch->q.qlen == sch->limit)
+ goto drop;
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn)
+ goto drop;
+
+ pkt_len = qdisc_pkt_len(skb);
+
+ sch->q.qlen++;
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&q_fifo_lock);
+ bpf_list_push_back(&q_fifo, &skbn->node);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ sch->qstats.backlog += pkt_len;
+ return NET_XMIT_SUCCESS;
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops/bpf_fifo_dequeue")
+struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+ if (!node)
+ return NULL;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+ if (!skb)
+ return NULL;
+
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fifo_init")
+int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ sch->limit = 1000;
+ init_called = true;
+ return 0;
+}
+
+SEC("struct_ops/bpf_fifo_reset")
+void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct skb_node *skbn;
+ int i;
+
+ bpf_for(i, 0, sch->q.qlen) {
+ struct sk_buff *skb = NULL;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ if (!node)
+ break;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_kfree_skb(skb);
+ bpf_obj_drop(skbn);
+ }
+ sch->q.qlen = 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fifo = {
+ .enqueue = (void *)bpf_fifo_enqueue,
+ .dequeue = (void *)bpf_fifo_dequeue,
+ .init = (void *)bpf_fifo_init,
+ .reset = (void *)bpf_fifo_reset,
+ .destroy = (void *)bpf_fifo_destroy,
+ .id = "bpf_fifo",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
new file mode 100644
index 000000000000..1a3233a275c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
@@ -0,0 +1,756 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct
+ * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before
+ * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was
+ * introduced. It gives each flow a fair chance to transmit packets in a
+ * round-robin fashion. Note that for flow pacing, bpf_fq currently only
+ * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there
+ * are multiple bpf_fq instances, they will have a shared view of flows and
+ * configuration since some key data structure such as fq_prio_flows,
+ * fq_nonprio_flows, and fq_bpf_data are global.
+ *
+ * To use bpf_fq alone without running selftests, use the following commands.
+ *
+ * 1. Register bpf_fq to the kernel
+ * bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf
+ * 2. Add bpf_fq to an interface
+ * tc qdisc add dev <interface name> root handle <handle> bpf_fq
+ * 3. Delete bpf_fq attached to the interface
+ * tc qdisc delete dev <interface name> root
+ * 4. Unregister bpf_fq
+ * bpftool struct_ops unregister name fq
+ *
+ * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id.
+ * The struct_ops_map_name, fq, used in the bpftool command is the name of the
+ * Qdisc_ops.
+ *
+ * SEC(".struct_ops")
+ * struct Qdisc_ops fq = {
+ * ...
+ * .id = "bpf_fq",
+ * };
+ */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_SEC 1000000000L
+
+#define NUM_QUEUE (1 << 20)
+
+struct fq_bpf_data {
+ u32 quantum;
+ u32 initial_quantum;
+ u32 flow_refill_delay;
+ u32 flow_plimit;
+ u64 horizon;
+ u32 orphan_mask;
+ u32 timer_slack;
+ u64 time_next_delayed_flow;
+ u64 unthrottle_latency_ns;
+ u8 horizon_drop;
+ u32 new_flow_cnt;
+ u32 old_flow_cnt;
+ u64 ktime_cache;
+};
+
+enum {
+ CLS_RET_PRIO = 0,
+ CLS_RET_NONPRIO = 1,
+ CLS_RET_ERR = 2,
+};
+
+struct skb_node {
+ u64 tstamp;
+ struct sk_buff __kptr * skb;
+ struct bpf_rb_node node;
+};
+
+struct fq_flow_node {
+ int credit;
+ u32 qlen;
+ u64 age;
+ u64 time_next_packet;
+ struct bpf_list_node list_node;
+ struct bpf_rb_node rb_node;
+ struct bpf_rb_root queue __contains(skb_node, node);
+ struct bpf_spin_lock lock;
+ struct bpf_refcount refcount;
+};
+
+struct dequeue_nonprio_ctx {
+ bool stop_iter;
+ u64 expire;
+ u64 now;
+};
+
+struct remove_flows_ctx {
+ bool gc_only;
+ u32 reset_cnt;
+ u32 reset_max;
+};
+
+struct unset_throttled_flows_ctx {
+ bool unset_all;
+ u64 now;
+};
+
+struct fq_stashed_flow {
+ struct fq_flow_node __kptr * flow;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, NUM_QUEUE);
+} fq_nonprio_flows SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, 1);
+} fq_prio_flows SEC(".maps");
+
+private(A) struct bpf_spin_lock fq_delayed_lock;
+private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node);
+
+private(B) struct bpf_spin_lock fq_new_flows_lock;
+private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node);
+
+private(C) struct bpf_spin_lock fq_old_flows_lock;
+private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node);
+
+private(D) struct fq_bpf_data q;
+
+/* Wrapper for bpf_kptr_xchg that expects NULL dst */
+static void bpf_kptr_xchg_back(void *map_val, void *ptr)
+{
+ void *ret;
+
+ ret = bpf_kptr_xchg(map_val, ptr);
+ if (ret)
+ bpf_obj_drop(ret);
+}
+
+static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct skb_node *skbn_a;
+ struct skb_node *skbn_b;
+
+ skbn_a = container_of(a, struct skb_node, node);
+ skbn_b = container_of(b, struct skb_node, node);
+
+ return skbn_a->tstamp < skbn_b->tstamp;
+}
+
+static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct fq_flow_node *flow_a;
+ struct fq_flow_node *flow_b;
+
+ flow_a = container_of(a, struct fq_flow_node, rb_node);
+ flow_b = container_of(b, struct fq_flow_node, rb_node);
+
+ return flow_a->time_next_packet < flow_b->time_next_packet;
+}
+
+static void
+fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_back(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct bpf_list_node **node, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ *node = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ *flow_cnt -= 1;
+}
+
+static bool
+fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock)
+{
+ struct bpf_list_node *node;
+
+ bpf_spin_lock(lock);
+ node = bpf_list_pop_front(head);
+ if (node) {
+ bpf_list_push_front(head, node);
+ bpf_spin_unlock(lock);
+ return false;
+ }
+ bpf_spin_unlock(lock);
+
+ return true;
+}
+
+/* flow->age is used to denote the state of the flow (not-detached, detached, throttled)
+ * as well as the timestamp when the flow is detached.
+ *
+ * 0: not-detached
+ * 1 - (~0ULL-1): detached
+ * ~0ULL: throttled
+ */
+static void fq_flow_set_detached(struct fq_flow_node *flow)
+{
+ flow->age = bpf_jiffies64();
+}
+
+static bool fq_flow_is_detached(struct fq_flow_node *flow)
+{
+ return flow->age != 0 && flow->age != ~0ULL;
+}
+
+static bool sk_listener(struct sock *sk)
+{
+ return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
+}
+
+static void fq_gc(void);
+
+static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash)
+{
+ struct fq_stashed_flow tmp = {};
+ struct fq_flow_node *flow;
+ int ret;
+
+ flow = bpf_obj_new(typeof(*flow));
+ if (!flow)
+ return -ENOMEM;
+
+ flow->credit = q.initial_quantum,
+ flow->qlen = 0,
+ flow->age = 1,
+ flow->time_next_packet = 0,
+
+ ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0);
+ if (ret == -ENOMEM || ret == -E2BIG) {
+ fq_gc();
+ bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0);
+ }
+
+ *sflow = bpf_map_lookup_elem(flow_map, &hash);
+ if (!*sflow) {
+ bpf_obj_drop(flow);
+ return -ENOMEM;
+ }
+
+ bpf_kptr_xchg_back(&(*sflow)->flow, flow);
+ return 0;
+}
+
+static int
+fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow)
+{
+ struct sock *sk = skb->sk;
+ int ret = CLS_RET_NONPRIO;
+ u64 hash = 0;
+
+ if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) {
+ *sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ ret = CLS_RET_PRIO;
+ } else {
+ if (!sk || sk_listener(sk)) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ /* Avoid collision with an existing flow hash, which
+ * only uses the lower 32 bits of hash, by setting the
+ * upper half of hash to 1.
+ */
+ hash |= (1ULL << 32);
+ } else if (sk->__sk_common.skc_state == TCP_CLOSE) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ hash |= (1ULL << 32);
+ } else {
+ hash = sk->__sk_common.skc_hash;
+ }
+ *sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash);
+ }
+
+ if (!*sflow)
+ ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ?
+ CLS_RET_ERR : CLS_RET_NONPRIO;
+
+ return ret;
+}
+
+static bool fq_packet_beyond_horizon(struct sk_buff *skb)
+{
+ return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon);
+}
+
+SEC("struct_ops/bpf_fq_enqueue")
+int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct fq_flow_node *flow = NULL, *flow_copy;
+ struct fq_stashed_flow *sflow;
+ u64 time_to_send, jiffies;
+ struct skb_node *skbn;
+ int ret;
+
+ if (sch->q.qlen >= sch->limit)
+ goto drop;
+
+ if (!skb->tstamp) {
+ time_to_send = q.ktime_cache = bpf_ktime_get_ns();
+ } else {
+ if (fq_packet_beyond_horizon(skb)) {
+ q.ktime_cache = bpf_ktime_get_ns();
+ if (fq_packet_beyond_horizon(skb)) {
+ if (q.horizon_drop)
+ goto drop;
+
+ skb->tstamp = q.ktime_cache + q.horizon;
+ }
+ }
+ time_to_send = skb->tstamp;
+ }
+
+ ret = fq_classify(skb, &sflow);
+ if (ret == CLS_RET_ERR)
+ goto drop;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ goto drop;
+
+ if (ret == CLS_RET_NONPRIO) {
+ if (flow->qlen >= q.flow_plimit) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ if (fq_flow_is_detached(flow)) {
+ flow_copy = bpf_refcount_acquire(flow);
+
+ jiffies = bpf_jiffies64();
+ if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) {
+ if (flow_copy->credit < q.quantum)
+ flow_copy->credit = q.quantum;
+ }
+ flow_copy->age = 0;
+ fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy,
+ &q.new_flow_cnt);
+ }
+ }
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ skbn->tstamp = skb->tstamp = time_to_send;
+
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&flow->lock);
+ bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less);
+ bpf_spin_unlock(&flow->lock);
+
+ flow->qlen++;
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ sch->qstats.drops++;
+ return NET_XMIT_DROP;
+}
+
+static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx)
+{
+ struct bpf_rb_node *node = NULL;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_delayed_lock);
+
+ node = bpf_rbtree_first(&fq_delayed);
+ if (!node) {
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ if (!ctx->unset_all && flow->time_next_packet > ctx->now) {
+ q.time_next_delayed_flow = flow->time_next_packet;
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node);
+
+ bpf_spin_unlock(&fq_delayed_lock);
+
+ if (!node)
+ return 1;
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ flow->age = 0;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+
+ return 0;
+}
+
+static void fq_flow_set_throttled(struct fq_flow_node *flow)
+{
+ flow->age = ~0ULL;
+
+ if (q.time_next_delayed_flow > flow->time_next_packet)
+ q.time_next_delayed_flow = flow->time_next_packet;
+
+ bpf_spin_lock(&fq_delayed_lock);
+ bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less);
+ bpf_spin_unlock(&fq_delayed_lock);
+}
+
+static void fq_check_throttled(u64 now)
+{
+ struct unset_throttled_flows_ctx ctx = {
+ .unset_all = false,
+ .now = now,
+ };
+ unsigned long sample;
+
+ if (q.time_next_delayed_flow > now)
+ return;
+
+ sample = (unsigned long)(now - q.time_next_delayed_flow);
+ q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3;
+ q.unthrottle_latency_ns += sample >> 3;
+
+ q.time_next_delayed_flow = ~0ULL;
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0);
+}
+
+static struct sk_buff*
+fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx)
+{
+ u64 time_next_packet, time_to_send;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct bpf_list_head *head;
+ struct bpf_list_node *node;
+ struct bpf_spin_lock *lock;
+ struct fq_flow_node *flow;
+ struct skb_node *skbn;
+ bool is_empty;
+ u32 *cnt;
+
+ if (q.new_flow_cnt) {
+ head = &fq_new_flows;
+ lock = &fq_new_flows_lock;
+ cnt = &q.new_flow_cnt;
+ } else if (q.old_flow_cnt) {
+ head = &fq_old_flows;
+ lock = &fq_old_flows_lock;
+ cnt = &q.old_flow_cnt;
+ } else {
+ if (q.time_next_delayed_flow != ~0ULL)
+ ctx->expire = q.time_next_delayed_flow;
+ goto break_loop;
+ }
+
+ fq_flows_remove_front(head, lock, &node, cnt);
+ if (!node)
+ goto break_loop;
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ if (flow->credit <= 0) {
+ flow->credit += q.quantum;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ return NULL;
+ }
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock);
+ if (head == &fq_new_flows && !is_empty) {
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ } else {
+ fq_flow_set_detached(flow);
+ bpf_obj_drop(flow);
+ }
+ return NULL;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ time_to_send = skbn->tstamp;
+
+ time_next_packet = (time_to_send > flow->time_next_packet) ?
+ time_to_send : flow->time_next_packet;
+ if (ctx->now < time_next_packet) {
+ bpf_spin_unlock(&flow->lock);
+ flow->time_next_packet = time_next_packet;
+ fq_flow_set_throttled(flow);
+ return NULL;
+ }
+
+ rb_node = bpf_rbtree_remove(&flow->queue, rb_node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto add_flow_and_break;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+ if (!skb)
+ goto add_flow_and_break;
+
+ flow->credit -= qdisc_skb_cb(skb)->pkt_len;
+ flow->qlen--;
+
+add_flow_and_break:
+ fq_flows_add_head(head, lock, flow, cnt);
+
+break_loop:
+ ctx->stop_iter = true;
+ return skb;
+}
+
+static struct sk_buff *fq_dequeue_prio(void)
+{
+ struct fq_flow_node *flow = NULL;
+ struct fq_stashed_flow *sflow;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+ u64 hash = 0;
+
+ sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ if (!sflow)
+ return NULL;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ return NULL;
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ goto out;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto out;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+out:
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fq_dequeue")
+struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch)
+{
+ struct dequeue_nonprio_ctx cb_ctx = {};
+ struct sk_buff *skb = NULL;
+ int i;
+
+ if (!sch->q.qlen)
+ goto out;
+
+ skb = fq_dequeue_prio();
+ if (skb)
+ goto dequeue;
+
+ q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns();
+ fq_check_throttled(q.ktime_cache);
+ bpf_for(i, 0, sch->limit) {
+ skb = fq_dequeue_nonprio_flows(i, &cb_ctx);
+ if (cb_ctx.stop_iter)
+ break;
+ };
+
+ if (skb) {
+dequeue:
+ sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ return skb;
+ }
+
+ if (cb_ctx.expire)
+ bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack);
+out:
+ return NULL;
+}
+
+static int fq_remove_flows_in_list(u32 index, void *ctx)
+{
+ struct bpf_list_node *node;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_new_flows_lock);
+ node = bpf_list_pop_front(&fq_new_flows);
+ bpf_spin_unlock(&fq_new_flows_lock);
+ if (!node) {
+ bpf_spin_lock(&fq_old_flows_lock);
+ node = bpf_list_pop_front(&fq_old_flows);
+ bpf_spin_unlock(&fq_old_flows_lock);
+ if (!node)
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ bpf_obj_drop(flow);
+
+ return 0;
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*CONFIG_HZ)
+
+static bool fq_gc_candidate(struct fq_flow_node *flow)
+{
+ u64 jiffies = bpf_jiffies64();
+
+ return fq_flow_is_detached(flow) &&
+ ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0);
+}
+
+static int
+fq_remove_flows(struct bpf_map *flow_map, u64 *hash,
+ struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx)
+{
+ if (sflow->flow &&
+ (!ctx->gc_only || fq_gc_candidate(sflow->flow))) {
+ bpf_map_delete_elem(flow_map, hash);
+ ctx->reset_cnt++;
+ }
+
+ return ctx->reset_cnt < ctx->reset_max ? 0 : 1;
+}
+
+static void fq_gc(void)
+{
+ struct remove_flows_ctx cb_ctx = {
+ .gc_only = true,
+ .reset_cnt = 0,
+ .reset_max = FQ_GC_MAX,
+ };
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_reset")
+void BPF_PROG(bpf_fq_reset, struct Qdisc *sch)
+{
+ struct unset_throttled_flows_ctx utf_ctx = {
+ .unset_all = true,
+ };
+ struct remove_flows_ctx rf_ctx = {
+ .gc_only = false,
+ .reset_cnt = 0,
+ .reset_max = NUM_QUEUE,
+ };
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0);
+
+ rf_ctx.reset_cnt = 0;
+ bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0);
+ fq_new_flow(&fq_prio_flows, &sflow, hash);
+
+ bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0);
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_init")
+int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = sch->dev_queue->dev;
+ u32 psched_mtu = dev->mtu + dev->hard_header_len;
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0)
+ return -ENOMEM;
+
+ sch->limit = 10000;
+ q.initial_quantum = 10 * psched_mtu;
+ q.quantum = 2 * psched_mtu;
+ q.flow_refill_delay = 40;
+ q.flow_plimit = 100;
+ q.horizon = 10ULL * NSEC_PER_SEC;
+ q.horizon_drop = 1;
+ q.orphan_mask = 1024 - 1;
+ q.timer_slack = 10 * NSEC_PER_USEC;
+ q.time_next_delayed_flow = ~0ULL;
+ q.unthrottle_latency_ns = 0ULL;
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ return 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fq = {
+ .enqueue = (void *)bpf_fq_enqueue,
+ .dequeue = (void *)bpf_fq_dequeue,
+ .reset = (void *)bpf_fq_reset,
+ .init = (void *)bpf_fq_init,
+ .destroy = (void *)bpf_fq_destroy,
+ .id = "bpf_fq",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 659694162739..17db400f0e0d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -128,6 +128,7 @@
#define sk_refcnt __sk_common.skc_refcnt
#define sk_state __sk_common.skc_state
#define sk_net __sk_common.skc_net
+#define sk_rcv_saddr __sk_common.skc_rcv_saddr
#define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define sk_flags __sk_common.skc_flags
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 38f78d9345de..69f81cb555ca 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -30,22 +30,27 @@ int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
/* trace_cgroup_mkdir(struct cgroup *cgrp, const char *path)
*
- * struct cgroup_rstat_cpu {
+ * struct css_rstat_cpu {
* ...
- * struct cgroup *updated_children;
+ * struct cgroup_subsys_state *updated_children;
* ...
* };
*
- * struct cgroup {
+ * struct cgroup_subsys_state {
+ * ...
+ * struct css_rstat_cpu __percpu *rstat_cpu;
* ...
- * struct cgroup_rstat_cpu __percpu *rstat_cpu;
+ * };
+ *
+ * struct cgroup {
+ * struct cgroup_subsys_state self;
* ...
* };
*/
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
{
- g = (__u64)cgrp->rstat_cpu->updated_children;
+ g = (__u64)cgrp->self.rstat_cpu->updated_children;
return 0;
}
@@ -56,7 +61,8 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
__u32 cpu;
cpu = bpf_get_smp_processor_id();
- rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat_cpu, cpu);
+ rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(
+ cgrp->self.rstat_cpu, cpu);
if (rstat) {
/* READ_ONCE */
*(volatile int *)rstat;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index c74362854948..ff189a736ad8 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -37,8 +37,9 @@ struct {
__type(value, struct attach_counter);
} attach_counters SEC(".maps");
-extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
+extern void css_rstat_updated(
+ struct cgroup_subsys_state *css, int cpu) __ksym;
+extern void css_rstat_flush(struct cgroup_subsys_state *css) __ksym;
static uint64_t cgroup_id(struct cgroup *cgrp)
{
@@ -75,7 +76,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
else if (create_percpu_attach_counter(cg_id, 1))
return 0;
- cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
+ css_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
return 0;
}
@@ -141,7 +142,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
return 1;
/* Flush the stats to make sure we get the most updated numbers */
- cgroup_rstat_flush(cgrp);
+ css_rstat_flush(&cgrp->self);
total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
if (!total_counter) {
diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
new file mode 100644
index 000000000000..13cdb11fdeb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+/* From uapi/linux/dma-buf.h */
+#define DMA_BUF_NAME_LEN 32
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, DMA_BUF_NAME_LEN);
+ __type(value, bool);
+ __uint(max_entries, 5);
+} testbuf_hash SEC(".maps");
+
+/*
+ * Fields output by this iterator are delimited by newlines. Convert any
+ * newlines in user-provided printed strings to spaces.
+ */
+static void sanitize_string(char *src, size_t size)
+{
+ for (char *c = src; (size_t)(c - src) < size && *c; ++c)
+ if (*c == '\n')
+ *c = ' ';
+}
+
+SEC("iter/dmabuf")
+int dmabuf_collector(struct bpf_iter__dmabuf *ctx)
+{
+ const struct dma_buf *dmabuf = ctx->dmabuf;
+ struct seq_file *seq = ctx->meta->seq;
+ unsigned long inode = 0;
+ size_t size;
+ const char *pname, *exporter;
+ char name[DMA_BUF_NAME_LEN] = {'\0'};
+
+ if (!dmabuf)
+ return 0;
+
+ if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) ||
+ bpf_core_read(&size, sizeof(size), &dmabuf->size) ||
+ bpf_core_read(&pname, sizeof(pname), &dmabuf->name) ||
+ bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (pname) {
+ if (bpf_probe_read_kernel(name, sizeof(name), pname))
+ return 1;
+
+ /* Name strings can be provided by userspace */
+ sanitize_string(name, sizeof(name));
+ }
+
+ BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter);
+ return 0;
+}
+
+SEC("syscall")
+int iter_dmabuf_for_each(const void *ctx)
+{
+ struct dma_buf *d;
+
+ bpf_for_each(dmabuf, d) {
+ char name[DMA_BUF_NAME_LEN];
+ const char *pname;
+ bool *found;
+ long len;
+ int i;
+
+ if (bpf_core_read(&pname, sizeof(pname), &d->name))
+ return 1;
+
+ /* Buffers are not required to be named */
+ if (!pname)
+ continue;
+
+ len = bpf_probe_read_kernel_str(name, sizeof(name), pname);
+ if (len < 0)
+ return 1;
+
+ /*
+ * The entire name buffer is used as a map key.
+ * Zeroize any uninitialized trailing bytes after the NUL.
+ */
+ bpf_for(i, len, DMA_BUF_NAME_LEN)
+ name[i] = 0;
+
+ found = bpf_map_lookup_elem(&testbuf_hash, name);
+ if (found) {
+ bool t = true;
+
+ bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index e1fba28e4a86..a0391f9da2d4 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -680,3 +680,233 @@ out:
bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
return XDP_DROP;
}
+
+void *user_ptr;
+/* Contains the copy of the data pointed by user_ptr.
+ * Size 384 to make it not fit into a single kernel chunk when copying
+ * but less than the maximum bpf stack size (512).
+ */
+char expected_str[384];
+__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257};
+
+typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr);
+
+/* Returns the offset just before the end of the maximum sized xdp fragment.
+ * Any write larger than 32 bytes will be split between 2 fragments.
+ */
+__u32 xdp_near_frag_end_offset(void)
+{
+ const __u32 headroom = 256;
+ const __u32 max_frag_size = __PAGE_SIZE - headroom - sizeof(struct skb_shared_info);
+
+ /* 32 bytes before the approximate end of the fragment */
+ return max_frag_size - 32;
+}
+
+/* Use __always_inline on test_dynptr_probe[_str][_xdp]() and callbacks
+ * of type bpf_read_dynptr_fn_t to prevent compiler from generating
+ * indirect calls that make program fail to load with "unknown opcode" error.
+ */
+static __always_inline void test_dynptr_probe(void *ptr, bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ int i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_buf, 0, test_len[i], ptr);
+ if (len > sizeof(buf))
+ break;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_buf, 0, buf, len, 0);
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_str(void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ char buf[sizeof(expected_str)];
+ struct bpf_dynptr ptr_buf;
+ __u32 cnt, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_buf, 0, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+ }
+ bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ err = err ?: bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+ if (err || bpf_memcmp(expected_str, buf, len))
+ err = 1;
+ /* Reset buffer and dynptr */
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+static __always_inline void test_dynptr_probe_str_xdp(struct xdp_md *xdp, void *ptr,
+ bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+ struct bpf_dynptr ptr_xdp;
+ char buf[sizeof(expected_str)];
+ __u32 cnt, off, i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ off = xdp_near_frag_end_offset();
+ err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+ if (err)
+ return;
+
+ bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+ __u32 len = test_len[i];
+
+ cnt = bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+ if (cnt != len)
+ err = 1;
+
+ if (len > sizeof(buf))
+ continue;
+ err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+
+ if (!len)
+ continue;
+ if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+ err = 1;
+
+ __builtin_memset(buf, 0, sizeof(buf));
+ err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+ }
+}
+
+SEC("xdp")
+int test_probe_read_user_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(user_ptr, bpf_probe_read_user_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, user_ptr, bpf_probe_read_user_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe(expected_str, bpf_probe_read_kernel_dynptr);
+ if (!err)
+ test_dynptr_probe_xdp(xdp, expected_str, bpf_probe_read_kernel_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_user_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(user_ptr, bpf_probe_read_user_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, user_ptr, bpf_probe_read_user_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_str_dynptr(struct xdp_md *xdp)
+{
+ test_dynptr_probe_str(expected_str, bpf_probe_read_kernel_str_dynptr);
+ if (!err)
+ test_dynptr_probe_str_xdp(xdp, expected_str, bpf_probe_read_kernel_str_dynptr);
+ return XDP_PASS;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_from_user_dynptr);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_from_user_str_dynptr);
+ return 0;
+}
+
+static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off,
+ u32 size, const void *unsafe_ptr)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return bpf_copy_from_user_task_str_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_dynptr(void *ctx)
+{
+ test_dynptr_probe(user_ptr, bpf_copy_data_from_user_task);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_str_dynptr(void *ctx)
+{
+ test_dynptr_probe_str(user_ptr, bpf_copy_data_from_user_task_str);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fd_htab_lookup.c b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
new file mode 100644
index 000000000000..a4a9e1db626f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct inner_map_type);
+} outer_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 427b72954b87..76adf4a8f2da 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -7,8 +7,6 @@
#include "bpf_misc.h"
#include "bpf_compiler.h"
-#define unlikely(x) __builtin_expect(!!(x), 0)
-
static volatile int zero = 0;
int my_pid;
diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c
new file mode 100644
index 000000000000..264e81bfb287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_list_node l;
+ int key;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(node_data, l);
+
+#define list_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+SEC("syscall")
+__retval(0)
+long list_peek(void *ctx)
+{
+ struct bpf_list_node *l_n;
+ struct node_data *n;
+ int i, err = 0;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_front(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ bpf_spin_lock(&glock);
+ l_n = bpf_list_back(&ghead);
+ bpf_spin_unlock(&glock);
+ if (l_n)
+ return __LINE__;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+ n->key = i;
+ bpf_spin_lock(&glock);
+ bpf_list_push_back(&ghead, &n->l);
+ bpf_spin_unlock(&glock);
+ }
+
+ bpf_spin_lock(&glock);
+
+ l_n = bpf_list_front(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != 0) {
+ err = __LINE__;
+ goto done;
+ }
+
+ l_n = bpf_list_back(&ghead);
+ if (!l_n) {
+ err = __LINE__;
+ goto done;
+ }
+
+ n = list_entry(l_n, struct node_data, l);
+ if (n->key != NR_NODES - 1) {
+ err = __LINE__;
+ goto done;
+ }
+
+done:
+ bpf_spin_unlock(&glock);
+ return err;
+}
+
+#define TEST_FB(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_list_node *l_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock); \
+ l_n = bpf_list_##op(&ghead); \
+ if (l_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock); \
+ \
+ return !!jiffies; \
+}
+
+#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_FB(front, true)
+TEST_FB(back, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head"
+TEST_FB(front, false)
+TEST_FB(back, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c
index 1f1dd547e4ee..cfc1f48e0d28 100644
--- a/tools/testing/selftests/bpf/progs/prepare.c
+++ b/tools/testing/selftests/bpf/progs/prepare.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2025 Meta */
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-//#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c
index 5927054b6dd9..efa416f53968 100644
--- a/tools/testing/selftests/bpf/progs/raw_tp_null.c
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c
@@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL";
int tid;
int i;
-SEC("tp_btf/bpf_testmod_test_raw_tp_null")
+SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp")
int BPF_PROG(test_raw_tp_null, struct sk_buff *skb)
{
struct task_struct *task = bpf_get_current_task_btf();
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
index 38d669957bf1..0d58114a4955 100644
--- a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
@@ -8,7 +8,7 @@
char _license[] SEC("license") = "GPL";
/* Ensure module parameter has PTR_MAYBE_NULL */
-SEC("tp_btf/bpf_testmod_test_raw_tp_null")
+SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp")
__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) {
asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all);
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index dbd5eee8e25e..4acb6af2dfe3 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -69,11 +69,11 @@ long rbtree_api_nolock_first(void *ctx)
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__retval(0)
long rbtree_api_remove_unadded_node(void *ctx)
{
struct node_data *n, *m;
- struct bpf_rb_node *res;
+ struct bpf_rb_node *res_n, *res_m;
n = bpf_obj_new(typeof(*n));
if (!n)
@@ -88,19 +88,20 @@ long rbtree_api_remove_unadded_node(void *ctx)
bpf_spin_lock(&glock);
bpf_rbtree_add(&groot, &n->node, less);
- /* This remove should pass verifier */
- res = bpf_rbtree_remove(&groot, &n->node);
- n = container_of(res, struct node_data, node);
+ res_n = bpf_rbtree_remove(&groot, &n->node);
- /* This remove shouldn't, m isn't in an rbtree */
- res = bpf_rbtree_remove(&groot, &m->node);
- m = container_of(res, struct node_data, node);
+ res_m = bpf_rbtree_remove(&groot, &m->node);
bpf_spin_unlock(&glock);
- if (n)
- bpf_obj_drop(n);
- if (m)
- bpf_obj_drop(m);
+ bpf_obj_drop(m);
+ if (res_n)
+ bpf_obj_drop(container_of(res_n, struct node_data, node));
+ if (res_m) {
+ bpf_obj_drop(container_of(res_m, struct node_data, node));
+ /* m was not added to the rbtree */
+ return 2;
+ }
+
return 0;
}
@@ -178,7 +179,7 @@ err_out:
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
long rbtree_api_add_release_unlock_escape(void *ctx)
{
struct node_data *n;
@@ -202,7 +203,7 @@ long rbtree_api_add_release_unlock_escape(void *ctx)
}
SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
long rbtree_api_first_release_unlock_escape(void *ctx)
{
struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c
new file mode 100644
index 000000000000..098ef970fac1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_search.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ struct bpf_refcount ref;
+ struct bpf_rb_node r0;
+ struct bpf_rb_node r1;
+ int key0;
+ int key1;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock0;
+private(A) struct bpf_rb_root groot0 __contains(node_data, r0);
+
+private(B) struct bpf_spin_lock glock1;
+private(B) struct bpf_rb_root groot1 __contains(node_data, r1);
+
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+static bool less0(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r0);
+ node_b = rb_entry(b, struct node_data, r0);
+
+ return node_a->key0 < node_b->key0;
+}
+
+static bool less1(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = rb_entry(a, struct node_data, r1);
+ node_b = rb_entry(b, struct node_data, r1);
+
+ return node_a->key1 < node_b->key1;
+}
+
+SEC("syscall")
+__retval(0)
+long rbtree_search(void *ctx)
+{
+ struct bpf_rb_node *rb_n, *rb_m, *gc_ns[NR_NODES];
+ long lookup_key = NR_NODES / 2;
+ struct node_data *n, *m;
+ int i, nr_gc = 0;
+
+ for (i = zero; i < NR_NODES && can_loop; i++) {
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return __LINE__;
+
+ m = bpf_refcount_acquire(n);
+
+ n->key0 = i;
+ m->key1 = i;
+
+ bpf_spin_lock(&glock0);
+ bpf_rbtree_add(&groot0, &n->r0, less0);
+ bpf_spin_unlock(&glock0);
+
+ bpf_spin_lock(&glock1);
+ bpf_rbtree_add(&groot1, &m->r1, less1);
+ bpf_spin_unlock(&glock1);
+ }
+
+ n = NULL;
+ bpf_spin_lock(&glock0);
+ rb_n = bpf_rbtree_root(&groot0);
+ while (can_loop) {
+ if (!rb_n) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ n = rb_entry(rb_n, struct node_data, r0);
+ if (lookup_key == n->key0)
+ break;
+ if (nr_gc < NR_NODES)
+ gc_ns[nr_gc++] = rb_n;
+ if (lookup_key < n->key0)
+ rb_n = bpf_rbtree_left(&groot0, rb_n);
+ else
+ rb_n = bpf_rbtree_right(&groot0, rb_n);
+ }
+
+ if (!n || lookup_key != n->key0) {
+ bpf_spin_unlock(&glock0);
+ return __LINE__;
+ }
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ gc_ns[i] = bpf_rbtree_remove(&groot0, rb_n);
+ }
+
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&glock0);
+
+ for (i = 0; i < nr_gc; i++) {
+ rb_n = gc_ns[i];
+ if (rb_n) {
+ n = rb_entry(rb_n, struct node_data, r0);
+ bpf_obj_drop(n);
+ }
+ }
+
+ if (!m)
+ return __LINE__;
+
+ bpf_spin_lock(&glock1);
+ rb_m = bpf_rbtree_remove(&groot1, &m->r1);
+ bpf_spin_unlock(&glock1);
+ bpf_obj_drop(m);
+ if (!rb_m)
+ return __LINE__;
+ bpf_obj_drop(rb_entry(rb_m, struct node_data, r1));
+
+ return 0;
+}
+
+#define TEST_ROOT(dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_root_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ __u64 jiffies = 0; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+#define TEST_LR(op, dolock) \
+SEC("syscall") \
+__failure __msg(MSG) \
+long test_##op##_spinlock_##dolock(void *ctx) \
+{ \
+ struct bpf_rb_node *rb_n; \
+ struct node_data *n; \
+ __u64 jiffies = 0; \
+ \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_root(&groot0); \
+ if (!rb_n) { \
+ bpf_spin_unlock(&glock0); \
+ return 1; \
+ } \
+ n = rb_entry(rb_n, struct node_data, r0); \
+ n = bpf_refcount_acquire(n); \
+ bpf_spin_unlock(&glock0); \
+ if (!n) \
+ return 1; \
+ \
+ if (dolock) \
+ bpf_spin_lock(&glock0); \
+ rb_n = bpf_rbtree_##op(&groot0, &n->r0); \
+ if (rb_n) \
+ jiffies = bpf_jiffies64(); \
+ if (dolock) \
+ bpf_spin_unlock(&glock0); \
+ \
+ return !!jiffies; \
+}
+
+/*
+ * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG)
+ * to ensure the message itself is not in the bpf prog lineinfo
+ * which the verifier includes in its log.
+ * Otherwise, the test_loader will incorrectly match the prog lineinfo
+ * instead of the log generated by the verifier.
+ */
+#define MSG "call bpf_rbtree_root{{.+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_ROOT(true)
+#undef MSG
+#define MSG "call bpf_rbtree_{{(left|right).+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_LR(left, true)
+TEST_LR(right, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_rb_root"
+TEST_ROOT(false)
+TEST_LR(left, false)
+TEST_LR(right, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
index 9adb5ba4cd4d..90f5656c3991 100644
--- a/tools/testing/selftests/bpf/progs/set_global_vars.c
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -24,6 +24,44 @@ const volatile enum Enumu64 var_eb = EB1;
const volatile enum Enums64 var_ec = EC1;
const volatile bool var_b = false;
+struct Struct {
+ int:16;
+ __u16 filler;
+ struct {
+ const __u16 filler2;
+ };
+ struct Struct2 {
+ __u16 filler;
+ volatile struct {
+ const int:1;
+ union {
+ const volatile __u8 var_u8;
+ const volatile __s16 filler3;
+ const int:1;
+ } u;
+ };
+ } struct2;
+};
+
+const volatile __u32 stru = 0; /* same prefix as below */
+const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}};
+
+union Union {
+ __u16 var_u16;
+ struct Struct3 {
+ struct {
+ __u8 var_u8_l;
+ };
+ struct {
+ struct {
+ __u8 var_u8_h;
+ };
+ };
+ } struct3;
+};
+
+const volatile union Union union1 = {.var_u16 = -1};
+
char arr[4] = {0};
SEC("socket")
@@ -43,5 +81,8 @@ int test_set_globals(void *ctx)
a = var_eb;
a = var_ec;
a = var_b;
+ a = struct1.struct2.u.var_u8;
+ a = union1.var_u16;
+
return a;
}
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 0107a24b7522..d330b1511979 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -83,6 +83,14 @@ struct loop_ctx {
struct sock *sk;
};
+static bool sk_is_tcp(struct sock *sk)
+{
+ return (sk->__sk_common.skc_family == AF_INET ||
+ sk->__sk_common.skc_family == AF_INET6) &&
+ sk->sk_type == SOCK_STREAM &&
+ sk->sk_protocol == IPPROTO_TCP;
+}
+
static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
const struct sockopt_test *t,
int level)
@@ -91,6 +99,9 @@ static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
opt = t->opt;
+ if (opt == SO_TXREHASH && !sk_is_tcp(sk))
+ return 0;
+
if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
return 1;
/* kernel initialized txrehash to 255 */
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 96531b0d9d55..8f483337e103 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -17,6 +17,12 @@ static bool ipv6_addr_loopback(const struct in6_addr *a)
a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0;
}
+static bool ipv4_addr_loopback(__be32 a)
+{
+ return a == bpf_ntohl(0x7f000001);
+}
+
+volatile const unsigned int sf;
volatile const __u16 ports[2];
unsigned int bucket[2];
@@ -26,16 +32,20 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
struct sock *sk = (struct sock *)ctx->sk_common;
struct inet_hashinfo *hinfo;
unsigned int hash;
+ __u64 sock_cookie;
struct net *net;
int idx;
if (!sk)
return 0;
+ sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
- if (sk->sk_family != AF_INET6 ||
+ if (sk->sk_family != sf ||
sk->sk_state != TCP_LISTEN ||
- !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr))
return 0;
if (sk->sk_num == ports[0])
@@ -52,6 +62,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
hinfo = net->ipv4.tcp_death_row.hashinfo;
bucket[idx] = hash & hinfo->lhash2_mask;
bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
return 0;
}
@@ -63,14 +74,18 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
{
struct sock *sk = (struct sock *)ctx->udp_sk;
struct udp_table *udptable;
+ __u64 sock_cookie;
int idx;
if (!sk)
return 0;
+ sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
- if (sk->sk_family != AF_INET6 ||
- !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ if (sk->sk_family != sf ||
+ sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr))
return 0;
if (sk->sk_num == ports[0])
@@ -84,6 +99,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
udptable = sk->sk_net.net->ipv4.udp_table;
bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask;
bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_ext.c b/tools/testing/selftests/bpf/progs/test_btf_ext.c
new file mode 100644
index 000000000000..cdf20331db04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_ext.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms Inc. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__noinline static void f0(void)
+{
+ __u64 a = 1;
+
+ __sink(a);
+}
+
+SEC("xdp")
+__u64 global_func(struct xdp_md *xdp)
+{
+ f0();
+ return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index 7f3c233943b3..03d7f89787a1 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -19,7 +19,7 @@ int BPF_PROG(handle_raw_tp,
__u32 raw_tp_bare_write_sz = 0;
-SEC("raw_tp/bpf_testmod_test_write_bare")
+SEC("raw_tp/bpf_testmod_test_write_bare_tp")
int BPF_PROG(handle_raw_tp_bare,
struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx)
{
@@ -31,7 +31,7 @@ int raw_tp_writable_bare_in_val = 0;
int raw_tp_writable_bare_early_ret = 0;
int raw_tp_writable_bare_out_val = 0;
-SEC("raw_tp.w/bpf_testmod_test_writable_bare")
+SEC("raw_tp.w/bpf_testmod_test_writable_bare_tp")
int BPF_PROG(handle_raw_tp_writable_bare,
struct bpf_testmod_test_writable_ctx *writable)
{
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
new file mode 100644
index 000000000000..8bdb9987c0c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+int cork_byte;
+int push_start;
+int push_end;
+int apply_bytes;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_msg")
+int prog_sk_policy(struct sk_msg_md *msg)
+{
+ if (cork_byte > 0)
+ bpf_msg_cork_bytes(msg, cork_byte);
+ if (push_start > 0 && push_end > 0)
+ bpf_msg_push_data(msg, push_start, push_end, 0);
+
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_sk_policy_redir(struct sk_msg_md *msg)
+{
+ int two = 2;
+
+ bpf_msg_apply_bytes(msg, apply_bytes);
+ return bpf_msg_redirect_map(msg, &sock_map, two, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_redir.c b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
new file mode 100644
index 000000000000..34d9f4f2f0a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_map, sock_map;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_hash, sock_hash;
+
+SEC(".maps") struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, unsigned int);
+} verdict_map;
+
+/* Set by user space */
+int redirect_type;
+int redirect_flags;
+
+#define redirect_map(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_map, \
+ struct sk_msg_md * : bpf_msg_redirect_map \
+ )((__data), &sock_map, (__u32){0}, redirect_flags)
+
+#define redirect_hash(__data) \
+ _Generic((__data), \
+ struct __sk_buff * : bpf_sk_redirect_hash, \
+ struct sk_msg_md * : bpf_msg_redirect_hash \
+ )((__data), &sock_hash, &(__u32){0}, redirect_flags)
+
+#define DEFINE_PROG(__type, __param) \
+SEC("sk_" XSTR(__type)) \
+int prog_ ## __type ## _verdict(__param data) \
+{ \
+ unsigned int *count; \
+ int verdict; \
+ \
+ if (redirect_type == BPF_MAP_TYPE_SOCKMAP) \
+ verdict = redirect_map(data); \
+ else if (redirect_type == BPF_MAP_TYPE_SOCKHASH) \
+ verdict = redirect_hash(data); \
+ else \
+ verdict = redirect_type - __MAX_BPF_MAP_TYPE; \
+ \
+ count = bpf_map_lookup_elem(&verdict_map, &verdict); \
+ if (count) \
+ (*count)++; \
+ \
+ return verdict; \
+}
+
+DEFINE_PROG(skb, struct __sk_buff *);
+DEFINE_PROG(msg, struct sk_msg_md *);
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index eb5cca1fce16..7d5293de1952 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -294,7 +294,9 @@ static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
(ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
goto err;
- if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+ if (!ctx->attrs.wscale_ok ||
+ !ctx->attrs.snd_wscale ||
+ ctx->attrs.snd_wscale >= BPF_SYNCOOKIE_WSCALE_MASK)
goto err;
if (!ctx->attrs.tstamp_ok)
diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
index 39ff06f2c834..cf0547a613ff 100644
--- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -6,14 +6,14 @@
#include "../test_kmods/bpf_testmod.h"
#include "bpf_misc.h"
-SEC("tp_btf/bpf_testmod_test_nullable_bare")
+SEC("tp_btf/bpf_testmod_test_nullable_bare_tp")
__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
{
return nullable_ctx->len;
}
-SEC("tp_btf/bpf_testmod_test_nullable_bare")
+SEC("tp_btf/bpf_testmod_test_nullable_bare_tp")
int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx)
{
if (nullable_ctx)
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
new file mode 100644
index 000000000000..35e2cdc00a01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if __clang_major__ >= 21 && 0
+SEC("socket")
+__description("__builtin_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_builtin_trap_with_simple_c(void)
+{
+ __builtin_trap();
+}
+#endif
+
+SEC("socket")
+__description("__bpf_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_trap_with_simple_c(void)
+{
+ __bpf_trap();
+}
+
+SEC("socket")
+__description("__bpf_trap as the second-from-last insn")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void bpf_trap_at_func_end(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "call %[__bpf_trap];"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead code __bpf_trap in the middle of code")
+__success
+__naked void dead_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 0 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("reachable __bpf_trap in the middle of code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void live_bpf_trap_in_middle(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 == 1 goto +1;"
+ "call %[__bpf_trap];"
+ "r0 = 2;"
+ "exit;"
+ :
+ : __imm(__bpf_trap)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
index 28b939572cda..03942cec07e5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -65,4 +65,16 @@ __naked void ctx_access_u32_pointer_reject_8(void)
" ::: __clobber_all);
}
+SEC("fentry/bpf_fentry_test10")
+__description("btf_ctx_access const void pointer accept")
+__success __retval(0)
+__naked void ctx_access_const_void_pointer_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u64 *)(r1 + 0); /* load 1st argument value (const void pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
index 77698d5a19e4..74f4f19c10b8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
+++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
@@ -10,65 +10,81 @@
SEC("socket")
__description("load-acquire, 8-bit")
-__success __success_unpriv __retval(0x12)
+__success __success_unpriv __retval(0)
__naked void load_acquire_8(void)
{
asm volatile (
- "w1 = 0x12;"
+ "r0 = 0;"
+ "w1 = 0xfe;"
"*(u8 *)(r10 - 1) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1))
+ BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 16-bit")
-__success __success_unpriv __retval(0x1234)
+__success __success_unpriv __retval(0)
__naked void load_acquire_16(void)
{
asm volatile (
- "w1 = 0x1234;"
+ "r0 = 0;"
+ "w1 = 0xfedc;"
"*(u16 *)(r10 - 2) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2))
+ BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 32-bit")
-__success __success_unpriv __retval(0x12345678)
+__success __success_unpriv __retval(0)
__naked void load_acquire_32(void)
{
asm volatile (
- "w1 = 0x12345678;"
+ "r0 = 0;"
+ "w1 = 0xfedcba09;"
"*(u32 *)(r10 - 4) = w1;"
- ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4));
+ ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4))
+ BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4))
: __clobber_all);
}
SEC("socket")
__description("load-acquire, 64-bit")
-__success __success_unpriv __retval(0x1234567890abcdef)
+__success __success_unpriv __retval(0)
__naked void load_acquire_64(void)
{
asm volatile (
- "r1 = 0x1234567890abcdef ll;"
+ "r0 = 0;"
+ "r1 = 0xfedcba0987654321 ll;"
"*(u64 *)(r10 - 8) = r1;"
- ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8));
+ ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8));
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(load_acquire_insn,
- BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8))
+ BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
: __clobber_all);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 6662d4b39969..9fe5d255ee37 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -91,8 +91,7 @@ __naked int bpf_end_bswap(void)
::: __clobber_all);
}
-#if defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
SEC("?raw_tp")
__success __log_level(2)
@@ -138,7 +137,7 @@ __naked int bpf_store_release(void)
: __clobber_all);
}
-#endif /* load-acquire, store-release */
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
#endif /* v4 instruction */
SEC("?raw_tp")
@@ -179,4 +178,57 @@ __naked int state_loop_first_last_equal(void)
);
}
+__used __naked static void __bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r2 = 2314885393468386424 ll;"
+ "goto +0;"
+ "if r2 <= r10 goto +3;"
+ "if r1 >= -1835016 goto +0;"
+ "if r2 <= 8 goto +0;"
+ "if r3 <= 0 goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (bd) if r2 <= r10 goto pc+3")
+__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("10: (b5) if r2 <= 0x8 goto pc+0")
+__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3")
+__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0")
+__naked void bpf_cond_op_r10(void)
+{
+ asm volatile (
+ "r3 = 0 ll;"
+ "call __bpf_cond_op_r10;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("3: (bf) r3 = r10")
+__msg("4: (bd) if r3 <= r2 goto pc+1")
+__msg("5: (b5) if r2 <= 0x8 goto pc+2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__naked void bpf_cond_op_not_r10(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "r2 = 2314885393468386424 ll;"
+ "r3 = r10;"
+ "if r3 <= r2 goto +1;"
+ "if r2 <= 8 goto +2;"
+ "r0 = 2 ll;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c
index c0442d5bb049..72f1eb006074 100644
--- a/tools/testing/selftests/bpf/progs/verifier_store_release.c
+++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c
@@ -6,18 +6,21 @@
#include "../../../include/linux/filter.h"
#include "bpf_misc.h"
-#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
SEC("socket")
__description("store-release, 8-bit")
-__success __success_unpriv __retval(0x12)
+__success __success_unpriv __retval(0)
__naked void store_release_8(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x12;"
".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1);
- "w0 = *(u8 *)(r10 - 1);"
+ "w2 = *(u8 *)(r10 - 1);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -27,13 +30,17 @@ __naked void store_release_8(void)
SEC("socket")
__description("store-release, 16-bit")
-__success __success_unpriv __retval(0x1234)
+__success __success_unpriv __retval(0)
__naked void store_release_16(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x1234;"
".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1);
- "w0 = *(u16 *)(r10 - 2);"
+ "w2 = *(u16 *)(r10 - 2);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -43,13 +50,17 @@ __naked void store_release_16(void)
SEC("socket")
__description("store-release, 32-bit")
-__success __success_unpriv __retval(0x12345678)
+__success __success_unpriv __retval(0)
__naked void store_release_32(void)
{
asm volatile (
+ "r0 = 0;"
"w1 = 0x12345678;"
".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1);
- "w0 = *(u32 *)(r10 - 4);"
+ "w2 = *(u32 *)(r10 - 4);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -59,13 +70,17 @@ __naked void store_release_32(void)
SEC("socket")
__description("store-release, 64-bit")
-__success __success_unpriv __retval(0x1234567890abcdef)
+__success __success_unpriv __retval(0)
__naked void store_release_64(void)
{
asm volatile (
+ "r0 = 0;"
"r1 = 0x1234567890abcdef ll;"
".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
- "r0 = *(u64 *)(r10 - 8);"
+ "r2 = *(u64 *)(r10 - 8);"
+ "if r2 == r1 goto 1f;"
+ "r0 = 1;"
+"1:"
"exit;"
:
: __imm_insn(store_release_insn,
@@ -271,7 +286,7 @@ __naked void store_release_with_invalid_reg(void)
: __clobber_all);
}
-#else
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
SEC("socket")
__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test")
@@ -281,6 +296,6 @@ int dummy_test(void)
return 0;
}
-#endif
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index 31ca229bb3c0..09bb8a038d52 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -19,6 +19,13 @@ struct {
__type(value, __u32);
} prog_arr SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} dev_map SEC(".maps");
+
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
@@ -95,4 +102,10 @@ int rx(struct xdp_md *ctx)
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
+SEC("xdp")
+int redirect(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&dev_map, ctx->rx_queue_index, XDP_PASS);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index ccde6a4c6319..683306db8594 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -4,6 +4,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/errno.h>
#include "xsk_xdp_common.h"
struct {
@@ -14,6 +16,7 @@ struct {
} xsk SEC(".maps");
static unsigned int idx;
+int adjust_value = 0;
int count = 0;
SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
@@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
return bpf_redirect_map(&xsk, idx, XDP_DROP);
}
+SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp)
+{
+ __u32 buff_len, curr_buff_len;
+ int ret;
+
+ buff_len = bpf_xdp_get_buff_len(xdp);
+ if (buff_len == 0)
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_tail(xdp, adjust_value);
+ if (ret < 0) {
+ /* Handle unsupported cases */
+ if (ret == -EOPNOTSUPP) {
+ /* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case
+ * is unsupported
+ */
+ adjust_value = -EOPNOTSUPP;
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+ }
+
+ return XDP_DROP;
+ }
+
+ curr_buff_len = bpf_xdp_get_buff_len(xdp);
+ if (curr_buff_len != buff_len + adjust_value)
+ return XDP_DROP;
+
+ if (curr_buff_len > buff_len) {
+ __u32 *pkt_data = (void *)(long)xdp->data;
+ __u32 len, words_to_end, seq_num;
+
+ len = curr_buff_len - PKT_HDR_ALIGN;
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ seq_num = words_to_end;
+
+ /* Convert sequence number to network byte order. Store this in the last 4 bytes of
+ * the packet. Use 'adjust_value' to determine the position at the end of the
+ * packet for storing the sequence number.
+ */
+ seq_num = __constant_htonl(words_to_end);
+ bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num,
+ sizeof(seq_num));
+ }
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index 3220f1d28697..e6c248e3ae54 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -134,6 +134,10 @@ bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
return bpf_testmod_test_struct_arg_result;
}
+__weak noinline void bpf_testmod_looooooooooooooooooooooooooooooong_name(void)
+{
+}
+
__bpf_kfunc void
bpf_testmod_test_mod_kfunc(int i)
{
@@ -413,7 +417,7 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
(void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
- (void)trace_bpf_testmod_test_raw_tp_null(NULL);
+ (void)trace_bpf_testmod_test_raw_tp_null_tp(NULL);
bpf_testmod_test_struct_ops3();
@@ -431,14 +435,14 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
if (bpf_testmod_loop_test(101) > 100)
trace_bpf_testmod_test_read(current, &ctx);
- trace_bpf_testmod_test_nullable_bare(NULL);
+ trace_bpf_testmod_test_nullable_bare_tp(NULL);
/* Magic number to enable writable tp */
if (len == 64) {
struct bpf_testmod_test_writable_ctx writable = {
.val = 1024,
};
- trace_bpf_testmod_test_writable_bare(&writable);
+ trace_bpf_testmod_test_writable_bare_tp(&writable);
if (writable.early_ret)
return snprintf(buf, len, "%d\n", writable.val);
}
@@ -470,7 +474,7 @@ bpf_testmod_test_write(struct file *file, struct kobject *kobj,
.len = len,
};
- trace_bpf_testmod_test_write_bare(current, &ctx);
+ trace_bpf_testmod_test_write_bare_tp(current, &ctx);
return -EIO; /* always fail */
}
@@ -1340,7 +1344,7 @@ static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct
*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
- *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
*insn++ = prog->insnsi[0];
@@ -1379,7 +1383,7 @@ static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struc
*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
- *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id),
+ *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
*insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
*insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
*insn++ = BPF_EXIT_INSN();
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 49f2fc61061f..9551d8d5f8f9 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -1042,6 +1042,14 @@ void run_subtest(struct test_loader *tester,
emit_verifier_log(tester->log_buf, false /*force*/);
validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log);
+ /* Restore capabilities because the kernel will silently ignore requests
+ * for program info (such as xlated program text) if we are not
+ * bpf-capable. Also, for some reason test_verifier executes programs
+ * with all capabilities restored. Do the same here.
+ */
+ if (restore_capabilities(&caps))
+ goto tobj_cleanup;
+
if (subspec->expect_xlated.cnt) {
err = get_xlated_program_text(bpf_program__fd(tprog),
tester->log_buf, tester->log_buf_sz);
@@ -1067,12 +1075,6 @@ void run_subtest(struct test_loader *tester,
}
if (should_do_test_run(spec, subspec)) {
- /* For some reason test_verifier executes programs
- * with all capabilities restored. Do the same here.
- */
- if (restore_capabilities(&caps))
- goto tobj_cleanup;
-
/* Do bpf_map__attach_struct_ops() for each struct_ops map.
* This should trigger bpf_struct_ops->reg callback on kernel side.
*/
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 447b68509d76..27db34ecf3f5 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -734,7 +734,7 @@ static __u32 btf_raw_types[] = {
BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
};
-static char bpf_vlog[UINT_MAX >> 8];
+static char bpf_vlog[UINT_MAX >> 5];
static int load_btf_spec(__u32 *types, int types_len,
const char *strings, int strings_len)
@@ -1559,10 +1559,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
test->errstr_unpriv : test->errstr;
opts.expected_attach_type = test->expected_attach_type;
- if (verbose)
- opts.log_level = verif_log_level | 4; /* force stats */
- else if (expected_ret == VERBOSE_ACCEPT)
+ if (expected_ret == VERBOSE_ACCEPT)
opts.log_level = 2;
+ else if (verbose)
+ opts.log_level = verif_log_level | 4; /* force stats */
else
opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
opts.prog_flags = pflags;
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index a18972ffdeb6..b2bb20b00952 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -1486,7 +1486,84 @@ static bool is_preset_supported(const struct btf_type *t)
return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
}
-static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t,
+const int btf_find_member(const struct btf *btf,
+ const struct btf_type *parent_type,
+ __u32 parent_offset,
+ const char *member_name,
+ int *member_tid,
+ __u32 *member_offset)
+{
+ int i;
+
+ if (!btf_is_composite(parent_type))
+ return -EINVAL;
+
+ for (i = 0; i < btf_vlen(parent_type); ++i) {
+ const struct btf_member *member;
+ const struct btf_type *member_type;
+ int tid;
+
+ member = btf_members(parent_type) + i;
+ tid = btf__resolve_type(btf, member->type);
+ if (tid < 0)
+ return -EINVAL;
+
+ member_type = btf__type_by_id(btf, tid);
+ if (member->name_off) {
+ const char *name = btf__name_by_offset(btf, member->name_off);
+
+ if (strcmp(member_name, name) == 0) {
+ if (btf_member_bitfield_size(parent_type, i) != 0) {
+ fprintf(stderr, "Bitfield presets are not supported %s\n",
+ name);
+ return -EINVAL;
+ }
+ *member_offset = parent_offset + member->offset;
+ *member_tid = tid;
+ return 0;
+ }
+ } else if (btf_is_composite(member_type)) {
+ int err;
+
+ err = btf_find_member(btf, member_type, parent_offset + member->offset,
+ member_name, member_tid, member_offset);
+ if (!err)
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t,
+ struct btf_var_secinfo *sinfo, const char *var)
+{
+ char expr[256], *saveptr;
+ const struct btf_type *base_type, *member_type;
+ int err, member_tid;
+ char *name;
+ __u32 member_offset = 0;
+
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
+ snprintf(expr, sizeof(expr), "%s", var);
+ strtok_r(expr, ".", &saveptr);
+
+ while ((name = strtok_r(NULL, ".", &saveptr))) {
+ err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset);
+ if (err) {
+ fprintf(stderr, "Could not find member %s for variable %s\n", name, var);
+ return err;
+ }
+ member_type = btf__type_by_id(btf, member_tid);
+ sinfo->offset += member_offset / 8;
+ sinfo->size = member_type->size;
+ sinfo->type = member_tid;
+ base_type = member_type;
+ }
+ return 0;
+}
+
+static int set_global_var(struct bpf_object *obj, struct btf *btf,
struct bpf_map *map, struct btf_var_secinfo *sinfo,
struct var_preset *preset)
{
@@ -1495,9 +1572,9 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct
long long value = preset->ivalue;
size_t size;
- base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type));
+ base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type));
if (!base_type) {
- fprintf(stderr, "Failed to resolve type %d\n", t->type);
+ fprintf(stderr, "Failed to resolve type %d\n", sinfo->type);
return -EINVAL;
}
if (!is_preset_supported(base_type)) {
@@ -1530,7 +1607,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct
if (value >= max_val || value < -max_val) {
fprintf(stderr,
"Variable %s value %lld is out of range [%lld; %lld]\n",
- btf__name_by_offset(btf, t->name_off), value,
+ btf__name_by_offset(btf, base_type->name_off), value,
is_signed ? -max_val : 0, max_val - 1);
return -EINVAL;
}
@@ -1583,14 +1660,20 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
for (j = 0; j < n; ++j, ++sinfo) {
const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
const char *var_name;
+ int var_len;
if (!btf_is_var(var_type))
continue;
var_name = btf__name_by_offset(btf, var_type->name_off);
+ var_len = strlen(var_name);
for (k = 0; k < npresets; ++k) {
- if (strcmp(var_name, presets[k].name) != 0)
+ struct btf_var_secinfo tmp_sinfo;
+
+ if (strncmp(var_name, presets[k].name, var_len) != 0 ||
+ (presets[k].name[var_len] != '\0' &&
+ presets[k].name[var_len] != '.'))
continue;
if (presets[k].applied) {
@@ -1598,13 +1681,17 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i
var_name);
return -EINVAL;
}
+ tmp_sinfo = *sinfo;
+ err = adjust_var_secinfo(btf, var_type,
+ &tmp_sinfo, presets[k].name);
+ if (err)
+ return err;
- err = set_global_var(obj, btf, var_type, map, sinfo, presets + k);
+ err = set_global_var(obj, btf, map, &tmp_sinfo, presets + k);
if (err)
return err;
presets[k].applied = true;
- break;
}
}
}
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
index 5a6f36f07383..45810ff552da 100644
--- a/tools/testing/selftests/bpf/xsk_xdp_common.h
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -4,6 +4,7 @@
#define XSK_XDP_COMMON_H_
#define MAX_SOCKETS 2
+#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
struct xdp_info {
__u64 count;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 11f047b8af75..0ced4026ee44 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -524,6 +524,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
test->nb_sockets = 1;
test->fail = false;
test->set_ring = false;
+ test->adjust_tail = false;
+ test->adjust_tail_support = false;
test->mtu = MAX_ETH_PKT_SIZE;
test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
@@ -757,14 +759,15 @@ static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
}
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
{
- struct pkt_stream *pkt_stream;
+ ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+}
- pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
- test->ifobj_tx->xsk->pkt_stream = pkt_stream;
- pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+ pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+ pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
}
static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
@@ -991,6 +994,31 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
return true;
}
+static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx)
+{
+ struct bpf_map *data_map;
+ int adjust_value = 0;
+ int key = 0;
+ int ret;
+
+ data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+ if (!data_map || !bpf_map__is_internal(data_map)) {
+ ksft_print_msg("Error: could not find bss section of XDP program\n");
+ exit_with_error(errno);
+ }
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+ if (ret) {
+ ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+ exit_with_error(errno);
+ }
+
+ /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+ * helper is not supported. Skip the adjust_tail test case in this scenario.
+ */
+ return adjust_value != -EOPNOTSUPP;
+}
+
static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
u32 bytes_processed)
{
@@ -1767,8 +1795,13 @@ static void *worker_testapp_validate_rx(void *arg)
if (!err && ifobject->validation_func)
err = ifobject->validation_func(ifobject);
- if (err)
- report_failure(test);
+
+ if (err) {
+ if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs))
+ test->adjust_tail_support = false;
+ else
+ report_failure(test);
+ }
pthread_exit(NULL);
}
@@ -2515,6 +2548,71 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test)
return testapp_validate_traffic(test);
}
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+ skel_tx->progs.xsk_xdp_adjust_tail,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ skel_rx->bss->adjust_value = adjust_value;
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+ int ret;
+
+ test->adjust_tail_support = true;
+ test->adjust_tail = true;
+ test->total_steps = 1;
+
+ pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+ pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+
+ ret = testapp_xdp_adjust_tail(test, value);
+ if (ret)
+ return ret;
+
+ if (!test->adjust_tail_support) {
+ ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+ mode_string(test), busy_poll_string(test));
+ return TEST_SKIP;
+ }
+
+ return 0;
+}
+
+static int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+ /* Shrink by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+static int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Shrink by the frag size */
+ return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+static int testapp_adjust_tail_grow(struct test_spec *test)
+{
+ /* Grow by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+static int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+ return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+ XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
static void run_pkt_test(struct test_spec *test)
{
int ret;
@@ -2621,6 +2719,10 @@ static const struct test_spec tests[] = {
{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
{.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
{.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+ {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+ {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+ {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+ {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
};
static void print_tests(void)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index e46e823f6a1a..67fc44b2813b 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -173,6 +173,8 @@ struct test_spec {
u16 nb_sockets;
bool fail;
bool set_ring;
+ bool adjust_tail;
+ bool adjust_tail_support;
enum test_mode mode;
char name[MAX_TEST_NAME_SIZE];
};
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 137b2364a082..9984413be9f0 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -1,14 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <fcntl.h>
+#include <inttypes.h>
#include <libgen.h>
#include <linux/limits.h>
#include <pthread.h>
#include <string.h>
+#include <sys/mount.h>
#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
#include <unistd.h>
#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
#define STACKDUMP_FILE "stack_values"
#define STACKDUMP_SCRIPT "stackdump"
@@ -35,6 +41,7 @@ static void crashing_child(void)
FIXTURE(coredump)
{
char original_core_pattern[256];
+ pid_t pid_coredump_server;
};
FIXTURE_SETUP(coredump)
@@ -44,6 +51,7 @@ FIXTURE_SETUP(coredump)
char *dir;
int ret;
+ self->pid_coredump_server = -ESRCH;
file = fopen("/proc/sys/kernel/core_pattern", "r");
ASSERT_NE(NULL, file);
@@ -61,10 +69,17 @@ FIXTURE_TEARDOWN(coredump)
{
const char *reason;
FILE *file;
- int ret;
+ int ret, status;
unlink(STACKDUMP_FILE);
+ if (self->pid_coredump_server > 0) {
+ kill(self->pid_coredump_server, SIGTERM);
+ waitpid(self->pid_coredump_server, &status, 0);
+ }
+ unlink("/tmp/coredump.file");
+ unlink("/tmp/coredump.socket");
+
file = fopen("/proc/sys/kernel/core_pattern", "w");
if (!file) {
reason = "Unable to open core_pattern";
@@ -89,14 +104,14 @@ fail:
fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason);
}
-TEST_F(coredump, stackdump)
+TEST_F_TIMEOUT(coredump, stackdump, 120)
{
struct sigaction action = {};
unsigned long long stack;
char *test_dir, *line;
size_t line_length;
char buf[PATH_MAX];
- int ret, i;
+ int ret, i, status;
FILE *file;
pid_t pid;
@@ -129,6 +144,10 @@ TEST_F(coredump, stackdump)
/*
* Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file
*/
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
for (i = 0; i < 10; ++i) {
file = fopen(STACKDUMP_FILE, "r");
if (file)
@@ -138,14 +157,466 @@ TEST_F(coredump, stackdump)
ASSERT_NE(file, NULL);
/* Step 4: Make sure all stack pointer values are non-zero */
+ line = NULL;
for (i = 0; -1 != getline(&line, &line_length, file); ++i) {
stack = strtoull(line, NULL, 10);
ASSERT_NE(stack, 0);
}
+ free(line);
ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN);
fclose(file);
}
+TEST_F(coredump, socket)
+{
+ int fd, pidfd, ret, status;
+ FILE *file;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ char core_file[PATH_MAX];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(file, NULL);
+
+ ret = fprintf(file, "@/tmp/coredump.socket");
+ ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
+ ASSERT_EQ(fclose(file), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
+ socklen_t fd_peer_pidfd_len;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0)
+ _exit(EXIT_FAILURE);
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = listen(fd_server, 1);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to listen on coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "Failed to accept coredump socket connection\n");
+ close(fd_server);
+ _exit(EXIT_FAILURE);
+ }
+
+ fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
+ &fd_peer_pidfd, &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
+ close(fd_coredump);
+ close(fd_server);
+ _exit(EXIT_FAILURE);
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "Missing coredump information from coredumping task\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!(info.coredump_mask & PIDFD_COREDUMPED)) {
+ fprintf(stderr, "Received connection from non-coredumping task\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ fd_core_file = creat("/tmp/coredump.file", 0644);
+ if (fd_core_file < 0) {
+ fprintf(stderr, "Failed to create coredump file\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ for (;;) {
+ char buffer[4096];
+ ssize_t bytes_read, bytes_write;
+
+ bytes_read = read(fd_coredump, buffer, sizeof(buffer));
+ if (bytes_read < 0) {
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ close(fd_core_file);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (bytes_read == 0)
+ break;
+
+ bytes_write = write(fd_core_file, buffer, bytes_read);
+ if (bytes_read != bytes_write) {
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ close(fd_core_file);
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ close(fd_core_file);
+ _exit(EXIT_SUCCESS);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_TRUE(WCOREDUMP(status));
+
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_EQ(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_GT(st.st_size, 0);
+ /*
+ * We should somehow validate the produced core file.
+ * For now just allow for visual inspection
+ */
+ system("file /tmp/coredump.file");
+}
+
+TEST_F(coredump, socket_detect_userspace_client)
+{
+ int fd, pidfd, ret, status;
+ FILE *file;
+ pid_t pid, pid_coredump_server;
+ struct stat st;
+ char core_file[PATH_MAX];
+ struct pidfd_info info = {};
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(file, NULL);
+
+ ret = fprintf(file, "@/tmp/coredump.socket");
+ ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
+ ASSERT_EQ(fclose(file), 0);
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file;
+ socklen_t fd_peer_pidfd_len;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0)
+ _exit(EXIT_FAILURE);
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = listen(fd_server, 1);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to listen on coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ close(ipc_sockets[1]);
+
+ fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC);
+ if (fd_coredump < 0) {
+ fprintf(stderr, "Failed to accept coredump socket connection\n");
+ close(fd_server);
+ _exit(EXIT_FAILURE);
+ }
+
+ fd_peer_pidfd_len = sizeof(fd_peer_pidfd);
+ ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD,
+ &fd_peer_pidfd, &fd_peer_pidfd_len);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n");
+ close(fd_coredump);
+ close(fd_server);
+ _exit(EXIT_FAILURE);
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!(info.mask & PIDFD_INFO_COREDUMP)) {
+ fprintf(stderr, "Missing coredump information from coredumping task\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (info.coredump_mask & PIDFD_COREDUMPED) {
+ fprintf(stderr, "Received unexpected connection from coredumping task\n");
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ _exit(EXIT_FAILURE);
+ }
+
+ close(fd_coredump);
+ close(fd_server);
+ close(fd_peer_pidfd);
+ close(fd_core_file);
+ _exit(EXIT_SUCCESS);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ int fd_socket;
+ ssize_t ret;
+
+ fd_socket = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd_socket < 0)
+ _exit(EXIT_FAILURE);
+
+
+ ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0)
+ _exit(EXIT_FAILURE);
+
+ (void *)write(fd_socket, &(char){ 0 }, 1);
+ close(fd_socket);
+ _exit(EXIT_SUCCESS);
+ }
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP;
+ ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0);
+ ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0);
+ ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0);
+
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ ASSERT_NE(stat("/tmp/coredump.file", &st), 0);
+ ASSERT_EQ(errno, ENOENT);
+}
+
+TEST_F(coredump, socket_enoent)
+{
+ int pidfd, ret, status;
+ FILE *file;
+ pid_t pid;
+ char core_file[PATH_MAX];
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(file, NULL);
+
+ ret = fprintf(file, "@/tmp/coredump.socket");
+ ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
+ ASSERT_EQ(fclose(file), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+}
+
+TEST_F(coredump, socket_no_listener)
+{
+ int pidfd, ret, status;
+ FILE *file;
+ pid_t pid, pid_coredump_server;
+ int ipc_sockets[2];
+ char c;
+ const struct sockaddr_un coredump_sk = {
+ .sun_family = AF_UNIX,
+ .sun_path = "/tmp/coredump.socket",
+ };
+ size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) +
+ sizeof("/tmp/coredump.socket");
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_EQ(ret, 0);
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(file, NULL);
+
+ ret = fprintf(file, "@/tmp/coredump.socket");
+ ASSERT_EQ(ret, strlen("@/tmp/coredump.socket"));
+ ASSERT_EQ(fclose(file), 0);
+
+ pid_coredump_server = fork();
+ ASSERT_GE(pid_coredump_server, 0);
+ if (pid_coredump_server == 0) {
+ int fd_server;
+ socklen_t fd_peer_pidfd_len;
+
+ close(ipc_sockets[0]);
+
+ fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ if (fd_server < 0)
+ _exit(EXIT_FAILURE);
+
+ ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind coredump socket\n");
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0) {
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_FAILURE);
+ }
+
+ close(fd_server);
+ close(ipc_sockets[1]);
+ _exit(EXIT_SUCCESS);
+ }
+ self->pid_coredump_server = pid_coredump_server;
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0)
+ crashing_child();
+
+ pidfd = sys_pidfd_open(pid, 0);
+ ASSERT_GE(pidfd, 0);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_FALSE(WCOREDUMP(status));
+
+ waitpid(pid_coredump_server, &status, 0);
+ self->pid_coredump_server = -ESRCH;
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
index e350c521b467..9927b654fb8f 100755
--- a/tools/testing/selftests/cpufreq/cpufreq.sh
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -52,7 +52,14 @@ read_cpufreq_files_in_dir()
for file in $files; do
if [ -f $1/$file ]; then
printf "$file:"
- cat $1/$file
+ #file is readable ?
+ local rfile=$(ls -l $1/$file | awk '$1 ~ /^.*r.*/ { print $NF; }')
+
+ if [ ! -z $rfile ]; then
+ cat $1/$file
+ else
+ printf "$file is not readable\n"
+ fi
else
printf "\n"
read_cpufreq_files_in_dir "$1/$file"
@@ -83,10 +90,10 @@ update_cpufreq_files_in_dir()
for file in $files; do
if [ -f $1/$file ]; then
- # file is writable ?
- local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+ # file is readable and writable ?
+ local rwfile=$(ls -l $1/$file | awk '$1 ~ /^.*rw.*/ { print $NF; }')
- if [ ! -z $wfile ]; then
+ if [ ! -z $rwfile ]; then
# scaling_setspeed is a special file and we
# should skip updating it
if [ $file != "scaling_setspeed" ]; then
@@ -244,9 +251,10 @@ do_suspend()
printf "Failed to suspend using RTC wake alarm\n"
return 1
fi
+ else
+ echo $filename > $SYSFS/power/state
fi
- echo $filename > $SYSFS/power/state
printf "Came out of $1\n"
printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
index ec746f374e85..d634d8395d90 100644
--- a/tools/testing/selftests/drivers/net/.gitignore
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-xdp_helper
+napi_id_helper
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 0c95bd944d56..be780bcb73a3 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -3,12 +3,14 @@ CFLAGS += $(KHDR_INCLUDES)
TEST_INCLUDES := $(wildcard lib/py/*.py) \
$(wildcard lib/sh/*.sh) \
- ../../net/net_helper.sh \
../../net/lib.sh \
-TEST_GEN_FILES := xdp_helper
+TEST_GEN_FILES := \
+ napi_id_helper \
+# end of TEST_GEN_FILES
TEST_PROGS := \
+ napi_id.py \
netcons_basic.sh \
netcons_fragmented_msg.sh \
netcons_overflow.sh \
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 07cddb19ba35..df2c047ffa90 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -15,12 +15,11 @@ TEST_PROGS = \
iou-zcrx.py \
irq.py \
loopback.sh \
- nic_link_layer.py \
- nic_performance.py \
pp_alloc_fail.py \
rss_ctx.py \
rss_input_xfrm.py \
tso.py \
+ xsk_reconfig.py \
#
TEST_FILES := \
diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py
index 3947e9157115..7947650210a0 100755
--- a/tools/testing/selftests/drivers/net/hw/devmem.py
+++ b/tools/testing/selftests/drivers/net/hw/devmem.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+from os import path
from lib.py import ksft_run, ksft_exit
from lib.py import ksft_eq, KsftSkipEx
from lib.py import NetDrvEpEnv
@@ -10,8 +11,7 @@ from lib.py import ksft_disruptive
def require_devmem(cfg):
if not hasattr(cfg, "_devmem_probed"):
- port = rand_port()
- probe_command = f"./ncdevmem -f {cfg.ifname}"
+ probe_command = f"{cfg.bin_local} -f {cfg.ifname}"
cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0
cfg._devmem_probed = True
@@ -21,22 +21,55 @@ def require_devmem(cfg):
@ksft_disruptive
def check_rx(cfg) -> None:
- cfg.require_ipver("6")
require_devmem(cfg)
port = rand_port()
- listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}"
+ socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}"
+ listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7"
+
+ with bkg(listen_cmd, exit_wait=True) as ncdevmem:
+ wait_port_listen(port)
+ cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \
+ head -c 1K | {socat}", host=cfg.remote, shell=True)
+
+ ksft_eq(ncdevmem.ret, 0)
+
+
+@ksft_disruptive
+def check_tx(cfg) -> None:
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}"
with bkg(listen_cmd) as socat:
wait_port_listen(port)
- cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True)
+
+ ksft_eq(socat.stdout.strip(), "hello\nworld")
+
+
+@ksft_disruptive
+def check_tx_chunks(cfg) -> None:
+ cfg.require_ipver("6")
+ require_devmem(cfg)
+
+ port = rand_port()
+ listen_cmd = f"socat -U - TCP6-LISTEN:{port}"
+
+ with bkg(listen_cmd, exit_wait=True) as socat:
+ wait_port_listen(port)
+ cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True)
ksft_eq(socat.stdout.strip(), "hello\nworld")
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
- ksft_run([check_rx],
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ ksft_run([check_rx, check_tx, check_tx_chunks],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index c26b4180eddd..62456df947bc 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -37,8 +37,8 @@
#include <liburing.h>
-#define PAGE_SIZE (4096)
-#define AREA_SIZE (8192 * PAGE_SIZE)
+static long page_size;
+#define AREA_SIZE (8192 * page_size)
#define SEND_SIZE (512 * 4096)
#define min(a, b) \
({ \
@@ -66,7 +66,7 @@ static int cfg_oneshot_recvs;
static int cfg_send_size = SEND_SIZE;
static struct sockaddr_in6 cfg_addr;
-static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE)));
+static char *payload;
static void *area_ptr;
static void *ring_ptr;
static size_t ring_size;
@@ -114,8 +114,8 @@ static inline size_t get_refill_ring_size(unsigned int rq_entries)
ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
/* add space for the header (head/tail/etc.) */
- ring_size += PAGE_SIZE;
- return ALIGN_UP(ring_size, 4096);
+ ring_size += page_size;
+ return ALIGN_UP(ring_size, page_size);
}
static void setup_zcrx(struct io_uring *ring)
@@ -219,7 +219,7 @@ static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
connfd = cqe->res;
if (cfg_oneshot)
- add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ add_recvzc_oneshot(ring, connfd, page_size);
else
add_recvzc(ring, connfd);
}
@@ -245,7 +245,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
if (cfg_oneshot) {
if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
- add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ add_recvzc_oneshot(ring, connfd, page_size);
cfg_oneshot_recvs--;
}
} else if (!(cqe->flags & IORING_CQE_F_MORE)) {
@@ -260,7 +260,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
for (i = 0; i < n; i++) {
if (*(data + i) != payload[(received + i)])
- error(1, 0, "payload mismatch at ", i);
+ error(1, 0, "payload mismatch at %d", i);
}
received += n;
@@ -354,7 +354,7 @@ static void run_client(void)
chunk = min_t(ssize_t, cfg_payload_len, to_send);
res = send(fd, src, chunk, 0);
if (res < 0)
- error(1, 0, "send(): %d", sent);
+ error(1, 0, "send(): %zd", sent);
sent += res;
to_send -= res;
}
@@ -370,7 +370,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
- const int max_payload_len = sizeof(payload) -
+ const int max_payload_len = SEND_SIZE -
sizeof(struct ipv6hdr) -
sizeof(struct tcphdr) -
40 /* max tcp options */;
@@ -443,6 +443,13 @@ int main(int argc, char **argv)
const char *cfg_test = argv[argc - 1];
int i;
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 0)
+ return 1;
+
+ if (posix_memalign((void **)&payload, page_size, SEND_SIZE))
+ return 1;
+
parse_opts(argc, argv);
for (i = 0; i < SEND_SIZE; i++)
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 6a0378e06cab..9c03fd777f3d 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -5,13 +5,12 @@ import re
from os import path
from lib.py import ksft_run, ksft_exit
from lib.py import NetDrvEpEnv
-from lib.py import bkg, cmd, ethtool, wait_port_listen
+from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
-def _get_rx_ring_entries(cfg):
- output = ethtool(f"-g {cfg.ifname}", host=cfg.remote).stdout
- values = re.findall(r'RX:\s+(\d+)', output)
- return int(values[1])
+def _get_current_settings(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0]
+ return (output['rx'], output['hds-thresh'])
def _get_combined_channels(cfg):
@@ -20,8 +19,21 @@ def _get_combined_channels(cfg):
return int(values[1])
-def _set_flow_rule(cfg, chan):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port 9999 action {chan}", host=cfg.remote).stdout
+def _create_rss_ctx(cfg, chan):
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout
+ values = re.search(r'New RSS context is (\d+)', output).group(1)
+ ctx_id = int(values)
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote))
+
+
+def _set_flow_rule(cfg, port, chan):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def _set_flow_rule_rss(cfg, port, ctx_id):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
@@ -32,24 +44,29 @@ def test_zcrx(cfg) -> None:
combined_chans = _get_combined_channels(cfg)
if combined_chans < 2:
raise KsftSkipEx('at least 2 combined channels required')
- rx_ring = _get_rx_ring_entries(cfg)
-
- try:
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
-
- rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(9999, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
- finally:
- ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
def test_zcrx_oneshot(cfg) -> None:
@@ -58,24 +75,61 @@ def test_zcrx_oneshot(cfg) -> None:
combined_chans = _get_combined_channels(cfg)
if combined_chans < 2:
raise KsftSkipEx('at least 2 combined channels required')
- rx_ring = _get_rx_ring_entries(cfg)
-
- try:
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
-
- rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(9999, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
- finally:
- ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
+
+
+def test_zcrx_rss(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
+ flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
index 399789a9676a..b582885786f5 100644
--- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -9,7 +9,6 @@ try:
sys.path.append(KSFT_DIR.as_posix())
from net.lib.py import *
from drivers.net.lib.py import *
- from .linkconfig import LinkConfig
except ModuleNotFoundError as e:
ksft_pr("Failed importing `net` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py
deleted file mode 100644
index 79fde603cbbc..000000000000
--- a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py
+++ /dev/null
@@ -1,222 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-from lib.py import cmd, ethtool, ip
-from lib.py import ksft_pr, ksft_eq, KsftSkipEx
-from typing import Optional
-import re
-import time
-import json
-
-#The LinkConfig class is implemented to handle the link layer configurations.
-#Required minimum ethtool version is 6.10
-
-class LinkConfig:
- """Class for handling the link layer configurations"""
- def __init__(self, cfg: object) -> None:
- self.cfg = cfg
- self.partner_netif = self.get_partner_netif_name()
-
- """Get the initial link configuration of local interface"""
- self.common_link_modes = self.get_common_link_modes()
-
- def get_partner_netif_name(self) -> Optional[str]:
- partner_netif = None
- try:
- if not self.verify_link_up():
- return None
- """Get partner interface name"""
- partner_json_output = ip("addr show", json=True, host=self.cfg.remote)
- for interface in partner_json_output:
- for addr in interface.get('addr_info', []):
- if addr.get('local') == self.cfg.remote_addr:
- partner_netif = interface['ifname']
- ksft_pr(f"Partner Interface name: {partner_netif}")
- if partner_netif is None:
- ksft_pr("Unable to get the partner interface name")
- except Exception as e:
- print(f"Unexpected error occurred while getting partner interface name: {e}")
- self.partner_netif = partner_netif
- return partner_netif
-
- def verify_link_up(self) -> bool:
- """Verify whether the local interface link is up"""
- with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp:
- link_state = fp.read().strip()
-
- if link_state == "down":
- ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN")
- return False
- else:
- return True
-
- def reset_interface(self, local: bool = True, remote: bool = True) -> bool:
- ksft_pr("Resetting interfaces in local and remote")
- if remote:
- if self.verify_link_up():
- if self.partner_netif is not None:
- ifname = self.partner_netif
- link_up_cmd = f"ip link set up {ifname}"
- link_down_cmd = f"ip link set down {ifname}"
- reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}"
- try:
- cmd(reset_cmd, host=self.cfg.remote)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while resetting remote: {e}")
- else:
- ksft_pr("Partner interface not available")
- if local:
- ifname = self.cfg.ifname
- link_up_cmd = f"ip link set up {ifname}"
- link_down_cmd = f"ip link set down {ifname}"
- reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}"
- try:
- cmd(reset_cmd)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while resetting local: {e}")
- time.sleep(10)
- if self.verify_link_up() and self.get_ethtool_field("link-detected"):
- ksft_pr("Local and remote interfaces reset to original state")
- return True
- else:
- ksft_pr("Error occurred after resetting interfaces. Link is DOWN.")
- return False
-
- def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool:
- """Set the speed and duplex state for the interface"""
- autoneg_state = "on" if autoneg is True else "off"
- process = None
- try:
- process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}")
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}")
- if process is None or process.ret != 0:
- return False
- else:
- ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}")
- return True
-
- def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool:
- if not self.verify_link_up():
- return False
- """Verifying the speed and duplex state for the interface"""
- with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp:
- actual_speed = fp.read().strip()
- with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp:
- actual_duplex = fp.read().strip()
-
- ksft_eq(actual_speed, expected_speed)
- ksft_eq(actual_duplex, expected_duplex)
- return True
-
- def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool:
- common_link_modes = self.common_link_modes
- speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes)
- speed = speeds[0]
- duplex = duplex_modes[0]
- if not speed or not duplex:
- ksft_pr("No speed or duplex modes found")
- return False
-
- speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else ""
- if remote:
- if not self.verify_link_up():
- return False
- """Set the autonegotiation state for the partner"""
- command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}"
- partner_autoneg_change = None
- """Set autonegotiation state for interface in remote pc"""
- try:
- partner_autoneg_change = ethtool(command, host=self.cfg.remote)
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}")
- if partner_autoneg_change is None or partner_autoneg_change.ret != 0:
- ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.")
- return False
- ksft_pr(f"Autoneg set as {state} for {self.partner_netif}")
- else:
- """Set the autonegotiation state for the interface"""
- try:
- process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}")
- if process.ret != 0:
- ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}")
- return False
- except Exception as e:
- ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}")
- return False
- ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}")
- return True
-
- def check_autoneg_supported(self, remote: bool = False) -> bool:
- if not remote:
- local_autoneg = self.get_ethtool_field("supports-auto-negotiation")
- if local_autoneg is None:
- ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}")
- """Return autoneg status of the local interface"""
- return local_autoneg
- else:
- if not self.verify_link_up():
- raise KsftSkipEx("Link is DOWN")
- """Check remote auto-negotiation support status"""
- partner_autoneg = False
- if self.partner_netif is not None:
- partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True)
- if partner_autoneg is None:
- ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}")
- return partner_autoneg
-
- def get_common_link_modes(self) -> set[str]:
- common_link_modes = []
- """Populate common link modes"""
- link_modes = self.get_ethtool_field("supported-link-modes")
- partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes")
- if link_modes is None:
- raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}")
- if partner_link_modes is None:
- raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}")
- common_link_modes = set(link_modes) and set(partner_link_modes)
- return common_link_modes
-
- def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]:
- speed = []
- duplex = []
- """Check the link modes"""
- for data in link_modes:
- parts = data.split('/')
- speed_value = re.match(r'\d+', parts[0])
- if speed_value:
- speed.append(speed_value.group())
- else:
- ksft_pr(f"No speed value found for interface {self.ifname}")
- return None, None
- duplex.append(parts[1].lower())
- return speed, duplex
-
- def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]:
- process = None
- if not remote:
- """Get the ethtool field value for the local interface"""
- try:
- process = ethtool(self.cfg.ifname, json=True)
- except Exception as e:
- ksft_pr("Required minimum ethtool version is 6.10")
- ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}")
- return None
- else:
- if not self.verify_link_up():
- return None
- """Get the ethtool field value for the remote interface"""
- self.cfg.require_cmd("ethtool", remote=True)
- if self.partner_netif is None:
- ksft_pr(f"Partner interface name is unavailable.")
- return None
- try:
- process = ethtool(self.partner_netif, json=True, host=self.cfg.remote)
- except Exception as e:
- ksft_pr("Required minimum ethtool version is 6.10")
- ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}")
- return None
- json_data = process[0]
- """Check if the field exist in the json data"""
- if field not in json_data:
- raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}')
- return json_data[field]
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 2bf14ac2b8c6..02e4d3d7ded2 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -9,22 +9,31 @@
* ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
*
* On client:
- * echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201
+ * echo -n "hello\nworld" | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
*
- * Test data validation:
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ *
+ * Test data validation (devmem TCP on RX only):
*
* On server:
* ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
*
* On client:
* yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
- * tr \\n \\0 | \
- * head -c 5G | \
+ * head -c 1G | \
* nc <server IP> 5201 -p 5201
*
+ * Test data validation (devmem TCP on RX and TX, validation happens on RX):
*
- * Note this is compatible with regular netcat. i.e. the sender or receiver can
- * be replaced with regular netcat to test the RX or TX path in isolation.
+ * On server:
+ * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
+ * head -c 1M | \
+ * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
*/
#define _GNU_SOURCE
#define __EXPORTED_HEADERS__
@@ -40,15 +49,18 @@
#include <fcntl.h>
#include <malloc.h>
#include <error.h>
+#include <poll.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
+#include <sys/time.h>
#include <linux/memfd.h>
#include <linux/dma-buf.h>
+#include <linux/errqueue.h>
#include <linux/udmabuf.h>
#include <linux/types.h>
#include <linux/netlink.h>
@@ -70,6 +82,9 @@
#define MSG_SOCK_DEVMEM 0x2000000
#endif
+#define MAX_IOV 1024
+
+static size_t max_chunk;
static char *server_ip;
static char *client_ip;
static char *port;
@@ -79,6 +94,8 @@ static int num_queues = -1;
static char *ifname;
static unsigned int ifindex;
static unsigned int dmabuf_id;
+static uint32_t tx_dmabuf_id;
+static int waittime_ms = 500;
struct memory_buffer {
int fd;
@@ -92,6 +109,8 @@ struct memory_buffer {
struct memory_provider {
struct memory_buffer *(*alloc)(size_t size);
void (*free)(struct memory_buffer *ctx);
+ void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
+ void *src, int n);
void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
size_t off, int n);
};
@@ -152,6 +171,20 @@ static void udmabuf_free(struct memory_buffer *ctx)
free(ctx);
}
+static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
+ void *src, int n)
+{
+ struct dma_buf_sync sync = {};
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+
+ memcpy(dst->buf_mem + off, src, n);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
+ ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
+}
+
static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
size_t off, int n)
{
@@ -169,6 +202,7 @@ static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
static struct memory_provider udmabuf_memory_provider = {
.alloc = udmabuf_alloc,
.free = udmabuf_free,
+ .memcpy_to_device = udmabuf_memcpy_to_device,
.memcpy_from_device = udmabuf_memcpy_from_device,
};
@@ -187,14 +221,16 @@ void validate_buffer(void *line, size_t size)
{
static unsigned char seed = 1;
unsigned char *ptr = line;
- int errors = 0;
+ unsigned char expected;
+ static int errors;
size_t i;
for (i = 0; i < size; i++) {
- if (ptr[i] != seed) {
+ expected = seed ? seed : '\n';
+ if (ptr[i] != expected) {
fprintf(stderr,
"Failed validation: expected=%u, actual=%u, index=%lu\n",
- seed, ptr[i], i);
+ expected, ptr[i], i);
errors++;
if (errors > 20)
error(1, 0, "validation failed.");
@@ -337,7 +373,8 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
server_addr = strrchr(server_addr, ':') + 1;
}
- return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
+ /* Try configure 5-tuple */
+ if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
ifname,
type,
client_ip ? "src-ip" : "",
@@ -345,7 +382,17 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin)
server_addr,
client_ip ? "src-port" : "",
client_ip ? port : "",
- port, start_queue);
+ port, start_queue))
+ /* If that fails, try configure 3-tuple */
+ if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2",
+ ifname,
+ type,
+ server_addr,
+ port, start_queue))
+ /* If that fails, return error */
+ return -1;
+
+ return 0;
}
static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
@@ -393,6 +440,49 @@ err_close:
return -1;
}
+static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct ynl_sock **ys)
+{
+ struct netdev_bind_tx_req *req = NULL;
+ struct netdev_bind_tx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_tx_req_alloc();
+ netdev_bind_tx_req_set_ifindex(req, ifindex);
+ netdev_bind_tx_req_set_fd(req, dmabuf_fd);
+
+ rsp = netdev_bind_tx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_tx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
+ tx_dmabuf_id = rsp->id;
+
+ netdev_bind_tx_req_free(req);
+ netdev_bind_tx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_tx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
static void enable_reuseaddr(int fd)
{
int opt = 1;
@@ -431,10 +521,25 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
return 0;
}
-int do_server(struct memory_buffer *mem)
+static struct netdev_queue_id *create_queues(void)
{
- char ctrl_data[sizeof(int) * 20000];
struct netdev_queue_id *queues;
+ size_t i = 0;
+
+ queues = calloc(num_queues, sizeof(*queues));
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ return queues;
+}
+
+static int do_server(struct memory_buffer *mem)
+{
+ char ctrl_data[sizeof(int) * 20000];
size_t non_page_aligned_frags = 0;
struct sockaddr_in6 client_addr;
struct sockaddr_in6 server_sin;
@@ -448,7 +553,6 @@ int do_server(struct memory_buffer *mem)
char buffer[256];
int socket_fd;
int client_fd;
- size_t i = 0;
int ret;
ret = parse_address(server_ip, atoi(port), &server_sin);
@@ -471,16 +575,7 @@ int do_server(struct memory_buffer *mem)
sleep(1);
- queues = malloc(sizeof(*queues) * num_queues);
-
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
- if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Failed to bind\n");
tmp_mem = malloc(mem->size);
@@ -545,7 +640,6 @@ int do_server(struct memory_buffer *mem)
goto cleanup;
}
- i++;
for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
if (cm->cmsg_level != SOL_SOCKET ||
(cm->cmsg_type != SCM_DEVMEM_DMABUF &&
@@ -630,10 +724,8 @@ cleanup:
void run_devmem_tests(void)
{
- struct netdev_queue_id *queues;
struct memory_buffer *mem;
struct ynl_sock *ys;
- size_t i = 0;
mem = provider->alloc(getpagesize() * NUM_PAGES);
@@ -641,38 +733,24 @@ void run_devmem_tests(void)
if (configure_rss())
error(1, 0, "rss error\n");
- queues = calloc(num_queues, sizeof(*queues));
-
if (configure_headersplit(1))
error(1, 0, "Failed to configure header split\n");
- if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (!bind_rx_queue(ifindex, mem->fd,
+ calloc(num_queues, sizeof(struct netdev_queue_id)),
+ num_queues, &ys))
error(1, 0, "Binding empty queues array should have failed\n");
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
if (configure_headersplit(0))
error(1, 0, "Failed to configure header split\n");
- if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Configure dmabuf with header split off should have failed\n");
if (configure_headersplit(1))
error(1, 0, "Failed to configure header split\n");
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
- if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Failed to bind\n");
/* Deactivating a bound queue should not be legal */
@@ -685,13 +763,216 @@ void run_devmem_tests(void)
provider->free(mem);
}
+static uint64_t gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
+}
+
+static int do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ ret = poll(&pfd, 1, waittime_ms);
+ if (ret == -1)
+ error(1, errno, "poll");
+
+ return ret && (pfd.revents & POLLERR);
+}
+
+static void wait_compl(int fd)
+{
+ int64_t tstop = gettimeofday_ms() + waittime_ms;
+ char control[CMSG_SPACE(100)] = {};
+ struct sock_extended_err *serr;
+ struct msghdr msg = {};
+ struct cmsghdr *cm;
+ __u32 hi, lo;
+ int ret;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ while (gettimeofday_ms() < tstop) {
+ if (!do_poll(fd))
+ continue;
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ error(1, errno, "recvmsg(MSG_ERRQUEUE)");
+ return;
+ }
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, 0, "MSG_CTRUNC\n");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_IP &&
+ cm->cmsg_level != SOL_IPV6)
+ continue;
+ if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type != IP_RECVERR)
+ continue;
+ if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type != IPV6_RECVERR)
+ continue;
+
+ serr = (void *)CMSG_DATA(cm);
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+ error(1, 0, "wrong origin %u", serr->ee_origin);
+ if (serr->ee_errno != 0)
+ error(1, 0, "wrong errno %d", serr->ee_errno);
+
+ hi = serr->ee_data;
+ lo = serr->ee_info;
+
+ fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
+ return;
+ }
+ }
+
+ error(1, 0, "did not receive tx completion");
+}
+
+static int do_client(struct memory_buffer *mem)
+{
+ char ctrl_data[CMSG_SPACE(sizeof(__u32))];
+ struct sockaddr_in6 server_sin;
+ struct sockaddr_in6 client_sin;
+ struct ynl_sock *ys = NULL;
+ struct iovec iov[MAX_IOV];
+ struct msghdr msg = {};
+ ssize_t line_size = 0;
+ struct cmsghdr *cmsg;
+ char *line = NULL;
+ unsigned long mid;
+ size_t len = 0;
+ int socket_fd;
+ __u32 ddmabuf;
+ int opt = 1;
+ int ret;
+
+ ret = parse_address(server_ip, atoi(port), &server_sin);
+ if (ret < 0)
+ error(1, 0, "parse server address");
+
+ socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (socket_fd < 0)
+ error(1, socket_fd, "create socket");
+
+ enable_reuseaddr(socket_fd);
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
+ strlen(ifname) + 1);
+ if (ret)
+ error(1, errno, "bindtodevice");
+
+ if (bind_tx_queue(ifindex, mem->fd, &ys))
+ error(1, 0, "Failed to bind\n");
+
+ if (client_ip) {
+ ret = parse_address(client_ip, atoi(port), &client_sin);
+ if (ret < 0)
+ error(1, 0, "parse client address");
+
+ ret = bind(socket_fd, &client_sin, sizeof(client_sin));
+ if (ret)
+ error(1, errno, "bind");
+ }
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
+ if (ret)
+ error(1, errno, "set sock opt");
+
+ fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
+ ntohs(server_sin.sin6_port), ifname);
+
+ ret = connect(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret)
+ error(1, errno, "connect");
+
+ while (1) {
+ free(line);
+ line = NULL;
+ line_size = getline(&line, &len, stdin);
+
+ if (line_size < 0)
+ break;
+
+ if (max_chunk) {
+ msg.msg_iovlen =
+ (line_size + max_chunk - 1) / max_chunk;
+ if (msg.msg_iovlen > MAX_IOV)
+ error(1, 0,
+ "can't partition %zd bytes into maximum of %d chunks",
+ line_size, MAX_IOV);
+
+ for (int i = 0; i < msg.msg_iovlen; i++) {
+ iov[i].iov_base = (void *)(i * max_chunk);
+ iov[i].iov_len = max_chunk;
+ }
+
+ iov[msg.msg_iovlen - 1].iov_len =
+ line_size - (msg.msg_iovlen - 1) * max_chunk;
+ } else {
+ iov[0].iov_base = 0;
+ iov[0].iov_len = line_size;
+ msg.msg_iovlen = 1;
+ }
+
+ msg.msg_iov = iov;
+ provider->memcpy_to_device(mem, 0, line, line_size);
+
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+
+ ddmabuf = tx_dmabuf_id;
+
+ *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
+
+ ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
+ if (ret < 0)
+ error(1, errno, "Failed sendmsg");
+
+ fprintf(stderr, "sendmsg_ret=%d\n", ret);
+
+ if (ret != line_size)
+ error(1, errno, "Did not send all bytes %d vs %zd", ret,
+ line_size);
+
+ wait_compl(socket_fd);
+ }
+
+ fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
+
+ free(line);
+ close(socket_fd);
+
+ if (ys)
+ ynl_sock_destroy(ys);
+
+ return 0;
+}
+
int main(int argc, char *argv[])
{
struct memory_buffer *mem;
int is_server = 0, opt;
int ret;
- while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
+ while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
switch (opt) {
case 'l':
is_server = 1;
@@ -717,6 +998,9 @@ int main(int argc, char *argv[])
case 'f':
ifname = optarg;
break;
+ case 'z':
+ max_chunk = atoi(optarg);
+ break;
case '?':
fprintf(stderr, "unknown option: %c\n", optopt);
break;
@@ -728,6 +1012,8 @@ int main(int argc, char *argv[])
ifindex = if_nametoindex(ifname);
+ fprintf(stderr, "using ifindex=%u\n", ifindex);
+
if (!server_ip && !client_ip) {
if (start_queue < 0 && num_queues < 0) {
num_queues = rxq_num(ifindex);
@@ -778,7 +1064,7 @@ int main(int argc, char *argv[])
error(1, 0, "Missing -p argument\n");
mem = provider->alloc(getpagesize() * NUM_PAGES);
- ret = is_server ? do_server(mem) : 1;
+ ret = is_server ? do_server(mem) : do_client(mem);
provider->free(mem);
return ret;
diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py
deleted file mode 100644
index efd921180532..000000000000
--- a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-#Introduction:
-#This file has basic link layer tests for generic NIC drivers.
-#The test comprises of auto-negotiation, speed and duplex checks.
-#
-#Setup:
-#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1)
-#
-# DUT PC Partner PC
-#┌───────────────────────┐ ┌──────────────────────────┐
-#│ │ │ │
-#│ │ │ │
-#│ ┌───────────┐ │ │
-#│ │DUT NIC │ Eth │ │
-#│ │Interface ─┼─────────────────────────┼─ any eth Interface │
-#│ └───────────┘ │ │
-#│ │ │ │
-#│ │ │ │
-#└───────────────────────┘ └──────────────────────────┘
-#
-#Configurations:
-#Required minimum ethtool version is 6.10 (supports json)
-#Default values:
-#time_delay = 8 #time taken to wait for transitions to happen, in seconds.
-
-import time
-import argparse
-from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq
-from lib.py import KsftFailEx, KsftSkipEx
-from lib.py import NetDrvEpEnv
-from lib.py import LinkConfig
-
-def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None:
- if link_config.partner_netif is None:
- KsftSkipEx("Partner interface is not available")
- if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True):
- KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}")
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-
-def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None:
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
- """Verifying the autonegotiation state in partner"""
- partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True)
- if partner_autoneg_output is None:
- KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}")
- partner_autoneg_state = "on" if partner_autoneg_output is True else "off"
-
- ksft_eq(partner_autoneg_state, expected_state)
-
- """Verifying the autonegotiation state of local"""
- autoneg_output = link_config.get_ethtool_field("auto-negotiation")
- if autoneg_output is None:
- KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}")
- actual_state = "on" if autoneg_output is True else "off"
-
- ksft_eq(actual_state, expected_state)
-
- """Verifying the link establishment"""
- link_available = link_config.get_ethtool_field("link-detected")
- if link_available is None:
- KsftSkipEx(f"Link status not available for interface {cfg.ifname}")
- if link_available != True:
- raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation")
-
-def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None:
- _pre_test_checks(cfg, link_config)
- for state in ["off", "on"]:
- if not link_config.set_autonegotiation_state(state, remote=True):
- raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}")
- if not link_config.set_autonegotiation_state(state):
- raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}")
- time.sleep(time_delay)
- verify_autonegotiation(cfg, state, link_config)
-
-def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None:
- _pre_test_checks(cfg, link_config)
- common_link_modes = link_config.common_link_modes
- if not common_link_modes:
- KsftSkipEx("No common link modes exist")
- speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes)
-
- if speeds and duplex_modes and len(speeds) == len(duplex_modes):
- for idx in range(len(speeds)):
- speed = speeds[idx]
- duplex = duplex_modes[idx]
- if not link_config.set_speed_and_duplex(speed, duplex):
- raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}")
- time.sleep(time_delay)
- if not link_config.verify_speed_and_duplex(speed, duplex):
- raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}")
- else:
- if not speeds or not duplex_modes:
- KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}")
- else:
- KsftSkipEx("Mismatch in the number of speeds and duplex modes")
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver")
- parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.')
- args = parser.parse_args()
- time_delay = args.time_delay
- with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
- link_config = LinkConfig(cfg)
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,))
- link_config.reset_interface()
- ksft_exit()
-
-if __name__ == "__main__":
- main()
diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py
deleted file mode 100644
index 201403b76ea3..000000000000
--- a/tools/testing/selftests/drivers/net/hw/nic_performance.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-#Introduction:
-#This file has basic performance test for generic NIC drivers.
-#The test comprises of throughput check for TCP and UDP streams.
-#
-#Setup:
-#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1)
-#
-# DUT PC Partner PC
-#┌───────────────────────┐ ┌──────────────────────────┐
-#│ │ │ │
-#│ │ │ │
-#│ ┌───────────┐ │ │
-#│ │DUT NIC │ Eth │ │
-#│ │Interface ─┼─────────────────────────┼─ any eth Interface │
-#│ └───────────┘ │ │
-#│ │ │ │
-#│ │ │ │
-#└───────────────────────┘ └──────────────────────────┘
-#
-#Configurations:
-#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote.
-#Required minimum ethtool version is 6.10
-#Change the below configuration based on your hw needs.
-# """Default values"""
-#time_delay = 8 #time taken to wait for transitions to happen, in seconds.
-#test_duration = 10 #performance test duration for the throughput check, in seconds.
-#send_throughput_threshold = 80 #percentage of send throughput required to pass the check
-#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check
-
-import time
-import json
-import argparse
-from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true
-from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic
-from lib.py import NetDrvEpEnv, bkg, wait_port_listen
-from lib.py import cmd
-from lib.py import LinkConfig
-
-class TestConfig:
- def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None:
- self.time_delay = time_delay
- self.test_duration = test_duration
- self.send_throughput_threshold = send_throughput_threshold
- self.receive_throughput_threshold = receive_throughput_threshold
-
-def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None:
- if not link_config.verify_link_up():
- KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
- common_link_modes = link_config.common_link_modes
- if common_link_modes is None:
- KsftSkipEx("No common link modes found")
- if link_config.partner_netif == None:
- KsftSkipEx("Partner interface is not available")
- if link_config.check_autoneg_supported():
- KsftSkipEx("Auto-negotiation not supported by local")
- if link_config.check_autoneg_supported(remote=True):
- KsftSkipEx("Auto-negotiation not supported by remote")
- cfg.require_cmd("iperf3", remote=True)
-
-def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None:
- common_link_modes = link_config.common_link_modes
- speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes)
- """Test duration in seconds"""
- duration = test_config.test_duration
-
- ksft_pr(f"{protocol} test")
- test_type = "-u" if protocol == "UDP" else ""
-
- send_throughput = []
- receive_throughput = []
- for idx in range(0, len(speeds)):
- if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False:
- raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}")
- time.sleep(test_config.time_delay)
- if not link_config.verify_link_up():
- raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN")
-
- send_command=f"{test_type} -b 0 -t {duration} --json"
- receive_command=f"{test_type} -b 0 -t {duration} --reverse --json"
-
- send_result = traffic.run_remote_test(cfg, command=send_command)
- if send_result.ret != 0:
- raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}")
-
- send_output = send_result.stdout
- send_data = json.loads(send_output)
-
- """Convert throughput to Mbps"""
- send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2))
- ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps")
-
- receive_result = traffic.run_remote_test(cfg, command=receive_command)
- if receive_result.ret != 0:
- raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}")
-
- receive_output = receive_result.stdout
- receive_data = json.loads(receive_output)
-
- """Convert throughput to Mbps"""
- receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2))
- ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps")
-
- """Check whether throughput is not below the threshold (default values set at start)"""
- for idx in range(0, len(speeds)):
- send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100)
- receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100)
- ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex")
- ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex")
-
-def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None:
- _pre_test_checks(cfg, link_config)
- check_throughput(cfg, link_config, test_config, 'TCP', traffic)
-
-def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None:
- _pre_test_checks(cfg, link_config)
- check_throughput(cfg, link_config, test_config, 'UDP', traffic)
-
-def main() -> None:
- parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver")
- parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.')
- parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.')
- parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.')
- parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.')
- args=parser.parse_args()
- test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt)
- with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
- traffic = GenerateTraffic(cfg)
- link_config = LinkConfig(cfg)
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic, ))
- link_config.reset_interface()
- ksft_exit()
-
-if __name__ == "__main__":
- main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 53bb08cc29ec..f439c434ba36 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -32,6 +32,11 @@ def test_rss_input_xfrm(cfg, ipver):
if multiprocessing.cpu_count() < 2:
raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
+ cfg.require_cmd("socat", remote=True)
+
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
input_xfrm = cfg.ethnl.rss_get(
{'header': {'dev-name': cfg.ifname}}).get('input_xfrm')
diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
new file mode 100755
index 000000000000..d19d1d518208
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# This is intended to be run on a virtio-net guest interface.
+# The test binds the XDP socket to the interface without setting
+# the fill ring to trigger delayed refill_work. This helps to
+# make it easier to reproduce the deadlock when XDP program,
+# XDP socket bind/unbind, rx ring resize race with refill_work on
+# the buggy kernel.
+#
+# The Qemu command to setup virtio-net
+# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
+# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
+
+from lib.py import ksft_exit, ksft_run
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetDrvEnv
+from lib.py import bkg, ip, cmd, ethtool
+import time
+
+def _get_rx_ring_entries(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)
+ return output[0]["rx"]
+
+def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
+ # Probe for support
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ try:
+ return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
+ '{xdp_queue_id} -z', ksft_wait=3)
+ except:
+ raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \
+ 'Please consider adding iommu_platform=on ' \
+ 'when setting up virtio-net-pci')
+
+def check_xdp_bind(cfg):
+ with setup_xsk(cfg):
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+def check_rx_resize(cfg):
+ with setup_xsk(cfg):
+ rx_ring = _get_rx_ring_entries(cfg)
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2))
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
+
+def main():
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([check_xdp_bind, check_rx_resize],
+ args=(cfg, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index ad5ff645183a..3bccddf8cbc5 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -12,7 +12,7 @@ from .remote import Remote
class NetDrvEnvBase:
"""
- Base class for a NIC / host envirnoments
+ Base class for a NIC / host environments
Attributes:
test_dir: Path to the source directory of the test
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
index da5af2c680fa..d9c10613ae67 100644
--- a/tools/testing/selftests/drivers/net/lib/py/load.py
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -2,7 +2,7 @@
import time
-from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg
+from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
class GenerateTraffic:
def __init__(self, env, port=None):
@@ -23,24 +23,6 @@ class GenerateTraffic:
self.stop(verbose=True)
raise Exception("iperf3 traffic did not ramp up")
- def run_remote_test(self, env: object, port=None, command=None):
- if port is None:
- port = rand_port()
- try:
- server_cmd = f"iperf3 -s 1 -p {port} --one-off"
- with bkg(server_cmd, host=env.remote):
- #iperf3 opens TCP connection as default in server
- #-u to be specified in client command for UDP
- wait_port_listen(port, host=env.remote)
- except Exception as e:
- raise Exception(f"Unexpected error occurred while running server command: {e}")
- try:
- client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}"
- proc = cmd(client_cmd)
- return proc
- except Exception as e:
- raise Exception(f"Unexpected error occurred while running client command: {e}")
-
def _wait_pkts(self, pkt_cnt=None, pps=None):
"""
Wait until we've seen pkt_cnt or until traffic ramps up to pps.
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index 3c96b022954d..29b01b8e2215 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -33,7 +33,6 @@ NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
# Used to create and delete namespaces
source "${LIBDIR}"/../../../../net/lib.sh
-source "${LIBDIR}"/../../../../net/net_helper.sh
# Create netdevsim interfaces
create_ifaces() {
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
new file mode 100755
index 000000000000..356bac46ba04
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, NetNSEnter
+
+def test_napi_id(cfg) -> None:
+ port = rand_port()
+ listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}"
+
+ with bkg(listen_cmd, ksft_wait=3) as server:
+ cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True)
+
+ ksft_eq(0, server.ret)
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run([test_napi_id], args=(cfg,))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
new file mode 100644
index 000000000000..eecd610c2109
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#include "../../net/lib/ksft.h"
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in address;
+ unsigned int napi_id;
+ unsigned int port;
+ socklen_t optlen;
+ char buf[1024];
+ int opt = 1;
+ int server;
+ int client;
+ int ret;
+
+ server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (server < 0) {
+ perror("socket creation failed");
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ port = atoi(argv[2]);
+
+ if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
+ perror("setsockopt");
+ return 1;
+ }
+
+ address.sin_family = AF_INET;
+ inet_pton(AF_INET, argv[1], &address.sin_addr);
+ address.sin_port = htons(port);
+
+ if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) {
+ perror("bind failed");
+ return 1;
+ }
+
+ if (listen(server, 1) < 0) {
+ perror("listen");
+ return 1;
+ }
+
+ ksft_ready();
+
+ client = accept(server, NULL, 0);
+ if (client < 0) {
+ perror("accept");
+ return 1;
+ }
+
+ optlen = sizeof(napi_id);
+ ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id,
+ &optlen);
+ if (ret != 0) {
+ perror("getsockopt");
+ return 1;
+ }
+
+ read(client, buf, 1024);
+
+ ksft_wait();
+
+ if (napi_id == 0) {
+ fprintf(stderr, "napi ID is 0\n");
+ return 1;
+ }
+
+ close(client);
+ close(server);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
index aed62d9e6c0a..1bb46ec435d4 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0-only
-source ../../../net/net_helper.sh
+source ../../../net/lib.sh
NSIM_DEV_1_ID=$((256 + RANDOM % 256))
NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index af8df2313a3b..e0f114612c1a 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -50,6 +50,16 @@ def _test_tcp(cfg) -> None:
cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
ksft_eq(nc.stdout.strip(), test_string)
+def _schedule_checksum_reset(cfg, netnl) -> None:
+ features = ethtool(f"-k {cfg.ifname}", json=True)
+ setting = ""
+ for side in ["tx", "rx"]:
+ f = features[0][side + "-checksumming"]
+ if not f["fixed"]:
+ setting += " " + side
+ setting += " " + ("on" if f["requested"] or f["active"] else "off")
+ defer(ethtool, f" -K {cfg.ifname} " + setting)
+
def _set_offload_checksum(cfg, netnl, on) -> None:
try:
ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
@@ -139,6 +149,7 @@ def set_interface_init(cfg) -> None:
def test_default_v4(cfg, netnl) -> None:
cfg.require_ipver("4")
+ _schedule_checksum_reset(cfg, netnl)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
_test_tcp(cfg)
@@ -149,6 +160,7 @@ def test_default_v4(cfg, netnl) -> None:
def test_default_v6(cfg, netnl) -> None:
cfg.require_ipver("6")
+ _schedule_checksum_reset(cfg, netnl)
_set_offload_checksum(cfg, netnl, "off")
_test_v6(cfg)
_test_tcp(cfg)
@@ -157,6 +169,7 @@ def test_default_v6(cfg, netnl) -> None:
_test_tcp(cfg)
def test_xdp_generic_sb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
_set_xdp_generic_sb_on(cfg)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
@@ -168,6 +181,7 @@ def test_xdp_generic_sb(cfg, netnl) -> None:
_test_tcp(cfg)
def test_xdp_generic_mb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
_set_xdp_generic_mb_on(cfg)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
@@ -179,6 +193,7 @@ def test_xdp_generic_mb(cfg, netnl) -> None:
_test_tcp(cfg)
def test_xdp_native_sb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
_set_xdp_native_sb_on(cfg)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
@@ -190,6 +205,7 @@ def test_xdp_native_sb(cfg, netnl) -> None:
_test_tcp(cfg)
def test_xdp_native_mb(cfg, netnl) -> None:
+ _schedule_checksum_reset(cfg, netnl)
_set_xdp_native_mb_on(cfg)
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 06abd3f233e1..236005290a33 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'):
def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
# Probe for support
- xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False)
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
if xdp.ret == 255:
raise KsftSkipEx('AF_XDP unsupported')
elif xdp.ret > 0:
raise KsftFailEx('unable to create AF_XDP socket')
- with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
+ with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
ksft_wait=3):
rx = tx = False
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
index 2d5a76d99181..eaf6938f100e 100644
--- a/tools/testing/selftests/drivers/net/team/Makefile
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for net selftests
-TEST_PROGS := dev_addr_lists.sh
+TEST_PROGS := dev_addr_lists.sh propagation.sh
TEST_INCLUDES := \
../bonding/lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
index b5e3a3aad4bf..636b3525b679 100644
--- a/tools/testing/selftests/drivers/net/team/config
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -1,5 +1,6 @@
CONFIG_DUMMY=y
CONFIG_IPV6=y
CONFIG_MACVLAN=y
+CONFIG_NETDEVSIM=m
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh
new file mode 100755
index 000000000000..4bea75b79878
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/propagation.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+NSIM_LRO_ID=$((256 + RANDOM % 256))
+NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+
+cleanup()
+{
+ set +e
+ ip link del dummyteam &>/dev/null
+ ip link del team0 &>/dev/null
+ echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL
+ modprobe -r netdevsim
+}
+
+# Trigger LRO propagation to the lower.
+# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/
+team_lro()
+{
+ # using netdevsim because it supports NETIF_F_LRO
+ NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_LRO_SYS/net -exec basename {} \;)
+
+ ip link add name team0 type team
+ ip link set $NSIM_LRO_NAME down
+ ip link set dev $NSIM_LRO_NAME master team0
+ ip link set team0 up
+ ethtool -K team0 large-receive-offload off
+
+ ip link del team0
+}
+
+# Trigger promisc propagation to the lower during IFLA_MASTER.
+# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/
+team_promisc()
+{
+ ip link add name dummyteam type dummy
+ ip link add name team0 type team
+ ip link set dummyteam down
+ ip link set team0 promisc on
+ ip link set dev dummyteam master team0
+ ip link set team0 up
+
+ ip link del team0
+ ip link del dummyteam
+}
+
+# Trigger promisc propagation to the lower via netif_change_flags (aka
+# ndo_change_rx_flags).
+# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/
+team_change_flags()
+{
+ ip link add name dummyteam type dummy
+ ip link add name team0 type team
+ ip link set dummyteam down
+ ip link set dev dummyteam master team0
+ ip link set team0 up
+ ip link set team0 promisc on
+
+ # Make sure we can add more L2 addresses without any issues.
+ ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan
+ ip link set team0.1 up
+
+ ip link del team0.1
+ ip link del team0
+ ip link del dummyteam
+}
+
+trap cleanup EXIT
+modprobe netdevsim || :
+echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+team_lro
+team_promisc
+team_change_flags
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index 828b66a10c63..7afa58e2bb20 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -2,3 +2,4 @@
dnotify_test
devpts_pts
file_stressor
+anon_inode_test
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 66305fc34c60..b02326193fee 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := devpts_pts file_stressor
+TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test
TEST_GEN_PROGS_EXTENDED := dnotify_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c
new file mode 100644
index 000000000000..e8e0ef1460d2
--- /dev/null
+++ b/tools/testing/selftests/filesystems/anon_inode_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include "../kselftest_harness.h"
+#include "overlayfs/wrappers.h"
+
+TEST(anon_inode_no_chown)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(fchown(fd_context, 1234, 5678), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_chmod)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(fchmod(fd_context, 0777), 0);
+ ASSERT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_exec)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EACCES);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST(anon_inode_no_open)
+{
+ int fd_context;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_GE(dup2(fd_context, 500), 0);
+ ASSERT_EQ(close(fd_context), 0);
+ fd_context = 500;
+
+ ASSERT_LT(open("/proc/self/fd/500", 0), 0);
+ ASSERT_EQ(errno, ENXIO);
+
+ EXPECT_EQ(close(fd_context), 0);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore
index 82a4846cbc4b..124339ea7845 100644
--- a/tools/testing/selftests/filesystems/mount-notify/.gitignore
+++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
/*_test
+/*_test_ns
diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile
index 10be0227b5ae..836a4eb7be06 100644
--- a/tools/testing/selftests/filesystems/mount-notify/Makefile
+++ b/tools/testing/selftests/filesystems/mount-notify/Makefile
@@ -1,6 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := mount-notify_test
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
+TEST_GEN_PROGS := mount-notify_test mount-notify_test_ns
include ../../lib.mk
+
+$(OUTPUT)/mount-notify_test: ../utils.c
+$(OUTPUT)/mount-notify_test_ns: ../utils.c
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
index 59a71f22fb11..63ce708d93ed 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -8,43 +8,21 @@
#include <string.h>
#include <sys/stat.h>
#include <sys/mount.h>
-#include <linux/fanotify.h>
#include <unistd.h>
-#include <sys/fanotify.h>
#include <sys/syscall.h>
#include "../../kselftest_harness.h"
#include "../statmount/statmount.h"
+#include "../utils.h"
-#ifndef FAN_MNT_ATTACH
-struct fanotify_event_info_mnt {
- struct fanotify_event_info_header hdr;
- __u64 mnt_id;
-};
-#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
-#endif
-
-#ifndef FAN_MNT_DETACH
-#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
-#endif
-
-#ifndef FAN_REPORT_MNT
-#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+// Needed for linux/fanotify.h
+#ifndef __kernel_fsid_t
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
#endif
-#ifndef FAN_MARK_MNTNS
-#define FAN_MARK_MNTNS 0x00000110
-#endif
-
-static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
- const char *path)
-{
- struct statx sx;
-
- ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0);
- ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE));
- return sx.stx_mnt_id;
-}
+#include <sys/fanotify.h>
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
@@ -94,7 +72,7 @@ FIXTURE_SETUP(fanotify)
ASSERT_EQ(mkdir("b", 0700), 0);
- self->root_id = get_mnt_id(_metadata, "/");
+ self->root_id = get_unique_mnt_id("/");
ASSERT_NE(self->root_id, 0);
for (i = 0; i < NUM_FAN_FDS; i++) {
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
new file mode 100644
index 000000000000..090a5ca65004
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
+#include "../statmount/statmount.h"
+#include "../utils.h"
+
+// Needed for linux/fanotify.h
+#ifndef __kernel_fsid_t
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#endif
+
+#include <sys/fanotify.h>
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+static const int mark_types[] = {
+ FAN_MARK_FILESYSTEM,
+ FAN_MARK_MOUNT,
+ FAN_MARK_INODE
+};
+
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
+FIXTURE(fanotify) {
+ int fan_fd[NUM_FAN_FDS];
+ char buf[256];
+ unsigned int rem;
+ void *next;
+ char root_mntpoint[sizeof(root_mntpoint_templ)];
+ int orig_root;
+ int orig_ns_fd;
+ int ns_fd;
+ uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+ int i, ret;
+
+ self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->orig_ns_fd, 0);
+
+ ret = setup_userns();
+ ASSERT_EQ(ret, 0);
+
+ self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->ns_fd, 0);
+
+ strcpy(self->root_mntpoint, root_mntpoint_templ);
+ ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+ self->orig_root = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_GE(self->orig_root, 0);
+
+ ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+ ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+ ASSERT_EQ(chdir("/"), 0);
+
+ ASSERT_EQ(mkdir("a", 0700), 0);
+
+ ASSERT_EQ(mkdir("b", 0700), 0);
+
+ self->root_id = get_unique_mnt_id("/");
+ ASSERT_NE(self->root_id, 0);
+
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
+ // Verify that watching tmpfs mounted inside userns is allowed
+ ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
+ FAN_OPEN, AT_FDCWD, "/");
+ ASSERT_EQ(ret, 0);
+ // ...but watching entire orig root filesystem is not allowed
+ ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
+ FAN_OPEN, self->orig_root, ".");
+ ASSERT_NE(ret, 0);
+ close(fan_fd);
+
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ // Verify that watching mntns where group was created is allowed
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // ...but watching orig mntns is not allowed
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->orig_ns_fd, NULL);
+ ASSERT_NE(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+
+ self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+ int i;
+
+ ASSERT_EQ(self->rem, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
+
+ ASSERT_EQ(fchdir(self->orig_root), 0);
+
+ ASSERT_EQ(chroot("."), 0);
+
+ EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+ EXPECT_EQ(chdir(self->root_mntpoint), 0);
+ EXPECT_EQ(chdir("/"), 0);
+ EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t *mask)
+{
+ struct fanotify_event_metadata *meta;
+ struct fanotify_event_info_mnt *mnt;
+ unsigned int thislen;
+
+ if (!self->rem) {
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
+
+ self->rem = len;
+ self->next = (void *) self->buf;
+ }
+
+ meta = self->next;
+ ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+ thislen = meta->event_len;
+ self->rem -= thislen;
+ self->next += thislen;
+
+ *mask = meta->mask;
+ thislen -= sizeof(*meta);
+
+ mnt = ((void *) meta) + meta->event_len - thislen;
+
+ ASSERT_EQ(thislen, sizeof(*mnt));
+
+ return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t expect_mask)
+{
+ uint64_t mntid, mask;
+
+ mntid = expect_notify(_metadata, self, &mask);
+ ASSERT_EQ(expect_mask, mask);
+
+ return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+ const uint64_t list1[], const uint64_t list2[],
+ size_t num)
+{
+ unsigned int i, j;
+
+ // Check that neither list has any duplicates
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (i != j) {
+ ASSERT_NE(list1[i], list1[j]);
+ ASSERT_NE(list2[i], list2[j]);
+ }
+ }
+ }
+ // Check that all list1 memebers can be found in list2. Together with
+ // the above it means that the list1 and list2 represent the same sets.
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (list1[i] == list2[j])
+ break;
+ }
+ ASSERT_NE(j, num);
+ }
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+ const uint64_t mnts[], size_t num)
+{
+ ssize_t ret;
+ uint64_t *list;
+
+ list = malloc((num + 1) * sizeof(list[0]));
+ ASSERT_NE(list, NULL);
+
+ ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+ ASSERT_EQ(ret, num);
+
+ verify_mount_ids(_metadata, mnts, list, num);
+
+ free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+ int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+TEST_F(fanotify, bind)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+ uint64_t move_id;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+ ASSERT_EQ(ret, 0);
+
+ move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ ASSERT_EQ(move_id, mnts[1]);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+ const unsigned int log2_num = 4;
+ const unsigned int num = (1 << log2_num);
+ uint64_t mnts[num];
+
+ setup_mount_tree(_metadata, log2_num);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+ mnts[0] = self->root_id;
+ check_mounted(_metadata, mnts, num);
+
+ // Cleanup
+ int ret;
+ uint64_t mnts2[num];
+ ret = umount2("/", MNT_DETACH);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts2[0] = self->root_id;
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+ verify_mount_ids(_metadata, mnts, mnts2, num);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+ int ret, fs, mnt;
+ uint64_t mnts[2] = { self->root_id };
+
+ fs = fsopen("tmpfs", 0);
+ ASSERT_GE(fs, 0);
+
+ ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ mnt = fsmount(fs, 0, 0);
+ ASSERT_GE(mnt, 0);
+
+ close(fs);
+
+ ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+ ASSERT_EQ(ret, 0);
+
+ close(mnt);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+ uint64_t mnts[6] = { self->root_id };
+ uint64_t dmnts[3];
+ uint64_t masks[3];
+ unsigned int i;
+ int ret;
+
+ // Create setup with a[1] -> b[2] propagation
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/a", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ // Mount on a[3], which is propagated to b[4]
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+ check_mounted(_metadata, mnts, 5);
+
+ // Mount on b[5], not propagated
+ ret = mount("/", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ check_mounted(_metadata, mnts, 6);
+
+ // Umount a[3], which is propagated to b[4], but not b[5]
+ // This will result in b[5] "falling" on b[2]
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_n(_metadata, self, 3, masks, dmnts);
+ verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+ for (i = 0; i < 3; i++) {
+ if (dmnts[i] == mnts[5]) {
+ ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ } else {
+ ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+ }
+ }
+
+ mnts[3] = mnts[5];
+ check_mounted(_metadata, mnts, 4);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+ verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+ uint64_t mnts[3] = { self->root_id };
+ int ret;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ ret = chdir("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret == 0) {
+ chdir("/");
+ unshare(CLONE_NEWNS);
+ mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ umount2("/a", MNT_DETACH);
+ // This triggers a detach in the other namespace
+ rmdir("/a");
+ exit(0);
+ }
+ wait(NULL);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+ check_mounted(_metadata, mnts, 1);
+
+ // Cleanup
+ ret = chdir("/");
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+ uint64_t mnts[3] = { self->root_id };
+ uint64_t mnts2[3];
+ int ret;
+
+ ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ ret = mkdir("/a/new", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mkdir("/a/old", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ check_mounted(_metadata, mnts, 3);
+
+ ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+ verify_mount_ids(_metadata, mnts, mnts2, 2);
+ check_mounted(_metadata, mnts, 3);
+
+ // Cleanup
+ ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
index 6c661232b3b5..d3ad4a77db9b 100644
--- a/tools/testing/selftests/filesystems/overlayfs/Makefile
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -Wall
CFLAGS += $(KHDR_INCLUDES)
LDLIBS += -lcap
-LOCAL_HDRS += wrappers.h log.h
+LOCAL_HDRS += ../wrappers.h log.h
TEST_GEN_PROGS := dev_in_maps
TEST_GEN_PROGS += set_layers_via_fds
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
index 3b796264223f..31db54b00e64 100644
--- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -17,7 +17,7 @@
#include "../../kselftest.h"
#include "log.h"
-#include "wrappers.h"
+#include "../wrappers.h"
static long get_file_dev_and_inode(void *addr, struct statx *stx)
{
diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index 5074e64e74a8..dc0449fa628f 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -16,7 +16,7 @@
#include "../../pidfd/pidfd.h"
#include "log.h"
#include "../utils.h"
-#include "wrappers.h"
+#include "../wrappers.h"
FIXTURE(set_layers_via_fds) {
int pidfd;
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
index 14ee91a41650..8e354fe99b44 100644
--- a/tools/testing/selftests/filesystems/statmount/Makefile
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+LDLIBS += -lcap
+
TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test
include ../../lib.mk
+
+$(OUTPUT)/statmount_test_ns: ../utils.c
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
index a7a5289ddae9..99e5ad082fb1 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount.h
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -7,6 +7,42 @@
#include <linux/mount.h>
#include <asm/unistd.h>
+#ifndef __NR_statmount
+ #if defined __alpha__
+ #define __NR_statmount 567
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_statmount 4457
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_statmount 6457
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_statmount 5457
+ #endif
+ #else
+ #define __NR_statmount 457
+ #endif
+#endif
+
+#ifndef __NR_listmount
+ #if defined __alpha__
+ #define __NR_listmount 568
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_listmount 4458
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_listmount 6458
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_listmount 5458
+ #endif
+ #else
+ #define __NR_listmount 458
+ #endif
+#endif
+
static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
struct statmount *buf, size_t bufsize,
unsigned int flags)
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
index 70cb0c8b21cf..605a3fa16bf7 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -14,6 +14,7 @@
#include <linux/stat.h>
#include "statmount.h"
+#include "../utils.h"
#include "../../kselftest.h"
#define NSID_PASS 0
@@ -78,87 +79,10 @@ static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
return NSID_PASS;
}
-static int get_mnt_id(const char *path, uint64_t *mnt_id)
-{
- struct statx sx;
- int ret;
-
- ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
- if (ret == -1) {
- ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
- strerror(errno));
- return NSID_ERROR;
- }
-
- if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
- ksft_print_msg("no unique mount ID available for %s\n", path);
- return NSID_ERROR;
- }
-
- *mnt_id = sx.stx_mnt_id;
- return NSID_PASS;
-}
-
-static int write_file(const char *path, const char *val)
-{
- int fd = open(path, O_WRONLY);
- size_t len = strlen(val);
- int ret;
-
- if (fd == -1) {
- ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
- return NSID_ERROR;
- }
-
- ret = write(fd, val, len);
- if (ret == -1) {
- ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
- return NSID_ERROR;
- }
- if (ret != len) {
- ksft_print_msg("short write to %s\n", path);
- return NSID_ERROR;
- }
-
- ret = close(fd);
- if (ret == -1) {
- ksft_print_msg("closing %s\n", path);
- return NSID_ERROR;
- }
-
- return NSID_PASS;
-}
-
static int setup_namespace(void)
{
- int ret;
- char buf[32];
- uid_t uid = getuid();
- gid_t gid = getgid();
-
- ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
- if (ret == -1)
- ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
- strerror(errno));
-
- sprintf(buf, "0 %d 1", uid);
- ret = write_file("/proc/self/uid_map", buf);
- if (ret != NSID_PASS)
- return ret;
- ret = write_file("/proc/self/setgroups", "deny");
- if (ret != NSID_PASS)
- return ret;
- sprintf(buf, "0 %d 1", gid);
- ret = write_file("/proc/self/gid_map", buf);
- if (ret != NSID_PASS)
- return ret;
-
- ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
- if (ret == -1) {
- ksft_print_msg("making mount tree private: %s\n",
- strerror(errno));
+ if (setup_userns() != 0)
return NSID_ERROR;
- }
return NSID_PASS;
}
@@ -174,9 +98,9 @@ static int _test_statmount_mnt_ns_id(void)
if (ret != NSID_PASS)
return ret;
- ret = get_mnt_id("/", &root_id);
- if (ret != NSID_PASS)
- return ret;
+ root_id = get_unique_mnt_id("/");
+ if (!root_id)
+ return NSID_ERROR;
ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
if (ret == -1) {
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
index e553c89c5b19..c43a69dffd83 100644
--- a/tools/testing/selftests/filesystems/utils.c
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -18,7 +18,10 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/xattr.h>
+#include <sys/mount.h>
+#include "../kselftest.h"
+#include "wrappers.h"
#include "utils.h"
#define MAX_USERNS_LEVEL 32
@@ -447,6 +450,71 @@ out_close:
return fret;
}
+static int write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1) {
+ ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
+ return -1;
+ }
+
+ ret = write(fd, val, len);
+ if (ret == -1) {
+ ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
+ return -1;
+ }
+ if (ret != len) {
+ ksft_print_msg("short write to %s\n", path);
+ return -1;
+ }
+
+ ret = close(fd);
+ if (ret == -1) {
+ ksft_print_msg("closing %s\n", path);
+ return -1;
+ }
+
+ return 0;
+}
+
+int setup_userns(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
+ if (ret) {
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+ return ret;
+ }
+
+ sprintf(buf, "0 %d 1", uid);
+ ret = write_file("/proc/self/uid_map", buf);
+ if (ret)
+ return ret;
+ ret = write_file("/proc/self/setgroups", "deny");
+ if (ret)
+ return ret;
+ sprintf(buf, "0 %d 1", gid);
+ ret = write_file("/proc/self/gid_map", buf);
+ if (ret)
+ return ret;
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret) {
+ ksft_print_msg("making mount tree private: %s\n", strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
/* caps_down - lower all effective caps */
int caps_down(void)
{
@@ -499,3 +567,23 @@ out:
cap_free(caps);
return fret;
}
+
+uint64_t get_unique_mnt_id(const char *path)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
+ if (ret == -1) {
+ ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
+ strerror(errno));
+ return 0;
+ }
+
+ if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
+ ksft_print_msg("no unique mount ID available for %s\n", path);
+ return 0;
+ }
+
+ return sx.stx_mnt_id;
+}
diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
index 7f1df2a3e94c..70f7ccc607f4 100644
--- a/tools/testing/selftests/filesystems/utils.h
+++ b/tools/testing/selftests/filesystems/utils.h
@@ -27,6 +27,7 @@ extern int caps_down(void);
extern int cap_down(cap_value_t down);
extern bool switch_ids(uid_t uid, gid_t gid);
+extern int setup_userns(void);
static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
{
@@ -42,4 +43,6 @@ static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
return true;
}
+extern uint64_t get_unique_mnt_id(const char *path);
+
#endif /* __IDMAP_UTILS_H */
diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/wrappers.h
index c38bc48e0cfa..420ae4f908cf 100644
--- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h
+++ b/tools/testing/selftests/filesystems/wrappers.h
@@ -9,6 +9,10 @@
#include <linux/mount.h>
#include <sys/syscall.h>
+#ifndef STATX_MNT_ID_UNIQUE
+#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
+#endif
+
static inline int sys_fsopen(const char *fsname, unsigned int flags)
{
return syscall(__NR_fsopen, fsname, flags);
@@ -36,6 +40,28 @@ static inline int sys_mount(const char *src, const char *tgt, const char *fst,
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
#endif
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#endif
+
+#ifndef __NR_move_mount
+ #if defined __alpha__
+ #define __NR_move_mount 539
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_move_mount 4429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_move_mount 6429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_move_mount 5429
+ #endif
+ #else
+ #define __NR_move_mount 429
+ #endif
+#endif
+
static inline int sys_move_mount(int from_dfd, const char *from_pathname,
int to_dfd, const char *to_pathname,
unsigned int flags)
@@ -53,7 +79,25 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname,
#endif
#ifndef AT_RECURSIVE
-#define AT_RECURSIVE 0x8000
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+#ifndef __NR_open_tree
+ #if defined __alpha__
+ #define __NR_open_tree 538
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree 4428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree 6428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree 5428
+ #endif
+ #else
+ #define __NR_open_tree 428
+ #endif
#endif
static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index 49d96bb16355..7c12263f8260 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -6,6 +6,6 @@ TEST_PROGS := ftracetest-ktap
TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
-TEST_GEN_PROGS = poll
+TEST_GEN_FILES := poll
include ../lib.mk
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index fbcbdb6963b3..7b24ae89594a 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -1,11 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
+futex_numa_mpol
+futex_priv_hash
+futex_requeue
futex_requeue_pi
futex_requeue_pi_mismatched_ops
futex_requeue_pi_signal_restart
+futex_wait
futex_wait_private_mapped_file
futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
-futex_wait
-futex_requeue
futex_waitv
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index f79f9bac7918..8cfb87f7f7c5 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES)
-LDLIBS := -lpthread -lrt
+LDLIBS := -lpthread -lrt -lnuma
LOCAL_HDRS := \
../include/futextest.h \
@@ -17,7 +17,10 @@ TEST_GEN_PROGS := \
futex_wait_private_mapped_file \
futex_wait \
futex_requeue \
- futex_waitv
+ futex_priv_hash \
+ futex_numa_mpol \
+ futex_waitv \
+ futex_numa
TEST_PROGS := run.sh
diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c
new file mode 100644
index 000000000000..f29e4d627e79
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <time.h>
+#include <assert.h>
+#include "logging.h"
+#include "futextest.h"
+#include "futex2test.h"
+
+typedef u_int32_t u32;
+typedef int32_t s32;
+typedef u_int64_t u64;
+
+static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
+static int fnode = FUTEX_NO_NODE;
+
+/* fairly stupid test-and-set lock with a waiter flag */
+
+#define N_LOCK 0x0000001
+#define N_WAITERS 0x0001000
+
+struct futex_numa_32 {
+ union {
+ u64 full;
+ struct {
+ u32 val;
+ u32 node;
+ };
+ };
+};
+
+void futex_numa_32_lock(struct futex_numa_32 *lock)
+{
+ for (;;) {
+ struct futex_numa_32 new, old = {
+ .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
+ };
+
+ for (;;) {
+ new = old;
+ if (old.val == 0) {
+ /* no waiter, no lock -> first lock, set no-node */
+ new.node = fnode;
+ }
+ if (old.val & N_LOCK) {
+ /* contention, set waiter */
+ new.val |= N_WAITERS;
+ }
+ new.val |= N_LOCK;
+
+ /* nothing changed, ready to block */
+ if (old.full == new.full)
+ break;
+
+ /*
+ * Use u64 cmpxchg to set the futex value and node in a
+ * consistent manner.
+ */
+ if (__atomic_compare_exchange_n(&lock->full,
+ &old.full, new.full,
+ /* .weak */ false,
+ __ATOMIC_ACQUIRE,
+ __ATOMIC_RELAXED)) {
+
+ /* if we just set N_LOCK, we own it */
+ if (!(old.val & N_LOCK))
+ return;
+
+ /* go block */
+ break;
+ }
+ }
+
+ futex2_wait(lock, new.val, fflags, NULL, 0);
+ }
+}
+
+void futex_numa_32_unlock(struct futex_numa_32 *lock)
+{
+ u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
+ assert((s32)val >= 0);
+ if (val & N_WAITERS) {
+ int woken = futex2_wake(lock, 1, fflags);
+ assert(val == N_WAITERS);
+ if (!woken) {
+ __atomic_compare_exchange_n(&lock->val, &val, 0U,
+ false, __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED);
+ }
+ }
+}
+
+static long nanos = 50000;
+
+struct thread_args {
+ pthread_t tid;
+ volatile int * done;
+ struct futex_numa_32 *lock;
+ int val;
+ int *val1, *val2;
+ int node;
+};
+
+static void *threadfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+ struct timespec ts = {
+ .tv_nsec = nanos,
+ };
+ int node;
+
+ while (!*args->done) {
+
+ futex_numa_32_lock(args->lock);
+ args->val++;
+
+ assert(*args->val1 == *args->val2);
+ (*args->val1)++;
+ nanosleep(&ts, NULL);
+ (*args->val2)++;
+
+ node = args->lock->node;
+ futex_numa_32_unlock(args->lock);
+
+ if (node != args->node) {
+ args->node = node;
+ printf("node: %d\n", node);
+ }
+
+ nanosleep(&ts, NULL);
+ }
+
+ return NULL;
+}
+
+static void *contendfn(void *_arg)
+{
+ struct thread_args *args = _arg;
+
+ while (!*args->done) {
+ /*
+ * futex2_wait() will take hb-lock, verify *var == val and
+ * queue/abort. By knowingly setting val 'wrong' this will
+ * abort and thereby generate hb-lock contention.
+ */
+ futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
+ args->val++;
+ }
+
+ return NULL;
+}
+
+static volatile int done = 0;
+static struct futex_numa_32 lock = { .val = 0, };
+static int val1, val2;
+
+int main(int argc, char *argv[])
+{
+ struct thread_args *tas[512], *cas[512];
+ int c, t, threads = 2, contenders = 0;
+ int sleeps = 10;
+ int total = 0;
+
+ while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
+ switch (c) {
+ case 'c':
+ contenders = atoi(optarg);
+ break;
+ case 't':
+ threads = atoi(optarg);
+ break;
+ case 's':
+ sleeps = atoi(optarg);
+ break;
+ case 'n':
+ nanos = atoi(optarg);
+ break;
+ case 'N':
+ fflags |= FUTEX2_NUMA;
+ if (optarg)
+ fnode = atoi(optarg);
+ break;
+ default:
+ exit(1);
+ break;
+ }
+ }
+
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, contendfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ cas[t] = args;
+ }
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = calloc(1, sizeof(*args));
+ if (!args) {
+ perror("thread_args");
+ exit(-1);
+ }
+
+ args->done = &done;
+ args->lock = &lock;
+ args->val1 = &val1;
+ args->val2 = &val2;
+ args->node = -1;
+
+ if (pthread_create(&args->tid, NULL, threadfn, args)) {
+ perror("pthread_create");
+ exit(-1);
+ }
+
+ tas[t] = args;
+ }
+
+ sleep(sleeps);
+
+ done = true;
+
+ for (t = 0; t < threads; t++) {
+ struct thread_args *args = tas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+// printf("tval: %d\n", args->val);
+ }
+ printf("total: %d\n", total);
+
+ if (contenders) {
+ total = 0;
+ for (t = 0; t < contenders; t++) {
+ struct thread_args *args = cas[t];
+
+ pthread_join(args->tid, NULL);
+ total += args->val;
+ // printf("tval: %d\n", args->val);
+ }
+ printf("contenders: %d\n", total);
+ }
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
new file mode 100644
index 000000000000..20a9d3ecf743
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <numa.h>
+#include <numaif.h>
+
+#include <linux/futex.h>
+#include <sys/mman.h>
+
+#include "logging.h"
+#include "futextest.h"
+#include "futex2test.h"
+
+#define MAX_THREADS 64
+
+static pthread_barrier_t barrier_main;
+static pthread_t threads[MAX_THREADS];
+
+struct thread_args {
+ void *futex_ptr;
+ unsigned int flags;
+ int result;
+};
+
+static struct thread_args thread_args[MAX_THREADS];
+
+#ifndef FUTEX_NO_NODE
+#define FUTEX_NO_NODE (-1)
+#endif
+
+#ifndef FUTEX2_MPOL
+#define FUTEX2_MPOL 0x08
+#endif
+
+static void *thread_lock_fn(void *arg)
+{
+ struct thread_args *args = arg;
+ int ret;
+
+ pthread_barrier_wait(&barrier_main);
+ ret = futex2_wait(args->futex_ptr, 0, args->flags, NULL, 0);
+ args->result = ret;
+ return NULL;
+}
+
+static void create_max_threads(void *futex_ptr)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ thread_args[i].futex_ptr = futex_ptr;
+ thread_args[i].flags = FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA;
+ thread_args[i].result = 0;
+ ret = pthread_create(&threads[i], NULL, thread_lock_fn, &thread_args[i]);
+ if (ret)
+ ksft_exit_fail_msg("pthread_create failed\n");
+ }
+}
+
+static void join_max_threads(void)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_join(threads[i], NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
+ }
+}
+
+static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags)
+{
+ int to_wake, ret, i, need_exit = 0;
+
+ pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
+ create_max_threads(futex_ptr);
+ pthread_barrier_wait(&barrier_main);
+ to_wake = MAX_THREADS;
+
+ do {
+ ret = futex2_wake(futex_ptr, to_wake, futex_flags);
+ if (must_fail) {
+ if (ret < 0)
+ break;
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
+ to_wake, futex_flags);
+ }
+ if (ret < 0) {
+ ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n",
+ to_wake, futex_flags);
+ }
+ if (!ret)
+ usleep(50);
+ to_wake -= ret;
+
+ } while (to_wake);
+ join_max_threads();
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (must_fail && thread_args[i].result != -1) {
+ ksft_print_msg("Thread %d should fail but succeeded (%d)\n",
+ i, thread_args[i].result);
+ need_exit = 1;
+ }
+ if (!must_fail && thread_args[i].result != 0) {
+ ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result);
+ need_exit = 1;
+ }
+ }
+ if (need_exit)
+ ksft_exit_fail_msg("Aborting due to earlier errors.\n");
+}
+
+static void test_futex(void *futex_ptr, int must_fail)
+{
+ __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
+}
+
+static void test_futex_mpol(void *futex_ptr, int must_fail)
+{
+ __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+}
+
+static void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ struct futex32_numa *futex_numa;
+ int mem_size, i;
+ void *futex_ptr;
+ char c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ mem_size = sysconf(_SC_PAGE_SIZE);
+ futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ if (futex_ptr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size);
+
+ futex_numa = futex_ptr;
+
+ ksft_print_msg("Regular test\n");
+ futex_numa->futex = 0;
+ futex_numa->numa = FUTEX_NO_NODE;
+ test_futex(futex_ptr, 0);
+
+ if (futex_numa->numa == FUTEX_NO_NODE)
+ ksft_exit_fail_msg("NUMA node is left uninitialized\n");
+
+ ksft_print_msg("Memory too small\n");
+ test_futex(futex_ptr + mem_size - 4, 1);
+
+ ksft_print_msg("Memory out of range\n");
+ test_futex(futex_ptr + mem_size, 1);
+
+ futex_numa->numa = FUTEX_NO_NODE;
+ mprotect(futex_ptr, mem_size, PROT_READ);
+ ksft_print_msg("Memory, RO\n");
+ test_futex(futex_ptr, 1);
+
+ mprotect(futex_ptr, mem_size, PROT_NONE);
+ ksft_print_msg("Memory, no access\n");
+ test_futex(futex_ptr, 1);
+
+ mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE);
+ ksft_print_msg("Memory back to RW\n");
+ test_futex(futex_ptr, 0);
+
+ /* MPOL test. Does not work as expected */
+ for (i = 0; i < 4; i++) {
+ unsigned long nodemask;
+ int ret;
+
+ nodemask = 1 << i;
+ ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask,
+ sizeof(nodemask) * 8, 0);
+ if (ret == 0) {
+ ksft_print_msg("Node %d test\n", i);
+ futex_numa->futex = 0;
+ futex_numa->numa = FUTEX_NO_NODE;
+
+ ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+ if (ret < 0)
+ ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n");
+ if (0)
+ test_futex_mpol(futex_numa, 0);
+ if (futex_numa->numa != i) {
+ ksft_test_result_fail("Returned NUMA node is %d expected %d\n",
+ futex_numa->numa, i);
+ }
+ }
+ }
+ ksft_test_result_pass("NUMA MPOL tests passed\n");
+ ksft_finished();
+ return 0;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
new file mode 100644
index 000000000000..2dca18fefedc
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/prctl.h>
+#include <sys/prctl.h>
+
+#include "logging.h"
+
+#define MAX_THREADS 64
+
+static pthread_barrier_t barrier_main;
+static pthread_mutex_t global_lock;
+static pthread_t threads[MAX_THREADS];
+static int counter;
+
+#ifndef PR_FUTEX_HASH
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define FH_FLAG_IMMUTABLE (1ULL << 0)
+# define PR_FUTEX_HASH_GET_SLOTS 2
+# define PR_FUTEX_HASH_GET_IMMUTABLE 3
+#endif
+
+static int futex_hash_slots_set(unsigned int slots, int flags)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags);
+}
+
+static int futex_hash_slots_get(void)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
+}
+
+static int futex_hash_immutable_get(void)
+{
+ return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
+}
+
+static void futex_hash_slots_set_verify(int slots)
+{
+ int ret;
+
+ ret = futex_hash_slots_set(slots, 0);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set slots to %d: %m\n", slots);
+ ksft_finished();
+ }
+ ret = futex_hash_slots_get();
+ if (ret != slots) {
+ ksft_test_result_fail("Set %d slots but PR_FUTEX_HASH_GET_SLOTS returns: %d, %m\n",
+ slots, ret);
+ ksft_finished();
+ }
+ ksft_test_result_pass("SET and GET slots %d passed\n", slots);
+}
+
+static void futex_hash_slots_set_must_fail(int slots, int flags)
+{
+ int ret;
+
+ ret = futex_hash_slots_set(slots, flags);
+ ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n",
+ slots, flags);
+}
+
+static void *thread_return_fn(void *arg)
+{
+ return NULL;
+}
+
+static void *thread_lock_fn(void *arg)
+{
+ pthread_barrier_wait(&barrier_main);
+
+ pthread_mutex_lock(&global_lock);
+ counter++;
+ usleep(20);
+ pthread_mutex_unlock(&global_lock);
+ return NULL;
+}
+
+static void create_max_threads(void *(*thread_fn)(void *))
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_create(&threads[i], NULL, thread_fn, NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_create failed: %m\n");
+ }
+}
+
+static void join_max_threads(void)
+{
+ int i, ret;
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ ret = pthread_join(threads[i], NULL);
+ if (ret)
+ ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
+ }
+}
+
+static void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -g Test global hash instead intead local immutable \n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n";
+static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n";
+
+int main(int argc, char *argv[])
+{
+ int futex_slots1, futex_slotsn, online_cpus;
+ pthread_mutexattr_t mutex_attr_pi;
+ int use_global_hash = 0;
+ int ret;
+ char c;
+
+ while ((c = getopt(argc, argv, "cghv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'g':
+ use_global_hash = 1;
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(22);
+
+ ret = pthread_mutexattr_init(&mutex_attr_pi);
+ ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
+ ret |= pthread_mutex_init(&global_lock, &mutex_attr_pi);
+ if (ret != 0) {
+ ksft_exit_fail_msg("Failed to initialize pthread mutex.\n");
+ }
+ /* First thread, expect to be 0, not yet initialized */
+ ret = futex_hash_slots_get();
+ if (ret != 0)
+ ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret);
+
+ ret = futex_hash_immutable_get();
+ if (ret != 0)
+ ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret);
+
+ ksft_test_result_pass("Basic get slots and immutable status.\n");
+ ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_create() failed: %d, %m\n", ret);
+
+ ret = pthread_join(threads[0], NULL);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret);
+
+ /* First thread, has to initialiaze private hash */
+ futex_slots1 = futex_hash_slots_get();
+ if (futex_slots1 <= 0) {
+ ksft_print_msg("Current hash buckets: %d\n", futex_slots1);
+ ksft_exit_fail_msg(test_msg_auto_create);
+ }
+
+ ksft_test_result_pass(test_msg_auto_create);
+
+ online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_barrier_init failed: %m.\n");
+
+ ret = pthread_mutex_lock(&global_lock);
+ if (ret != 0)
+ ksft_exit_fail_msg("pthread_mutex_lock failed: %m.\n");
+
+ counter = 0;
+ create_max_threads(thread_lock_fn);
+ pthread_barrier_wait(&barrier_main);
+
+ /*
+ * The current default size of hash buckets is 16. The auto increase
+ * works only if more than 16 CPUs are available.
+ */
+ ksft_print_msg("Online CPUs: %d\n", online_cpus);
+ if (online_cpus > 16) {
+ futex_slotsn = futex_hash_slots_get();
+ if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) {
+ ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
+ futex_slots1, futex_slotsn);
+ ksft_exit_fail_msg(test_msg_auto_inc);
+ }
+ ksft_test_result_pass(test_msg_auto_inc);
+ } else {
+ ksft_test_result_skip(test_msg_auto_inc);
+ }
+ ret = pthread_mutex_unlock(&global_lock);
+
+ /* Once the user changes it, it has to be what is set */
+ futex_hash_slots_set_verify(2);
+ futex_hash_slots_set_verify(4);
+ futex_hash_slots_set_verify(8);
+ futex_hash_slots_set_verify(32);
+ futex_hash_slots_set_verify(16);
+
+ ret = futex_hash_slots_set(15, 0);
+ ksft_test_result(ret < 0, "Use 15 slots\n");
+
+ futex_hash_slots_set_verify(2);
+ join_max_threads();
+ ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n",
+ counter, MAX_THREADS);
+ counter = 0;
+ /* Once the user set something, auto reisze must be disabled */
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
+
+ create_max_threads(thread_lock_fn);
+ join_max_threads();
+
+ ret = futex_hash_slots_get();
+ ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n",
+ ret);
+
+ futex_hash_slots_set_must_fail(1 << 29, 0);
+
+ /*
+ * Once the private hash has been made immutable or global hash has been requested,
+ * then this requested can not be undone.
+ */
+ if (use_global_hash) {
+ ret = futex_hash_slots_set(0, 0);
+ ksft_test_result(ret == 0, "Global hash request\n");
+ } else {
+ ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE);
+ ksft_test_result(ret == 0, "Immutable resize to 4\n");
+ }
+ if (ret != 0)
+ goto out;
+
+ futex_hash_slots_set_must_fail(4, 0);
+ futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE);
+ futex_hash_slots_set_must_fail(8, 0);
+ futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE);
+ futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE);
+ futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE);
+
+ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
+ if (ret != 0) {
+ ksft_exit_fail_msg("pthread_barrier_init failed: %m\n");
+ return 1;
+ }
+ create_max_threads(thread_lock_fn);
+ join_max_threads();
+
+ ret = futex_hash_slots_get();
+ if (use_global_hash) {
+ ksft_test_result(ret == 0, "Continue to use global hash\n");
+ } else {
+ ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n");
+ }
+
+ ret = futex_hash_immutable_get();
+ ksft_test_result(ret == 1, "Hash reports to be immutable\n");
+
+out:
+ ksft_finished();
+ return 0;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 5ccd599da6c3..81739849f299 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -82,3 +82,10 @@ echo
echo
./futex_waitv $COLOR
+
+echo
+./futex_priv_hash $COLOR
+./futex_priv_hash -g $COLOR
+
+echo
+./futex_numa_mpol $COLOR
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
index 9d305520e849..ea79662405bc 100644
--- a/tools/testing/selftests/futex/include/futex2test.h
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -8,6 +8,53 @@
#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
+#ifndef __NR_futex_waitv
+#define __NR_futex_waitv 449
+struct futex_waitv {
+ __u64 val;
+ __u64 uaddr;
+ __u32 flags;
+ __u32 __reserved;
+};
+#endif
+
+#ifndef __NR_futex_wake
+#define __NR_futex_wake 454
+#endif
+
+#ifndef __NR_futex_wait
+#define __NR_futex_wait 455
+#endif
+
+#ifndef FUTEX2_SIZE_U32
+#define FUTEX2_SIZE_U32 0x02
+#endif
+
+#ifndef FUTEX2_NUMA
+#define FUTEX2_NUMA 0x04
+#endif
+
+#ifndef FUTEX2_MPOL
+#define FUTEX2_MPOL 0x08
+#endif
+
+#ifndef FUTEX2_PRIVATE
+#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG
+#endif
+
+#ifndef FUTEX2_NO_NODE
+#define FUTEX_NO_NODE (-1)
+#endif
+
+#ifndef FUTEX_32
+#define FUTEX_32 FUTEX2_SIZE_U32
+#endif
+
+struct futex32_numa {
+ futex_t futex;
+ futex_t numa;
+};
+
/**
* futex_waitv - Wait at multiple futexes, wake on any
* @waiters: Array of waiters
@@ -20,3 +67,26 @@ static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned lon
{
return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
}
+
+/*
+ * futex_wait() - block on uaddr with optional timeout
+ * @val: Expected value
+ * @flags: FUTEX2 flags
+ * @timeout: Relative timeout
+ * @clockid: Clock id for the timeout
+ */
+static inline int futex2_wait(void *uaddr, long val, unsigned int flags,
+ struct timespec *timeout, clockid_t clockid)
+{
+ return syscall(__NR_futex_wait, uaddr, val, ~0U, flags, timeout, clockid);
+}
+
+/*
+ * futex2_wake() - Wake a number of futexes
+ * @nr: Number of threads to wake at most
+ * @flags: FUTEX2 flags
+ */
+static inline int futex2_wake(void *uaddr, int nr, unsigned int flags)
+{
+ return syscall(__NR_futex_wake, uaddr, ~0U, nr, flags);
+}
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index e0884390447d..7bfe315f7001 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := gpio-mockup.sh gpio-sim.sh
+TEST_PROGS := gpio-mockup.sh gpio-sim.sh gpio-aggregator.sh
TEST_FILES := gpio-mockup-sysfs.sh
TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
index 409a8532facc..1287abeaac7e 100644
--- a/tools/testing/selftests/gpio/config
+++ b/tools/testing/selftests/gpio/config
@@ -2,3 +2,4 @@ CONFIG_GPIOLIB=y
CONFIG_GPIO_CDEV=y
CONFIG_GPIO_MOCKUP=m
CONFIG_GPIO_SIM=m
+CONFIG_GPIO_AGGREGATOR=m
diff --git a/tools/testing/selftests/gpio/gpio-aggregator.sh b/tools/testing/selftests/gpio/gpio-aggregator.sh
new file mode 100755
index 000000000000..9b6f80ad9f8a
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-aggregator.sh
@@ -0,0 +1,727 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 Bartosz Golaszewski <brgl@bgdev.pl>
+# Copyright (C) 2025 Koichiro Den <koichiro.den@canonical.com>
+
+BASE_DIR=$(dirname "$0")
+CONFIGFS_SIM_DIR="/sys/kernel/config/gpio-sim"
+CONFIGFS_AGG_DIR="/sys/kernel/config/gpio-aggregator"
+SYSFS_AGG_DIR="/sys/bus/platform/drivers/gpio-aggregator"
+MODULE="gpio-aggregator"
+
+fail() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test FAIL"
+ exit 1
+}
+
+skip() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test SKIP"
+ exit 4
+}
+
+# gpio-sim
+sim_enable_chip() {
+ local CHIP=$1
+
+ echo 1 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to enable the chip"
+}
+
+sim_disable_chip() {
+ local CHIP=$1
+
+ echo 0 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to disable the chip"
+}
+
+sim_configfs_cleanup() {
+ local NOCHECK=${1:-0}
+
+ for CHIP_DIR in "$CONFIGFS_SIM_DIR"/*; do
+ [ -d "$CHIP_DIR" ] || continue
+ echo 0 > "$CHIP_DIR/live"
+ find "$CHIP_DIR" -depth -type d -exec rmdir {} \;
+ done
+ [ "$NOCHECK" -eq 1 ] && return;
+ remaining=$(find "$CONFIGFS_SIM_DIR" -mindepth 1 -type d 2> /dev/null)
+ if [ -n "$remaining" ]; then
+ fail "Directories remain in $CONFIGFS_SIM_DIR: $remaining"
+ fi
+}
+
+sim_get_chip_label() {
+ local CHIP=$1
+ local BANK=$2
+ local CHIP_NAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name" 2> /dev/null) || \
+ fail "Unable to read the chip name from configfs"
+
+ $BASE_DIR/gpio-chip-info "/dev/$CHIP_NAME" label || \
+ fail "Unable to read the chip label from the character device"
+}
+
+# gpio-aggregator
+agg_create_chip() {
+ local CHIP=$1
+
+ mkdir "$CONFIGFS_AGG_DIR/$CHIP"
+}
+
+agg_remove_chip() {
+ local CHIP=$1
+
+ find "$CONFIGFS_AGG_DIR/$CHIP/" -depth -type d -exec rmdir {} \; || \
+ fail "Unable to remove $CONFIGFS_AGG_DIR/$CHIP"
+}
+
+agg_create_line() {
+ local CHIP=$1
+ local LINE=$2
+
+ mkdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE"
+}
+
+agg_remove_line() {
+ local CHIP=$1
+ local LINE=$2
+
+ rmdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE"
+}
+
+agg_set_key() {
+ local CHIP=$1
+ local LINE=$2
+ local KEY=$3
+
+ echo "$KEY" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/key" || fail "Unable to set the lookup key"
+}
+
+agg_set_offset() {
+ local CHIP=$1
+ local LINE=$2
+ local OFFSET=$3
+
+ echo "$OFFSET" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/offset" || \
+ fail "Unable to set the lookup offset"
+}
+
+agg_set_line_name() {
+ local CHIP=$1
+ local LINE=$2
+ local NAME=$3
+
+ echo "$NAME" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/name" || fail "Unable to set the line name"
+}
+
+agg_enable_chip() {
+ local CHIP=$1
+
+ echo 1 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to enable the chip"
+}
+
+agg_disable_chip() {
+ local CHIP=$1
+
+ echo 0 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to disable the chip"
+}
+
+agg_configfs_cleanup() {
+ local NOCHECK=${1:-0}
+
+ for CHIP_DIR in "$CONFIGFS_AGG_DIR"/*; do
+ [ -d "$CHIP_DIR" ] || continue
+ echo 0 > "$CHIP_DIR/live" 2> /dev/null
+ find "$CHIP_DIR" -depth -type d -exec rmdir {} \;
+ done
+ [ "$NOCHECK" -eq 1 ] && return;
+ remaining=$(find "$CONFIGFS_AGG_DIR" -mindepth 1 -type d 2> /dev/null)
+ if [ -n "$remaining" ]; then
+ fail "Directories remain in $CONFIGFS_AGG_DIR: $remaining"
+ fi
+}
+
+agg_configfs_dev_name() {
+ local CHIP=$1
+
+ cat "$CONFIGFS_AGG_DIR/$CHIP/dev_name" 2> /dev/null || \
+ fail "Unable to read the device name from configfs"
+}
+
+agg_configfs_chip_name() {
+ local CHIP=$1
+ local DEV_NAME=$(agg_configfs_dev_name "$CHIP")
+ local CHIP_LIST=$(find "/sys/devices/platform/$DEV_NAME" \
+ -maxdepth 1 -type d -name "gpiochip[0-9]*" 2> /dev/null)
+ local CHIP_COUNT=$(echo "$CHIP_LIST" | wc -l)
+
+ if [ -z "$CHIP_LIST" ]; then
+ fail "No gpiochip in /sys/devices/platform/$DEV_NAME/"
+ elif [ "$CHIP_COUNT" -ne 1 ]; then
+ fail "Multiple gpiochips unexpectedly found: $CHIP_LIST"
+ fi
+ basename "$CHIP_LIST"
+}
+
+agg_get_chip_num_lines() {
+ local CHIP=$1
+ local N_DIR=$(ls -d $CONFIGFS_AGG_DIR/$CHIP/line[0-9]* 2> /dev/null | wc -l)
+ local N_LINES
+
+ if [ "$(cat $CONFIGFS_AGG_DIR/$CHIP/live)" = 0 ]; then
+ echo "$N_DIR"
+ else
+ N_LINES=$(
+ $BASE_DIR/gpio-chip-info \
+ "/dev/$(agg_configfs_chip_name "$CHIP")" num-lines
+ ) || fail "Unable to read the number of lines from the character device"
+ if [ $N_DIR != $N_LINES ]; then
+ fail "Discrepancy between two sources for the number of lines"
+ fi
+ echo "$N_LINES"
+ fi
+}
+
+agg_get_chip_label() {
+ local CHIP=$1
+
+ $BASE_DIR/gpio-chip-info "/dev/$(agg_configfs_chip_name "$CHIP")" label || \
+ fail "Unable to read the chip label from the character device"
+}
+
+agg_get_line_name() {
+ local CHIP=$1
+ local OFFSET=$2
+ local NAME_CONFIGFS=$(cat "$CONFIGFS_AGG_DIR/$CHIP/line${OFFSET}/name")
+ local NAME_CDEV
+
+ if [ "$(cat "$CONFIGFS_AGG_DIR/$CHIP/live")" = 0 ]; then
+ echo "$NAME_CONFIGFS"
+ else
+ NAME_CDEV=$(
+ $BASE_DIR/gpio-line-name \
+ "/dev/$(agg_configfs_chip_name "$CHIP")" "$OFFSET"
+ ) || fail "Unable to read the line name from the character device"
+ if [ "$NAME_CONFIGFS" != "$NAME_CDEV" ]; then
+ fail "Discrepancy between two sources for the name of line"
+ fi
+ echo "$NAME_CDEV"
+ fi
+}
+
+
+# Load the modules. This will pull in configfs if needed too.
+modprobe gpio-sim || skip "unable to load the gpio-sim module"
+modprobe gpio-aggregator || skip "unable to load the gpio-aggregator module"
+
+# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed.
+for IDX in $(seq 5); do
+ if [ "$IDX" -eq "5" ]; then
+ skip "configfs not mounted at /sys/kernel/config"
+ fi
+
+ mountpoint -q /sys/kernel/config && break
+ sleep 0.1
+done
+
+# If the module was already loaded: remove all previous chips
+agg_configfs_cleanup
+sim_configfs_cleanup
+
+trap "exit 1" SIGTERM SIGINT
+trap "agg_configfs_cleanup 1; sim_configfs_cleanup 1" EXIT
+
+# Use gpio-sim chips as the test backend
+for CHIP in $(seq -f "chip%g" 0 1); do
+ mkdir $CONFIGFS_SIM_DIR/$CHIP
+ for BANK in $(seq -f "bank%g" 0 1); do
+ mkdir -p "$CONFIGFS_SIM_DIR/$CHIP/$BANK"
+ echo "${CHIP}_${BANK}" > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/label" || \
+ fail "unable to set the chip label"
+ echo 16 > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/num_lines" || \
+ fail "unable to set the number of lines"
+ for IDX in $(seq 0 15); do
+ LINE_NAME="${CHIP}${BANK}_${IDX}"
+ LINE_DIR="$CONFIGFS_SIM_DIR/$CHIP/$BANK/line$IDX"
+ mkdir -p $LINE_DIR
+ echo "$LINE_NAME" > "$LINE_DIR/name" || fail "unable to set the line name"
+ done
+ done
+ sim_enable_chip "$CHIP"
+done
+
+echo "1. GPIO aggregator creation/deletion"
+
+echo "1.1. Creation/deletion via configfs"
+
+echo "1.1.1. Minimum creation/deletion"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+agg_set_offset agg0 line0 5
+agg_set_line_name agg0 line0 test0
+agg_enable_chip agg0
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines agg0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset"
+agg_disable_chip agg0
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.2. Complex creation/deletion"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_create_line agg0 line1
+agg_create_line agg0 line2
+agg_create_line agg0 line3
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key agg0 line2 "$(sim_get_chip_label chip1 bank0)"
+agg_set_key agg0 line3 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset agg0 line0 1
+agg_set_offset agg0 line1 3
+agg_set_offset agg0 line2 5
+agg_set_offset agg0 line3 7
+agg_set_line_name agg0 line0 test0
+agg_set_line_name agg0 line1 test1
+agg_set_line_name agg0 line2 test2
+agg_set_line_name agg0 line3 test3
+agg_enable_chip agg0
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines agg0)" = "4" || fail "number of lines is not 1"
+test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset"
+test "$(agg_get_line_name agg0 1)" = "test1" || fail "line name is unset"
+test "$(agg_get_line_name agg0 2)" = "test2" || fail "line name is unset"
+test "$(agg_get_line_name agg0 3)" = "test3" || fail "line name is unset"
+agg_disable_chip agg0
+agg_remove_line agg0 line0
+agg_remove_line agg0 line1
+agg_remove_line agg0 line2
+agg_remove_line agg0 line3
+agg_remove_chip agg0
+
+echo "1.1.3. Can't instantiate a chip without any line"
+agg_create_chip agg0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+agg_remove_chip agg0
+
+echo "1.1.4. Can't instantiate a chip with invalid configuration"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "chipX_bankX"
+agg_set_offset agg0 line0 99
+agg_set_line_name agg0 line0 test0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.5. Can't instantiate a chip asynchronously via deferred probe"
+agg_create_chip agg0
+agg_create_line agg0 line0
+agg_set_key agg0 line0 "chip0_bank0"
+agg_set_offset agg0 line0 5
+agg_set_line_name agg0 line0 test0
+sim_disable_chip chip0
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive"
+sim_enable_chip chip0
+sleep 1
+test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || \
+ fail "chip unexpectedly transitioned to 'live' state"
+agg_remove_line agg0 line0
+agg_remove_chip agg0
+
+echo "1.1.6. Can't instantiate a chip with _sysfs prefix"
+mkdir "$CONFIGFS_AGG_DIR/_sysfs" 2> /dev/null && fail "chip _sysfs unexpectedly created"
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.foo" 2> /dev/null && fail "chip _sysfs.foo unexpectedly created"
+
+echo "1.2. Creation/deletion via sysfs"
+
+echo "1.2.1. Minimum creation/deletion"
+echo "chip0_bank0 0" > "$SYSFS_AGG_DIR/new_device"
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.2. Complex creation/deletion"
+echo "chip0bank0_0 chip1_bank1 10-11" > "$SYSFS_AGG_DIR/new_device"
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "3" || fail "number of lines is not 3"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 1)" = "" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 2)" = "" || fail "line name is unset"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.3. Asynchronous creation with deferred probe"
+sim_disable_chip chip0
+echo 'chip0_bank0 0' > $SYSFS_AGG_DIR/new_device
+sleep 1
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive"
+sim_enable_chip chip0
+sleep 1
+CHIPNAME=$(agg_configfs_chip_name _sysfs.0)
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly remains dead"
+test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \
+ fail "label is inconsistent"
+test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1"
+test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name unexpectedly set"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains"
+test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains"
+
+echo "1.2.4. Can't instantiate a chip with invalid configuration"
+echo "xyz 0" > "$SYSFS_AGG_DIR/new_device"
+test "$(cat $CONFIGFS_AGG_DIR/_sysfs.0/live)" = 0 || fail "chip unexpectedly alive"
+echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+
+echo "2. GPIO aggregator configuration"
+
+echo "2.1. Configuring aggregators instantiated via configfs"
+setup_2_1() {
+ agg_create_chip agg0
+ agg_create_line agg0 line0
+ agg_create_line agg0 line1
+ agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)"
+ agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank0)"
+ agg_set_offset agg0 line0 1
+ agg_set_offset agg0 line1 3
+ agg_set_line_name agg0 line0 test0
+ agg_set_line_name agg0 line1 test1
+ agg_enable_chip agg0
+}
+teardown_2_1() {
+ agg_configfs_cleanup
+}
+
+echo "2.1.1. While offline"
+
+echo "2.1.1.1. Line can be added/removed"
+setup_2_1
+agg_disable_chip agg0
+agg_create_line agg0 line2
+agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line2 5
+agg_enable_chip agg0
+test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1"
+teardown_2_1
+
+echo "2.1.1.2. Line key can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank1)"
+agg_enable_chip agg0
+teardown_2_1
+
+echo "2.1.1.3. Line name can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_line_name agg0 line0 new0
+agg_set_line_name agg0 line1 new1
+agg_enable_chip agg0
+test "$(agg_get_line_name agg0 0)" = "new0" || fail "line name is unset"
+test "$(agg_get_line_name agg0 1)" = "new1" || fail "line name is unset"
+teardown_2_1
+
+echo "2.1.1.4. Line offset can be modified"
+setup_2_1
+agg_disable_chip agg0
+agg_set_offset agg0 line0 5
+agg_set_offset agg0 line1 7
+agg_enable_chip agg0
+teardown_2_1
+
+echo "2.1.1.5. Can re-enable a chip after valid reconfiguration"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset agg0 line0 15
+agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line0 14
+agg_create_line agg0 line2
+agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset agg0 line2 13
+agg_enable_chip agg0
+test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1"
+teardown_2_1
+
+echo "2.1.1.7. Can't re-enable a chip with invalid reconfiguration"
+setup_2_1
+agg_disable_chip agg0
+agg_set_key agg0 line0 invalidkey
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_1
+setup_2_1
+agg_disable_chip agg0
+agg_set_offset agg0 line0 99
+echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_1
+
+echo "2.1.2. While online"
+
+echo "2.1.2.1. Can't add/remove line"
+setup_2_1
+mkdir "$CONFIGFS_AGG_DIR/agg0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/agg0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_1
+
+echo "2.1.2.2. Can't modify line key"
+setup_2_1
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/agg0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_1
+
+echo "2.1.2.3. Can't modify line name"
+setup_2_1
+echo "new0" > "$CONFIGFS_AGG_DIR/agg0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_1
+
+echo "2.1.2.4. Can't modify line offset"
+setup_2_1
+echo "5" > "$CONFIGFS_AGG_DIR/agg0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_1
+
+echo "2.2. Configuring aggregators instantiated via sysfs"
+setup_2_2() {
+ echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device"
+}
+teardown_2_2() {
+ echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device"
+}
+
+echo "2.2.1. While online"
+
+echo "2.2.1.1. Can toggle live"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.1.2. Can't add/remove line"
+setup_2_2
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+
+echo "2.2.1.3. Can't modify line key"
+setup_2_2
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_2
+
+echo "2.2.1.4. Can't modify line name"
+setup_2_2
+echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_2
+
+echo "2.2.1.5. Can't modify line offset"
+setup_2_2
+echo "5" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_2
+
+echo "2.2.2. While waiting for deferred probe"
+
+echo "2.2.2.1. Can't add/remove line despite live = 0"
+sim_disable_chip chip0
+setup_2_2
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.2. Can't modify line key"
+sim_disable_chip chip0
+setup_2_2
+echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \
+ fail "lookup key unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.3. Can't modify line name"
+sim_disable_chip chip0
+setup_2_2
+echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \
+ fail "name unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.4. Can't modify line offset"
+sim_disable_chip chip0
+setup_2_2
+echo 5 > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \
+ fail "offset unexpectedly updated"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.2.5. Can't toggle live"
+sim_disable_chip chip0
+setup_2_2
+test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive"
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+sim_enable_chip chip0
+
+echo "2.2.3. While offline"
+
+echo "2.2.3.1. Can't add/remove line despite live = 0"
+setup_2_2
+agg_disable_chip _sysfs.0
+mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added"
+rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed"
+teardown_2_2
+
+echo "2.2.3.2. Line key can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip0 bank1)"
+agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip1 bank1)"
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.3. Line name can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_line_name _sysfs.0 line0 new0
+agg_set_line_name _sysfs.0 line1 new1
+agg_enable_chip _sysfs.0
+test "$(agg_get_line_name _sysfs.0 0)" = "new0" || fail "line name is unset"
+test "$(agg_get_line_name _sysfs.0 1)" = "new1" || fail "line name is unset"
+teardown_2_2
+
+echo "2.2.3.4. Line offset can be modified"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_offset _sysfs.0 line0 5
+agg_set_offset _sysfs.0 line1 7
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.5. Can re-enable a chip with valid reconfiguration"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip1 bank1)"
+agg_set_offset _sysfs.0 line0 15
+agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip0 bank1)"
+agg_set_offset _sysfs.0 line0 14
+agg_enable_chip _sysfs.0
+teardown_2_2
+
+echo "2.2.3.6. Can't re-enable a chip with invalid reconfiguration"
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_key _sysfs.0 line0 invalidkey
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+setup_2_2
+agg_disable_chip _sysfs.0
+agg_set_offset _sysfs.0 line0 99
+echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled"
+teardown_2_2
+
+echo "3. Module unload"
+
+echo "3.1. Can't unload module if there is at least one device created via configfs"
+agg_create_chip agg0
+modprobe -r gpio-aggregator 2> /dev/null
+test -d /sys/module/gpio_aggregator || fail "module unexpectedly unloaded"
+agg_remove_chip agg0
+
+echo "3.2. Can unload module if there is no device created via configfs"
+echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device"
+modprobe -r gpio-aggregator 2> /dev/null
+test -d /sys/module/gpio_aggregator && fail "module unexpectedly remains to be loaded"
+modprobe gpio-aggregator 2> /dev/null
+
+echo "4. GPIO forwarder functional"
+SETTINGS="chip0:bank0:2 chip0:bank1:4 chip1:bank0:6 chip1:bank1:8"
+setup_4() {
+ local OFFSET=0
+ agg_create_chip agg0
+ for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ agg_create_line agg0 "line${OFFSET}"
+ agg_set_key agg0 "line${OFFSET}" "$(sim_get_chip_label "$CHIP" "$BANK")"
+ agg_set_offset agg0 "line${OFFSET}" "$LINE"
+ OFFSET=$(expr $OFFSET + 1)
+ done
+ agg_enable_chip agg0
+}
+teardown_4() {
+ agg_configfs_cleanup
+}
+
+echo "4.1. Forwarding set values"
+setup_4
+OFFSET=0
+for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name")
+ CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name")
+ VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value"
+ test $(cat $VAL_PATH) = "0" || fail "incorrect value read from sysfs"
+ $BASE_DIR/gpio-mockup-cdev -s 1 "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET" &
+ mock_pid=$!
+ sleep 0.1 # FIXME Any better way?
+ test "$(cat $VAL_PATH)" = "1" || fail "incorrect value read from sysfs"
+ kill "$mock_pid"
+ OFFSET=$(expr $OFFSET + 1)
+done
+teardown_4
+
+echo "4.2. Forwarding set config"
+setup_4
+OFFSET=0
+for SETTING in $SETTINGS; do
+ CHIP=$(echo "$SETTING" | cut -d: -f1)
+ BANK=$(echo "$SETTING" | cut -d: -f2)
+ LINE=$(echo "$SETTING" | cut -d: -f3)
+ DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name")
+ CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name")
+ VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value"
+ $BASE_DIR/gpio-mockup-cdev -b pull-up "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET"
+ test $(cat "$VAL_PATH") = "1" || fail "incorrect value read from sysfs"
+ OFFSET=$(expr $OFFSET + 1)
+done
+teardown_4
+
+echo "5. Race condition verification"
+
+echo "5.1. Stress test of new_device/delete_device and module load/unload"
+for _ in $(seq 1000); do
+ {
+ echo "dummy 0" > "$SYSFS_AGG_DIR/new_device"
+ cat "$CONFIGFS_AGG_DIR/_sysfs.0/dev_name" > "$SYSFS_AGG_DIR/delete_device"
+ } 2> /dev/null
+done &
+writer_pid=$!
+while kill -0 "$writer_pid" 2> /dev/null; do
+ {
+ modprobe gpio-aggregator
+ modprobe -r gpio-aggregator
+ } 2> /dev/null
+done
+
+echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index 67fe7a46cb62..e3000ccb9a5d 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -8,6 +8,13 @@ ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le))
TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh
TEST_FILES := kexec_common_lib.sh
+include ../../../scripts/Makefile.arch
+
+ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86)
+TEST_PROGS += test_kexec_jump.sh
+test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump
+endif
+
include ../lib.mk
endif
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.c b/tools/testing/selftests/kexec/test_kexec_jump.c
new file mode 100644
index 000000000000..fbce287866f5
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.c
@@ -0,0 +1,72 @@
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <sys/reboot.h>
+#include <sys/syscall.h>
+
+asm(
+ " .code64\n"
+ " .data\n"
+ "purgatory_start:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start_b(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ // Same again
+ "purgatory_start_b:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ "purgatory_end:\n"
+ ".previous"
+);
+extern char purgatory_start[], purgatory_end[];
+
+int main (void)
+{
+ struct kexec_segment segment = {};
+ int ret;
+
+ segment.buf = purgatory_start;
+ segment.bufsz = purgatory_end - purgatory_start;
+ segment.mem = (void *)0x400000;
+ segment.memsz = 0x1000;
+ ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT);
+ if (ret) {
+ perror("kexec_load");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+ printf("Success\n");
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.sh b/tools/testing/selftests/kexec/test_kexec_jump.sh
new file mode 100755
index 000000000000..6ae977054ba2
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Prevent loading a kernel image via the kexec_load syscall when
+# signatures are required. (Dependent on CONFIG_IMA_ARCH_POLICY.)
+
+TEST="$0"
+. ./kexec_common_lib.sh
+
+# kexec requires root privileges
+require_root_privileges
+
+# get the kernel config
+get_kconfig
+
+kconfig_enabled "CONFIG_KEXEC_JUMP=y" "kexec_jump is enabled"
+if [ $? -eq 0 ]; then
+ log_skip "kexec_jump is not enabled"
+fi
+
+kconfig_enabled "CONFIG_IMA_APPRAISE=y" "IMA enabled"
+ima_appraise=$?
+
+kconfig_enabled "CONFIG_IMA_ARCH_POLICY=y" \
+ "IMA architecture specific policy enabled"
+arch_policy=$?
+
+get_secureboot_mode
+secureboot=$?
+
+if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ]; then
+ log_skip "Secure boot and CONFIG_IMA_ARCH_POLICY are enabled"
+fi
+
+./test_kexec_jump
+if [ $? -eq 0 ]; then
+ log_pass "kexec_jump succeeded"
+else
+ # The more likely failure mode if anything went wrong is that the
+ # kernel just crashes. But if we get back here, sure, whine anyway.
+ log_fail "kexec_jump failed"
+fi
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 666c9fde76da..2925e47db995 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -56,6 +56,8 @@
#include <asm/types.h>
#include <ctype.h>
#include <errno.h>
+#include <linux/unistd.h>
+#include <poll.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -65,7 +67,6 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
-#include <setjmp.h>
#include "kselftest.h"
@@ -172,14 +173,11 @@
#define __TEST_IMPL(test_name, _signal) \
static void test_name(struct __test_metadata *_metadata); \
- static inline void wrapper_##test_name( \
+ static void wrapper_##test_name( \
struct __test_metadata *_metadata, \
- struct __fixture_variant_metadata *variant) \
+ struct __fixture_variant_metadata __attribute__((unused)) *variant) \
{ \
- _metadata->setup_completed = true; \
- if (setjmp(_metadata->env) == 0) \
- test_name(_metadata); \
- __test_check_assert(_metadata); \
+ test_name(_metadata); \
} \
static struct __test_metadata _##test_name##_object = \
{ .name = #test_name, \
@@ -258,7 +256,7 @@
* A bare "return;" statement may be used to return early.
*/
#define FIXTURE_SETUP(fixture_name) \
- void fixture_name##_setup( \
+ static void fixture_name##_setup( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
const FIXTURE_VARIANT(fixture_name) \
@@ -307,7 +305,7 @@
__FIXTURE_TEARDOWN(fixture_name)
#define __FIXTURE_TEARDOWN(fixture_name) \
- void fixture_name##_teardown( \
+ static void fixture_name##_teardown( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
const FIXTURE_VARIANT(fixture_name) \
@@ -401,7 +399,7 @@
struct __test_metadata *_metadata, \
FIXTURE_DATA(fixture_name) *self, \
const FIXTURE_VARIANT(fixture_name) *variant); \
- static inline void wrapper_##fixture_name##_##test_name( \
+ static void wrapper_##fixture_name##_##test_name( \
struct __test_metadata *_metadata, \
struct __fixture_variant_metadata *variant) \
{ \
@@ -410,9 +408,9 @@
pid_t child = 1; \
int status = 0; \
/* Makes sure there is only one teardown, even when child forks again. */ \
- bool *teardown = mmap(NULL, sizeof(*teardown), \
+ _metadata->no_teardown = mmap(NULL, sizeof(*_metadata->no_teardown), \
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
- *teardown = false; \
+ *_metadata->no_teardown = true; \
if (sizeof(*self) > 0) { \
if (fixture_name##_teardown_parent) { \
self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \
@@ -422,31 +420,26 @@
self = &self_private; \
} \
} \
- if (setjmp(_metadata->env) == 0) { \
- /* _metadata and potentially self are shared with all forks. */ \
- child = fork(); \
- if (child == 0) { \
- fixture_name##_setup(_metadata, self, variant->data); \
- /* Let setup failure terminate early. */ \
- if (_metadata->exit_code) \
- _exit(0); \
- _metadata->setup_completed = true; \
- fixture_name##_##test_name(_metadata, self, variant->data); \
- } else if (child < 0 || child != waitpid(child, &status, 0)) { \
- ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
- _metadata->exit_code = KSFT_FAIL; \
- } \
- } \
+ _metadata->variant = variant->data; \
+ _metadata->self = self; \
+ /* _metadata and potentially self are shared with all forks. */ \
+ child = fork(); \
if (child == 0) { \
- if (_metadata->setup_completed && !fixture_name##_teardown_parent && \
- __sync_bool_compare_and_swap(teardown, false, true)) \
- fixture_name##_teardown(_metadata, self, variant->data); \
+ fixture_name##_setup(_metadata, self, variant->data); \
+ /* Let setup failure terminate early. */ \
+ if (_metadata->exit_code) \
+ _exit(0); \
+ *_metadata->no_teardown = false; \
+ fixture_name##_##test_name(_metadata, self, variant->data); \
+ _metadata->teardown_fn(false, _metadata, self, variant->data); \
_exit(0); \
+ } else if (child < 0 || child != waitpid(child, &status, 0)) { \
+ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
+ _metadata->exit_code = KSFT_FAIL; \
} \
- if (_metadata->setup_completed && fixture_name##_teardown_parent && \
- __sync_bool_compare_and_swap(teardown, false, true)) \
- fixture_name##_teardown(_metadata, self, variant->data); \
- munmap(teardown, sizeof(*teardown)); \
+ _metadata->teardown_fn(true, _metadata, self, variant->data); \
+ munmap(_metadata->no_teardown, sizeof(*_metadata->no_teardown)); \
+ _metadata->no_teardown = NULL; \
if (self && fixture_name##_teardown_parent) \
munmap(self, sizeof(*self)); \
if (WIFEXITED(status)) { \
@@ -456,7 +449,14 @@
/* Forward signal to __wait_for_test(). */ \
kill(getpid(), WTERMSIG(status)); \
} \
- __test_check_assert(_metadata); \
+ } \
+ static void wrapper_##fixture_name##_##test_name##_teardown( \
+ bool in_parent, struct __test_metadata *_metadata, \
+ void *self, const void *variant) \
+ { \
+ if (fixture_name##_teardown_parent == in_parent && \
+ !__atomic_test_and_set(_metadata->no_teardown, __ATOMIC_RELAXED)) \
+ fixture_name##_teardown(_metadata, self, variant); \
} \
static struct __test_metadata *_##fixture_name##_##test_name##_object; \
static void __attribute__((constructor)) \
@@ -467,6 +467,7 @@
object->name = #test_name; \
object->fn = &wrapper_##fixture_name##_##test_name; \
object->fixture = &_##fixture_name##_fixture_object; \
+ object->teardown_fn = &wrapper_##fixture_name##_##test_name##_teardown; \
object->termsig = signal; \
object->timeout = tmout; \
_##fixture_name##_##test_name##_object = object; \
@@ -910,14 +911,16 @@ struct __test_metadata {
struct __fixture_variant_metadata *);
pid_t pid; /* pid of test when being run */
struct __fixture_metadata *fixture;
+ void (*teardown_fn)(bool in_parent, struct __test_metadata *_metadata,
+ void *self, const void *variant);
int termsig;
int exit_code;
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
- bool timed_out; /* did this test timeout instead of exiting? */
bool aborted; /* stopped test due to failed ASSERT */
- bool setup_completed; /* did setup finish? */
- jmp_buf env; /* for exiting out of test early */
+ bool *no_teardown; /* fixture needs teardown */
+ void *self;
+ const void *variant;
struct __test_results *results;
struct __test_metadata *prev, *next;
};
@@ -951,88 +954,60 @@ static inline int __bail(int for_realz, struct __test_metadata *t)
{
/* if this is ASSERT, return immediately. */
if (for_realz) {
- t->aborted = true;
- longjmp(t->env, 1);
+ if (t->teardown_fn)
+ t->teardown_fn(false, t, t->self, t->variant);
+ abort();
}
/* otherwise, end the for loop and continue. */
return 0;
}
-static inline void __test_check_assert(struct __test_metadata *t)
-{
- if (t->aborted)
- abort();
-}
-
-struct __test_metadata *__active_test;
-static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
-{
- struct __test_metadata *t = __active_test;
-
- /* Sanity check handler execution environment. */
- if (!t) {
- fprintf(TH_LOG_STREAM,
- "# no active test in SIGALRM handler!?\n");
- abort();
- }
- if (sig != SIGALRM || sig != info->si_signo) {
- fprintf(TH_LOG_STREAM,
- "# %s: SIGALRM handler caught signal %d!?\n",
- t->name, sig != SIGALRM ? sig : info->si_signo);
- abort();
- }
-
- t->timed_out = true;
- // signal process group
- kill(-(t->pid), SIGKILL);
-}
-
-void __wait_for_test(struct __test_metadata *t)
+static void __wait_for_test(struct __test_metadata *t)
{
- struct sigaction action = {
- .sa_sigaction = __timeout_handler,
- .sa_flags = SA_SIGINFO,
- };
- struct sigaction saved_action;
/*
* Sets status so that WIFEXITED(status) returns true and
* WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
* should never be evaluated because of the waitpid(2) check and
- * SIGALRM handling.
+ * timeout handling.
*/
int status = KSFT_FAIL << 8;
- int child;
+ struct pollfd poll_child;
+ int ret, child, childfd;
+ bool timed_out = false;
- if (sigaction(SIGALRM, &action, &saved_action)) {
+ childfd = syscall(__NR_pidfd_open, t->pid, 0);
+ if (childfd == -1) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: unable to install SIGALRM handler\n",
+ "# %s: unable to open pidfd\n",
t->name);
return;
}
- __active_test = t;
- t->timed_out = false;
- alarm(t->timeout);
- child = waitpid(t->pid, &status, 0);
- if (child == -1 && errno != EINTR) {
+
+ poll_child.fd = childfd;
+ poll_child.events = POLLIN;
+ ret = poll(&poll_child, 1, t->timeout * 1000);
+ if (ret == -1) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: Failed to wait for PID %d (errno: %d)\n",
- t->name, t->pid, errno);
+ "# %s: unable to wait on child pidfd\n",
+ t->name);
return;
+ } else if (ret == 0) {
+ timed_out = true;
+ /* signal process group */
+ kill(-(t->pid), SIGKILL);
}
-
- alarm(0);
- if (sigaction(SIGALRM, &saved_action, NULL)) {
+ child = waitpid(t->pid, &status, WNOHANG);
+ if (child == -1 && errno != EINTR) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "# %s: unable to uninstall SIGALRM handler\n",
- t->name);
+ "# %s: Failed to wait for PID %d (errno: %d)\n",
+ t->name, t->pid, errno);
return;
}
- __active_test = NULL;
- if (t->timed_out) {
+ if (timed_out) {
t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test terminated by timeout\n", t->name);
@@ -1205,9 +1180,9 @@ static bool test_enabled(int argc, char **argv,
return !has_positive;
}
-void __run_test(struct __fixture_metadata *f,
- struct __fixture_variant_metadata *variant,
- struct __test_metadata *t)
+static void __run_test(struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *variant,
+ struct __test_metadata *t)
{
struct __test_xfail *xfail;
char test_name[1024];
@@ -1218,8 +1193,7 @@ void __run_test(struct __fixture_metadata *f,
t->exit_code = KSFT_PASS;
t->trigger = 0;
t->aborted = false;
- t->setup_completed = false;
- memset(t->env, 0, sizeof(t->env));
+ t->no_teardown = NULL;
memset(t->results->reason, 0, sizeof(t->results->reason));
snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
diff --git a/tools/testing/selftests/kselftest_harness/.gitignore b/tools/testing/selftests/kselftest_harness/.gitignore
new file mode 100644
index 000000000000..e4e476a333c9
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/.gitignore
@@ -0,0 +1,2 @@
+/harness-selftest
+/harness-selftest.seen
diff --git a/tools/testing/selftests/kselftest_harness/Makefile b/tools/testing/selftests/kselftest_harness/Makefile
new file mode 100644
index 000000000000..0617535a6ce4
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS_EXTENDED := harness-selftest
+TEST_PROGS := harness-selftest.sh
+EXTRA_CLEAN := harness-selftest.seen
+
+include ../lib.mk
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.c b/tools/testing/selftests/kselftest_harness/harness-selftest.c
new file mode 100644
index 000000000000..b555493bdb4d
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+
+#include <sys/resource.h>
+#include <sys/prctl.h>
+
+/* Avoid any inconsistencies */
+#define TH_LOG_STREAM stdout
+
+#include "../kselftest_harness.h"
+
+static void test_helper(struct __test_metadata *_metadata)
+{
+ ASSERT_EQ(0, 0);
+}
+
+TEST(standalone_pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ EXPECT_EQ(0, 0);
+ test_helper(_metadata);
+ TH_LOG("after");
+}
+
+TEST(standalone_fail) {
+ TH_LOG("before");
+ EXPECT_EQ(0, 0);
+ EXPECT_EQ(0, 1);
+ ASSERT_EQ(0, 1);
+ TH_LOG("after");
+}
+
+TEST_SIGNAL(signal_pass, SIGUSR1) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+ kill(getpid(), SIGUSR1);
+}
+
+TEST_SIGNAL(signal_fail, SIGUSR1) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 1);
+ TH_LOG("after");
+ kill(getpid(), SIGUSR1);
+}
+
+FIXTURE(fixture) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+}
+
+FIXTURE_TEARDOWN(fixture) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ test_helper(_metadata);
+ standalone_pass(_metadata);
+ TH_LOG("after");
+}
+
+TEST_F(fixture, fail) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 1);
+ fixture_pass(_metadata, self, variant);
+ TH_LOG("after");
+}
+
+TEST_F_TIMEOUT(fixture, timeout, 1) {
+ TH_LOG("before");
+ sleep(2);
+ TH_LOG("after");
+}
+
+FIXTURE(fixture_parent) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture_parent) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+}
+
+FIXTURE_TEARDOWN_PARENT(fixture_parent) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture_parent, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+}
+
+FIXTURE(fixture_setup_failure) {
+ pid_t testpid;
+};
+
+FIXTURE_SETUP(fixture_setup_failure) {
+ TH_LOG("setup");
+ self->testpid = getpid();
+ ASSERT_EQ(0, 1);
+}
+
+FIXTURE_TEARDOWN(fixture_setup_failure) {
+ TH_LOG("teardown same-process=%d", self->testpid == getpid());
+}
+
+TEST_F(fixture_setup_failure, pass) {
+ TH_LOG("before");
+ ASSERT_EQ(0, 0);
+ TH_LOG("after");
+}
+
+int main(int argc, char **argv)
+{
+ /*
+ * The harness uses abort() to signal assertion failures, which triggers coredumps.
+ * This may be useful to debug real failures but not for this selftest, disable them.
+ */
+ struct rlimit rlimit = {
+ .rlim_cur = 0,
+ .rlim_max = 0,
+ };
+
+ prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
+ setrlimit(RLIMIT_CORE, &rlimit);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.expected b/tools/testing/selftests/kselftest_harness/harness-selftest.expected
new file mode 100644
index 000000000000..97e1418c1c7e
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.expected
@@ -0,0 +1,64 @@
+TAP version 13
+1..9
+# Starting 9 tests from 4 test cases.
+# RUN global.standalone_pass ...
+# harness-selftest.c:19:standalone_pass:before
+# harness-selftest.c:23:standalone_pass:after
+# OK global.standalone_pass
+ok 1 global.standalone_pass
+# RUN global.standalone_fail ...
+# harness-selftest.c:27:standalone_fail:before
+# harness-selftest.c:29:standalone_fail:Expected 0 (0) == 1 (1)
+# harness-selftest.c:30:standalone_fail:Expected 0 (0) == 1 (1)
+# standalone_fail: Test terminated by assertion
+# FAIL global.standalone_fail
+not ok 2 global.standalone_fail
+# RUN global.signal_pass ...
+# harness-selftest.c:35:signal_pass:before
+# harness-selftest.c:37:signal_pass:after
+# OK global.signal_pass
+ok 3 global.signal_pass
+# RUN global.signal_fail ...
+# harness-selftest.c:42:signal_fail:before
+# harness-selftest.c:43:signal_fail:Expected 0 (0) == 1 (1)
+# signal_fail: Test terminated by assertion
+# FAIL global.signal_fail
+not ok 4 global.signal_fail
+# RUN fixture.pass ...
+# harness-selftest.c:53:pass:setup
+# harness-selftest.c:62:pass:before
+# harness-selftest.c:19:pass:before
+# harness-selftest.c:23:pass:after
+# harness-selftest.c:66:pass:after
+# harness-selftest.c:58:pass:teardown same-process=1
+# OK fixture.pass
+ok 5 fixture.pass
+# RUN fixture.fail ...
+# harness-selftest.c:53:fail:setup
+# harness-selftest.c:70:fail:before
+# harness-selftest.c:71:fail:Expected 0 (0) == 1 (1)
+# harness-selftest.c:58:fail:teardown same-process=1
+# fail: Test terminated by assertion
+# FAIL fixture.fail
+not ok 6 fixture.fail
+# RUN fixture.timeout ...
+# harness-selftest.c:53:timeout:setup
+# harness-selftest.c:77:timeout:before
+# timeout: Test terminated by timeout
+# FAIL fixture.timeout
+not ok 7 fixture.timeout
+# RUN fixture_parent.pass ...
+# harness-selftest.c:87:pass:setup
+# harness-selftest.c:96:pass:before
+# harness-selftest.c:98:pass:after
+# harness-selftest.c:92:pass:teardown same-process=0
+# OK fixture_parent.pass
+ok 8 fixture_parent.pass
+# RUN fixture_setup_failure.pass ...
+# harness-selftest.c:106:pass:setup
+# harness-selftest.c:108:pass:Expected 0 (0) == 1 (1)
+# pass: Test terminated by assertion
+# FAIL fixture_setup_failure.pass
+not ok 9 fixture_setup_failure.pass
+# FAILED: 4 / 9 tests passed.
+# Totals: pass:4 fail:5 xfail:0 xpass:0 skip:0 error:0
diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.sh b/tools/testing/selftests/kselftest_harness/harness-selftest.sh
new file mode 100755
index 000000000000..fe72d16370fe
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness/harness-selftest.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Selftest for kselftest_harness.h
+#
+
+set -e
+
+DIR="$(dirname $(readlink -f "$0"))"
+
+"$DIR"/harness-selftest > harness-selftest.seen || true
+
+diff -u "$DIR"/harness-selftest.expected harness-selftest.seen
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 20af35a91d6f..d9fffe06d3ea 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -3,7 +3,7 @@ top_srcdir = ../../../..
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
+ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch))
# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
ARCH := x86
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index f62b0a5aba35..3e786080473d 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -47,6 +47,10 @@ LIBKVM_riscv += lib/riscv/handlers.S
LIBKVM_riscv += lib/riscv/processor.c
LIBKVM_riscv += lib/riscv/ucall.c
+LIBKVM_loongarch += lib/loongarch/processor.c
+LIBKVM_loongarch += lib/loongarch/ucall.c
+LIBKVM_loongarch += lib/loongarch/exception.S
+
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
@@ -147,6 +151,7 @@ TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
TEST_GEN_PROGS_arm64 += arm64/mmio_abort
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
@@ -190,6 +195,19 @@ TEST_GEN_PROGS_riscv += coalesced_io_test
TEST_GEN_PROGS_riscv += get-reg-list
TEST_GEN_PROGS_riscv += steal_time
+TEST_GEN_PROGS_loongarch += coalesced_io_test
+TEST_GEN_PROGS_loongarch += demand_paging_test
+TEST_GEN_PROGS_loongarch += dirty_log_perf_test
+TEST_GEN_PROGS_loongarch += dirty_log_test
+TEST_GEN_PROGS_loongarch += guest_print_test
+TEST_GEN_PROGS_loongarch += hardware_disable_test
+TEST_GEN_PROGS_loongarch += kvm_binary_stats_test
+TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus
+TEST_GEN_PROGS_loongarch += kvm_page_table_test
+TEST_GEN_PROGS_loongarch += memslot_modification_stress_test
+TEST_GEN_PROGS_loongarch += memslot_perf_test
+TEST_GEN_PROGS_loongarch += set_memory_region_test
+
SPLIT_TESTS += arch_timer
SPLIT_TESTS += get-reg-list
diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c
new file mode 100644
index 000000000000..3826772fd470
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/host_sve.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls.
+ *
+ * Copyright 2025 Arm, Ltd
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/auxv.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "ucall_common.h"
+
+static void guest_code(void)
+{
+ for (int i = 0; i < 10; i++) {
+ GUEST_UCALL_NONE();
+ }
+
+ GUEST_DONE();
+}
+
+void handle_sigill(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *uctx = ctx;
+
+ printf(" < host signal %d >\n", sig);
+
+ /*
+ * Skip the UDF
+ */
+ uctx->uc_mcontext.pc += 4;
+}
+
+void register_sigill_handler(void)
+{
+ struct sigaction sa = {
+ .sa_sigaction = handle_sigill,
+ .sa_flags = SA_SIGINFO,
+ };
+ sigaction(SIGILL, &sa, NULL);
+}
+
+static void do_sve_roundtrip(void)
+{
+ unsigned long before, after;
+
+ /*
+ * Set all bits in a predicate register, force a save/restore via a
+ * SIGILL (which handle_sigill() will recover from), then report
+ * whether the value has changed.
+ */
+ asm volatile(
+ " .arch_extension sve\n"
+ " ptrue p0.B\n"
+ " cntp %[before], p0, p0.B\n"
+ " udf #0\n"
+ " cntp %[after], p0, p0.B\n"
+ : [before] "=r" (before),
+ [after] "=r" (after)
+ :
+ : "p0"
+ );
+
+ if (before != after) {
+ TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n",
+ before, after);
+ } else {
+ printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n",
+ before, after);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ register_sigill_handler();
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ do_sve_roundtrip();
+
+ while (!guest_done) {
+
+ printf("Running VCPU...\n");
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_NONE:
+ do_sve_roundtrip();
+ do_sve_roundtrip();
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ /*
+ * This is testing the host environment, we don't care about
+ * guest SVE support.
+ */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ printf("SVE not supported\n");
+ return KSFT_SKIP;
+ }
+
+ test_run();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 57708de2075d..8f422bfdfcb9 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -15,6 +15,8 @@
#include "test_util.h"
#include <linux/bitfield.h>
+bool have_cap_arm_mte;
+
enum ftr_type {
FTR_EXACT, /* Use a predefined safe value */
FTR_LOWER_SAFE, /* Smaller value is safe */
@@ -543,6 +545,70 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
}
+#define MTE_IDREG_TEST 1
+static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ uint64_t val;
+ uint64_t mte;
+ uint64_t mte_frac;
+ int idx, err;
+
+ if (!have_cap_arm_mte) {
+ ksft_test_result_skip("MTE capability not supported, nothing to test\n");
+ return;
+ }
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+ if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) {
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n");
+ return;
+ }
+
+ /*
+ * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2)
+ * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported
+ * and MTE_frac == 0 indicates it is supported.
+ *
+ * As MTE_frac was previously unconditionally read as 0, check
+ * that the set to 0 succeeds but does not change MTE_frac
+ * from unsupported (0xF) to supported (0).
+ *
+ */
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+
+ mte = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), val);
+ mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val);
+ if (mte != ID_AA64PFR1_EL1_MTE_MTE2 ||
+ mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) {
+ ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n");
+ return;
+ }
+
+ /* Try to set MTE_frac=0. */
+ val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err) {
+ ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n");
+ return;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val);
+ if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI)
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n");
+}
+
static void test_guest_reg_read(struct kvm_vcpu *vcpu)
{
bool done = false;
@@ -673,6 +739,14 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
ksft_test_result_pass("%s\n", __func__);
}
+void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+ if (vm_check_cap(vm, KVM_CAP_ARM_MTE)) {
+ vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0);
+ have_cap_arm_mte = true;
+ }
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
@@ -701,7 +775,7 @@ int main(void)
ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
- MPAM_IDREG_TEST;
+ MPAM_IDREG_TEST + MTE_IDREG_TEST;
ksft_set_plan(test_cnt);
@@ -709,6 +783,7 @@ int main(void)
test_vcpu_ftr_id_regs(vcpu);
test_vcpu_non_ftr_id_regs(vcpu);
test_user_set_mpam_reg(vcpu);
+ test_user_set_mte_reg(vcpu);
test_guest_reg_read(vcpu);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 373912464fb4..93013564428b 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -177,6 +177,7 @@ enum vm_guest_mode {
VM_MODE_P36V48_4K,
VM_MODE_P36V48_16K,
VM_MODE_P36V48_64K,
+ VM_MODE_P47V47_16K,
VM_MODE_P36V47_16K,
NUM_VM_MODES,
};
@@ -232,6 +233,11 @@ extern enum vm_guest_mode vm_mode_default;
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
+#elif defined(__loongarch__)
+#define VM_MODE_DEFAULT VM_MODE_P47V47_16K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
#endif
#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h
new file mode 100644
index 000000000000..6427a3275e6a
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/processor.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+#include "ucall_common.h"
+
+#else
+/* general registers */
+#define zero $r0
+#define ra $r1
+#define tp $r2
+#define sp $r3
+#define a0 $r4
+#define a1 $r5
+#define a2 $r6
+#define a3 $r7
+#define a4 $r8
+#define a5 $r9
+#define a6 $r10
+#define a7 $r11
+#define t0 $r12
+#define t1 $r13
+#define t2 $r14
+#define t3 $r15
+#define t4 $r16
+#define t5 $r17
+#define t6 $r18
+#define t7 $r19
+#define t8 $r20
+#define u0 $r21
+#define fp $r22
+#define s0 $r23
+#define s1 $r24
+#define s2 $r25
+#define s3 $r26
+#define s4 $r27
+#define s5 $r28
+#define s6 $r29
+#define s7 $r30
+#define s8 $r31
+#endif
+
+/*
+ * LoongArch page table entry definition
+ * Original header file arch/loongarch/include/asm/loongarch.h
+ */
+#define _PAGE_VALID_SHIFT 0
+#define _PAGE_DIRTY_SHIFT 1
+#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */
+#define PLV_KERN 0
+#define PLV_USER 3
+#define PLV_MASK 0x3
+#define _CACHE_SHIFT 4 /* 4~5, two bits */
+#define _PAGE_PRESENT_SHIFT 7
+#define _PAGE_WRITE_SHIFT 8
+
+#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT)
+#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT)
+#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT)
+#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT)
+#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT)
+#define __READABLE (_PAGE_VALID)
+#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE)
+/* Coherent Cached */
+#define _CACHE_CC BIT_ULL(_CACHE_SHIFT)
+#define PS_4K 0x0000000c
+#define PS_16K 0x0000000e
+#define PS_64K 0x00000010
+#define PS_DEFAULT_SIZE PS_16K
+
+/* LoongArch Basic CSR registers */
+#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */
+#define CSR_CRMD_PG_SHIFT 4
+#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT)
+#define CSR_CRMD_IE_SHIFT 2
+#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT)
+#define CSR_CRMD_PLV_SHIFT 0
+#define CSR_CRMD_PLV_WIDTH 2
+#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT)
+#define PLV_MASK 0x3
+#define LOONGARCH_CSR_PRMD 0x1
+#define LOONGARCH_CSR_EUEN 0x2
+#define LOONGARCH_CSR_ECFG 0x4
+#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
+#define LOONGARCH_CSR_ERA 0x6 /* ERA */
+#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
+#define LOONGARCH_CSR_EENTRY 0xc
+#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */
+#define CSR_TLBIDX_PS_SHIFT 24
+#define CSR_TLBIDX_PS_WIDTH 6
+#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT)
+#define CSR_TLBIDX_SIZEM 0x3f000000
+#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT
+#define LOONGARCH_CSR_ASID 0x18 /* ASID */
+#define LOONGARCH_CSR_PGDL 0x19
+#define LOONGARCH_CSR_PGDH 0x1a
+/* Page table base */
+#define LOONGARCH_CSR_PGD 0x1b
+#define LOONGARCH_CSR_PWCTL0 0x1c
+#define LOONGARCH_CSR_PWCTL1 0x1d
+#define LOONGARCH_CSR_STLBPGSIZE 0x1e
+#define LOONGARCH_CSR_CPUID 0x20
+#define LOONGARCH_CSR_KS0 0x30
+#define LOONGARCH_CSR_KS1 0x31
+#define LOONGARCH_CSR_TMID 0x40
+#define LOONGARCH_CSR_TCFG 0x41
+/* TLB refill exception entry */
+#define LOONGARCH_CSR_TLBRENTRY 0x88
+#define LOONGARCH_CSR_TLBRSAVE 0x8b
+#define LOONGARCH_CSR_TLBREHI 0x8e
+#define CSR_TLBREHI_PS_SHIFT 0
+#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT)
+
+#define EXREGS_GPRS (32)
+
+#ifndef __ASSEMBLER__
+void handle_tlb_refill(void);
+void handle_exception(void);
+
+struct ex_regs {
+ unsigned long regs[EXREGS_GPRS];
+ unsigned long pc;
+ unsigned long estat;
+ unsigned long badv;
+};
+
+#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc)
+#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat)
+#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv)
+#define EXREGS_SIZE sizeof(struct ex_regs)
+
+#else
+#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8)
+#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8)
+#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8)
+#define EXREGS_SIZE ((EXREGS_GPRS + 3) * 8)
+#endif
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h
new file mode 100644
index 000000000000..4ec801f37f00
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index 5f389166338c..162f303d9daa 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -11,6 +11,19 @@
#include <asm/csr.h>
#include "kvm_util.h"
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_FUNCT3 0x7000
+#define INSN_SHIFT_FUNCT3 12
+
+#define INSN_CSR_MASK 0xfff00000
+#define INSN_CSR_SHIFT 20
+
+#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3)
+#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT)
+
static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
uint64_t idx, uint64_t size)
{
@@ -60,7 +73,8 @@ static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
}
-struct ex_regs {
+struct pt_regs {
+ unsigned long epc;
unsigned long ra;
unsigned long sp;
unsigned long gp;
@@ -92,16 +106,19 @@ struct ex_regs {
unsigned long t4;
unsigned long t5;
unsigned long t6;
- unsigned long epc;
+ /* Supervisor/Machine CSRs */
unsigned long status;
+ unsigned long badaddr;
unsigned long cause;
+ /* a0 value before the syscall */
+ unsigned long orig_a0;
};
#define NR_VECTORS 2
#define NR_EXCEPTIONS 32
#define EC_MASK (NR_EXCEPTIONS - 1)
-typedef void(*exception_handler_fn)(struct ex_regs *);
+typedef void(*exception_handler_fn)(struct pt_regs *);
void vm_init_vector_tables(struct kvm_vm *vm);
void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 815bc45dd8dc..5649cf2f40e8 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -222,6 +222,7 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
[VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
[VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",
[VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
@@ -248,6 +249,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
[VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
[VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },
[VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
@@ -319,6 +321,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
case VM_MODE_P36V48_16K:
vm->pgtable_levels = 4;
break;
+ case VM_MODE_P47V47_16K:
case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S
new file mode 100644
index 000000000000..88bfa505c6f5
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "processor.h"
+
+/* address of refill exception should be 4K aligned */
+.balign 4096
+.global handle_tlb_refill
+handle_tlb_refill:
+ csrwr t0, LOONGARCH_CSR_TLBRSAVE
+ csrrd t0, LOONGARCH_CSR_PGD
+ lddir t0, t0, 3
+ lddir t0, t0, 1
+ ldpte t0, 0
+ ldpte t0, 1
+ tlbfill
+ csrrd t0, LOONGARCH_CSR_TLBRSAVE
+ ertn
+
+ /*
+ * save and restore all gprs except base register,
+ * and default value of base register is sp ($r3).
+ */
+.macro save_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+.macro restore_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+/* address of general exception should be 4K aligned */
+.balign 4096
+.global handle_exception
+handle_exception:
+ csrwr sp, LOONGARCH_CSR_KS0
+ csrrd sp, LOONGARCH_CSR_KS1
+ addi.d sp, sp, -EXREGS_SIZE
+
+ save_gprs sp
+ /* save sp register to stack */
+ csrrd t0, LOONGARCH_CSR_KS0
+ st.d t0, sp, 3 * 8
+
+ csrrd t0, LOONGARCH_CSR_ERA
+ st.d t0, sp, PC_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_ESTAT
+ st.d t0, sp, ESTAT_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_BADV
+ st.d t0, sp, BADV_OFFSET_EXREGS
+
+ or a0, sp, zero
+ bl route_exception
+ restore_gprs sp
+ csrrd sp, LOONGARCH_CSR_KS0
+ ertn
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
new file mode 100644
index 000000000000..0ac1abcb71cb
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <linux/compiler.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000
+#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000
+
+static vm_paddr_t invalid_pgtable[4];
+
+static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ unsigned int shift;
+ uint64_t mask;
+
+ shift = level * (vm->page_shift - 3) + vm->page_shift;
+ mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return entry & ~((0x1UL << vm->page_shift) - 1);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child)
+{
+ uint64_t *ptep;
+ int i, ptrs_per_pte;
+
+ ptep = addr_gpa2hva(vm, table);
+ ptrs_per_pte = 1 << (vm->page_shift - 3);
+ for (i = 0; i < ptrs_per_pte; i++)
+ WRITE_ONCE(*(ptep + i), child);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ int i;
+ vm_paddr_t child, table;
+
+ if (vm->pgd_created)
+ return;
+
+ child = table = 0;
+ for (i = 0; i < vm->pgtable_levels; i++) {
+ invalid_pgtable[i] = child;
+ table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN,
+ vm->memslots[MEM_REGION_PT]);
+ TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i);
+ virt_set_pgtable(vm, table, child);
+ child = table;
+ }
+ vm->pgd = table;
+ vm->pgd_created = true;
+}
+
+static int virt_pte_none(uint64_t *ptep, int level)
+{
+ return *ptep == invalid_pgtable[level];
+}
+
+static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc)
+{
+ int level;
+ uint64_t *ptep;
+ vm_paddr_t child;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ child = vm->pgd;
+ level = vm->pgtable_levels - 1;
+ while (level > 0) {
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ if (virt_pte_none(ptep, level)) {
+ if (alloc) {
+ child = vm_alloc_page_table(vm);
+ virt_set_pgtable(vm, child, invalid_pgtable[level - 1]);
+ WRITE_ONCE(*ptep, child);
+ } else
+ goto unmapped_gva;
+
+ } else
+ child = pte_addr(vm, *ptep);
+ level--;
+ }
+
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ return ptep;
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+ exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+
+ ptep = virt_populate_pte(vm, gva, 0);
+ TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint32_t prot_bits;
+ uint64_t *ptep;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = virt_populate_pte(vm, vaddr, 1);
+ prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER;
+ WRITE_ONCE(*ptep, paddr | prot_bits);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+ uint64_t pte, *ptep;
+ static const char * const type[] = { "pte", "pmd", "pud", "pgd"};
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (virt_pte_none(ptep, level))
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n",
+ indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--);
+ }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level;
+
+ if (!vm->pgd_created)
+ return;
+
+ level = vm->pgtable_levels - 1;
+ pte_dump(stream, vm, indent, vm->pgd, level);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)",
+ uc.args[0], uc.args[1], uc.args[2]);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ unsigned long pc, estat, badv;
+
+ pc = regs->pc;
+ badv = regs->badv;
+ estat = regs->estat;
+ ucall(UCALL_UNHANDLED, 3, pc, estat, badv);
+ while (1) ;
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ int i;
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ "num: %u\n", num);
+
+ vcpu_regs_get(vcpu, &regs);
+
+ va_start(ap, num);
+ for (i = 0; i < num; i++)
+ regs.gpr[i + 4] = va_arg(ap, uint64_t);
+ va_end(ap);
+
+ vcpu_regs_set(vcpu, &regs);
+}
+
+static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_get_reg(vcpu, csrid, addr);
+}
+
+static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_set_reg(vcpu, csrid, val);
+}
+
+static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ int width;
+ unsigned long val;
+ struct kvm_vm *vm = vcpu->vm;
+
+ switch (vm->mode) {
+ case VM_MODE_P36V47_16K:
+ case VM_MODE_P47V47_16K:
+ break;
+
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ /* user mode and page enable mode */
+ val = PLV_USER | CSR_CRMD_PG;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1);
+
+ val = 0;
+ width = vm->page_shift - 3;
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ /* pud page shift and width */
+ val = (vm->page_shift + width * 2) << 20 | (width << 25);
+ /* fall throuth */
+ case 3:
+ /* pmd page shift and width */
+ val |= (vm->page_shift + width) << 10 | (width << 15);
+ /* pte page shift and width */
+ val |= vm->page_shift | width << 5;
+ break;
+ default:
+ TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels);
+ }
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val);
+
+ /* PGD page shift and width */
+ val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd);
+
+ /*
+ * Refill exception runs on real mode
+ * Entry address should be physical address
+ */
+ val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val);
+
+ /*
+ * General exception runs on page-enabled mode
+ * Entry address should be virtual address
+ */
+ val = (unsigned long)handle_exception;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val);
+ val &= ~CSR_TLBIDX_SIZEM;
+ val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE);
+
+ /* LOONGARCH_CSR_KS1 is used for exception stack */
+ val = __vm_vaddr_alloc(vm, vm->page_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(val != 0, "No memory for exception stack");
+ val = val + vm->page_size;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val);
+ val &= ~CSR_TLBREHI_PS;
+ val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ stack_size = vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack");
+
+ loongarch_vcpu_setup(vcpu);
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, &regs);
+ regs.gpr[3] = stack_vaddr + stack_size;
+ vcpu_regs_set(vcpu, &regs);
+
+ return vcpu;
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ /* Setup guest PC register */
+ vcpu_regs_get(vcpu, &regs);
+ regs.pc = (uint64_t)guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
new file mode 100644
index 000000000000..fc6cbb50573f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ */
+#include "kvm_util.h"
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+ vm->ucall_mmio_addr = mmio_gpa;
+
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+ "Unexpected ucall exit mmio address access");
+
+ return (void *)(*((uint64_t *)run->mmio.data));
+ }
+
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
index aa0abd3f35bb..b787b982e922 100644
--- a/tools/testing/selftests/kvm/lib/riscv/handlers.S
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -10,85 +10,88 @@
#include <asm/csr.h>
.macro save_context
- addi sp, sp, (-8*34)
- sd x1, 0(sp)
- sd x2, 8(sp)
- sd x3, 16(sp)
- sd x4, 24(sp)
- sd x5, 32(sp)
- sd x6, 40(sp)
- sd x7, 48(sp)
- sd x8, 56(sp)
- sd x9, 64(sp)
- sd x10, 72(sp)
- sd x11, 80(sp)
- sd x12, 88(sp)
- sd x13, 96(sp)
- sd x14, 104(sp)
- sd x15, 112(sp)
- sd x16, 120(sp)
- sd x17, 128(sp)
- sd x18, 136(sp)
- sd x19, 144(sp)
- sd x20, 152(sp)
- sd x21, 160(sp)
- sd x22, 168(sp)
- sd x23, 176(sp)
- sd x24, 184(sp)
- sd x25, 192(sp)
- sd x26, 200(sp)
- sd x27, 208(sp)
- sd x28, 216(sp)
- sd x29, 224(sp)
- sd x30, 232(sp)
- sd x31, 240(sp)
+ addi sp, sp, (-8*36)
+ sd x1, 8(sp)
+ sd x2, 16(sp)
+ sd x3, 24(sp)
+ sd x4, 32(sp)
+ sd x5, 40(sp)
+ sd x6, 48(sp)
+ sd x7, 56(sp)
+ sd x8, 64(sp)
+ sd x9, 72(sp)
+ sd x10, 80(sp)
+ sd x11, 88(sp)
+ sd x12, 96(sp)
+ sd x13, 104(sp)
+ sd x14, 112(sp)
+ sd x15, 120(sp)
+ sd x16, 128(sp)
+ sd x17, 136(sp)
+ sd x18, 144(sp)
+ sd x19, 152(sp)
+ sd x20, 160(sp)
+ sd x21, 168(sp)
+ sd x22, 176(sp)
+ sd x23, 184(sp)
+ sd x24, 192(sp)
+ sd x25, 200(sp)
+ sd x26, 208(sp)
+ sd x27, 216(sp)
+ sd x28, 224(sp)
+ sd x29, 232(sp)
+ sd x30, 240(sp)
+ sd x31, 248(sp)
csrr s0, CSR_SEPC
csrr s1, CSR_SSTATUS
- csrr s2, CSR_SCAUSE
- sd s0, 248(sp)
+ csrr s2, CSR_STVAL
+ csrr s3, CSR_SCAUSE
+ sd s0, 0(sp)
sd s1, 256(sp)
sd s2, 264(sp)
+ sd s3, 272(sp)
.endm
.macro restore_context
+ ld s3, 272(sp)
ld s2, 264(sp)
ld s1, 256(sp)
- ld s0, 248(sp)
- csrw CSR_SCAUSE, s2
+ ld s0, 0(sp)
+ csrw CSR_SCAUSE, s3
csrw CSR_SSTATUS, s1
csrw CSR_SEPC, s0
- ld x31, 240(sp)
- ld x30, 232(sp)
- ld x29, 224(sp)
- ld x28, 216(sp)
- ld x27, 208(sp)
- ld x26, 200(sp)
- ld x25, 192(sp)
- ld x24, 184(sp)
- ld x23, 176(sp)
- ld x22, 168(sp)
- ld x21, 160(sp)
- ld x20, 152(sp)
- ld x19, 144(sp)
- ld x18, 136(sp)
- ld x17, 128(sp)
- ld x16, 120(sp)
- ld x15, 112(sp)
- ld x14, 104(sp)
- ld x13, 96(sp)
- ld x12, 88(sp)
- ld x11, 80(sp)
- ld x10, 72(sp)
- ld x9, 64(sp)
- ld x8, 56(sp)
- ld x7, 48(sp)
- ld x6, 40(sp)
- ld x5, 32(sp)
- ld x4, 24(sp)
- ld x3, 16(sp)
- ld x2, 8(sp)
- ld x1, 0(sp)
- addi sp, sp, (8*34)
+ ld x31, 248(sp)
+ ld x30, 240(sp)
+ ld x29, 232(sp)
+ ld x28, 224(sp)
+ ld x27, 216(sp)
+ ld x26, 208(sp)
+ ld x25, 200(sp)
+ ld x24, 192(sp)
+ ld x23, 184(sp)
+ ld x22, 176(sp)
+ ld x21, 168(sp)
+ ld x20, 160(sp)
+ ld x19, 152(sp)
+ ld x18, 144(sp)
+ ld x17, 136(sp)
+ ld x16, 128(sp)
+ ld x15, 120(sp)
+ ld x14, 112(sp)
+ ld x13, 104(sp)
+ ld x12, 96(sp)
+ ld x11, 88(sp)
+ ld x10, 80(sp)
+ ld x9, 72(sp)
+ ld x8, 64(sp)
+ ld x7, 56(sp)
+ ld x6, 48(sp)
+ ld x5, 40(sp)
+ ld x4, 32(sp)
+ ld x3, 24(sp)
+ ld x2, 16(sp)
+ ld x1, 8(sp)
+ addi sp, sp, (8*36)
.endm
.balign 4
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index dd663bcf0cc0..2eac7d4b59e9 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -402,7 +402,7 @@ struct handlers {
exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
};
-void route_exception(struct ex_regs *regs)
+void route_exception(struct pt_regs *regs)
{
struct handlers *handlers = (struct handlers *)exception_handlers;
int vector = 0, ec;
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 9e370800a6a2..f962fefc48fa 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -15,7 +15,7 @@
static int timer_irq = IRQ_S_TIMER;
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
uint64_t xcnt, xcnt_diff_us, cmp;
unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
index cfed6c727bfc..739d17befb5a 100644
--- a/tools/testing/selftests/kvm/riscv/ebreak_test.c
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -27,7 +27,7 @@ static void guest_code(void)
GUEST_DONE();
}
-static void guest_breakpoint_handler(struct ex_regs *regs)
+static void guest_breakpoint_handler(struct pt_regs *regs)
{
WRITE_ONCE(sw_bp_addr, regs->epc);
regs->epc += 4;
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 569f2d67c9b8..a0b7dabb5040 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -17,6 +17,15 @@ enum {
VCPU_FEATURE_SBI_EXT,
};
+enum {
+ KVM_RISC_V_REG_OFFSET_VSTART = 0,
+ KVM_RISC_V_REG_OFFSET_VL,
+ KVM_RISC_V_REG_OFFSET_VTYPE,
+ KVM_RISC_V_REG_OFFSET_VCSR,
+ KVM_RISC_V_REG_OFFSET_VLENB,
+ KVM_RISC_V_REG_OFFSET_MAX,
+};
+
static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
bool filter_reg(__u64 reg)
@@ -143,6 +152,38 @@ bool check_reject_set(int err)
return err == EINVAL;
}
+static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s,
+ uint64_t feature)
+{
+ unsigned long vlenb_reg = 0;
+ int rc;
+ u64 reg, size;
+
+ /* Enable V extension so that we can get the vlenb register */
+ rc = __vcpu_set_reg(vcpu, feature, 1);
+ if (rc)
+ return rc;
+
+ vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]);
+ if (!vlenb_reg) {
+ TEST_FAIL("Can't compute vector register size from zero vlenb\n");
+ return -EPERM;
+ }
+
+ size = __builtin_ctzl(vlenb_reg);
+ size <<= KVM_REG_SIZE_SHIFT;
+
+ for (int i = 0; i < 32; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i);
+ s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg;
+ }
+
+ /* We should assert if disabling failed here while enabling succeeded before */
+ vcpu_set_reg(vcpu, feature, 0);
+
+ return 0;
+}
+
void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
{
unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
@@ -172,6 +213,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
if (!s->feature)
continue;
+ if (s->feature == KVM_RISCV_ISA_EXT_V) {
+ feature = RISCV_ISA_EXT_REG(s->feature);
+ rc = override_vector_reg_size(vcpu, s, feature);
+ if (rc)
+ goto skip;
+ }
+
switch (s->feature_type) {
case VCPU_FEATURE_ISA_EXT:
feature = RISCV_ISA_EXT_REG(s->feature);
@@ -186,6 +234,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
/* Try to enable the desired extension */
__vcpu_set_reg(vcpu, feature, 1);
+skip:
/* Double check whether the desired extension was enabled */
__TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
"%s not available, skipping tests", s->name);
@@ -410,6 +459,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
+static const char *vector_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_v_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR);
+ int reg_index = 0;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR);
+
+ if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0))
+ reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0);
+ switch (reg_off) {
+ case KVM_REG_RISCV_VECTOR_REG(0) ...
+ KVM_REG_RISCV_VECTOR_REG(31):
+ return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index);
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
#define KVM_ISA_EXT_ARR(ext) \
[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext
@@ -639,6 +717,9 @@ void print_reg(const char *prefix, __u64 id)
case KVM_REG_SIZE_U128:
reg_size = "KVM_REG_SIZE_U128";
break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
default:
printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n",
(id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK);
@@ -670,6 +751,10 @@ void print_reg(const char *prefix, __u64 id)
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
reg_size, fp_d_id_to_str(prefix, id));
break;
+ case KVM_REG_RISCV_VECTOR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n",
+ reg_size, vector_id_to_str(prefix, id));
+ break;
case KVM_REG_RISCV_ISA_EXT:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
reg_size, isa_ext_id_to_str(prefix, id));
@@ -874,6 +959,48 @@ static __u64 fp_d_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D,
};
+/* Define a default vector registers with length. This will be overwritten at runtime */
+static __u64 vector_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V,
+};
+
#define SUBLIST_BASE \
{"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
.skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
@@ -898,6 +1025,9 @@ static __u64 fp_d_regs[] = {
{"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
.regs_n = ARRAY_SIZE(fp_d_regs),}
+#define SUBLIST_V \
+ {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),}
+
#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \
static __u64 regs_##ext[] = { \
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
@@ -966,6 +1096,7 @@ KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
+KVM_ISA_EXT_SUBLIST_CONFIG(v, V);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
@@ -1040,6 +1171,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_fp_f,
&config_fp_d,
&config_h,
+ &config_v,
&config_smnpm,
&config_smstateen,
&config_sscofpmf,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index 03406de4989d..924a335d2262 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -73,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num)
switch (csr_num) {
switchcase_csr_read_32(CSR_CYCLE, ret)
- switchcase_csr_read_32(CSR_CYCLEH, ret)
default :
break;
}
@@ -128,17 +127,36 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
"Unable to stop counter %ld error %ld\n", counter, ret.error);
}
-static void guest_illegal_exception_handler(struct ex_regs *regs)
+static void guest_illegal_exception_handler(struct pt_regs *regs)
{
+ unsigned long insn;
+ int opcode, csr_num, funct3;
+
__GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
"Unexpected exception handler %lx\n", regs->cause);
+ insn = regs->badaddr;
+ opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT;
+ __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM,
+ "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn);
+
+ csr_num = GET_CSR_NUM(insn);
+ funct3 = GET_RM(insn);
+ /* Validate if it is a CSR read/write operation */
+ __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4),
+ "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n",
+ funct3, csr_num);
+
+ /* Validate if it is a HPMCOUNTER CSR operation */
+ __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31),
+ "Unexpected csr_num 0x%x\n", csr_num);
+
illegal_handler_invoked = true;
/* skip the trapping instruction */
regs->epc += 4;
}
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index bc440d5aba57..ce3ac0fd6dfb 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -350,7 +350,7 @@ static void test_invalid_memory_region_flags(void)
struct kvm_vm *vm;
int r, i;
-#if defined __aarch64__ || defined __riscv || defined __x86_64__
+#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__
supported_flags |= KVM_MEM_READONLY;
#endif
diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile
index 0c0d7b1234c1..4d4f810cdf2c 100644
--- a/tools/testing/selftests/mount_setattr/Makefile
+++ b/tools/testing/selftests/mount_setattr/Makefile
@@ -2,6 +2,8 @@
# Makefile for mount selftests.
CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 -pthread
+LOCAL_HDRS += ../filesystems/wrappers.h
+
TEST_GEN_PROGS := mount_setattr_test
include ../lib.mk
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 48a000cabc97..8b378c91debf 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -20,7 +20,7 @@
#include <stdarg.h>
#include <linux/mount.h>
-#include "../filesystems/overlayfs/wrappers.h"
+#include "../filesystems/wrappers.h"
#include "../kselftest_harness.h"
#ifndef CLONE_NEWNS
@@ -107,46 +107,6 @@
#endif
#endif
-#ifndef __NR_open_tree
- #if defined __alpha__
- #define __NR_open_tree 538
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_open_tree 4428
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_open_tree 6428
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_open_tree 5428
- #endif
- #elif defined __ia64__
- #define __NR_open_tree (428 + 1024)
- #else
- #define __NR_open_tree 428
- #endif
-#endif
-
-#ifndef __NR_move_mount
- #if defined __alpha__
- #define __NR_move_mount 539
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_move_mount 4429
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_move_mount 6429
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_move_mount 5429
- #endif
- #elif defined __ia64__
- #define __NR_move_mount (428 + 1024)
- #else
- #define __NR_move_mount 429
- #endif
-#endif
-
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
@@ -161,23 +121,6 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag
return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
}
-#ifndef OPEN_TREE_CLONE
-#define OPEN_TREE_CLONE 1
-#endif
-
-#ifndef OPEN_TREE_CLOEXEC
-#define OPEN_TREE_CLOEXEC O_CLOEXEC
-#endif
-
-#ifndef AT_RECURSIVE
-#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-#endif
-
-static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
-{
- return syscall(__NR_open_tree, dfd, filename, flags);
-}
-
static ssize_t write_nointr(int fd, const void *buf, size_t count)
{
ssize_t ret;
@@ -1076,7 +1019,7 @@ FIXTURE_SETUP(mount_setattr_idmapped)
ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
ASSERT_GE(img_fd, 0);
- ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
+ ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0);
ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
ASSERT_EQ(close(img_fd), 0);
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index 1562aa7d60b0..6dec59d64083 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2021 Samsung Electrnoics
+ * Copyright (C) 2021 Samsung Electronics
* Bongsu Jeon <bongsu.jeon@samsung.com>
*
* Test code for nci
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 70a38f485d4d..ea84b88bcb30 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -115,7 +115,7 @@ YNL_GEN_FILES := busy_poller netlink-dumps
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_FILES := settings
-TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index d66336256580..8b015f16c03d 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights)
int type;
int flags;
bool test_listener;
+ bool disabled;
};
FIXTURE_VARIANT_ADD(scm_rights, dgram)
@@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram)
.type = SOCK_DGRAM,
.flags = 0,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled)
+{
+ .name = "UNIX ",
+ .type = SOCK_DGRAM,
+ .flags = 0,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream)
@@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream)
.type = SOCK_STREAM,
.flags = 0,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
@@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
.type = SOCK_STREAM,
.flags = MSG_OOB,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
@@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
.type = SOCK_STREAM,
.flags = 0,
.test_listener = true,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = true,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
@@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
.type = SOCK_STREAM,
.flags = MSG_OOB,
.test_listener = true,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = true,
+ .disabled = true,
};
static int count_sockets(struct __test_metadata *_metadata,
@@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights)
ret = unshare(CLONE_NEWNET);
ASSERT_EQ(0, ret);
+ if (variant->disabled)
+ return;
+
ret = count_sockets(_metadata, variant);
ASSERT_EQ(0, ret);
}
@@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights)
{
int ret;
+ if (variant->disabled)
+ return;
+
sleep(1);
ret = count_sockets(_metadata, variant);
@@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights)
static void create_listeners(struct __test_metadata *_metadata,
FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
int n)
{
struct sockaddr_un addr = {
@@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata,
ret = listen(self->fd[i], -1);
ASSERT_EQ(0, ret);
+ if (variant->disabled) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+ }
+
addrlen = sizeof(addr);
ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
ASSERT_EQ(0, ret);
@@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata,
for (i = 0; i < n * 2; i += 2) {
ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
ASSERT_EQ(0, ret);
+
+ if (variant->disabled) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+ }
}
}
@@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata,
ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
if (variant->test_listener)
- create_listeners(_metadata, self, n);
+ create_listeners(_metadata, self, variant, n);
else
create_socketpairs(_metadata, self, variant, n);
}
@@ -230,7 +300,13 @@ void __send_fd(struct __test_metadata *_metadata,
int ret;
ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
- ASSERT_EQ(MSGLEN, ret);
+
+ if (variant->disabled) {
+ ASSERT_EQ(-1, ret);
+ ASSERT_EQ(-EPERM, -errno);
+ } else {
+ ASSERT_EQ(MSGLEN, ret);
+ }
}
#define create_sockets(n) \
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index f366cadbc5e8..4046131e7888 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -106,26 +106,16 @@
# | |
# +-----------------------------------------------------------------------+
+. ./lib.sh
+
ERR=4 # Return 4 by default, which is the SKIP code for kselftest
PING6="ping"
PAUSE_ON_FAIL="no"
-readonly NS0=$(mktemp -u ns0-XXXXXXXX)
-readonly NS1=$(mktemp -u ns1-XXXXXXXX)
-readonly NS2=$(mktemp -u ns2-XXXXXXXX)
-readonly NS3=$(mktemp -u ns3-XXXXXXXX)
-
# Exit the script after having removed the network namespaces it created
-#
-# Parameters:
-#
-# * The list of network namespaces to delete before exiting.
-#
exit_cleanup()
{
- for ns in "$@"; do
- ip netns delete "${ns}" 2>/dev/null || true
- done
+ cleanup_all_ns
if [ "${ERR}" -eq 4 ]; then
echo "Error: Setting up the testing environment failed." >&2
@@ -140,17 +130,7 @@ exit_cleanup()
# namespaces created by this script are deleted.
create_namespaces()
{
- ip netns add "${NS0}" || exit_cleanup
- ip netns add "${NS1}" || exit_cleanup "${NS0}"
- ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
- ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
-}
-
-# The trap function handler
-#
-exit_cleanup_all()
-{
- exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+ setup_ns NS0 NS1 NS2 NS3 || exit_cleanup
}
# Configure a network interface using a host route
@@ -188,10 +168,6 @@ iface_config()
#
setup_underlay()
{
- for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
- ip -netns "${ns}" link set dev lo up
- done;
-
ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
@@ -234,14 +210,6 @@ setup_overlay_ipv4()
ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
-
- # The intermediate namespaces don't have routes for the reverse path,
- # as it will be handled by tc. So we need to ensure that rp_filter is
- # not going to block the traffic.
- ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
- ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
}
setup_overlay_ipv6()
@@ -521,13 +489,10 @@ done
check_features
-# Create namespaces before setting up the exit trap.
-# Otherwise, exit_cleanup_all() could delete namespaces that were not created
-# by this script.
-create_namespaces
-
set -e
-trap exit_cleanup_all EXIT
+trap exit_cleanup EXIT
+
+create_namespaces
setup_underlay
setup_overlay_ipv4
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
index 7db292ec4884..7d2d40812074 100755
--- a/tools/testing/selftests/net/busy_poll_test.sh
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -1,6 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
NSIM_SV_ID=$((256 + RANDOM % 256))
NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore
new file mode 100644
index 000000000000..764a53fc837f
--- /dev/null
+++ b/tools/testing/selftests/net/can/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_raw_filter
diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile
new file mode 100644
index 000000000000..5b82e60a03e7
--- /dev/null
+++ b/tools/testing/selftests/net/can/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := test_raw_filter.sh
+
+TEST_GEN_FILES := test_raw_filter
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c
new file mode 100644
index 000000000000..4101c36390fd
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/*
+ * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <linux/if.h>
+
+#include <linux/can.h>
+#include <linux/can/raw.h>
+
+#include "../../kselftest_harness.h"
+
+#define ID 0x123
+
+char CANIF[IFNAMSIZ];
+
+static int send_can_frames(int sock, int testcase)
+{
+ struct can_frame frame;
+
+ frame.can_dlc = 1;
+ frame.data[0] = testcase;
+
+ frame.can_id = ID;
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_RTR_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_EFF_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ return 0;
+
+write_err:
+ perror("write");
+ return 1;
+}
+
+FIXTURE(can_filters) {
+ int sock;
+};
+
+FIXTURE_SETUP(can_filters)
+{
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+ int recv_own_msgs = 1;
+ int s, ret;
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+ ASSERT_GE(s, 0)
+ TH_LOG("failed to create CAN_RAW socket: %d", errno);
+
+ strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name));
+ ret = ioctl(s, SIOCGIFINDEX, &ifr);
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed SIOCGIFINDEX: %d", errno);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+ &recv_own_msgs, sizeof(recv_own_msgs));
+
+ ret = bind(s, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(ret, 0)
+ TH_LOG("failed bind socket: %d", errno);
+
+ self->sock = s;
+}
+
+FIXTURE_TEARDOWN(can_filters)
+{
+ close(self->sock);
+}
+
+FIXTURE_VARIANT(can_filters) {
+ int testcase;
+ canid_t id;
+ canid_t mask;
+ int exp_num_rx;
+ canid_t exp_flags[];
+};
+
+/* Receive all frames when filtering for the ID in standard frame format */
+FIXTURE_VARIANT_ADD(can_filters, base) {
+ .testcase = 1,
+ .id = ID,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore EFF flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_eff) {
+ .testcase = 2,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore RTR flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_rtr) {
+ .testcase = 3,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_effrtr) {
+ .testcase = 4,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF frames when expecting no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff) {
+ .testcase = 5,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) {
+ .testcase = 6,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) {
+ .testcase = 7,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag,
+ * ignoring RTR flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) {
+ .testcase = 8,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive no remote frames when filtering for no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr) {
+ .testcase = 9,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) {
+ .testcase = 10,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive only remote frames when filter includes RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) {
+ .testcase = 11,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only remote frames when filter includes RTR flag, ignoring EFF
+ * flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) {
+ .testcase = 12,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF data frame when filtering for no flags */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) {
+ .testcase = 13,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ 0,
+ },
+};
+
+/* Receive only EFF data frame when filtering for EFF but no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) {
+ .testcase = 14,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive only SFF remote frame when filtering for RTR but no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) {
+ .testcase = 15,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF remote frame when filtering for EFF and RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) {
+ .testcase = 16,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag
+ * but based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff) {
+ .testcase = 17,
+ .id = ID,
+ .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ 0,
+ },
+};
+
+/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but
+ * based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff_eff) {
+ .testcase = 18,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ },
+};
+
+/* This test verifies that the raw CAN filters work, by checking if only frames
+ * with the expected set of flags are received. For each test case, the given
+ * filter (id and mask) is added and four CAN frames are sent with every
+ * combination of set/unset EFF/RTR flags.
+ */
+TEST_F(can_filters, test_filter)
+{
+ struct can_filter rfilter;
+ int ret;
+
+ rfilter.can_id = variant->id;
+ rfilter.can_mask = variant->mask;
+ setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER,
+ &rfilter, sizeof(rfilter));
+
+ TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X",
+ rfilter.can_id, rfilter.can_mask);
+
+ ret = send_can_frames(self->sock, variant->testcase);
+ ASSERT_EQ(ret, 0)
+ TH_LOG("failed to send CAN frames");
+
+ for (int i = 0; i <= variant->exp_num_rx; i++) {
+ struct can_frame frame;
+ struct timeval tv = {
+ .tv_sec = 0,
+ .tv_usec = 50000, /* 50ms timeout */
+ };
+ fd_set rdfs;
+
+ FD_ZERO(&rdfs);
+ FD_SET(self->sock, &rdfs);
+
+ ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv);
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed select for frame %d, err: %d)", i, errno);
+
+ ret = FD_ISSET(self->sock, &rdfs);
+ if (i == variant->exp_num_rx) {
+ ASSERT_EQ(ret, 0)
+ TH_LOG("too many frames received");
+ } else {
+ ASSERT_NE(ret, 0)
+ TH_LOG("too few frames received");
+
+ ret = read(self->sock, &frame, sizeof(frame));
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed to read frame %d, err: %d", i, errno);
+
+ TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i);
+
+ ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK)
+ TH_LOG("received wrong can_id");
+ ASSERT_EQ(variant->testcase, frame.data[0])
+ TH_LOG("received wrong test case");
+
+ ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK,
+ variant->exp_flags[i])
+ TH_LOG("received unexpected flags");
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char *ifname = getenv("CANIF");
+
+ if (!ifname) {
+ printf("CANIF environment variable must contain the test interface\n");
+ return KSFT_FAIL;
+ }
+
+ strncpy(CANIF, ifname, sizeof(CANIF) - 1);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh
new file mode 100755
index 000000000000..276d6c06ac95
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ test_raw_filter
+"
+
+net_dir=$(dirname $0)/..
+source $net_dir/lib.sh
+
+export CANIF=${CANIF:-"vcan0"}
+BITRATE=${BITRATE:-500000}
+
+setup()
+{
+ if [[ $CANIF == vcan* ]]; then
+ ip link add name $CANIF type vcan || exit $ksft_skip
+ else
+ ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip
+ fi
+ ip link set dev $CANIF up
+ pwd
+}
+
+cleanup()
+{
+ ip link set dev $CANIF down
+ if [[ $CANIF == vcan* ]]; then
+ ip link delete $CANIF
+ fi
+}
+
+test_raw_filter()
+{
+ ./test_raw_filter
+ check_err $?
+ log_test "test_raw_filter"
+}
+
+trap cleanup EXIT
+setup
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 130d532b7e67..3cfef5153823 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -33,7 +33,6 @@ CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
-CONFIG_IP_DCCP=m
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES=m
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index c7cea556b416..5fbdd2a0b537 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -516,10 +516,7 @@ fib_rule4_test()
fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
"oif redirect to table" "oif no redirect to table"
- # Enable forwarding and disable rp_filter as all the addresses are in
- # the same subnet and egress device == ingress device.
ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
- ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
match="from $SRC_IP iif $DEV"
getnomatch="from $SRC_IP iif lo"
fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 3ea6f886a210..a94b73a53f72 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
- ipv4_mpath_list ipv6_mpath_list"
+ ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -1085,6 +1085,35 @@ route_setup()
set +e
}
+forwarding_cleanup()
+{
+ cleanup_ns $ns3
+
+ route_cleanup
+}
+
+# extend route_setup with an ns3 reachable through ns2 over both devices
+forwarding_setup()
+{
+ forwarding_cleanup
+
+ route_setup
+
+ setup_ns ns3
+
+ ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2
+ ip -netns $ns3 link set veth5 up
+ ip -netns $ns2 link set veth6 up
+
+ ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24
+ ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24
+ ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2
+
+ ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad
+ ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad
+ ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2
+}
+
# assumption is that basic add of a single path route works
# otherwise just adding an address on an interface is broken
ipv6_rt_add()
@@ -2531,9 +2560,6 @@ ipv4_mpath_list_test()
run_cmd "ip -n $ns2 route add 203.0.113.0/24
nexthop via 172.16.201.2 nexthop via 172.16.202.2"
run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
set +e
local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
@@ -2600,6 +2626,93 @@ ipv6_mpath_list_test()
route_cleanup
}
+tc_set_flower_counter__saddr_syn() {
+ tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2"
+}
+
+ip_mpath_balance_dep_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "jq command not found. Skipping test"
+ return 1
+ fi
+}
+
+ip_mpath_balance() {
+ local -r ipver=$1
+ local -r daddr=$2
+ local -r num_conn=20
+
+ for i in $(seq 1 $num_conn); do
+ ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null &
+ sleep 0.02
+ echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000
+ done
+
+ local -r syn0="$(tc_get_flower_counter $ns1 veth1)"
+ local -r syn1="$(tc_get_flower_counter $ns1 veth3)"
+ local -r syns=$((syn0+syn1))
+
+ [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)"
+
+ [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]]
+}
+
+ipv4_mpath_balance_test()
+{
+ echo
+ echo "IPv4 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 172.16.105.1 \
+ nexthop via 172.16.101.2 \
+ nexthop via 172.16.103.2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1
+ tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1
+
+ ip_mpath_balance -4 172.16.105.1
+
+ log_test $? 0 "IPv4 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
+ipv6_mpath_balance_test()
+{
+ echo
+ echo "IPv6 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 2001:db8:105::1\
+ nexthop via 2001:db8:101::2 \
+ nexthop via 2001:db8:103::2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1
+ tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1
+
+ ip_mpath_balance -6 "[2001:db8:105::1]"
+
+ log_test $? 0 "IPv6 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
################################################################################
# usage
@@ -2683,6 +2796,8 @@ do
fib6_gc_test|ipv6_gc) fib6_gc_test;;
ipv4_mpath_list) ipv4_mpath_list_test;;
ipv6_mpath_list) ipv6_mpath_list_test;;
+ ipv4_mpath_balance) ipv4_mpath_balance_test;;
+ ipv6_mpath_balance) ipv6_mpath_balance_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e6a3e04fd83f..d4e7dd659354 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,10 +1,24 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
- v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
- v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
- v3exc_timeout_test v3star_ex_auto_add_test"
+ALL_TESTS="
+ v2reportleave_test
+ v3include_test
+ v3inc_allow_test
+ v3inc_is_include_test
+ v3inc_is_exclude_test
+ v3inc_to_exclude_test
+ v3exc_allow_test
+ v3exc_is_include_test
+ v3exc_is_exclude_test
+ v3exc_to_exclude_test
+ v3inc_block_test
+ v3exc_block_test
+ v3exc_timeout_test
+ v3star_ex_auto_add_test
+ v2per_vlan_snooping_port_stp_test
+ v2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="239.10.10.10"
@@ -554,6 +568,64 @@ v3star_ex_auto_add_test()
v3cleanup $swp2 $TEST_GROUP
}
+v2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_igmp_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No IGMP queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+v2per_vlan_snooping_port_stp_test()
+{
+ v2per_vlan_snooping_stp_test 1
+}
+
+v2per_vlan_snooping_vlan_stp_test()
+{
+ v2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index f84ab2e65754..4cacef5a813a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,10 +1,23 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
- mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
- mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
- mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+ALL_TESTS="
+ mldv2include_test
+ mldv2inc_allow_test
+ mldv2inc_is_include_test
+ mldv2inc_is_exclude_test
+ mldv2inc_to_exclude_test
+ mldv2exc_allow_test
+ mldv2exc_is_include_test
+ mldv2exc_is_exclude_test
+ mldv2exc_to_exclude_test
+ mldv2inc_block_test
+ mldv2exc_block_test
+ mldv2exc_timeout_test
+ mldv2star_ex_auto_add_test
+ mldv2per_vlan_snooping_port_stp_test
+ mldv2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="ff02::cc"
@@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test()
mldv2cleanup $swp2
}
+mldv2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No MLD queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2 \
+ mcast_mld_version 1
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+mldv2per_vlan_snooping_port_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 1
+}
+
+mldv2per_vlan_snooping_vlan_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 8d7a1a004b7c..18fd69d8d937 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -1,6 +1,7 @@
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_BRIDGE_IGMP_SNOOPING=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_VRF=m
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index d6f0e449c029..b13c89a99ecb 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -178,8 +178,6 @@ setup()
else
ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
- ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
- ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
index a6b2b1f9c641..c6866e42f95c 100755
--- a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
+++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
@@ -69,7 +69,6 @@
# which can affect the conditions needed to trigger a soft lockup.
source lib.sh
-source net_helper.sh
TEST_DURATION=300
ROUTING_TABLE_REFRESH_PERIOD=0.01
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 701905eeff66..006fdadcc4b9 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -217,6 +217,8 @@ setup_ns()
return $ksft_skip
fi
ip -n "${!ns_name}" link set lo up
+ ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ns_list+=("${!ns_name}")
done
NS_LIST+=("${ns_list[@]}")
@@ -270,6 +272,30 @@ tc_rule_handle_stats_get()
.options.actions[0].stats$selector"
}
+# attach a qdisc with two children match/no-match and a flower filter to match
+tc_set_flower_counter() {
+ local -r ns=$1
+ local -r ipver=$2
+ local -r dev=$3
+ local -r flower_expr=$4
+
+ tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \
+ priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+ tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo
+ tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo
+
+ tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \
+ flower $flower_expr classid 1:2
+}
+
+tc_get_flower_counter() {
+ local -r ns=$1
+ local -r dev=$2
+
+ tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets
+}
+
ret_set_ksft_status()
{
local ksft_status=$1; shift
@@ -569,3 +595,24 @@ bridge_vlan_add()
bridge vlan add "$@"
defer bridge vlan del "$@"
}
+
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+ local protocol="${3}"
+ local pattern
+ local i
+
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
+ for i in $(seq 10); do
+ if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+ /proc/net/"${protocol}"* | grep -q "${pattern}"; then
+ break
+ fi
+ sleep 0.1
+ done
+}
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
index 1ebc6187f421..bbc97d6bf556 100644
--- a/tools/testing/selftests/net/lib/.gitignore
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
csum
+xdp_helper
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index c22623b9a2a5..88c4bc461459 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -10,6 +10,7 @@ TEST_FILES += ../../../../net/ynl
TEST_GEN_FILES += csum
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+TEST_GEN_FILES += xdp_helper
TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
new file mode 100644
index 000000000000..17dc34a612c6
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(__NET_KSFT_H__)
+#define __NET_KSFT_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static inline void ksft_ready(void)
+{
+ const char msg[7] = "ready\n";
+ char *env_str;
+ int fd;
+
+ env_str = getenv("KSFT_READY_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ fd = STDOUT_FILENO;
+ }
+
+ write(fd, msg, sizeof(msg));
+ if (fd != STDOUT_FILENO)
+ close(fd);
+}
+
+static inline void ksft_wait(void)
+{
+ char *env_str;
+ char byte;
+ int fd;
+
+ env_str = getenv("KSFT_WAIT_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ /* Not running in KSFT env, wait for input from STDIN instead */
+ fd = STDIN_FILENO;
+ }
+
+ read(fd, &byte, sizeof(byte));
+ if (fd != STDIN_FILENO)
+ close(fd);
+}
+
+#endif
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 3cfad0fd4570..61287c203b6e 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -3,6 +3,7 @@
import builtins
import functools
import inspect
+import signal
import sys
import time
import traceback
@@ -26,6 +27,10 @@ class KsftXfailEx(Exception):
pass
+class KsftTerminate(KeyboardInterrupt):
+ pass
+
+
def ksft_pr(*objs, **kwargs):
print("#", *objs, **kwargs)
@@ -193,6 +198,17 @@ def ksft_setup(env):
return env
+def _ksft_intr(signum, frame):
+ # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
+ # if we don't ignore the second one it will stop us from handling cleanup
+ global term_cnt
+ term_cnt += 1
+ if term_cnt == 1:
+ raise KsftTerminate()
+ else:
+ ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
+
+
def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases = cases or []
@@ -205,6 +221,10 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases.append(value)
break
+ global term_cnt
+ term_cnt = 0
+ prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
+
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
print("TAP version 13")
@@ -233,7 +253,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
for line in tb.strip().split('\n'):
ksft_pr("Exception|", line)
if stop:
- ksft_pr("Stopping tests due to KeyboardInterrupt.")
+ ksft_pr(f"Stopping tests due to {type(e).__name__}.")
KSFT_RESULT = False
cnt_key = 'fail'
@@ -248,6 +268,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
if stop:
break
+ signal.signal(signal.SIGTERM, prev_sigterm)
+
print(
f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 8986c584cb37..6329ae805abf 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -39,12 +39,12 @@ class EthtoolFamily(YnlFamily):
class RtnlFamily(YnlFamily):
def __init__(self, recv_size=0):
- super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+ super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(),
schema='', recv_size=recv_size)
class RtnlAddrFamily(YnlFamily):
def __init__(self, recv_size=0):
- super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(),
+ super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(),
schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c
index aeed25914104..eb025a9f35b1 100644
--- a/tools/testing/selftests/drivers/net/xdp_helper.c
+++ b/tools/testing/selftests/net/lib/xdp_helper.c
@@ -11,55 +11,16 @@
#include <net/if.h>
#include <inttypes.h>
+#include "ksft.h"
+
#define UMEM_SZ (1U << 16)
#define NUM_DESC (UMEM_SZ / 2048)
-/* Move this to a common header when reused! */
-static void ksft_ready(void)
-{
- const char msg[7] = "ready\n";
- char *env_str;
- int fd;
-
- env_str = getenv("KSFT_READY_FD");
- if (env_str) {
- fd = atoi(env_str);
- if (!fd) {
- fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
- env_str);
- return;
- }
- } else {
- fd = STDOUT_FILENO;
- }
-
- write(fd, msg, sizeof(msg));
- if (fd != STDOUT_FILENO)
- close(fd);
-}
-static void ksft_wait(void)
+static void print_usage(const char *bin)
{
- char *env_str;
- char byte;
- int fd;
-
- env_str = getenv("KSFT_WAIT_FD");
- if (env_str) {
- fd = atoi(env_str);
- if (!fd) {
- fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
- env_str);
- return;
- }
- } else {
- /* Not running in KSFT env, wait for input from STDIN instead */
- fd = STDIN_FILENO;
- }
-
- read(fd, &byte, sizeof(byte));
- if (fd != STDIN_FILENO)
- close(fd);
+ fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n"
+ "where:\n\t-z: force zerocopy mode", bin);
}
/* this is a simple helper program that creates an XDP socket and does the
@@ -77,12 +38,13 @@ int main(int argc, char **argv)
struct sockaddr_xdp sxdp = { 0 };
int num_desc = NUM_DESC;
void *umem_area;
+ int retry = 0;
int ifindex;
int sock_fd;
int queue;
- if (argc != 3) {
- fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]);
+ if (argc != 3 && argc != 4) {
+ print_usage(argv[0]);
return 1;
}
@@ -132,11 +94,29 @@ int main(int argc, char **argv)
sxdp.sxdp_queue_id = queue;
sxdp.sxdp_flags = 0;
- if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
- munmap(umem_area, UMEM_SZ);
- perror("bind failed");
- close(sock_fd);
- return 1;
+ if (argc > 3) {
+ if (!strcmp(argv[3], "-z")) {
+ sxdp.sxdp_flags = XDP_ZEROCOPY;
+ } else {
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ while (1) {
+ if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0)
+ break;
+
+ if (errno == EBUSY && retry < 3) {
+ retry++;
+ sleep(1);
+ continue;
+ } else {
+ perror("bind failed");
+ munmap(umem_area, UMEM_SZ);
+ close(sock_fd);
+ return 1;
+ }
}
ksft_ready();
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 340e1a777e16..e47788bfa671 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,7 +11,7 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag
TEST_FILES := mptcp_lib.sh settings
-TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh
+TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh)
EXTRA_CLEAN := *.pcap
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index e7a75341f0f3..7a3cb4c09e45 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -225,6 +225,37 @@ chk_dump_one()
fi
}
+chk_dump_subflow()
+{
+ local inet_diag_token
+ local subflow_line
+ local ss_output
+ local ss_token
+ local msg
+
+ ss_output=$(ss -tniN $ns)
+
+ subflow_line=$(echo "$ss_output" | \
+ grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+')
+
+ ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+')
+
+ inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \
+ grep -Eo 'token:[^ ]+')
+
+ msg="....chk dump_subflow"
+
+ mptcp_lib_print_title "$msg"
+ if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
msk_info_get_value()
{
local port="${1}"
@@ -316,6 +347,7 @@ chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
chk_msk_cestab 2
chk_dump_one
+chk_dump_subflow
flush_pids
chk_msk_inuse 0 "2->0"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index c83a8b47bbdf..ac1349c4b9e5 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -180,13 +180,26 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ /* glibc starts to support MPTCP since v2.42.
+ * For older versions, use IPPROTO_TCP to resolve,
+ * and use TCP/MPTCP to create socket.
+ * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82
+ */
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -292,7 +305,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -356,7 +369,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
int infd, struct wstate *winfo)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index 284286c524cf..e084796e804d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -8,6 +8,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/tcp.h>
+#include <arpa/inet.h>
#include <unistd.h>
#include <stdlib.h>
@@ -19,6 +20,15 @@
#define IPPROTO_MPTCP 262
#endif
+#define parse_rtattr_nested(tb, max, rta) \
+ (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \
+ NLA_F_NESTED))
+
+struct params {
+ __u32 target_token;
+ char subflow_addrs[1024];
+};
+
struct mptcp_info {
__u8 mptcpi_subflows;
__u8 mptcpi_add_addr_signal;
@@ -46,6 +56,37 @@ struct mptcp_info {
__u32 mptcpi_last_ack_recv;
};
+enum {
+ MPTCP_SUBFLOW_ATTR_UNSPEC,
+ MPTCP_SUBFLOW_ATTR_TOKEN_REM,
+ MPTCP_SUBFLOW_ATTR_TOKEN_LOC,
+ MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
+ MPTCP_SUBFLOW_ATTR_SSN_OFFSET,
+ MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
+ MPTCP_SUBFLOW_ATTR_FLAGS,
+ MPTCP_SUBFLOW_ATTR_ID_REM,
+ MPTCP_SUBFLOW_ATTR_ID_LOC,
+ MPTCP_SUBFLOW_ATTR_PAD,
+
+ __MPTCP_SUBFLOW_ATTR_MAX
+};
+
+#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1)
+
+#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0)
+#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1)
+#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2)
+#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3)
+#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4)
+#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5)
+#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6)
+#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7)
+#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8)
+
+#define rta_getattr(type, value) (*(type *)RTA_DATA(value))
+
static void die_perror(const char *msg)
{
perror(msg);
@@ -54,11 +95,13 @@ static void die_perror(const char *msg)
static void die_usage(int r)
{
- fprintf(stderr, "Usage: mptcp_diag -t\n");
+ fprintf(stderr, "Usage:\n"
+ "mptcp_diag -t <token>\n"
+ "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n");
exit(r);
}
-static void send_query(int fd, __u32 token)
+static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto)
{
struct sockaddr_nl nladdr = {
.nl_family = AF_NETLINK
@@ -72,31 +115,26 @@ static void send_query(int fd, __u32 token)
.nlmsg_type = SOCK_DIAG_BY_FAMILY,
.nlmsg_flags = NLM_F_REQUEST
},
- .r = {
- .sdiag_family = AF_INET,
- /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
- .sdiag_protocol = IPPROTO_TCP,
- .id.idiag_cookie[0] = token,
- }
+ .r = *r
};
struct rtattr rta_proto;
struct iovec iov[6];
- int iovlen = 1;
- __u32 proto;
-
- req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1));
- proto = IPPROTO_MPTCP;
- rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
- rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+ int iovlen = 0;
- iov[0] = (struct iovec) {
+ iov[iovlen++] = (struct iovec) {
.iov_base = &req,
.iov_len = sizeof(req)
};
- iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
- iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)};
- req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
- iovlen += 2;
+
+ if (proto == IPPROTO_MPTCP) {
+ rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
+ rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+
+ iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
+ iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)};
+ req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
+ }
+
struct msghdr msg = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
@@ -160,7 +198,67 @@ static void print_info_msg(struct mptcp_info *info)
printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked);
}
-static void parse_nlmsg(struct nlmsghdr *nlh)
+/*
+ * 'print_subflow_info' is from 'mptcp_subflow_info'
+ * which is a function in 'misc/ss.c' of iproute2.
+ */
+static void print_subflow_info(struct rtattr *tb[])
+{
+ u_int32_t flags = 0;
+
+ printf("It's a mptcp subflow, the subflow info:\n");
+ if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
+ char caps[32 + 1] = { 0 }, *cap = &caps[0];
+
+ flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
+
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
+ *cap++ = 'M';
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
+ *cap++ = 'm';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
+ *cap++ = 'J';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
+ *cap++ = 'j';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
+ *cap++ = 'B';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
+ *cap++ = 'b';
+ if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
+ *cap++ = 'e';
+ if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
+ *cap++ = 'c';
+ if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
+ *cap++ = 'v';
+
+ if (flags)
+ printf(" flags:%s", caps);
+ }
+ if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
+ printf(" token:%04x(id:%u)/%04x(id:%u)",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
+ printf(" seq:%llu",
+ rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
+ printf(" sfseq:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
+ printf(" ssnoff:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
+ printf(" maplen:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
+ printf("\n");
+}
+
+static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto)
{
struct inet_diag_msg *r = NLMSG_DATA(nlh);
struct rtattr *tb[INET_DIAG_MAX + 1];
@@ -169,7 +267,7 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
NLA_F_NESTED);
- if (tb[INET_DIAG_INFO]) {
+ if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) {
int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
struct mptcp_info *info;
@@ -183,11 +281,28 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
}
print_info_msg(info);
}
+ if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) {
+ struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX,
+ tb[INET_DIAG_ULP_INFO]);
+
+ if (ulpinfo[INET_ULP_INFO_MPTCP]) {
+ struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
+ ulpinfo[INET_ULP_INFO_MPTCP]);
+ print_subflow_info(sfinfo);
+ } else {
+ printf("It's a normal TCP!\n");
+ }
+ }
}
-static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
+static void recv_nlmsg(int fd, __u32 proto)
{
char rcv_buff[8192];
+ struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff;
struct sockaddr_nl rcv_nladdr = {
.nl_family = AF_NETLINK
};
@@ -204,7 +319,6 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
int len;
len = recvmsg(fd, &rcv_msg, 0);
- nlh = (struct nlmsghdr *)rcv_buff;
while (NLMSG_OK(nlh, len)) {
if (nlh->nlmsg_type == NLMSG_DONE) {
@@ -218,40 +332,84 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
-(err->error), strerror(-(err->error)));
break;
}
- parse_nlmsg(nlh);
+ parse_nlmsg(nlh, proto);
nlh = NLMSG_NEXT(nlh, len);
}
}
static void get_mptcpinfo(__u32 token)
{
- struct nlmsghdr *nlh = NULL;
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = token,
+ };
+ __u32 proto = IPPROTO_MPTCP;
int fd;
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
if (fd < 0)
die_perror("Netlink socket");
- send_query(fd, token);
- recv_nlmsg(fd, nlh);
+ send_query(fd, &r, proto);
+ recv_nlmsg(fd, proto);
close(fd);
}
-static void parse_opts(int argc, char **argv, __u32 *target_token)
+static void get_subflow_info(char *subflow_addrs)
+{
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE,
+ .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE,
+ };
+ char saddr[64], daddr[64];
+ int sport, dport;
+ int ret;
+ int fd;
+
+ ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport);
+ if (ret != 4)
+ die_perror("IP PORT Pairs has style problems!");
+
+ printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0)
+ die_perror("Netlink socket");
+
+ r.id.idiag_sport = htons(sport);
+ r.id.idiag_dport = htons(dport);
+
+ inet_pton(AF_INET, saddr, &r.id.idiag_src);
+ inet_pton(AF_INET, daddr, &r.id.idiag_dst);
+ send_query(fd, &r, IPPROTO_TCP);
+ recv_nlmsg(fd, IPPROTO_TCP);
+}
+
+static void parse_opts(int argc, char **argv, struct params *p)
{
int c;
if (argc < 2)
die_usage(1);
- while ((c = getopt(argc, argv, "ht:")) != -1) {
+ while ((c = getopt(argc, argv, "ht:s:")) != -1) {
switch (c) {
case 'h':
die_usage(0);
break;
case 't':
- sscanf(optarg, "%x", target_token);
+ sscanf(optarg, "%x", &p->target_token);
+ break;
+ case 's':
+ strncpy(p->subflow_addrs, optarg,
+ sizeof(p->subflow_addrs) - 1);
break;
default:
die_usage(1);
@@ -262,10 +420,15 @@ static void parse_opts(int argc, char **argv, __u32 *target_token)
int main(int argc, char *argv[])
{
- __u32 target_token;
+ struct params p = { 0 };
+
+ parse_opts(argc, argv, &p);
+
+ if (p.target_token)
+ get_mptcpinfo(p.target_token);
- parse_opts(argc, argv, &target_token);
- get_mptcpinfo(target_token);
+ if (p.subflow_addrs[0] != '\0')
+ get_subflow_info(p.subflow_addrs);
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 218aac467321..3cf1e2a612ce 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -72,13 +72,21 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -91,7 +99,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -136,7 +144,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index befa66f5a366..b8af65373b3a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -62,6 +62,7 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset join_syn_rej
unset join_csum_ns1
unset join_csum_ns2
unset join_fail_nr
@@ -1403,6 +1404,7 @@ chk_join_nr()
local syn_nr=$1
local syn_ack_nr=$2
local ack_nr=$3
+ local syn_rej=${join_syn_rej:-0}
local csum_ns1=${join_csum_ns1:-0}
local csum_ns2=${join_csum_ns2:-0}
local fail_nr=${join_fail_nr:-0}
@@ -1468,6 +1470,15 @@ chk_join_nr()
fail_test "got $count JOIN[s] ack HMAC failure expected 0"
fi
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_rej" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn rejected"
+ fail_test "got $count JOIN[s] syn rejected expected $syn_rej"
+ fi
+
print_results "join Rx" ${rc}
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
@@ -1963,7 +1974,8 @@ subflows_tests()
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# subflow
@@ -1992,7 +2004,8 @@ subflows_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 2 1
+ join_syn_rej=1 \
+ chk_join_nr 2 2 1
fi
# single subflow, dev
@@ -3061,7 +3074,8 @@ syncookies_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 1 1
+ join_syn_rej=1 \
+ chk_join_nr 2 1 1
fi
# test signal address with cookies
@@ -3545,7 +3559,8 @@ userspace_tests()
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# userspace pm type does not send join
@@ -3568,7 +3583,8 @@ userspace_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
chk_prio_nr 0 0 0 0
fi
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 051e289d7967..09cd24b2ae46 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(dirname "${0}")/../lib.sh"
-. "$(dirname "${0}")/../net_helper.sh"
readonly KSFT_PASS=0
readonly KSFT_FAIL=1
@@ -331,12 +330,15 @@ mptcp_lib_result_print_all_tap() {
# get the value of keyword $1 in the line marked by keyword $2
mptcp_lib_get_info_value() {
- grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ grep "${2}" 2>/dev/null |
+ sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ # the ';q' at the end limits to the first matched entry.
}
# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
mptcp_lib_evts_get_info() {
- grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+ grep "${4:-}" "${2}" 2>/dev/null |
+ mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
}
# $1: PID
@@ -476,8 +478,6 @@ mptcp_lib_ns_init() {
local netns
for netns in "${@}"; do
ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
- ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
done
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 926b0be87c99..9934a68df237 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -159,13 +159,21 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -178,7 +186,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -223,7 +231,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
deleted file mode 100644
index 6596fe03c77f..000000000000
--- a/tools/testing/selftests/net/net_helper.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Helper functions
-
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
- local protocol="${3}"
- local pattern
- local i
-
- pattern=":$(printf "%04X" "${port}") "
-
- # for tcp protocol additionally check the socket state
- [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
- for i in $(seq 10); do
- if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
- /proc/net/"${protocol}"* | grep -q "${pattern}"; then
- break
- fi
- sleep 0.1
- done
-}
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index ffe161fac8b5..e9b2f553588d 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -12,6 +12,7 @@ TEST_PROGS += conntrack_dump_flush.sh
TEST_PROGS += conntrack_icmp_related.sh
TEST_PROGS += conntrack_ipip_mtu.sh
TEST_PROGS += conntrack_tcp_unreplied.sh
+TEST_PROGS += conntrack_resize.sh
TEST_PROGS += conntrack_sctp_collision.sh
TEST_PROGS += conntrack_vrf.sh
TEST_PROGS += conntrack_reverse_clash.sh
@@ -23,6 +24,7 @@ TEST_PROGS += nft_concat_range.sh
TEST_PROGS += nft_conntrack_helper.sh
TEST_PROGS += nft_fib.sh
TEST_PROGS += nft_flowtable.sh
+TEST_PROGS += nft_interface_stress.sh
TEST_PROGS += nft_meta.sh
TEST_PROGS += nft_nat.sh
TEST_PROGS += nft_nat_zones.sh
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index 1559ba275105..011de8763094 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -60,9 +60,6 @@ bcast_ping()
done
}
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
-
if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
echo "SKIP: Can't create veth device"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
index 2549b6590693..ea76f2bc2f59 100755
--- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -22,8 +22,6 @@ trap cleanup EXIT
setup_ns nsbr ns1 ns2
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
echo "SKIP: Can't create veth device"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 43d8b500d391..363646f4fefe 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -46,6 +46,7 @@ CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_MARK=y
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
new file mode 100755
index 000000000000..9e033e80219e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -0,0 +1,427 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft tool"
+
+init_net_max=0
+ct_buckets=0
+tmpfile=""
+tmpfile_proc=""
+tmpfile_uniq=""
+ret=0
+
+insert_count=2000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
+
+modprobe -q nf_conntrack
+if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
+ echo "SKIP: conntrack sysctls not available"
+ exit $KSFT_SKIP
+fi
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
+ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
+
+cleanup() {
+ cleanup_all_ns
+
+ rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq"
+
+ # restore original sysctl setting
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+ sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
+}
+trap cleanup EXIT
+
+check_max_alias()
+{
+ local expected="$1"
+ # old name, expected to alias to the first, i.e. changing one
+ # changes the other as well.
+ local lv=$(sysctl -n net.nf_conntrack_max)
+
+ if [ $expected -ne "$lv" ];then
+ echo "nf_conntrack_max sysctls should have identical values"
+ exit 1
+ fi
+}
+
+insert_ctnetlink() {
+ local ns="$1"
+ local count="$2"
+ local i=0
+ local bulk=16
+
+ while [ $i -lt $count ] ;do
+ ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
+ if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
+ return;\
+ fi & \
+ done ; wait" 2>/dev/null
+
+ i=$((i+bulk))
+ done
+}
+
+check_ctcount() {
+ local ns="$1"
+ local count="$2"
+ local msg="$3"
+
+ local now=$(ip netns exec "$ns" conntrack -C)
+
+ if [ $now -ne "$count" ] ;then
+ echo "expected $count entries in $ns, not $now: $msg"
+ exit 1
+ fi
+
+ echo "PASS: got $count connections: $msg"
+}
+
+ctresize() {
+ local duration="$1"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
+ now=$(date +%s)
+ done
+}
+
+do_rsleep() {
+ local limit="$1"
+ local r=$RANDOM
+
+ r=$((r%limit))
+ sleep "$r"
+}
+
+ct_flush_once() {
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -F 2>/dev/null
+}
+
+ctflush() {
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ do_rsleep "$duration"
+
+ while [ $now -lt $end ]; do
+ ct_flush_once "$ns"
+ do_rsleep "$duration"
+ now=$(date +%s)
+ done
+}
+
+ctflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local msg="$3"
+ local now=$(date +%s)
+ local end=$((now + duration))
+ local j=0
+ local k=0
+
+ while [ $now -lt $end ]; do
+ j=$((j%256))
+ k=$((k%256))
+
+ ip netns exec "$ns" bash -c \
+ "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
+
+ j=$((j+1))
+
+ if [ $j -eq 256 ];then
+ k=$((k+1))
+ fi
+
+ now=$(date +%s)
+ done
+
+ wait
+}
+
+# dump to /dev/null. We don't want dumps to cause infinite loops
+# or use-after-free even when conntrack table is altered while dumps
+# are in progress.
+ct_nulldump()
+{
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
+
+ # Don't require /proc support in conntrack
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
+ fi
+
+ wait
+}
+
+check_taint()
+{
+ local tainted_then="$1"
+ local msg="$2"
+
+ local tainted_now=0
+
+ if [ "$tainted_then" -ne 0 ];then
+ return
+ fi
+
+ read tainted_now < /proc/sys/kernel/tainted
+
+ if [ "$tainted_now" -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "TAINT: $msg"
+ dmesg
+ exit 1
+ fi
+}
+
+insert_flood()
+{
+ local n="$1"
+ local r=0
+
+ r=$((RANDOM%$insert_count))
+
+ ctflood "$n" "$timeout" "floodresize" &
+ insert_ctnetlink "$n" "$r" &
+ ctflush "$n" "$timeout" &
+ ct_nulldump "$n" &
+
+ wait
+}
+
+test_floodresize_all()
+{
+ local timeout=20
+ local n=""
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ for n in "$nsclient1" "$nsclient2";do
+ insert_flood "$n" &
+ done
+
+ # resize table constantly while flood/insert/dump/flushs
+ # are happening in parallel.
+ ctresize "$timeout"
+
+ # wait for subshells to complete, everything is limited
+ # by $timeout.
+ wait
+
+ check_taint "$tainted_then" "resize+flood"
+}
+
+check_dump()
+{
+ local ns="$1"
+ local protoname="$2"
+ local c=0
+ local proto=0
+ local proc=0
+ local unique=""
+ local lret=0
+
+ # NOTE: assumes timeouts are large enough to not have
+ # expirations in all following tests.
+ l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l)
+ c=$(ip netns exec "$ns" conntrack -C)
+
+ if [ "$c" -eq 0 ]; then
+ echo "FAIL: conntrack count for $ns is 0"
+ lret=1
+ fi
+
+ if [ "$c" -ne "$l" ]; then
+ echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l"
+ lret=1
+ fi
+
+ # check the dump we retrieved is free of duplicated entries.
+ unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$unique" ]; then
+ echo "FAIL: listing contained redundant entries for $ns: $l != $unique"
+ diff -u "$tmpfile" "$tmpfile_uniq"
+ lret=1
+ fi
+
+ # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
+ proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$proto" ]; then
+ echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto"
+ diff -u "$tmpfile" "$tmpfile_uniq"
+ lret=1
+ fi
+
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l")
+
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency for $ns: $l != $proc"
+ lret=1
+ fi
+
+ proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
+ diff -u "$tmpfile_proc" "$tmpfile_uniq"
+ lret=1
+ fi
+ fi
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+ else
+ echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+ ret=1
+ fi
+}
+
+test_dump_all()
+{
+ local timeout=3
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
+
+ ctflood "$nsclient1" $timeout "dumpall" &
+ insert_ctnetlink "$nsclient2" $insert_count
+
+ wait
+
+ check_dump "$nsclient1" "icmp"
+ check_dump "$nsclient2" "udp"
+
+ check_taint "$tainted_then" "test parallel conntrack dumps"
+}
+
+check_sysctl_immutable()
+{
+ local ns="$1"
+ local name="$2"
+ local failhard="$3"
+ local o=0
+ local n=0
+
+ o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+ n=$((o+1))
+
+ # return value isn't reliable, need to read it back
+ ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
+
+ n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+
+ [ -z "$n" ] && return 1
+
+ if [ $o -ne $n ]; then
+ if [ $failhard -gt 0 ] ;then
+ echo "FAIL: net.$name should not be changeable from namespace (now $n)"
+ ret=1
+ fi
+ return 0
+ fi
+
+ return 1
+}
+
+test_conntrack_max_limit()
+{
+ sysctl -q net.netfilter.nf_conntrack_max=100
+ insert_ctnetlink "$nsclient1" 101
+
+ # check netns is clamped by init_net, i.e., either netns follows
+ # init_net value, or a higher pernet limit (compared to init_net) is ignored.
+ check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
+
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+}
+
+test_conntrack_disable()
+{
+ local timeout=2
+
+ # disable conntrack pickups
+ ip netns exec "$nsclient1" nft flush table ip test_ct
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ctflood "$nsclient1" "$timeout" "conntrack disable"
+ ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
+
+ # Disabled, should not have picked up any connection.
+ check_ctcount "$nsclient1" 0 "conntrack disabled"
+
+ # This one is still active, expect 1 connection.
+ check_ctcount "$nsclient2" 1 "conntrack enabled"
+}
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
+
+check_max_alias $init_net_max
+
+sysctl -q net.netfilter.nf_conntrack_max="262000"
+check_max_alias 262000
+
+setup_ns nsclient1 nsclient2
+
+# check this only works from init_net
+for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
+ check_sysctl_immutable "$nsclient1" "net.$n" 1
+done
+
+# won't work on older kernels. If it works, check that the netns obeys the limit
+if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
+ # subtest: if pernet is changeable, check that reducing it in pernet
+ # limits the pernet entries. Inverse, pernet clamped by a lower init_net
+ # setting, is already checked by "test_conntrack_max_limit" test.
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
+ insert_ctnetlink "$nsclient1" 2
+ check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+fi
+
+for n in "$nsclient1" "$nsclient2";do
+# enable conntrack in both namespaces
+ip netns exec "$n" nft -f - <<EOF
+table ip test_ct {
+ chain input {
+ type filter hook input priority 0
+ ct state new counter
+ }
+}
+EOF
+done
+
+tmpfile=$(mktemp)
+tmpfile_proc=$(mktemp)
+tmpfile_uniq=$(mktemp)
+test_conntrack_max_limit
+test_dump_all
+test_floodresize_all
+test_conntrack_disable
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index e95ecb37c2b1..207b79932d91 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -32,7 +32,6 @@ source lib.sh
IP0=172.30.30.1
IP1=172.30.30.2
-DUMMYNET=10.9.9
PFXL=30
ret=0
@@ -52,11 +51,6 @@ trap cleanup EXIT
setup_ns ns0 ns1
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1
-
if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: Could not add veth device"
exit $ksft_skip
@@ -67,18 +61,13 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
exit $ksft_skip
fi
-ip -net "$ns0" link add dummy0 type dummy
-
ip -net "$ns0" li set veth0 master tvrf
-ip -net "$ns0" li set dummy0 master tvrf
ip -net "$ns0" li set tvrf up
ip -net "$ns0" li set veth0 up
-ip -net "$ns0" li set dummy0 up
ip -net "$ns1" li set veth0 up
ip -net "$ns0" addr add $IP0/$PFXL dev veth0
ip -net "$ns1" addr add $IP1/$PFXL dev veth0
-ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0
listener_ready()
{
@@ -219,35 +208,9 @@ EOF
fi
}
-test_fib()
-{
-ip netns exec "$ns0" nft -f - <<EOF
-flush ruleset
-table ip t {
- counter fibcount { }
-
- chain prerouting {
- type filter hook prerouting priority 0;
- meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack
- }
-}
-EOF
- ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0
- ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null
-
- if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then
- echo "PASS: fib lookup returned exepected output interface"
- else
- echo "FAIL: fib lookup did not return exepected output interface"
- ret=1
- return
- fi
-}
-
test_ct_zone_in
test_masquerade_vrf "default"
test_masquerade_vrf "pfifo"
test_masquerade_veth
-test_fib
exit $ret
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index d3edb16cd4b3..6af2ea3ad6b8 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -129,9 +129,6 @@ test_dr() {
# avoid incorrect arp response
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
- # avoid reverse route lookup
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
@@ -167,9 +164,6 @@ test_tun() {
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 1f5979c1510c..efea93cf23d4 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -15,10 +15,12 @@ source lib.sh
# Available test groups:
# - reported_issues: check for issues that were reported in the past
# - correctness: check that packets match given entries, and only those
+# - correctness_large: same but with additional non-matching entries
# - concurrency: attempt races between insertion, deletion and lookup
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="reported_issues correctness concurrency timeout"
+TESTS="reported_issues correctness correctness_large concurrency timeout"
+
[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
# Set types, defined by TYPE_ variables below
@@ -1257,9 +1259,7 @@ send_nomatch() {
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
# - remove some elements, check that packets don't match anymore
-test_correctness() {
- setup veth send_"${proto}" set || return ${ksft_skip}
-
+test_correctness_main() {
range_size=1
for i in $(seq "${start}" $((start + count))); do
end=$((start + range_size))
@@ -1293,6 +1293,163 @@ test_correctness() {
done
}
+test_correctness() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ test_correctness_main
+}
+
+# Repeat the correctness tests, but add extra non-matching entries.
+# This exercises the more compact '4 bit group' representation that
+# gets picked when the default 8-bit representation exceed
+# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory.
+# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust().
+#
+# The format() helper is way too slow when generating lots of
+# entries so its not used here.
+test_correctness_large() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+ # number of dummy (filler) entries to add.
+ local dcount=16385
+
+ (
+ echo -n "add element inet filter test { "
+
+ case "$type_spec" in
+ "ether_addr . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_mac $((1000000 + i))
+ printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "inet_proto . ipv6_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . " $((RANDOM%256))
+ format_addr6 $((1000000 + i))
+ done
+ ;;
+ "inet_service . inet_proto")
+ # smaller key sizes, need more entries to hit the
+ # 4-bit threshold.
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ local proto=$((RANDOM%256))
+
+ # Test uses UDP to match, as it also fails when matching
+ # an entry that doesn't exist, so skip 'udp' entries
+ # to not trigger a wrong failure.
+ [ $proto -eq 17 ] && proto=18
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . %i " $(((i%65534) + 1)) $((proto))
+ done
+ ;;
+ "inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv4_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ done
+ ;;
+ "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ format_mac $((1000000 + i))
+ printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv6_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . "
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr . inet_proto")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_addr6 $((i + 2123456))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ *)
+ "Unhandled $type_spec"
+ return 1
+ esac
+ echo -n "}"
+
+ ) | nft -f - || return 1
+
+ test_correctness_main
+}
+
# Concurrency test template:
# - add all the elements
# - start a thread for each physical thread that:
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index ce1451c275fd..9929a9ffef65 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -3,6 +3,10 @@
# This tests the fib expression.
#
# Kselftest framework requirement - SKIP code is 4.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
source lib.sh
@@ -45,6 +49,19 @@ table inet filter {
EOF
}
+load_input_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain input {
+ type filter hook input priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
load_pbr_ruleset() {
local netns=$1
@@ -59,6 +76,89 @@ table inet filter {
EOF
}
+load_type_ruleset() {
+ local netns=$1
+
+ for family in ip ip6;do
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain type_match_in {
+ fib daddr type local counter comment "daddr configured on other iface"
+ fib daddr . iif type local counter comment "daddr configured on iif"
+ fib daddr type unicast counter comment "daddr not local"
+ fib daddr . iif type unicast counter comment "daddr not configured on iif"
+ }
+
+ chain type_match_out {
+ fib daddr type unicast counter
+ fib daddr . oif type unicast counter
+ fib daddr type local counter
+ fib daddr . oif type local counter
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain input {
+ type filter hook input priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain forward {
+ type filter hook forward priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain output {
+ type filter hook output priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+
+ chain postrouting {
+ type filter hook postrouting priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+}
+EOF
+done
+}
+
+reload_type_ruleset() {
+ ip netns exec "$1" nft flush table ip filter
+ ip netns exec "$1" nft flush table ip6 filter
+ load_type_ruleset "$1"
+}
+
+check_fib_type_counter_family() {
+ local family="$1"
+ local want="$2"
+ local ns="$3"
+ local chain="$4"
+ local what="$5"
+ local errmsg="$6"
+
+ if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then
+ echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2
+ ip netns exec "$ns" nft list chain "$family" filter "$chain"
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_type_counter() {
+ check_fib_type_counter_family "ip" "$@" || return 1
+ check_fib_type_counter_family "ip6" "$@" || return 1
+}
+
load_ruleset_count() {
local netns=$1
@@ -77,6 +177,7 @@ check_drops() {
if dmesg | grep -q ' nft_rpfilter: ';then
dmesg | grep ' nft_rpfilter: '
echo "FAIL: rpfilter did drop packets"
+ ret=1
return 1
fi
@@ -151,19 +252,506 @@ test_ping() {
return 0
}
+test_ping_unreachable() {
+ local daddr4=$1
+ local daddr6=$2
+
+ if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr4" 1>&2
+ return 1
+ fi
+
+ if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr6" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+test_fib_type() {
+ local notice="$1"
+ local errmsg="addr-on-if"
+ local lret=0
+
+ if ! load_type_ruleset "$nsrouter";then
+ echo "SKIP: Could not load fib type ruleset"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ # makes router receive packet for addresses configured on incoming
+ # interface.
+ test_ping 10.0.1.1 dead:1::1 || return 1
+
+ # expectation: triggers all 'local' in prerouting/input.
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ # makes router receive packet for address configured on a different (but local)
+ # interface.
+ test_ping 10.0.2.1 dead:2::1 || return 1
+
+ # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'.
+ errmsg="addr-on-host"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ test_ping 10.0.2.99 dead:2::99 || return 1
+ errmsg="addr-on-otherhost"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: fib expression address types match ($notice)"
+ else
+ echo "FAIL: fib expression address types match ($notice)"
+ ret=1
+ fi
+}
+
+test_fib_vrf_dev_add_dummy()
+{
+ if ! ip -net "$nsrouter" link add dummy0 type dummy ;then
+ echo "SKIP: VRF tests: dummy device type not supported"
+ return 1
+ fi
+
+ if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then
+ echo "SKIP: VRF tests: vrf device type not supported"
+ return 1
+ fi
+
+ ip -net "$nsrouter" link set dummy0 master tvrf
+ ip -net "$nsrouter" link set dummy0 up
+ ip -net "$nsrouter" link set tvrf up
+}
+
+load_ruleset_vrf()
+{
+# Due to the many different possible combinations using named counters
+# or one-rule-per-expected-result is complex.
+#
+# Instead, add dynamic sets for the fib modes
+# (fib address type, fib output interface lookup .. ),
+# and then add the obtained fib results to them.
+#
+# The test is successful if the sets contain the expected results
+# and no unexpected extra entries existed.
+ip netns exec "$nsrouter" nft -f - <<EOF
+flush ruleset
+table inet t {
+ set fibif4 {
+ typeof meta iif . ip daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif4iif {
+ typeof meta iif . ip daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6 {
+ typeof meta iif . ip6 daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibtype4 {
+ typeof meta iif . ip daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype4iif {
+ typeof meta iif . ip daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6 {
+ typeof meta iif . ip6 daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ chain fib_test {
+ meta nfproto ipv4 jump {
+ add @fibif4 { meta iif . ip daddr . fib daddr oif }
+ add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif }
+ add @fibtype4 { meta iif . ip daddr . fib daddr type }
+ add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type }
+
+ add @fibif4 { meta iif . ip saddr . fib saddr oif }
+ add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif }
+ }
+
+ meta nfproto ipv6 jump {
+ add @fibif6 { meta iif . ip6 daddr . fib daddr oif }
+ add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif }
+ add @fibtype6 { meta iif . ip6 daddr . fib daddr type }
+ add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type }
+
+ add @fibif6 { meta iif . ip6 saddr . fib saddr oif }
+ add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif }
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump fib_test
+
+ # neighbour discovery to be ignored.
+ icmpv6 type echo-request counter jump fib_test
+ }
+}
+EOF
+
+if [ $? -ne 0 ] ;then
+ echo "SKIP: Could not load ruleset for fib vrf test"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return 1
+fi
+}
+
+check_type()
+{
+ local setname="$1"
+ local iifname="$2"
+ local addr="$3"
+ local type="$4"
+ local count="$5"
+
+ [ -z "$count" ] && count=1
+
+ if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then
+ echo "FAIL: did not find $iifname . $addr . $type in $setname"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ return 1
+ fi
+
+ # delete the entry, this allows to check if anything unexpected appeared
+ # at the end of the test run: all dynamic sets should be empty by then.
+ if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then
+ echo "FAIL: can't delete $iifname . $addr . $type in $setname"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+check_local()
+{
+ check_type $@ "local" 1
+}
+
+check_unicast()
+{
+ check_type $@ "unicast" 1
+}
+
+check_rpf()
+{
+ check_type $@
+}
+
+check_fib_vrf_sets_empty()
+{
+ local setname=""
+ local lret=0
+
+ # A non-empty set means that we have seen unexpected packets OR
+ # that a fib lookup provided unexpected results.
+ for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \
+ "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do
+ if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then
+ echo "FAIL: $setname not empty"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ lret=1
+ fi
+ done
+
+ return $lret
+}
+
+check_fib_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ # the incoming interface is always veth0. As its not linked to a VRF,
+ # the 'tvrf' device should NOT show up anywhere.
+ local ifname="veth0"
+ local lret=0
+
+ # local_veth0, local_veth1
+ for addr in "10.0.1.1" "10.0.2.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "0" || lret=1
+ done
+ for addr in "dead:1::1" "dead:2::1";do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "0" || lret=1
+ done
+
+ # when restricted to the incoming interface, 10.0.1.1 should
+ # be 'local', but 10.0.2.1 unicast.
+ check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1
+
+ # same for the ipv6 addresses.
+ check_local fibtype6iif "$ifname" "dead:1::1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1
+
+ # None of these addresses should find a valid route when restricting
+ # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are
+ # reachable via 'lo'.
+ for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do
+ check_type fibif4iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ # expect default route (veth1), dummy0 is part of VRF but iif isn't.
+ for addr in "10.9.9.1" "10.9.9.2";do
+ check_unicast fibtype4 "$ifname" "$addr" || lret=1
+ check_unicast fibtype4iif "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "veth1" || lret=1
+ done
+ for addr in "dead:9::1" "dead:9::2";do
+ check_unicast fibtype6 "$ifname" "$addr" || lret=1
+ check_unicast fibtype6iif "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "veth1" || lret=1
+ done
+
+ # same for the IPv6 equivalent addresses.
+ for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do
+ check_type fibif6iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+check_fib_veth_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ local ifname
+ local setname
+ local lret=0
+
+ # as veth0 is now part of tvrf interface, packets will be seen
+ # twice, once with iif veth0, then with iif tvrf.
+
+ for ifname in "veth0" "tvrf"; do
+ for addr in "10.0.1.1" "10.9.9.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ # addr local, but nft_fib doesn't return routes with RTN_LOCAL.
+ check_type fibif4 "$ifname" "$addr" 0 || lret=1
+ check_type fibif4iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for addr in "dead:1::1" "dead:9::1"; do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ # same, address is local but no route is returned for lo.
+ check_type fibif6 "$ifname" "$addr" 0 || lret=1
+ check_type fibif6iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for t in fibtype4 fibtype4iif; do
+ check_unicast "$t" "$ifname" 10.9.9.2 || lret=1
+ done
+ for t in fibtype6 fibtype6iif; do
+ check_unicast "$t" "$ifname" dead:9::2 || lret=1
+ done
+
+ check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1
+ check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1
+
+ # restricted to iif -- MUST NOT provide result, its != $ifname.
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1
+ done
+
+ check_local fibtype4iif "veth0" "10.0.1.1" || lret=1
+ check_local fibtype6iif "veth0" "dead:1::1" || lret=1
+
+ check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1
+ check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1
+
+ # 10.9.9.2 should not provide a result for iif veth, but
+ # should when iif is tvrf.
+ # This is because its reachable via dummy0 which is part of
+ # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0).
+ check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1
+ check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1
+
+ check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1
+ check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+# Extends nsrouter config by adding dummy0+vrf.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
+# [dummy0]
+# 10.9.9.1
+# dead:9::1
+# [tvrf]
+test_fib_vrf()
+{
+ local cntname=""
+
+ if ! test_fib_vrf_dev_add_dummy; then
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0
+ ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad
+
+ ip -net "$nsrouter" route add default via 10.0.2.99
+ ip -net "$nsrouter" route add default via dead:2::99
+
+ load_ruleset_vrf || return
+
+ # no echo reply for these addresses: The dummy interface is part of tvrf,
+ # but veth0 (incoming interface) isn't linked to it.
+ test_ping_unreachable "10.9.9.1" "dead:9::1" &
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ # expect replies from these.
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.0.2.1" "dead:2::1"
+ test_ping "10.0.2.99" "dead:2::99"
+
+ wait
+
+ check_fib_vrf_type "fib expression address types match (iif not in vrf)"
+
+ # second round: this time, make veth0 (rx interface) part of the vrf.
+ # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2
+ # becomes unreachable.
+ ip -net "$nsrouter" link set veth0 master tvrf
+ ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ # this reload should not be needed, but in case
+ # there is some error (missing or unexpected entry) this will prevent them
+ # from leaking into round 2.
+ load_ruleset_vrf || return
+
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.9.9.1" "dead:9::1"
+
+ # ns2 should no longer be reachable (veth1 not in vrf)
+ test_ping_unreachable "10.0.2.99" "dead:2::99" &
+
+ # vrf via dummy0, but host doesn't exist
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ wait
+
+ check_fib_veth_vrf_type "fib expression address types match (iif in vrf)"
+}
+
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
+check_drops
test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1
+check_drops
+
+test_ping 10.0.1.99 dead:1::99
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1 || exit 1
check_drops || exit 1
-echo "PASS: fib expression did not cause unwanted packet drops"
+test_ping 10.0.1.99 dead:1::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not discard loopback packets"
ip netns exec "$nsrouter" nft flush table inet filter
@@ -213,7 +801,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
# ... pbr ruleset for the router, check iif+oif.
if ! load_pbr_ruleset "$nsrouter";then
echo "SKIP: Could not load fib forward ruleset"
- exit $ksft_skip
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
fi
ip -net "$nsrouter" rule add from all table 128
@@ -224,11 +812,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
# drop main ipv4 table
ip -net "$nsrouter" -4 rule delete table main
-if ! test_ping 10.0.2.99 dead:2::99;then
- ip -net "$nsrouter" nft list ruleset
- echo "FAIL: fib mismatch in pbr setup"
- exit 1
+if test_ping 10.0.2.99 dead:2::99;then
+ echo "PASS: fib expression forward check with policy based routing"
+else
+ echo "FAIL: fib expression forward check with policy based routing"
+ ret=1
fi
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
+test_fib_type "policy routing"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+# Un-do policy routing changes
+ip -net "$nsrouter" rule del from all table 128
+ip -net "$nsrouter" rule del from all iif veth0 table 129
+
+ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1
+
+ip -net "$ns1" -4 route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" -4 route add default via 10.0.1.1
+ip -net "$ns1" -6 route add default via dead:1::1
+
+ip -net "$nsrouter" -4 rule add from all table main priority 32766
+
+test_fib_type "default table"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+test_fib_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
new file mode 100755
index 000000000000..5ff7be9daeee
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -0,0 +1,154 @@
+#!/bin/bash -e
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+# Torture nftables' netdevice notifier callbacks and related code by frequent
+# renaming of interfaces which netdev-family chains and flowtables hook into.
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3 tool"
+
+# how many seconds to torture the kernel?
+# default to 80% of max run time but don't exceed 48s
+TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
+[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48
+
+trap "cleanup_all_ns" EXIT
+
+setup_ns nsc nsr nss
+
+ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr
+ip -net $nsc addr add 10.0.0.1/24 dev cr0
+ip -net $nsc link set cr0 up
+ip -net $nsc route add default via 10.0.0.2
+
+ip -net $nss link add sr0 type veth peer name rs0 netns $nsr
+ip -net $nss addr add 10.1.0.1/24 dev sr0
+ip -net $nss link set sr0 up
+ip -net $nss route add default via 10.1.0.2
+
+ip -net $nsr addr add 10.0.0.2/24 dev rc0
+ip -net $nsr link set rc0 up
+ip -net $nsr addr add 10.1.0.2/24 dev rs0
+ip -net $nsr link set rs0 up
+ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1
+
+{
+ echo "table netdev t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ chain chain_rc$i {
+ type filter hook ingress device rc$i priority 0
+ counter
+ }
+ chain chain_rs$i {
+ type filter hook ingress device rs$i priority 0
+ counter
+ }
+ EOF
+ done
+ echo "}"
+ echo "table ip t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ flowtable ft_${i} {
+ hook ingress priority 0
+ devices = { rc$i, rs$i }
+ }
+ EOF
+ done
+ echo "chain c {"
+ echo "type filter hook forward priority 0"
+ for ((i = 0; i < 10; i++)); do
+ echo -n "iifname rc$i oifname rs$i "
+ echo "ip protocol tcp counter flow add @ft_${i}"
+ done
+ echo "counter"
+ echo "}"
+ echo "}"
+} | ip netns exec $nsr nft -f - || {
+ echo "SKIP: Could not load nft ruleset"
+ exit $ksft_skip
+}
+
+for ((o=0, n=1; ; o=n, n++, n %= 10)); do
+ ip -net $nsr link set rc$o name rc$n
+ ip -net $nsr link set rs$o name rs$n
+done &
+rename_loop_pid=$!
+
+while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done &
+nft_list_pid=$!
+
+ip netns exec $nsr nft monitor >/dev/null &
+nft_monitor_pid=$!
+
+ip netns exec $nss iperf3 --server --daemon -1
+summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p'
+rate=$(ip netns exec $nsc iperf3 \
+ --format k -c 10.1.0.1 --time $TEST_RUNTIME \
+ --length 56 --parallel 10 -i 0 | sed -n "$summary_expr")
+
+kill $nft_list_pid
+kill $nft_monitor_pid
+kill $rename_loop_pid
+wait
+
+wildcard_prep() {
+ ip netns exec $nsr nft -f - <<EOF
+table ip t {
+ flowtable ft_wild {
+ hook ingress priority 0
+ devices = { wild* }
+ }
+}
+EOF
+}
+
+if ! wildcard_prep; then
+ echo "SKIP wildcard tests: not supported by host's nft?"
+else
+ for ((i = 0; i < 100; i++)); do
+ ip -net $nsr link add wild$i type dummy &
+ done
+ wait
+ for ((i = 80; i < 100; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ for ((i = 0; i < 80; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ wait
+ for ((i = 0; i < 100; i += 10)); do
+ (
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link add wild$((i + j)) type dummy
+ done
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link del wild$((i + j))
+ done
+ ) &
+ done
+ wait
+fi
+
+[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
+ echo "FAIL: Kernel is tainted!"
+ exit $ksft_fail
+}
+
+[[ $rate -gt 0 ]] || {
+ echo "FAIL: Zero throughput in iperf3"
+ exit $ksft_fail
+}
+
+[[ -f /sys/kernel/debug/kmemleak && \
+ -n $(</sys/kernel/debug/kmemleak) ]] && {
+ echo "FAIL: non-empty kmemleak report"
+ exit $ksft_fail
+}
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
index 3b81d88bdde3..9f200f80253a 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do
echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
echo netns exec "$gw" ip link set "veth$i" up
echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
- echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
# clients have same IP addresses.
echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
@@ -178,7 +177,6 @@ fi
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
# useful for debugging: allows to use 'ping' from clients to gateway.
ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 784d1b46912b..6136ceec45e0 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -10,6 +10,8 @@ source lib.sh
ret=0
timeout=5
+SCTP_TEST_TIMEOUT=60
+
cleanup()
{
ip netns pids "$ns1" | xargs kill 2>/dev/null
@@ -40,7 +42,7 @@ TMPFILE3=$(mktemp)
TMPINPUT=$(mktemp)
COUNT=200
-[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8))
dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
@@ -275,9 +277,11 @@ test_tcp_forward()
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
+ local tthen=$(date +%s)
+
ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
- wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+ wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen"
kill "$nfqpid"
}
@@ -288,13 +292,14 @@ test_tcp_localhost()
ip netns exec "$nsrouter" ./nf_queue -q 3 &
local nfqpid=$!
+ local tthen=$(date +%s)
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
- wait "$rpid" && echo "PASS: tcp via loopback"
+ wait_and_check_retval "$rpid" "tcp via loopback" "$tthen"
kill "$nfqpid"
}
@@ -417,6 +422,23 @@ check_output_files()
fi
}
+wait_and_check_retval()
+{
+ local rpid="$1"
+ local msg="$2"
+ local tthen="$3"
+ local tnow=$(date +%s)
+
+ if wait "$rpid";then
+ echo -n "PASS: "
+ else
+ echo -n "FAIL: "
+ ret=1
+ fi
+
+ printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen))
+}
+
test_sctp_forward()
{
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
@@ -428,13 +450,14 @@ table inet sctpq {
}
}
EOF
- timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
local nfqpid=$!
+ local tthen=$(date +%s)
ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
@@ -443,7 +466,7 @@ EOF
exit 1
fi
- wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
+ wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen"
kill "$nfqpid"
check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
@@ -462,13 +485,14 @@ EOF
# reduce test file size, software segmentation causes sk wmem increase.
dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
- timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
ip netns exec "$ns1" ./nf_queue -q 11 &
local nfqpid=$!
+ local tthen=$(date +%s)
ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
@@ -478,7 +502,7 @@ EOF
fi
# must wait before checking completeness of output file.
- wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
+ wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen"
kill "$nfqpid"
check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 86ec4e68594d..24ad41d526d9 100755
--- a/tools/testing/selftests/net/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -1,8 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# return code to signal skipped test
-ksft_skip=4
+source lib.sh
# search for legacy iptables (it uses the xtables extensions
if iptables-legacy --version >/dev/null 2>&1; then
@@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then
exit $ksft_skip
fi
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-trap "ip netns del $ns1; ip netns del $ns2" EXIT
-
-# create two netns, disable rp_filter in ns2 and
-# keep IPv6 address when moving into VRF
-ip netns add "$ns1"
-ip netns add "$ns2"
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+trap cleanup_all_ns EXIT
+
+# create two netns, keep IPv6 address when moving into VRF
+setup_ns ns1 ns2
ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
# a standard connection between the netns, should not trigger rp filter
diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore
new file mode 100644
index 000000000000..ee44c081ca7c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
new file mode 100644
index 000000000000..e0926d76b4c8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES)
+VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS = -I/usr/include/libnl3
+endif
+CFLAGS += $(VAR_CFLAGS)
+
+
+LDLIBS = -lmbedtls -lmbedcrypto
+VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS = -lnl-genl-3 -lnl-3
+endif
+LDLIBS += $(VAR_LDLIBS)
+
+
+TEST_FILES = common.sh
+
+TEST_PROGS = test.sh \
+ test-large-mtu.sh \
+ test-chachapoly.sh \
+ test-tcp.sh \
+ test-float.sh \
+ test-close-socket.sh \
+ test-close-socket-tcp.sh
+
+TEST_GEN_FILES := ovpn-cli
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh
new file mode 100644
index 000000000000..88869c675d03
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/common.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt}
+TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt}
+OVPN_CLI=${OVPN_CLI:-./ovpn-cli}
+ALG=${ALG:-aes}
+PROTO=${PROTO:-UDP}
+FLOAT=${FLOAT:-0}
+
+LAN_IP="11.11.11.11"
+
+create_ns() {
+ ip netns add peer${1}
+}
+
+setup_ns() {
+ MODE="P2P"
+
+ if [ ${1} -eq 0 ]; then
+ MODE="MP"
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p}
+
+ ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p}
+ ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p}
+ ip -n peer0 link set veth${p} up
+
+ ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p}
+ ip -n peer${p} link set veth${p} up
+ done
+ fi
+
+ ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE
+ ip -n peer${1} addr add ${2} dev tun${1}
+ # add a secondary IP to peer 1, to test a LAN behind a client
+ if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then
+ ip -n peer${1} addr add ${LAN_IP} dev tun${1}
+ ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0
+ fi
+ if [ -n "${3}" ]; then
+ ip -n peer${1} link set mtu ${3} dev tun${1}
+ fi
+ ip -n peer${1} link set tun${1} up
+}
+
+add_peer() {
+ if [ "${PROTO}" == "UDP" ]; then
+ if [ ${1} -eq 0 ]; then
+ ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE}
+
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \
+ data64.key
+ done
+ else
+ RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE})
+ RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE})
+ LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE})
+ ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \
+ ${RADDR} ${RPORT}
+ ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \
+ data64.key
+ fi
+ else
+ if [ ${1} -eq 0 ]; then
+ (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && {
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \
+ ${ALG} 0 data64.key
+ done
+ }) &
+ sleep 5
+ else
+ ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \
+ data64.key
+ fi
+ fi
+}
+
+cleanup() {
+ # some ovpn-cli processes sleep in background so they need manual poking
+ killall $(basename ${OVPN_CLI}) 2>/dev/null || true
+
+ # netns peer0 is deleted without erasing ifaces first
+ for p in $(seq 1 10); do
+ ip -n peer${p} link set tun${p} down 2>/dev/null || true
+ ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true
+ done
+ for p in $(seq 1 10); do
+ ip -n peer0 link del veth${p} 2>/dev/null || true
+ done
+ for p in $(seq 0 10); do
+ ip netns del peer${p} 2>/dev/null || true
+ done
+}
+
+if [ "${PROTO}" == "UDP" ]; then
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')}
+else
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')}
+fi
+
+
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
new file mode 100644
index 000000000000..71946ba9fa17
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/config
@@ -0,0 +1,10 @@
+CONFIG_NET=y
+CONFIG_INET=y
+CONFIG_STREAM_PARSER=y
+CONFIG_NET_UDP_TUNNEL=y
+CONFIG_DST_CACHE=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_OVPN=m
diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key
new file mode 100644
index 000000000000..a99e88c4e290
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/data64.key
@@ -0,0 +1,5 @@
+jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B
+ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9
+uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6
+KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE
+BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w==
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
new file mode 100644
index 000000000000..de9c26f98b2e
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -0,0 +1,2383 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel accelerator
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#include <linux/ovpn.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#include <netlink/socket.h>
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <mbedtls/base64.h>
+#include <mbedtls/error.h>
+
+#include <sys/socket.h>
+
+/* defines to make checkpatch happy */
+#define strscpy strncpy
+#define __always_unused __attribute__((__unused__))
+
+/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we
+ * have to explicitly do it to prevent the kernel from failing upon
+ * parsing of the message
+ */
+#define nla_nest_start(_msg, _type) \
+ nla_nest_start(_msg, (_type) | NLA_F_NESTED)
+
+/* libnl < 3.11.0 does not implement nla_get_uint() */
+uint64_t ovpn_nla_get_uint(struct nlattr *attr)
+{
+ if (nla_len(attr) == sizeof(uint32_t))
+ return nla_get_u32(attr);
+ else
+ return nla_get_u64(attr);
+}
+
+typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg);
+
+enum ovpn_key_direction {
+ KEY_DIR_IN = 0,
+ KEY_DIR_OUT,
+};
+
+#define KEY_LEN (256 / 8)
+#define NONCE_LEN 8
+
+#define PEER_ID_UNDEF 0x00FFFFFF
+#define MAX_PEERS 10
+
+struct nl_ctx {
+ struct nl_sock *nl_sock;
+ struct nl_msg *nl_msg;
+ struct nl_cb *nl_cb;
+
+ int ovpn_dco_id;
+};
+
+enum ovpn_cmd {
+ CMD_INVALID,
+ CMD_NEW_IFACE,
+ CMD_DEL_IFACE,
+ CMD_LISTEN,
+ CMD_CONNECT,
+ CMD_NEW_PEER,
+ CMD_NEW_MULTI_PEER,
+ CMD_SET_PEER,
+ CMD_DEL_PEER,
+ CMD_GET_PEER,
+ CMD_NEW_KEY,
+ CMD_DEL_KEY,
+ CMD_GET_KEY,
+ CMD_SWAP_KEYS,
+ CMD_LISTEN_MCAST,
+};
+
+struct ovpn_ctx {
+ enum ovpn_cmd cmd;
+
+ __u8 key_enc[KEY_LEN];
+ __u8 key_dec[KEY_LEN];
+ __u8 nonce[NONCE_LEN];
+
+ enum ovpn_cipher_alg cipher;
+
+ sa_family_t sa_family;
+
+ unsigned long peer_id;
+ unsigned long lport;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } remote;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } peer_ip;
+
+ bool peer_ip_set;
+
+ unsigned int ifindex;
+ char ifname[IFNAMSIZ];
+ enum ovpn_mode mode;
+ bool mode_set;
+
+ int socket;
+ int cli_sockets[MAX_PEERS];
+
+ __u32 keepalive_interval;
+ __u32 keepalive_timeout;
+
+ enum ovpn_key_direction key_dir;
+ enum ovpn_key_slot key_slot;
+ int key_id;
+
+ const char *peers_file;
+};
+
+static int ovpn_nl_recvmsgs(struct nl_ctx *ctx)
+{
+ int ret;
+
+ ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb);
+
+ switch (ret) {
+ case -NLE_INTR:
+ fprintf(stderr,
+ "netlink received interrupt due to signal - ignoring\n");
+ break;
+ case -NLE_NOMEM:
+ fprintf(stderr, "netlink out of memory error\n");
+ break;
+ case -NLE_AGAIN:
+ fprintf(stderr,
+ "netlink reports blocking read - aborting wait\n");
+ break;
+ default:
+ if (ret)
+ fprintf(stderr, "netlink reports error (%d): %s\n",
+ ret, nl_geterror(-ret));
+ break;
+ }
+
+ return ret;
+}
+
+static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd,
+ int flags)
+{
+ struct nl_ctx *ctx;
+ int err, ret;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->nl_sock = nl_socket_alloc();
+ if (!ctx->nl_sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192);
+
+ ret = genl_connect(ctx->nl_sock);
+ if (ret) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_sock;
+ }
+
+ /* enable Extended ACK for detailed error reporting */
+ err = 1;
+ setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK,
+ &err, sizeof(err));
+
+ ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME);
+ if (ctx->ovpn_dco_id < 0) {
+ fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n",
+ ctx->ovpn_dco_id);
+ goto err_free;
+ }
+
+ ctx->nl_msg = nlmsg_alloc();
+ if (!ctx->nl_msg) {
+ fprintf(stderr, "cannot allocate netlink message\n");
+ goto err_sock;
+ }
+
+ ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!ctx->nl_cb) {
+ fprintf(stderr, "failed to allocate netlink callback\n");
+ goto err_msg;
+ }
+
+ nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb);
+
+ genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0);
+
+ if (ovpn->ifindex > 0)
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex);
+
+ return ctx;
+nla_put_failure:
+err_msg:
+ nlmsg_free(ctx->nl_msg);
+err_sock:
+ nl_socket_free(ctx->nl_sock);
+err_free:
+ free(ctx);
+ return NULL;
+}
+
+static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd)
+{
+ return nl_ctx_alloc_flags(ovpn, cmd, 0);
+}
+
+static void nl_ctx_free(struct nl_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ nl_socket_free(ctx->nl_sock);
+ nlmsg_free(ctx->nl_msg);
+ nl_cb_put(ctx->nl_cb);
+ free(ctx);
+}
+
+static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1;
+ struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1];
+ int len = nlh->nlmsg_len;
+ struct nlattr *attrs;
+ int *ret = arg;
+ int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+
+ *ret = err->error;
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return NL_STOP;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+ if (len <= ack_len)
+ return NL_STOP;
+
+ attrs = (void *)((uint8_t *)nlh + ack_len);
+ len -= ack_len;
+
+ nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+ if (tb_msg[NLMSGERR_ATTR_MSG]) {
+ len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]),
+ nla_len(tb_msg[NLMSGERR_ATTR_MSG]));
+ fprintf(stderr, "kernel error: %*s\n", len,
+ (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) {
+ fprintf(stderr, "missing required nesting type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) {
+ fprintf(stderr, "missing required attribute type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE]));
+ }
+
+ return NL_STOP;
+}
+
+static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_SKIP;
+}
+
+static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_STOP;
+}
+
+static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb)
+{
+ int status = 1;
+
+ nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
+ &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status);
+
+ if (cb)
+ nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx);
+
+ nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg);
+
+ while (status == 1)
+ ovpn_nl_recvmsgs(ctx);
+
+ if (status < 0)
+ fprintf(stderr, "failed to send netlink message: %s (%d)\n",
+ strerror(-status), status);
+
+ return status;
+}
+
+static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx)
+{
+ int idx_enc, idx_dec, ret = -1;
+ unsigned char *ckey = NULL;
+ __u8 *bkey = NULL;
+ size_t olen = 0;
+ long ckey_len;
+ FILE *fp;
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open: %s\n", file);
+ return -1;
+ }
+
+ /* get file size */
+ fseek(fp, 0L, SEEK_END);
+ ckey_len = ftell(fp);
+ rewind(fp);
+
+ /* if the file is longer, let's just read a portion */
+ if (ckey_len > 256)
+ ckey_len = 256;
+
+ ckey = malloc(ckey_len);
+ if (!ckey)
+ goto err;
+
+ ret = fread(ckey, 1, ckey_len, fp);
+ if (ret != ckey_len) {
+ fprintf(stderr,
+ "couldn't read enough data from key file: %dbytes read\n",
+ ret);
+ goto err;
+ }
+
+ olen = 0;
+ ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len);
+ if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ bkey = malloc(olen);
+ if (!bkey) {
+ fprintf(stderr, "cannot allocate binary key buffer\n");
+ goto err;
+ }
+
+ ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len);
+ if (ret) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ if (olen < 2 * KEY_LEN + NONCE_LEN) {
+ fprintf(stderr,
+ "not enough data in key file, found %zdB but needs %dB\n",
+ olen, 2 * KEY_LEN + NONCE_LEN);
+ goto err;
+ }
+
+ switch (ctx->key_dir) {
+ case KEY_DIR_IN:
+ idx_enc = 0;
+ idx_dec = 1;
+ break;
+ case KEY_DIR_OUT:
+ idx_enc = 1;
+ idx_dec = 0;
+ break;
+ default:
+ goto err;
+ }
+
+ memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN);
+ memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN);
+ memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN);
+
+ ret = 0;
+
+err:
+ fclose(fp);
+ free(bkey);
+ free(ckey);
+
+ return ret;
+}
+
+static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx)
+{
+ if (strcmp(cipher, "aes") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ else if (strcmp(cipher, "chachapoly") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+ else if (strcmp(cipher, "none") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_NONE;
+ else
+ return -ENOTSUP;
+
+ return 0;
+}
+
+static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx)
+{
+ int in_dir;
+
+ in_dir = strtoll(dir, NULL, 10);
+ switch (in_dir) {
+ case KEY_DIR_IN:
+ case KEY_DIR_OUT:
+ ctx->key_dir = in_dir;
+ break;
+ default:
+ fprintf(stderr,
+ "invalid key direction provided. Can be 0 or 1 only\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto)
+{
+ struct sockaddr_storage local_sock = { 0 };
+ struct sockaddr_in6 *in6;
+ struct sockaddr_in *in;
+ int ret, s, sock_type;
+ size_t sock_len;
+
+ if (proto == IPPROTO_UDP)
+ sock_type = SOCK_DGRAM;
+ else if (proto == IPPROTO_TCP)
+ sock_type = SOCK_STREAM;
+ else
+ return -EINVAL;
+
+ s = socket(family, sock_type, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (family) {
+ case AF_INET:
+ in = (struct sockaddr_in *)&local_sock;
+ in->sin_family = family;
+ in->sin_port = htons(ctx->lport);
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ sock_len = sizeof(*in);
+ break;
+ case AF_INET6:
+ in6 = (struct sockaddr_in6 *)&local_sock;
+ in6->sin6_family = family;
+ in6->sin6_port = htons(ctx->lport);
+ in6->sin6_addr = in6addr_any;
+ sock_len = sizeof(*in6);
+ break;
+ default:
+ return -1;
+ }
+
+ int opt = 1;
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEADDR");
+ return ret;
+ }
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEPORT");
+ return ret;
+ }
+
+ if (family == AF_INET6) {
+ opt = 0;
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt,
+ sizeof(opt))) {
+ perror("failed to set IPV6_V6ONLY");
+ return -1;
+ }
+ }
+
+ ret = bind(s, (struct sockaddr *)&local_sock, sock_len);
+ if (ret < 0) {
+ perror("cannot bind socket");
+ goto err_socket;
+ }
+
+ ctx->socket = s;
+ ctx->sa_family = family;
+ return 0;
+
+err_socket:
+ close(s);
+ return -1;
+}
+
+static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ return ovpn_socket(ctx, family, IPPROTO_UDP);
+}
+
+static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ int ret;
+
+ ret = ovpn_socket(ctx, family, IPPROTO_TCP);
+ if (ret < 0)
+ return ret;
+
+ ret = listen(ctx->socket, 10);
+ if (ret < 0) {
+ perror("listen");
+ close(ctx->socket);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_accept(struct ovpn_ctx *ctx)
+{
+ socklen_t socklen;
+ int ret;
+
+ socklen = sizeof(ctx->remote);
+ ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen);
+ if (ret < 0) {
+ perror("accept");
+ goto err;
+ }
+
+ fprintf(stderr, "Connection received!\n");
+
+ switch (socklen) {
+ case sizeof(struct sockaddr_in):
+ case sizeof(struct sockaddr_in6):
+ break;
+ default:
+ fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n");
+ close(ret);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ return ret;
+err:
+ close(ctx->socket);
+ return ret;
+}
+
+static int ovpn_connect(struct ovpn_ctx *ovpn)
+{
+ socklen_t socklen;
+ int s, ret;
+
+ s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ socklen = sizeof(struct sockaddr_in);
+ break;
+ case AF_INET6:
+ socklen = sizeof(struct sockaddr_in6);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen);
+ if (ret < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ fprintf(stderr, "connected\n");
+
+ ovpn->socket = s;
+
+ return 0;
+err:
+ close(s);
+ return ret;
+}
+
+static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket);
+
+ if (!is_tcp) {
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4,
+ ovpn->remote.in4.sin_addr.s_addr);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in4.sin_port);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6,
+ sizeof(ovpn->remote.in6.sin6_addr),
+ &ovpn->remote.in6.sin6_addr);
+ NLA_PUT_U32(ctx->nl_msg,
+ OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+ ovpn->remote.in6.sin6_scope_id);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in6.sin6_port);
+ break;
+ default:
+ fprintf(stderr,
+ "Invalid family for remote socket address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ if (ovpn->peer_ip_set) {
+ switch (ovpn->peer_ip.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4,
+ ovpn->peer_ip.in4.sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6,
+ sizeof(struct in6_addr),
+ &ovpn->peer_ip.in6.sin6_addr);
+ break;
+ default:
+ fprintf(stderr, "Invalid family for peer address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_set_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+ ovpn->keepalive_interval);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+ ovpn->keepalive_timeout);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *pattrs[OVPN_A_PEER_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ __u16 rport = 0, lport = 0;
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_PEER]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]),
+ nla_len(attrs[OVPN_A_PEER]), NULL);
+
+ if (pattrs[OVPN_A_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_ID]));
+
+ if (pattrs[OVPN_A_PEER_SOCKET_NETNSID])
+ fprintf(stderr, "\tsocket NetNS ID: %d\n",
+ nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID]));
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV4]) {
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv4: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV6]) {
+ char buf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv6: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_LOCAL_PORT])
+ lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_PORT])
+ rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6];
+ char buf[INET6_ADDRSTRLEN];
+ int scope_id = -1;
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+ void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID];
+
+ scope_id = nla_get_u32(p);
+ }
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport,
+ scope_id);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6];
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4];
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) {
+ void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4];
+
+ inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) {
+ void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL];
+
+ fprintf(stderr, "\tKeepalive interval: %u sec\n",
+ nla_get_u32(p));
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])
+ fprintf(stderr, "\tKeepalive timeout: %u sec\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_BYTES])
+ fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_BYTES])
+ fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS])
+ fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS])
+ fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_BYTES])
+ fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_BYTES])
+ fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS])
+ fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS])
+ fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS]));
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_peer(struct ovpn_ctx *ovpn)
+{
+ int flags = 0, ret = -1;
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ flags = NLM_F_DUMP;
+
+ ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (ovpn->peer_id != PEER_ID_UNDEF) {
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+ }
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_new_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf, *key_dir;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_KEYCONF]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]),
+ nla_len(attrs[OVPN_A_KEYCONF]), NULL);
+
+ if (kattrs[OVPN_A_KEYCONF_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID]));
+ if (kattrs[OVPN_A_KEYCONF_SLOT]) {
+ fprintf(stderr, "\t- Slot: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ fprintf(stderr, "primary\n");
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ fprintf(stderr, "secondary\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT]));
+ break;
+ }
+ }
+ if (kattrs[OVPN_A_KEYCONF_KEY_ID])
+ fprintf(stderr, "\t- Key ID: %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID]));
+ if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) {
+ fprintf(stderr, "\t- Cipher: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) {
+ case OVPN_CIPHER_ALG_NONE:
+ fprintf(stderr, "none\n");
+ break;
+ case OVPN_CIPHER_ALG_AES_GCM:
+ fprintf(stderr, "aes-gcm\n");
+ break;
+ case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+ fprintf(stderr, "chacha20poly1305\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG]));
+ break;
+ }
+ }
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_key);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_swap_keys(struct ovpn_ctx *ovpn)
+{
+ struct nl_ctx *ctx;
+ struct nlattr *kc;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP);
+ if (!ctx)
+ return -ENOMEM;
+
+ kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, kc);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+/* Helper function used to easily add attributes to a rtnl message */
+static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) {
+ fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -EMSGSIZE;
+ }
+
+ rta = nlmsg_tail(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+
+ if (!data)
+ memset(RTA_DATA(rta), 0, alen);
+ else
+ memcpy(RTA_DATA(rta), data, alen);
+
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+
+ return 0;
+}
+
+static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size,
+ int attr)
+{
+ struct rtattr *nest = nlmsg_tail(msg);
+
+ if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0)
+ return NULL;
+
+ return nest;
+}
+
+static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest)
+{
+ nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest;
+}
+
+#define RT_SNDBUF_SIZE (1024 * 2)
+#define RT_RCVBUF_SIZE (1024 * 4)
+
+/* Open RTNL socket */
+static int ovpn_rt_socket(void)
+{
+ int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd < 0) {
+ fprintf(stderr, "%s: cannot open netlink socket\n", __func__);
+ return fd;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf,
+ sizeof(sndbuf)) < 0) {
+ fprintf(stderr, "%s: SO_SNDBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+ sizeof(rcvbuf)) < 0) {
+ fprintf(stderr, "%s: SO_RCVBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/* Bind socket to Netlink subsystem */
+static int ovpn_rt_bind(int fd, uint32_t groups)
+{
+ struct sockaddr_nl local = { 0 };
+ socklen_t addr_len;
+
+ local.nl_family = AF_NETLINK;
+ local.nl_groups = groups;
+
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+ fprintf(stderr, "%s: cannot bind netlink socket: %d\n",
+ __func__, errno);
+ return -errno;
+ }
+
+ addr_len = sizeof(local);
+ if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) {
+ fprintf(stderr, "%s: cannot getsockname: %d\n", __func__,
+ errno);
+ return -errno;
+ }
+
+ if (addr_len != sizeof(local)) {
+ fprintf(stderr, "%s: wrong address length %d\n", __func__,
+ addr_len);
+ return -EINVAL;
+ }
+
+ if (local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "%s: wrong address family %d\n", __func__,
+ local.nl_family);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg);
+
+/* Send Netlink message and run callback on reply (if specified) */
+static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer,
+ unsigned int groups, ovpn_parse_reply_cb cb,
+ void *arg_cb)
+{
+ int len, rem_len, fd, ret, rcv_len;
+ struct sockaddr_nl nladdr = { 0 };
+ struct nlmsgerr *err;
+ struct nlmsghdr *h;
+ char buf[1024 * 16];
+ struct iovec iov = {
+ .iov_base = payload,
+ .iov_len = payload->nlmsg_len,
+ };
+ struct msghdr nlmsg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = peer;
+ nladdr.nl_groups = groups;
+
+ payload->nlmsg_seq = time(NULL);
+
+ /* no need to send reply */
+ if (!cb)
+ payload->nlmsg_flags |= NLM_F_ACK;
+
+ fd = ovpn_rt_socket();
+ if (fd < 0) {
+ fprintf(stderr, "%s: can't open rtnl socket\n", __func__);
+ return -errno;
+ }
+
+ ret = ovpn_rt_bind(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: can't bind rtnl socket\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ ret = sendmsg(fd, &nlmsg, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ /* prepare buffer to store RTNL replies */
+ memset(buf, 0, sizeof(buf));
+ iov.iov_base = buf;
+
+ while (1) {
+ /*
+ * iov_len is modified by recvmsg(), therefore has to be initialized before
+ * using it again
+ */
+ iov.iov_len = sizeof(buf);
+ rcv_len = recvmsg(fd, &nlmsg, 0);
+ if (rcv_len < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ fprintf(stderr, "%s: interrupted call\n",
+ __func__);
+ continue;
+ }
+ fprintf(stderr, "%s: rtnl: error on recvmsg()\n",
+ __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ if (rcv_len == 0) {
+ fprintf(stderr,
+ "%s: rtnl: socket reached unexpected EOF\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (nlmsg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "%s: sender address length: %u (expected %zu)\n",
+ __func__, nlmsg.msg_namelen, sizeof(nladdr));
+ ret = -EIO;
+ goto out;
+ }
+
+ h = (struct nlmsghdr *)buf;
+ while (rcv_len >= (int)sizeof(*h)) {
+ len = h->nlmsg_len;
+ rem_len = len - sizeof(*h);
+
+ if (rem_len < 0 || len > rcv_len) {
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: truncated message\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ fprintf(stderr, "%s: malformed message: len=%d\n",
+ __func__, len);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_DONE) {
+ ret = 0;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ err = (struct nlmsgerr *)NLMSG_DATA(h);
+ if (rem_len < (int)sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "%s: ERROR truncated\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (err->error) {
+ fprintf(stderr, "%s: (%d) %s\n",
+ __func__, err->error,
+ strerror(-err->error));
+ ret = err->error;
+ goto out;
+ }
+
+ ret = 0;
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0)
+ ret = r;
+ }
+ goto out;
+ }
+
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0) {
+ ret = r;
+ goto out;
+ }
+ } else {
+ fprintf(stderr, "%s: RTNL: unexpected reply\n",
+ __func__);
+ }
+
+ rcv_len -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((uint8_t *)h +
+ NLMSG_ALIGN(len));
+ }
+
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: message truncated\n", __func__);
+ continue;
+ }
+
+ if (rcv_len) {
+ fprintf(stderr, "%s: rtnl: %d not parsed bytes\n",
+ __func__, rcv_len);
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ close(fd);
+
+ return ret;
+}
+
+struct ovpn_link_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[256];
+};
+
+static int ovpn_new_iface(struct ovpn_ctx *ovpn)
+{
+ struct rtattr *linkinfo, *data;
+ struct ovpn_link_req req = { 0 };
+ int ret = -1;
+
+ fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname,
+ ovpn->mode);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname,
+ strlen(ovpn->ifname) + 1) < 0)
+ goto err;
+
+ linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO);
+ if (!linkinfo)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME,
+ strlen(OVPN_FAMILY_NAME) + 1) < 0)
+ goto err;
+
+ if (ovpn->mode_set) {
+ data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA);
+ if (!data)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE,
+ &ovpn->mode, sizeof(uint8_t)) < 0)
+ goto err;
+
+ ovpn_nest_end(&req.n, data);
+ }
+
+ ovpn_nest_end(&req.n, linkinfo);
+
+ req.i.ifi_family = AF_PACKET;
+
+ ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+err:
+ return ret;
+}
+
+static int ovpn_del_iface(struct ovpn_ctx *ovpn)
+{
+ struct ovpn_link_req req = { 0 };
+
+ fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname,
+ ovpn->ifindex);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_DELLINK;
+
+ req.i.ifi_family = AF_PACKET;
+ req.i.ifi_index = ovpn->ifindex;
+
+ return ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+}
+
+static int nl_seq_check(struct nl_msg (*msg)__always_unused,
+ void (*arg)__always_unused)
+{
+ return NL_OK;
+}
+
+struct mcast_handler_args {
+ const char *group;
+ int id;
+};
+
+static int mcast_family_handler(struct nl_msg *msg, void *arg)
+{
+ struct mcast_handler_args *grp = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int rem_mcgrp;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return NL_SKIP;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+ struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+ nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+ nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+ if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+ continue;
+ if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+ grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+ continue;
+ grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+ break;
+ }
+
+ return NL_SKIP;
+}
+
+static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ int *ret = arg;
+
+ *ret = err->error;
+ return NL_STOP;
+}
+
+static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg)
+{
+ int *ret = arg;
+
+ *ret = 0;
+ return NL_STOP;
+}
+
+static int ovpn_handle_msg(struct nl_msg *msg, void *arg)
+{
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ char ifname[IF_NAMESIZE];
+ int *ret = arg;
+ __u32 ifindex;
+
+ fprintf(stderr, "received message from ovpn-dco\n");
+
+ *ret = -1;
+
+ if (!genlmsg_valid_hdr(nlh, 0)) {
+ fprintf(stderr, "invalid header\n");
+ return NL_STOP;
+ }
+
+ if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL)) {
+ fprintf(stderr, "received bogus data from ovpn-dco\n");
+ return NL_STOP;
+ }
+
+ if (!attrs[OVPN_A_IFINDEX]) {
+ fprintf(stderr, "no ifindex in this message\n");
+ return NL_STOP;
+ }
+
+ ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]);
+ if (!if_indextoname(ifindex, ifname)) {
+ fprintf(stderr, "cannot resolve ifname for ifindex: %u\n",
+ ifindex);
+ return NL_STOP;
+ }
+
+ switch (gnlh->cmd) {
+ case OVPN_CMD_PEER_DEL_NTF:
+ fprintf(stdout, "received CMD_PEER_DEL_NTF\n");
+ break;
+ case OVPN_CMD_KEY_SWAP_NTF:
+ fprintf(stdout, "received CMD_KEY_SWAP_NTF\n");
+ break;
+ default:
+ fprintf(stderr, "received unknown command: %d\n", gnlh->cmd);
+ return NL_STOP;
+ }
+
+ *ret = 0;
+ return NL_OK;
+}
+
+static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family,
+ const char *group)
+{
+ struct nl_msg *msg;
+ struct nl_cb *cb;
+ int ret, ctrlid;
+ struct mcast_handler_args grp = {
+ .group = group,
+ .id = -ENOENT,
+ };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return -ENOMEM;
+
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!cb) {
+ ret = -ENOMEM;
+ goto out_fail_cb;
+ }
+
+ ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+ genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+ ret = -ENOBUFS;
+ NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = nl_send_auto_complete(sock, msg);
+ if (ret < 0)
+ goto nla_put_failure;
+
+ ret = 1;
+
+ nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret);
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp);
+
+ while (ret > 0)
+ nl_recvmsgs(sock, cb);
+
+ if (ret == 0)
+ ret = grp.id;
+ nla_put_failure:
+ nl_cb_put(cb);
+ out_fail_cb:
+ nlmsg_free(msg);
+ return ret;
+}
+
+static int ovpn_listen_mcast(void)
+{
+ struct nl_sock *sock;
+ struct nl_cb *cb;
+ int mcid, ret;
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(sock, 8192, 8192);
+
+ ret = genl_connect(sock);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_free;
+ }
+
+ mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS);
+ if (mcid < 0) {
+ fprintf(stderr, "cannot get mcast group: %s\n",
+ nl_geterror(mcid));
+ goto err_free;
+ }
+
+ ret = nl_socket_add_membership(sock, mcid);
+ if (ret) {
+ fprintf(stderr, "failed to join mcast group: %d\n", ret);
+ goto err_free;
+ }
+
+ ret = 1;
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret);
+ nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret);
+
+ while (ret == 1) {
+ int err = nl_recvmsgs(sock, cb);
+
+ if (err < 0) {
+ fprintf(stderr,
+ "cannot receive netlink message: (%d) %s\n",
+ err, nl_geterror(-err));
+ ret = -1;
+ break;
+ }
+ }
+
+ nl_cb_put(cb);
+err_free:
+ nl_socket_free(sock);
+ return ret;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr,
+ "Usage %s <command> <iface> [arguments..]\n",
+ cmd);
+ fprintf(stderr, "where <command> can be one of the following\n\n");
+
+ fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tmode:\n");
+ fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n");
+ fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n");
+
+ fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+
+ fprintf(stderr,
+ "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: TCP port to listen to\n");
+ fprintf(stderr,
+ "\tpeers_file: file containing one peer per line: Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n");
+ fprintf(stderr,
+ "\tipv6: whether the socket should listen to the IPv6 wildcard address\n");
+
+ fprintf(stderr,
+ "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n");
+ fprintf(stderr, "\traddr: peer IP address to connect to\n");
+ fprintf(stderr, "\trport: peer TCP port to connect to\n");
+ fprintf(stderr,
+ "\tkey_file: file containing the symmetric key for encryption\n");
+
+ fprintf(stderr,
+ "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID to be used in data packets to/from this peer\n");
+ fprintf(stderr, "\traddr: peer IP address\n");
+ fprintf(stderr, "\trport: peer UDP port\n");
+ fprintf(stderr, "\tvpnaddr: peer VPN IP\n");
+
+ fprintf(stderr,
+ "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeers_file: text file containing one peer per line. Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n");
+
+ fprintf(stderr,
+ "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr,
+ "\tkeepalive_interval: interval for sending ping messages\n");
+ fprintf(stderr,
+ "\tkeepalive_timeout: time after which a peer is timed out\n");
+
+ fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n");
+
+ fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n");
+
+ fprintf(stderr,
+ "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to configure the key for\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+ fprintf(stderr, "\tkey_id: an ID from 0 to 7\n");
+ fprintf(stderr,
+ "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n");
+ fprintf(stderr,
+ "\tkey_dir: key direction, must 0 on one host and 1 on the other\n");
+ fprintf(stderr, "\tkey_file: file containing the pre-shared key\n");
+
+ fprintf(stderr,
+ "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n");
+
+ fprintf(stderr,
+ "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+
+ fprintf(stderr,
+ "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+
+ fprintf(stderr,
+ "* listen_mcast: listen to ovpn netlink multicast messages\n");
+}
+
+static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host,
+ const char *service, const char *vpnip)
+{
+ int ret;
+ struct addrinfo *result;
+ struct addrinfo hints = {
+ .ai_family = ovpn->sa_family,
+ .ai_socktype = SOCK_DGRAM,
+ .ai_protocol = IPPROTO_UDP
+ };
+
+ if (host) {
+ ret = getaddrinfo(host, service, &hints, &result);
+ if (ret) {
+ fprintf(stderr, "getaddrinfo on remote error: %s\n",
+ gai_strerror(ret));
+ return -1;
+ }
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen);
+ }
+
+ if (vpnip) {
+ ret = getaddrinfo(vpnip, NULL, &hints, &result);
+ if (ret) {
+ fprintf(stderr, "getaddrinfo on vpnip error: %s\n",
+ gai_strerror(ret));
+ return -1;
+ }
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen);
+ ovpn->sa_family = result->ai_family;
+
+ ovpn->peer_ip_set = true;
+ }
+
+ ret = 0;
+out:
+ freeaddrinfo(result);
+ return ret;
+}
+
+static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id,
+ const char *raddr, const char *rport,
+ const char *vpnip)
+{
+ ovpn->peer_id = strtoul(peer_id, NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ return ovpn_parse_remote(ovpn, raddr, rport, vpnip);
+}
+
+static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn)
+{
+ int slot = strtoul(arg, NULL, 10);
+
+ if (errno == ERANGE || slot < 1 || slot > 2) {
+ fprintf(stderr, "key slot out of range\n");
+ return -1;
+ }
+
+ switch (slot) {
+ case 1:
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ break;
+ case 2:
+ ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY;
+ break;
+ }
+
+ return 0;
+}
+
+static int ovpn_send_tcp_data(int socket)
+{
+ uint16_t len = htons(1000);
+ uint8_t buf[1002];
+ int ret;
+
+ memcpy(buf, &len, sizeof(len));
+ memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len));
+
+ ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ fprintf(stdout, "Sent %u bytes over TCP socket\n", ret);
+
+ return ret > 0 ? 0 : ret;
+}
+
+static int ovpn_recv_tcp_data(int socket)
+{
+ uint8_t buf[1002];
+ uint16_t len;
+ int ret;
+
+ ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ if (ret < 2) {
+ fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret);
+ return ret;
+ }
+
+ memcpy(&len, buf, sizeof(len));
+ len = ntohs(len);
+
+ fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n",
+ ret, len);
+
+ return 0;
+}
+
+static enum ovpn_cmd ovpn_parse_cmd(const char *cmd)
+{
+ if (!strcmp(cmd, "new_iface"))
+ return CMD_NEW_IFACE;
+
+ if (!strcmp(cmd, "del_iface"))
+ return CMD_DEL_IFACE;
+
+ if (!strcmp(cmd, "listen"))
+ return CMD_LISTEN;
+
+ if (!strcmp(cmd, "connect"))
+ return CMD_CONNECT;
+
+ if (!strcmp(cmd, "new_peer"))
+ return CMD_NEW_PEER;
+
+ if (!strcmp(cmd, "new_multi_peer"))
+ return CMD_NEW_MULTI_PEER;
+
+ if (!strcmp(cmd, "set_peer"))
+ return CMD_SET_PEER;
+
+ if (!strcmp(cmd, "del_peer"))
+ return CMD_DEL_PEER;
+
+ if (!strcmp(cmd, "get_peer"))
+ return CMD_GET_PEER;
+
+ if (!strcmp(cmd, "new_key"))
+ return CMD_NEW_KEY;
+
+ if (!strcmp(cmd, "del_key"))
+ return CMD_DEL_KEY;
+
+ if (!strcmp(cmd, "get_key"))
+ return CMD_GET_KEY;
+
+ if (!strcmp(cmd, "swap_keys"))
+ return CMD_SWAP_KEYS;
+
+ if (!strcmp(cmd, "listen_mcast"))
+ return CMD_LISTEN_MCAST;
+
+ return CMD_INVALID;
+}
+
+/* Send process to background and waits for signal.
+ *
+ * This helper is called at the end of commands
+ * creating sockets, so that the latter stay alive
+ * along with the process that created them.
+ *
+ * A signal is expected to be delivered in order to
+ * terminate the waiting processes
+ */
+static void ovpn_waitbg(void)
+{
+ daemon(1, 1);
+ pause();
+}
+
+static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
+{
+ char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10];
+ char raddr[128], rport[10];
+ int n, ret;
+ FILE *fp;
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ ret = ovpn_new_iface(ovpn);
+ break;
+ case CMD_DEL_IFACE:
+ ret = ovpn_del_iface(ovpn);
+ break;
+ case CMD_LISTEN:
+ ret = ovpn_listen(ovpn, ovpn->sa_family);
+ if (ret < 0) {
+ fprintf(stderr, "cannot listen on TCP socket\n");
+ return ret;
+ }
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ int num_peers = 0;
+
+ while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ if (num_peers == MAX_PEERS) {
+ fprintf(stderr, "max peers reached!\n");
+ return -E2BIG;
+ }
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.sa_family = ovpn->sa_family;
+
+ peer_ctx.socket = ovpn_accept(ovpn);
+ if (peer_ctx.socket < 0) {
+ fprintf(stderr, "cannot accept connection!\n");
+ return -1;
+ }
+
+ /* store peer sockets to test TCP I/O */
+ ovpn->cli_sockets[num_peers] = peer_ctx.socket;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL,
+ NULL, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, true);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s\n",
+ peer_id, vpnip);
+ return ret;
+ }
+ num_peers++;
+ }
+
+ for (int i = 0; i < num_peers; i++) {
+ ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]);
+ if (ret < 0)
+ break;
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_CONNECT:
+ ret = ovpn_connect(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect TCP socket\n");
+ return ret;
+ }
+
+ ret = ovpn_new_peer(ovpn, true);
+ if (ret < 0) {
+ fprintf(stderr, "cannot add peer to VPN\n");
+ close(ovpn->socket);
+ return ret;
+ }
+
+ if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) {
+ ret = ovpn_new_key(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot set key\n");
+ return ret;
+ }
+ }
+
+ ret = ovpn_send_tcp_data(ovpn->socket);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ ret = ovpn_new_peer(ovpn, false);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_MULTI_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr,
+ lport, raddr, rport, vpnip)) == 6) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.socket = ovpn->socket;
+ peer_ctx.sa_family = AF_UNSPEC;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr,
+ rport, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, false);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s %s %s\n",
+ peer_id, raddr, rport, vpnip);
+ return ret;
+ }
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_SET_PEER:
+ ret = ovpn_set_peer(ovpn);
+ break;
+ case CMD_DEL_PEER:
+ ret = ovpn_del_peer(ovpn);
+ break;
+ case CMD_GET_PEER:
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ fprintf(stderr, "List of peers connected to: %s\n",
+ ovpn->ifname);
+
+ ret = ovpn_get_peer(ovpn);
+ break;
+ case CMD_NEW_KEY:
+ ret = ovpn_new_key(ovpn);
+ break;
+ case CMD_DEL_KEY:
+ ret = ovpn_del_key(ovpn);
+ break;
+ case CMD_GET_KEY:
+ ret = ovpn_get_key(ovpn);
+ break;
+ case CMD_SWAP_KEYS:
+ ret = ovpn_swap_keys(ovpn);
+ break;
+ case CMD_LISTEN_MCAST:
+ ret = ovpn_listen_mcast();
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return ret;
+}
+
+static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
+{
+ int ret;
+
+ /* no args required for LISTEN_MCAST */
+ if (ovpn->cmd == CMD_LISTEN_MCAST)
+ return 0;
+
+ /* all commands need an ifname */
+ if (argc < 3)
+ return -EINVAL;
+
+ strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1);
+ ovpn->ifname[IFNAMSIZ - 1] = '\0';
+
+ /* all commands, except NEW_IFNAME, needs an ifindex */
+ if (ovpn->cmd != CMD_NEW_IFACE) {
+ ovpn->ifindex = if_nametoindex(ovpn->ifname);
+ if (!ovpn->ifindex) {
+ fprintf(stderr, "cannot find interface: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ }
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ if (argc < 4)
+ break;
+
+ if (!strcmp(argv[3], "P2P")) {
+ ovpn->mode = OVPN_MODE_P2P;
+ } else if (!strcmp(argv[3], "MP")) {
+ ovpn->mode = OVPN_MODE_MP;
+ } else {
+ fprintf(stderr, "Cannot parse iface mode: %s\n",
+ argv[3]);
+ return -1;
+ }
+ ovpn->mode_set = true;
+ break;
+ case CMD_DEL_IFACE:
+ break;
+ case CMD_LISTEN:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+
+ if (argc > 5 && !strcmp(argv[5], "ipv6"))
+ ovpn->sa_family = AF_INET6;
+ break;
+ case CMD_CONNECT:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->sa_family = AF_INET;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5],
+ NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot parse remote peer data\n");
+ return -1;
+ }
+
+ if (argc > 6) {
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ ovpn->key_id = 0;
+ ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ ovpn->key_dir = KEY_DIR_OUT;
+
+ ret = ovpn_parse_key(argv[6], ovpn);
+ if (ret)
+ return -1;
+ }
+ break;
+ case CMD_NEW_PEER:
+ if (argc < 7)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ const char *vpnip = (argc > 7) ? argv[7] : NULL;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6],
+ vpnip);
+ if (ret < 0)
+ return -1;
+ break;
+ case CMD_NEW_MULTI_PEER:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+ break;
+ case CMD_SET_PEER:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_interval = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_DEL_PEER:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_GET_PEER:
+ ovpn->peer_id = PEER_ID_UNDEF;
+ if (argc > 3) {
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ }
+ break;
+ case CMD_NEW_KEY:
+ if (argc < 9)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return -1;
+
+ ovpn->key_id = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE || ovpn->key_id > 2) {
+ fprintf(stderr, "key ID out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_cipher(argv[6], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key_direction(argv[7], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key(argv[8], ovpn);
+ if (ret)
+ return -1;
+ break;
+ case CMD_DEL_KEY:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_GET_KEY:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_SWAP_KEYS:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_LISTEN_MCAST:
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct ovpn_ctx ovpn;
+ int ret;
+
+ if (argc < 2) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ memset(&ovpn, 0, sizeof(ovpn));
+ ovpn.sa_family = AF_UNSPEC;
+ ovpn.cipher = OVPN_CIPHER_ALG_NONE;
+
+ ovpn.cmd = ovpn_parse_cmd(argv[1]);
+ if (ovpn.cmd == CMD_INVALID) {
+ fprintf(stderr, "Error: unknown command.\n\n");
+ usage(argv[0]);
+ return -1;
+ }
+
+ ret = ovpn_parse_cmd_args(&ovpn, argc, argv);
+ if (ret < 0) {
+ fprintf(stderr, "Error: invalid arguments.\n\n");
+ if (ret == -EINVAL)
+ usage(argv[0]);
+ return ret;
+ }
+
+ ret = ovpn_run_cmd(&ovpn);
+ if (ret)
+ fprintf(stderr, "Cannot execute command: %s (%d)\n",
+ strerror(-ret), ret);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt
new file mode 100644
index 000000000000..d753eebe8716
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt
@@ -0,0 +1,5 @@
+1 5.5.5.2
+2 5.5.5.3
+3 5.5.5.4
+4 5.5.5.5
+5 5.5.5.6
diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
new file mode 100755
index 000000000000..32504079a2b8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+ALG="chachapoly"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
new file mode 100755
index 000000000000..093d44772ffd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test-close-socket.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
new file mode 100755
index 000000000000..5e48a8b67928
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh
new file mode 100755
index 000000000000..ba5d725e18b0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-float.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh
new file mode 100755
index 000000000000..ba3f1f315a34
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
new file mode 100755
index 000000000000..e8acdc303307
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+ ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1))
+done
+
+# ping LAN behind client 1
+ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP}
+
+if [ "$FLOAT" == "1" ]; then
+ # make clients float..
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p}
+ done
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1
+ done
+fi
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+echo "Adding secondary key and then swap:"
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p}
+done
+
+sleep 1
+
+echo "Querying all peers:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0
+ip netns exec peer1 ${OVPN_CLI} get_peer tun1
+
+echo "Querying peer 1:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1
+
+echo "Querying non-existent peer 10:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true
+
+echo "Deleting peer 1:"
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1
+ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1
+
+echo "Querying keys:"
+for p in $(seq 2 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2
+done
+
+echo "Deleting peer while sending traffic:"
+(ip netns exec peer2 ping -qf -w 4 5.5.5.1)&
+sleep 2
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2
+# following command fails in TCP mode
+# (both ends get conn reset when one peer disconnects)
+ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true
+
+echo "Deleting keys:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2
+done
+
+echo "Setting timeout to 3s MP:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0
+done
+# wait for peers to timeout
+sleep 5
+
+echo "Setting timeout to 3s P2P:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3
+done
+sleep 5
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt
new file mode 100644
index 000000000000..e9773ddf875c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/udp_peers.txt
@@ -0,0 +1,6 @@
+1 10.10.1.1 1 10.10.1.2 1 5.5.5.2
+2 10.10.2.1 1 10.10.2.2 1 5.5.5.3
+3 10.10.3.1 1 10.10.3.2 1 5.5.5.4
+4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5
+5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6
+6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 66be7699c72c..88e914c4eef9 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -205,7 +205,6 @@
# Check that PMTU exceptions are created for both paths.
source lib.sh
-source net_helper.sh
PAUSE_ON_FAIL=no
VERBOSE=0
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index b8475cb29be7..1c43401a1c80 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -9,7 +9,6 @@
#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
-#include <linux/dccp.h>
#include <linux/in.h>
#include <linux/unistd.h>
#include <stdbool.h>
@@ -21,10 +20,6 @@
#include <sys/socket.h>
#include <unistd.h>
-#ifndef SOL_DCCP
-#define SOL_DCCP 269
-#endif
-
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
@@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
error(1, errno, "tcp: failed to listen on receive port");
- else if (proto == SOCK_DCCP) {
- if (setsockopt(rcv_fds[i], SOL_DCCP,
- DCCP_SOCKOPT_SERVICE,
- &(int) {htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
- if (listen(rcv_fds[i], 10))
- error(1, errno, "dccp: failed to listen on receive port");
- }
}
}
@@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto)
if (fd < 0)
error(1, errno, "failed to create send socket");
- if (proto == SOCK_DCCP &&
- setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
- &(int){htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
if (bind(fd, saddr, sz))
error(1, errno, "failed to bind send socket");
@@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto)
if (i < 0)
error(1, errno, "epoll_wait failed");
- if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+ if (proto == SOCK_STREAM) {
fd = accept(ev.data.fd, NULL, NULL);
if (fd < 0)
error(1, errno, "failed to accept");
@@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto,
static void test_proto(int proto, const char *proto_str)
{
- if (proto == SOCK_DCCP) {
- int test_fd;
-
- test_fd = socket(AF_INET, proto, 0);
- if (test_fd < 0) {
- if (errno == ESOCKTNOSUPPORT) {
- fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
- return;
- } else
- error(1, errno, "failed to create a DCCP socket");
- }
- close(test_fd);
- }
-
fprintf(stderr, "%s IPv4 ... ", proto_str);
run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
@@ -271,7 +238,6 @@ int main(void)
{
test_proto(SOCK_DGRAM, "UDP");
test_proto(SOCK_STREAM, "TCP");
- test_proto(SOCK_DCCP, "DCCP");
fprintf(stderr, "SUCCESS\n");
return 0;
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 02d617040793..a5e959a080bb 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -285,11 +285,6 @@ setup_hs()
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index 79fb81e63c59..a649dba3cb77 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -250,11 +250,6 @@ setup_hs()
eval local rtname=\${rt_${rid}}
local rtveth=veth-t${tid}
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
index 50563443a4ad..318487eda671 100755
--- a/tools/testing/selftests/net/srv6_end_flavors_test.sh
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -399,7 +399,7 @@ __get_srv6_rtcfg_id()
# Given the description of a router <id:op> as an input, the function returns
# the <op> token which represents the operation (e.g. End behavior with or
-# withouth flavors) configured for the node.
+# without flavors) configured for the node.
# Note that when the operation represents an End behavior with a list of
# flavors, the output is the ordered version of that list.
@@ -480,7 +480,7 @@ setup_rt_local_sids()
# all SIDs start with a common locator. Routes and SRv6 Endpoint
- # behavior instaces are grouped together in the 'localsid' table.
+ # behavior instances are grouped together in the 'localsid' table.
ip -netns "${nsname}" -6 rule \
add to "${LOCATOR_SERVICE}::/16" \
lookup "${LOCALSID_TABLE_ID}" prio 999
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index 87e414cc417c..ba730655a7bf 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -245,10 +245,8 @@
# that adopted in the use cases already examined (of course, it is necessary to
# consider the different SIDs/C-SIDs).
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -376,32 +374,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -410,8 +394,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -420,28 +403,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
@@ -462,10 +429,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -497,7 +464,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -518,9 +485,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -596,7 +560,7 @@ setup_rt_local_sids()
local lcnode_func_prefix
local lcblock_prefix
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -668,8 +632,8 @@ __setup_l3vpn()
local rtsrc_nsname
local rtdst_nsname
- rtsrc_nsname="$(get_rtname "${src}")"
- rtdst_nsname="$(get_rtname "${dst}")"
+ eval rtsrc_nsname=\${$(get_rtname "${src}")}
+ eval rtdst_nsname=\${$(get_rtname "${dst}")}
container="${LCBLOCK_ADDR}"
@@ -744,8 +708,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -791,11 +755,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -880,7 +839,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -903,7 +862,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -915,7 +874,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test()
local nsname
local ret
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
__nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
ret="$?"
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index c79cb8ede17f..4b86040c58c6 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -287,10 +287,8 @@
# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46)
# and sends it to the host hs-1.
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -418,32 +416,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -452,15 +436,12 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
+ setup_ns "${nsname}"
- __create_namespace "${nsname}"
-
+ eval nsname=\${$(get_rtname "${rtid}")}
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -470,29 +451,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
# the selftest is considered as "skipped".
@@ -512,10 +476,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -547,7 +511,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -631,7 +595,7 @@ set_end_x_nextcsid()
local rt="$1"
local adj="$2"
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
net_prefix="$(get_network_prefix "${rt}" "${adj}")"
lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
@@ -650,7 +614,7 @@ set_underlay_sids_reachability()
local rt="$1"
local rt_neighs="$2"
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -685,7 +649,7 @@ setup_rt_local_sids()
local lcnode_func_prefix
local lcblock_prefix
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
set_underlay_sids_reachability "${rt}" "${rt_neighs}"
@@ -728,8 +692,8 @@ __setup_l3vpn()
local rtsrc_nsname
local rtdst_nsname
- rtsrc_nsname="$(get_rtname "${src}")"
- rtdst_nsname="$(get_rtname "${dst}")"
+ eval rtsrc_nsname=\${$(get_rtname "${src}")}
+ eval rtdst_nsname=\${$(get_rtname "${dst}")}
container="${LCBLOCK_ADDR}"
@@ -804,8 +768,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -851,11 +815,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -947,7 +906,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -970,7 +929,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -982,7 +941,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1093,7 +1052,7 @@ rt_x_nextcsid_end_x_behavior_test()
local nsname
local ret
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
__nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}"
ret="$?"
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 28a775654b92..3efce1718c5f 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -166,10 +166,8 @@
# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
#
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
@@ -248,32 +246,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -282,8 +266,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -292,29 +275,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
# the selftest is considered as "skipped".
@@ -334,10 +300,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -369,7 +335,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -387,9 +353,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -403,7 +366,7 @@ setup_rt_local_sids()
local nsname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -469,7 +432,7 @@ __setup_rt_policy()
local policy=''
local n
- nsname="$(get_rtname "${encap_rt}")"
+ eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -516,8 +479,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -555,11 +518,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -656,7 +614,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -679,7 +637,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -691,7 +649,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cb4177d41b21..cabc70538ffe 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -116,10 +116,8 @@
# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
#
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly RT2HS_DEVNAME="veth-hs"
readonly HS_VETH_NAME="veth0"
@@ -199,32 +197,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -233,8 +217,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -243,28 +226,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
@@ -285,10 +252,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -320,7 +287,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -341,9 +308,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -357,7 +321,7 @@ setup_rt_local_sids()
local nsname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -407,7 +371,7 @@ __setup_rt_policy()
local policy=''
local n
- nsname="$(get_rtname "${encap_rt}")"
+ eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -446,7 +410,7 @@ setup_decap()
local rt="$1"
local nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
# Local End.DX2 behavior
ip -netns "${nsname}" -6 route \
@@ -463,8 +427,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -486,11 +450,6 @@ setup_hs()
add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
-
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
}
# set an auto-generated mac address
@@ -508,7 +467,7 @@ set_mac_address()
local ifname="$4"
local nsname
- nsname=$(get_nodename "${nodename}")
+ eval nsname=\${${nodename}}
ip -netns "${nsname}" link set dev "${ifname}" down
@@ -532,7 +491,7 @@ set_host_l2peer()
local hssrc_name
local ipaddr
- hssrc_name="$(get_hsname "${hssrc}")"
+ eval hssrc_name=\${$(get_hsname "${hssrc}")}
if [ "${proto}" -eq 6 ]; then
ipaddr="${ipprefix}::${hsdst}"
@@ -562,7 +521,7 @@ setup_l2vpn()
local rtdst="${hsdst}"
# set fixed mac for source node and the neigh MAC address
- set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+ set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
@@ -570,7 +529,7 @@ setup_l2vpn()
# to the mac address of the remote peer (L2 VPN destination host).
# Otherwise, traffic coming from the source host is dropped at the
# ingress router.
- set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+ set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
# set the SRv6 Policies at the ingress router
setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
@@ -647,7 +606,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -670,7 +629,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -682,7 +641,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 02b986c9c247..9067197c9055 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -51,7 +51,9 @@ ret=0
# All tests in this script. Can be overridden with -t option.
TESTS="
neigh_suppress_arp
+ neigh_suppress_uc_arp
neigh_suppress_ns
+ neigh_suppress_uc_ns
neigh_vlan_suppress_arp
neigh_vlan_suppress_ns
"
@@ -388,6 +390,52 @@ neigh_suppress_arp()
neigh_suppress_arp_common $vid $sip $tip
}
+neigh_suppress_uc_arp_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local tip=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast ARP, per-port ARP suppression - VLAN $vid"
+ echo "-----------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_arp()
+{
+ local vid=10
+ local sip=192.0.2.1
+ local tip=192.0.2.2
+
+ neigh_suppress_uc_arp_common $vid $sip $tip
+
+ vid=20
+ sip=192.0.2.17
+ tip=192.0.2.18
+ neigh_suppress_uc_arp_common $vid $sip $tip
+}
+
neigh_suppress_ns_common()
{
local vid=$1; shift
@@ -494,6 +542,78 @@ neigh_suppress_ns()
neigh_suppress_ns_common $vid $saddr $daddr $maddr
}
+icmpv6_header_get()
+{
+ local csum=$1; shift
+ local tip=$1; shift
+ local type
+ local p
+
+ # Type 135 (Neighbor Solicitation), hex format
+ type="87"
+ p=$(:
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"$csum:"$( : ICMPv6.checksum
+ )"00:00:00:00:"$( : Reserved
+ )"$tip:"$( : Target Address
+ )
+ echo $p
+}
+
+neigh_suppress_uc_ns_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local full_dip=$1; shift
+ local csum=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast NS, per-port NS suppression - VLAN $vid"
+ echo "---------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_ns()
+{
+ local vid=10
+ local saddr=2001:db8:1::1
+ local daddr=2001:db8:1::2
+ local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02
+ local csum="ef:79"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+
+ vid=20
+ saddr=2001:db8:2::1
+ daddr=2001:db8:2::2
+ full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02
+ csum="ef:76"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+}
+
neigh_vlan_suppress_arp()
{
local vid1=10
@@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then
exit $ksft_skip
fi
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index d5ffd8c9172e..1dc337c709f8 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,7 +3,7 @@
#
# Run a series of udpgro functional tests.
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 815fad8c53a8..54fa4821bc5e 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,7 +3,7 @@
#
# Run a series of udpgro benchmarks
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 5f3d1a110d11..9a2cfec1153e 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,7 +3,7 @@
#
# Run a series of udpgro benchmarks
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index f22f6c66997e..a39fdc4aa2ff 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
BPF_FILE="lib/xdp_dummy.bpf.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 58bcbbd029bc..94176ffe4646 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -56,6 +56,8 @@ ARCH_mips32be = mips
ARCH_riscv32 = riscv
ARCH_riscv64 = riscv
ARCH_s390x = s390
+ARCH_sparc32 = sparc
+ARCH_sparc64 = sparc
ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
# kernel image names by architecture
@@ -76,6 +78,9 @@ IMAGE_riscv64 = arch/riscv/boot/Image
IMAGE_s390x = arch/s390/boot/bzImage
IMAGE_s390 = arch/s390/boot/bzImage
IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
+IMAGE_sparc32 = arch/sparc/boot/image
+IMAGE_sparc64 = arch/sparc/boot/image
+IMAGE_m68k = vmlinux
IMAGE = $(objtree)/$(IMAGE_$(XARCH))
IMAGE_NAME = $(notdir $(IMAGE))
@@ -97,9 +102,15 @@ DEFCONFIG_riscv64 = defconfig
DEFCONFIG_s390x = defconfig
DEFCONFIG_s390 = defconfig compat.config
DEFCONFIG_loongarch = defconfig
+DEFCONFIG_sparc32 = sparc32_defconfig
+DEFCONFIG_sparc64 = sparc64_defconfig
+DEFCONFIG_m68k = virt_defconfig
DEFCONFIG = $(DEFCONFIG_$(XARCH))
+EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD
EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
+EXTRACONFIG_arm = -e CONFIG_NAMESPACES
+EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES
# optional tests to run (default = all)
TEST =
@@ -122,6 +133,9 @@ QEMU_ARCH_riscv64 = riscv64
QEMU_ARCH_s390x = s390x
QEMU_ARCH_s390 = s390x
QEMU_ARCH_loongarch = loongarch64
+QEMU_ARCH_sparc32 = sparc
+QEMU_ARCH_sparc64 = sparc64
+QEMU_ARCH_m68k = m68k
QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
QEMU_ARCH_USER_ppc64le = ppc64le
@@ -152,6 +166,9 @@ QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_T
QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
# OUTPUT is only set when run from the main makefile, otherwise
@@ -174,10 +191,15 @@ CFLAGS_s390x = -m64
CFLAGS_s390 = -m31
CFLAGS_mips32le = -EL -mabi=32 -fPIC
CFLAGS_mips32be = -EB -mabi=32
+CFLAGS_sparc32 = $(call cc-option,-m32)
+ifeq ($(origin XARCH),command line)
+CFLAGS_XARCH = $(CFLAGS_$(XARCH))
+endif
CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
+CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all)
CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
$(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
- $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA)
+ $(CFLAGS_XARCH) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_SANITIZER) $(CFLAGS_EXTRA)
LDFLAGS :=
LIBGCC := -lgcc
@@ -232,11 +254,11 @@ all: run
sysroot: sysroot/$(ARCH)/include
-sysroot/$(ARCH)/include: | defconfig
+sysroot/$(ARCH)/include:
$(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
$(QUIET_MKDIR)mkdir -p sysroot
$(Q)$(MAKE) -C $(srctree) outputmakefile
- $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone
+ $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check
$(Q)mv sysroot/sysroot sysroot/$(ARCH)
ifneq ($(NOLIBC_SYSROOT),0)
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
index a7ca8325863f..0636d1b6e808 100644
--- a/tools/testing/selftests/nolibc/nolibc-test-linkage.c
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
@@ -2,9 +2,7 @@
#include "nolibc-test-linkage.h"
-#ifndef NOLIBC
#include <errno.h>
-#endif
void *linkage_test_errno_addr(void)
{
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 5884a891c491..dbe13000fb1a 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -9,24 +9,22 @@
* $(CC) -nostdlib -I/path/to/nolibc/sysroot => _NOLIBC_* guards are present
* $(CC) with default libc => NOLIBC* never defined
*/
-#ifndef NOLIBC
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifndef _NOLIBC_STDIO_H
-/* standard libcs need more includes */
#include <sys/auxv.h>
-#include <sys/io.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/prctl.h>
+#include <sys/random.h>
#include <sys/reboot.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/sysmacros.h>
#include <sys/time.h>
+#include <sys/timerfd.h>
#include <sys/utsname.h>
#include <sys/wait.h>
#include <dirent.h>
@@ -38,10 +36,10 @@
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
+#include <time.h>
#include <unistd.h>
#include <limits.h>
-#endif
-#endif
+#include <ctype.h>
#pragma GCC diagnostic ignored "-Wmissing-prototypes"
@@ -807,6 +805,26 @@ static int test_dirent(void)
return 0;
}
+int test_getrandom(void)
+{
+ uint64_t rng = 0;
+ ssize_t ret;
+
+ ret = getrandom(&rng, sizeof(rng), GRND_NONBLOCK);
+ if (ret == -1 && errno == EAGAIN)
+ return 0; /* No entropy available yet */
+
+ if (ret != sizeof(rng))
+ return ret;
+
+ if (!rng) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ return 0;
+}
+
int test_getpagesize(void)
{
int x = getpagesize();
@@ -836,6 +854,29 @@ int test_getpagesize(void)
return !c;
}
+int test_file_stream(void)
+{
+ FILE *f;
+ int r;
+
+ f = fopen("/dev/null", "r");
+ if (!f)
+ return -1;
+
+ errno = 0;
+ r = fwrite("foo", 1, 3, f);
+ if (r != 0 || errno != EBADF) {
+ fclose(f);
+ return -1;
+ }
+
+ r = fclose(f);
+ if (r == EOF)
+ return -1;
+
+ return 0;
+}
+
int test_fork(void)
{
int status;
@@ -883,6 +924,102 @@ int test_stat_timestamps(void)
return 0;
}
+int test_timer(void)
+{
+ struct itimerspec timerspec;
+ struct sigevent evp;
+ timer_t timer;
+ int ret;
+
+ evp.sigev_notify = SIGEV_NONE;
+
+ ret = timer_create(CLOCK_MONOTONIC, &evp, &timer);
+ if (ret)
+ return ret;
+
+ timerspec = (struct itimerspec) {
+ .it_value.tv_sec = 1000000,
+ };
+ ret = timer_settime(timer, 0, &timerspec, NULL);
+ if (ret)
+ goto err;
+
+ timerspec = (struct itimerspec) {
+ .it_value.tv_sec = -1,
+ .it_value.tv_nsec = -1,
+ .it_interval.tv_sec = -1,
+ .it_interval.tv_nsec = -1,
+ };
+ ret = timer_gettime(timer, &timerspec);
+ if (ret)
+ goto err;
+
+ errno = EINVAL;
+ ret = -1;
+
+ if (timerspec.it_interval.tv_sec || timerspec.it_interval.tv_nsec)
+ goto err;
+
+ if (timerspec.it_value.tv_sec > 1000000)
+ goto err;
+
+ ret = timer_delete(timer);
+ if (ret)
+ return ret;
+
+ return 0;
+
+err:
+ timer_delete(timer);
+ return ret;
+}
+
+int test_timerfd(void)
+{
+ struct itimerspec timerspec;
+ int timer, ret;
+
+ timer = timerfd_create(CLOCK_MONOTONIC, 0);
+ if (timer == -1)
+ return -1;
+
+ timerspec = (struct itimerspec) {
+ .it_value.tv_sec = 1000000,
+ };
+ ret = timerfd_settime(timer, 0, &timerspec, NULL);
+ if (ret)
+ goto err;
+
+ timerspec = (struct itimerspec) {
+ .it_value.tv_sec = -1,
+ .it_value.tv_nsec = -1,
+ .it_interval.tv_sec = -1,
+ .it_interval.tv_nsec = -1,
+ };
+ ret = timerfd_gettime(timer, &timerspec);
+ if (ret)
+ goto err;
+
+ errno = EINVAL;
+ ret = -1;
+
+ if (timerspec.it_interval.tv_sec || timerspec.it_interval.tv_nsec)
+ goto err;
+
+ if (timerspec.it_value.tv_sec > 1000000)
+ goto err;
+
+ ret = close(timer);
+ if (ret)
+ return ret;
+
+ return 0;
+
+err:
+ close(timer);
+ return ret;
+}
+
int test_uname(void)
{
struct utsname buf;
@@ -926,7 +1063,7 @@ int test_mmap_munmap(void)
{
int ret, fd, i, page_size;
void *mem;
- size_t file_size, length;
+ size_t file_size, length, mem_length;
off_t offset, pa_offset;
struct stat stat_buf;
const char * const files[] = {
@@ -966,14 +1103,22 @@ int test_mmap_munmap(void)
offset = 0;
length = file_size - offset;
pa_offset = offset & ~(page_size - 1);
+ mem_length = length + offset - pa_offset;
- mem = mmap(NULL, length + offset - pa_offset, PROT_READ, MAP_SHARED, fd, pa_offset);
+ mem = mmap(NULL, mem_length, PROT_READ, MAP_SHARED, fd, pa_offset);
if (mem == MAP_FAILED) {
ret = 1;
goto end;
}
- ret = munmap(mem, length + offset - pa_offset);
+ mem = mremap(mem, mem_length, mem_length * 2, MREMAP_MAYMOVE, 0);
+ if (mem == MAP_FAILED) {
+ munmap(mem, mem_length);
+ ret = 1;
+ goto end;
+ }
+
+ ret = munmap(mem, mem_length * 2);
end:
close(fd);
@@ -1045,6 +1190,72 @@ int test_openat(void)
return 0;
}
+int test_namespace(void)
+{
+ int original_ns, new_ns, ret;
+ ino_t original_ns_ino;
+ struct stat stat_buf;
+
+ original_ns = open("/proc/self/ns/uts", O_RDONLY);
+ if (original_ns == -1)
+ return -1;
+
+ ret = fstat(original_ns, &stat_buf);
+ if (ret)
+ goto out;
+
+ original_ns_ino = stat_buf.st_ino;
+
+ ret = unshare(CLONE_NEWUTS);
+ if (ret)
+ goto out;
+
+ new_ns = open("/proc/self/ns/uts", O_RDONLY);
+ if (new_ns == -1) {
+ ret = new_ns;
+ goto out;
+ }
+
+ ret = fstat(new_ns, &stat_buf);
+ close(new_ns);
+ if (ret)
+ goto out;
+
+ if (stat_buf.st_ino == original_ns_ino) {
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ ret = setns(original_ns, CLONE_NEWUTS);
+ if (ret)
+ goto out;
+
+ new_ns = open("/proc/self/ns/uts", O_RDONLY);
+ if (new_ns == -1) {
+ ret = new_ns;
+ goto out;
+ }
+
+ ret = fstat(new_ns, &stat_buf);
+ if (ret)
+ goto out;
+
+ close(new_ns);
+
+ if (stat_buf.st_ino != original_ns_ino) {
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ close(original_ns);
+ return ret;
+}
+
/* Run syscall tests between IDs <min> and <max>.
* Return 0 on success, non-zero on failure.
*/
@@ -1052,6 +1263,7 @@ int run_syscall(int min, int max)
{
struct timeval tv;
struct timezone tz;
+ struct timespec ts;
struct stat stat_buf;
int euid0;
int proc;
@@ -1083,6 +1295,11 @@ int run_syscall(int min, int max)
* test numbers.
*/
switch (test + __LINE__ + 1) {
+ CASE_TEST(access); EXPECT_SYSZR(proc, access("/proc/self", R_OK)); break;
+ CASE_TEST(access_bad); EXPECT_SYSER(proc, access("/proc/self", W_OK), -1, EPERM); break;
+ CASE_TEST(clock_getres); EXPECT_SYSZR(1, clock_getres(CLOCK_MONOTONIC, &ts)); break;
+ CASE_TEST(clock_gettime); EXPECT_SYSZR(1, clock_gettime(CLOCK_MONOTONIC, &ts)); break;
+ CASE_TEST(clock_settime); EXPECT_SYSER(1, clock_settime(CLOCK_MONOTONIC, &ts), -1, EINVAL); break;
CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break;
CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break;
CASE_TEST(gettid); EXPECT_SYSNE(has_gettid, gettid(), -1); break;
@@ -1112,10 +1329,12 @@ int run_syscall(int min, int max)
CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break;
CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break;
+ CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break;
CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break;
+ CASE_TEST(getrandom); EXPECT_SYSZR(1, test_getrandom()); break;
CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break;
CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break;
CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break;
@@ -1149,6 +1368,8 @@ int run_syscall(int min, int max)
CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break;
CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
+ CASE_TEST(timer); EXPECT_SYSZR(1, test_timer()); break;
+ CASE_TEST(timerfd); EXPECT_SYSZR(1, test_timerfd()); break;
CASE_TEST(uname); EXPECT_SYSZR(proc, test_uname()); break;
CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break;
CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
@@ -1160,6 +1381,7 @@ int run_syscall(int min, int max)
CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break;
CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break;
CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break;
+ CASE_TEST(namespace); EXPECT_SYSZR(euid0 && proc, test_namespace()); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -1168,6 +1390,17 @@ int run_syscall(int min, int max)
return ret;
}
+int test_difftime(void)
+{
+ if (difftime(200., 100.) != 100.)
+ return 1;
+
+ if (difftime(100., 200.) != -100.)
+ return 1;
+
+ return 0;
+}
+
int run_stdlib(int min, int max)
{
int test;
@@ -1211,6 +1444,9 @@ int run_stdlib(int min, int max)
CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 2), buf, 3, "b"); break;
CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 3), buf, 3, "ba"); break;
CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 4), buf, 3, "bar"); break;
+ CASE_TEST(strstr_foobar_foo); EXPECT_STREQ(1, strstr("foobar", "foo"), "foobar"); break;
+ CASE_TEST(strstr_foobar_bar); EXPECT_STREQ(1, strstr("foobar", "bar"), "bar"); break;
+ CASE_TEST(strstr_foobar_baz); EXPECT_PTREQ(1, strstr("foobar", "baz"), NULL); break;
CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break;
CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break;
CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break;
@@ -1281,6 +1517,13 @@ int run_stdlib(int min, int max)
CASE_TEST(strerror_EINVAL); EXPECT_STREQ(is_nolibc, strerror(EINVAL), "errno=22"); break;
CASE_TEST(strerror_int_max); EXPECT_STREQ(is_nolibc, strerror(INT_MAX), "errno=2147483647"); break;
CASE_TEST(strerror_int_min); EXPECT_STREQ(is_nolibc, strerror(INT_MIN), "errno=-2147483648"); break;
+ CASE_TEST(tolower); EXPECT_EQ(1, tolower('A'), 'a'); break;
+ CASE_TEST(tolower_noop); EXPECT_EQ(1, tolower('a'), 'a'); break;
+ CASE_TEST(toupper); EXPECT_EQ(1, toupper('a'), 'A'); break;
+ CASE_TEST(toupper_noop); EXPECT_EQ(1, toupper('A'), 'A'); break;
+ CASE_TEST(abs); EXPECT_EQ(1, abs(-10), 10); break;
+ CASE_TEST(abs_noop); EXPECT_EQ(1, abs(10), 10); break;
+ CASE_TEST(difftime); EXPECT_ZR(1, test_difftime()); break;
case __LINE__:
return ret; /* must be last */
@@ -1295,27 +1538,15 @@ int run_stdlib(int min, int max)
static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...)
{
- int ret, pipefd[2];
- ssize_t w, r;
char buf[100];
- FILE *memfile;
va_list args;
+ ssize_t w;
+ int ret;
- ret = pipe(pipefd);
- if (ret == -1) {
- llen += printf(" pipe() != %s", strerror(errno));
- result(llen, FAIL);
- return 1;
- }
-
- memfile = fdopen(pipefd[1], "w");
- if (!memfile) {
- result(llen, FAIL);
- return 1;
- }
va_start(args, fmt);
- w = vfprintf(memfile, fmt, args);
+ /* Only allow writing 21 bytes, to test truncation */
+ w = vsnprintf(buf, 21, fmt, args);
va_end(args);
if (w != c) {
@@ -1324,17 +1555,6 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
return 1;
}
- fclose(memfile);
-
- r = read(pipefd[0], buf, sizeof(buf) - 1);
-
- if (r != w) {
- llen += printf(" written(%d) != read(%d)", (int)w, (int)r);
- result(llen, FAIL);
- return 1;
- }
-
- buf[r] = '\0';
llen += printf(" \"%s\" = \"%s\"", expected, buf);
ret = strncmp(expected, buf, c);
@@ -1409,7 +1629,24 @@ static int test_scanf(void)
return 0;
}
-static int run_vfprintf(int min, int max)
+int test_strerror(void)
+{
+ char buf[100];
+ ssize_t ret;
+
+ memset(buf, 'A', sizeof(buf));
+
+ errno = EINVAL;
+ ret = snprintf(buf, sizeof(buf), "%m");
+ if (is_nolibc) {
+ if (ret < 6 || memcmp(buf, "errno=", 6))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int run_printf(int min, int max)
{
int test;
int ret = 0;
@@ -1430,7 +1667,14 @@ static int run_vfprintf(int min, int max)
CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break;
CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break;
CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break;
+ CASE_TEST(uintmax_t); EXPECT_VFPRINTF(20, "18446744073709551615", "%ju", 0xffffffffffffffffULL); break;
+ CASE_TEST(intmax_t); EXPECT_VFPRINTF(20, "-9223372036854775807", "%jd", 0x8000000000000001LL); break;
+ CASE_TEST(truncation); EXPECT_VFPRINTF(25, "01234567890123456789", "%s", "0123456789012345678901234"); break;
+ CASE_TEST(string_width); EXPECT_VFPRINTF(10, " 1", "%10s", "1"); break;
+ CASE_TEST(number_width); EXPECT_VFPRINTF(10, " 1", "%10d", 1); break;
+ CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break;
CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break;
+ CASE_TEST(strerror); EXPECT_ZR(1, test_strerror()); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -1439,6 +1683,7 @@ static int run_vfprintf(int min, int max)
return ret;
}
+__attribute__((no_sanitize("undefined")))
static int smash_stack(void)
{
char buf[100];
@@ -1455,8 +1700,7 @@ static int run_protection(int min __attribute__((unused)),
int max __attribute__((unused)))
{
pid_t pid;
- int llen = 0, ret;
- siginfo_t siginfo = {};
+ int llen = 0, status;
struct rlimit rlimit = { 0, 0 };
llen += printf("0 -fstackprotector ");
@@ -1494,11 +1738,10 @@ static int run_protection(int min __attribute__((unused)),
return 1;
default:
- ret = waitid(P_PID, pid, &siginfo, WEXITED);
+ pid = waitpid(pid, &status, 0);
- if (ret != 0 || siginfo.si_signo != SIGCHLD ||
- siginfo.si_code != CLD_KILLED || siginfo.si_status != SIGABRT) {
- llen += printf("waitid()");
+ if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) {
+ llen += printf("waitpid()");
result(llen, FAIL);
return 1;
}
@@ -1570,7 +1813,7 @@ static const struct test test_names[] = {
{ .name = "startup", .func = run_startup },
{ .name = "syscall", .func = run_syscall },
{ .name = "stdlib", .func = run_stdlib },
- { .name = "vfprintf", .func = run_vfprintf },
+ { .name = "printf", .func = run_printf },
{ .name = "protection", .func = run_protection },
{ 0 }
};
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index 0299a0912d40..8277599e6441 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -25,6 +25,8 @@ all_archs=(
riscv32 riscv64
s390x s390
loongarch
+ sparc32 sparc64
+ m68k
)
archs="${all_archs[@]}"
@@ -111,6 +113,7 @@ crosstool_arch() {
loongarch) echo loongarch64;;
mips*) echo mips;;
s390*) echo s390;;
+ sparc*) echo sparc64;;
*) echo "$1";;
esac
}
@@ -184,6 +187,10 @@ test_arch() {
echo "Unsupported configuration"
return
fi
+ if [ "$arch" = "m68k" ] && [ "$llvm" = "1" ]; then
+ echo "Unsupported configuration"
+ return
+ fi
mkdir -p "$build_dir"
swallow_output "${MAKE[@]}" defconfig
diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c
index 49dc1e831174..e03fe1b9bba2 100644
--- a/tools/testing/selftests/perf_events/watermark_signal.c
+++ b/tools/testing/selftests/perf_events/watermark_signal.c
@@ -75,7 +75,7 @@ TEST(watermark_signal)
if (waitpid(child, &child_status, WSTOPPED) != child ||
!(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) {
fprintf(stderr,
- "failed to sycnhronize with child errno=%d status=%x\n",
+ "failed to synchronize with child errno=%d status=%x\n",
errno,
child_status);
goto cleanup;
diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
index 51c414faabb0..96f274f0582b 100644
--- a/tools/testing/selftests/pid_namespace/pid_max.c
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
+#include <sys/mount.h>
#include <sys/wait.h>
#include "../kselftest_harness.h"
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 55bcf81a2b9a..efd74063126e 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -131,6 +131,26 @@
#define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */
#endif
+#ifndef PIDFD_INFO_COREDUMP
+#define PIDFD_INFO_COREDUMP (1UL << 4)
+#endif
+
+#ifndef PIDFD_COREDUMPED
+#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
+#endif
+
+#ifndef PIDFD_COREDUMP_SKIP
+#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */
+#endif
+
+#ifndef PIDFD_COREDUMP_USER
+#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */
+#endif
+
+#ifndef PIDFD_COREDUMP_ROOT
+#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */
+#endif
+
#ifndef PIDFD_THREAD
#define PIDFD_THREAD O_EXCL
#endif
@@ -150,6 +170,8 @@ struct pidfd_info {
__u32 fsuid;
__u32 fsgid;
__s32 exit_code;
+ __u32 coredump_mask;
+ __u32 __spare1;
};
/*
diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c
index 7822dd080258..c094aeb1c620 100644
--- a/tools/testing/selftests/pidfd/pidfd_bind_mount.c
+++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c
@@ -15,79 +15,7 @@
#include "pidfd.h"
#include "../kselftest_harness.h"
-
-#ifndef __NR_open_tree
- #if defined __alpha__
- #define __NR_open_tree 538
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_open_tree 4428
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_open_tree 6428
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_open_tree 5428
- #endif
- #elif defined __ia64__
- #define __NR_open_tree (428 + 1024)
- #else
- #define __NR_open_tree 428
- #endif
-#endif
-
-#ifndef __NR_move_mount
- #if defined __alpha__
- #define __NR_move_mount 539
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_move_mount 4429
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_move_mount 6429
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_move_mount 5429
- #endif
- #elif defined __ia64__
- #define __NR_move_mount (428 + 1024)
- #else
- #define __NR_move_mount 429
- #endif
-#endif
-
-#ifndef MOVE_MOUNT_F_EMPTY_PATH
-#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
-#endif
-
-#ifndef MOVE_MOUNT_F_EMPTY_PATH
-#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
-#endif
-
-static inline int sys_move_mount(int from_dfd, const char *from_pathname,
- int to_dfd, const char *to_pathname,
- unsigned int flags)
-{
- return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
- to_pathname, flags);
-}
-
-#ifndef OPEN_TREE_CLONE
-#define OPEN_TREE_CLONE 1
-#endif
-
-#ifndef OPEN_TREE_CLOEXEC
-#define OPEN_TREE_CLOEXEC O_CLOEXEC
-#endif
-
-#ifndef AT_RECURSIVE
-#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
-#endif
-
-static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
-{
- return syscall(__NR_open_tree, dfd, filename, flags);
-}
+#include "../filesystems/wrappers.h"
FIXTURE(pidfd_bind_mount) {
char template[PATH_MAX];
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
index 1758a1b0457b..a0eb6e81eaa2 100644
--- a/tools/testing/selftests/pidfd/pidfd_info_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -299,6 +299,7 @@ TEST_F(pidfd_info, thread_group)
/* Opening a thread as a thread-group leader must fail. */
pidfd_thread = sys_pidfd_open(pid_thread, 0);
ASSERT_LT(pidfd_thread, 0);
+ ASSERT_EQ(errno, ENOENT);
/* Opening a thread as a PIDFD_THREAD must succeed. */
pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
@@ -362,9 +363,9 @@ TEST_F(pidfd_info, thread_group)
ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
- /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
- ASSERT_TRUE(WIFEXITED(info.exit_code));
- ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+ /* Even though the thread-group exited successfully it will still report the group exit code. */
+ ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+ ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
/*
* Retrieve exit information for the thread-group leader via the
@@ -375,9 +376,9 @@ TEST_F(pidfd_info, thread_group)
ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS));
ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT));
- /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
- ASSERT_TRUE(WIFEXITED(info2.exit_code));
- ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0);
+ /* Even though the thread-group exited successfully it will still report the group exit code. */
+ ASSERT_TRUE(WIFSIGNALED(info2.exit_code));
+ ASSERT_EQ(WTERMSIG(info2.exit_code), SIGKILL);
/* Retrieve exit information for the thread. */
info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index aad51e7c0183..991fb11306eb 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -10,7 +10,7 @@
#
# Authors: Paul E. McKenney <paulmck@kernel.org>
-grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Call trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
grep -v 'ODEBUG: ' |
grep -v 'This means that this is a DEBUG kernel and it is' |
grep -v 'Warning: unable to open an initial console' |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index ad79784e552d..957800c9ffba 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -73,7 +73,7 @@ config_override_param "$config_dir/CFcommon.$(uname -m)" KcList \
cp $T/KcList $resdir/ConfigFragment
base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
-if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux
+if test "$base_resdir" != "$resdir" && (test -f $base_resdir/bzImage || test -f $base_resdir/Image) && test -f $base_resdir/vmlinux
then
# Rerunning previous test, so use that test's kernel.
QEMU="`identify_qemu $base_resdir/vmlinux`"
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index b07c11cf6929..21e6ba3615f6 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -148,7 +148,7 @@ then
summary="$summary KCSAN: $n_kcsan"
fi
fi
- n_calltrace=`grep -c 'Call Trace:' $file`
+ n_calltrace=`grep -Ec 'Call Trace:|Call trace:' $file`
if test "$n_calltrace" -ne 0
then
summary="$summary Call Traces: $n_calltrace"
diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
index 2db12c5cad9c..208be7d09a61 100755
--- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
+++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
@@ -39,8 +39,9 @@ do
shift
done
-err=
nerrs=0
+
+# Test lockdep's handling of deadlocks.
for d in 0 1
do
for t in 0 1 2
@@ -52,6 +53,12 @@ do
tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
ret=$?
mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
+ if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config
+ then
+ echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario"
+ nerrs=$((nerrs+1))
+ err=1
+ fi
if test "$d" -ne 0 && test "$ret" -eq 0
then
err=1
@@ -71,6 +78,39 @@ do
done
done
done
+
+# Test lockdep-enabled testing of mixed SRCU readers.
+for val in 0x1 0xf
+do
+ err=
+ tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.reader_flavor=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
+ ret=$?
+ mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
+ if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config
+ then
+ echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario"
+ nerrs=$((nerrs+1))
+ err=1
+ fi
+ if test "$val" -eq 0xf && test "$ret" -eq 0
+ then
+ err=1
+ echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ fi
+ if test "$val" -eq 0x1 && test "$ret" -ne 0
+ then
+ err=1
+ echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ fi
+ if test -n "$err"
+ then
+ grep "rcu_torture_init_srcu_lockdep: test_srcu_lockdep = " "$RCUTORTURE/res/$ds/$val/SRCU-P/console.log" | sed -e 's/^.*rcu_torture_init_srcu_lockdep://' >> "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ cat "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ nerrs=$((nerrs+1))
+ fi
+done
+
+# Set up exit code.
if test "$nerrs" -ne 0
then
exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 0447c4a00cc4..e03fdaca89b3 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -51,12 +51,15 @@ do_scftorture=yes
do_rcuscale=yes
do_refscale=yes
do_kvfree=yes
+do_normal=yes
+explicit_normal=no
do_kasan=yes
do_kcsan=no
do_clocksourcewd=yes
do_rt=yes
do_rcutasksflavors=yes
do_srcu_lockdep=yes
+do_rcu_rust=no
# doyesno - Helper function for yes/no arguments
function doyesno () {
@@ -87,6 +90,7 @@ usage () {
echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture"
echo " --do-refscale / --do-no-refscale / --no-refscale"
echo " --do-rt / --do-no-rt / --no-rt"
+ echo " --do-rcu-rust / --do-no-rcu-rust / --no-rcu-rust"
echo " --do-scftorture / --do-no-scftorture / --no-scftorture"
echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep"
echo " --duration [ <minutes> | <hours>h | <days>d ]"
@@ -128,6 +132,8 @@ do
do_refscale=yes
do_rt=yes
do_kvfree=yes
+ do_normal=yes
+ explicit_normal=no
do_kasan=yes
do_kcsan=yes
do_clocksourcewd=yes
@@ -161,11 +167,17 @@ do
do_refscale=no
do_rt=no
do_kvfree=no
+ do_normal=no
+ explicit_normal=no
do_kasan=no
do_kcsan=no
do_clocksourcewd=no
do_srcu_lockdep=no
;;
+ --do-normal|--do-no-normal|--no-normal)
+ do_normal=`doyesno "$1" --do-normal`
+ explicit_normal=yes
+ ;;
--do-rcuscale|--do-no-rcuscale|--no-rcuscale)
do_rcuscale=`doyesno "$1" --do-rcuscale`
;;
@@ -181,6 +193,9 @@ do
--do-rt|--do-no-rt|--no-rt)
do_rt=`doyesno "$1" --do-rt`
;;
+ --do-rcu-rust|--do-no-rcu-rust|--no-rcu-rust)
+ do_rcu_rust=`doyesno "$1" --do-rcu-rust`
+ ;;
--do-scftorture|--do-no-scftorture|--no-scftorture)
do_scftorture=`doyesno "$1" --do-scftorture`
;;
@@ -242,6 +257,17 @@ trap 'rm -rf $T' 0 2
echo " --- " $scriptname $args | tee -a $T/log
echo " --- Results directory: " $ds | tee -a $T/log
+if test "$do_normal" = "no" && test "$do_kasan" = "no" && test "$do_kcsan" = "no"
+then
+ # Match old scripts so that "--do-none --do-rcutorture" does
+ # normal rcutorture testing, but no KASAN or KCSAN testing.
+ if test $explicit_normal = yes
+ then
+ echo " --- Everything disabled, so explicit --do-normal overridden" | tee -a $T/log
+ fi
+ do_normal=yes
+fi
+
# Calculate rcutorture defaults and apportion time
if test -z "$configs_rcutorture"
then
@@ -332,9 +358,12 @@ function torture_set {
local kcsan_kmake_tag=
local flavor=$1
shift
- curflavor=$flavor
- torture_one "$@"
- mv $T/last-resdir $T/last-resdir-nodebug || :
+ if test "$do_normal" = "yes"
+ then
+ curflavor=$flavor
+ torture_one "$@"
+ mv $T/last-resdir $T/last-resdir-nodebug || :
+ fi
if test "$do_kasan" = "yes"
then
curflavor=${flavor}-kasan
@@ -448,13 +477,57 @@ fi
if test "$do_rt" = "yes"
then
- # With all post-boot grace periods forced to normal.
- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1"
- torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+ # In both runs, disable testing of RCU priority boosting because
+ # -rt doesn't like its interaction with testing of callback
+ # flooding.
+
+ # With all post-boot grace periods forced to normal (default for PREEMPT_RT).
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcutorture.preempt_duration=0"
+ torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_RCU_NOCB_CPU=y" --trust-make
# With all post-boot grace periods forced to expedited.
- torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1"
- torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcupdate.rcu_normal_after_boot=0 rcupdate.rcu_expedited=1 rcutorture.preempt_duration=0"
+ torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_FULL=y CONFIG_RCU_NOCB_CPU=y" --trust-make
+fi
+
+if test "$do_rcu_rust" = "yes"
+then
+ echo " --- do-rcu-rust:" Start `date` | tee -a $T/log
+ rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust"
+ mkdir -p "$rrdir"
+ echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out
+ make LLVM=1 rustavailable > $T/rustavailable.out 2>&1
+ retcode=$?
+ echo $retcode > $rrdir/rustavailable.exitcode
+ cat $T/rustavailable.out | tee -a $rrdir/log >> $rrdir/rustavailable.out 2>&1
+ buildphase=rustavailable
+ if test "$retcode" -eq 0
+ then
+ echo " --- Running 'make mrproper' in order to run kunit." | tee -a $rrdir/log > $rrdir/mrproper.out
+ make mrproper > $rrdir/mrproper.out 2>&1
+ retcode=$?
+ echo $retcode > $rrdir/mrproper.exitcode
+ buildphase=mrproper
+ fi
+ if test "$retcode" -eq 0
+ then
+ echo " --- Running rust_doctests_kernel." | tee -a $rrdir/log > $rrdir/rust_doctests_kernel.out
+ ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --make_options CLIPPY=1 --arch arm64 --kconfig_add CONFIG_SMP=y --kconfig_add CONFIG_WERROR=y --kconfig_add CONFIG_RUST=y rust_doctests_kernel >> $rrdir/rust_doctests_kernel.out 2>&1
+ # @@@ Remove "--arch arm64" in order to test on native architecture?
+ # @@@ Analyze $rrdir/rust_doctests_kernel.out contents?
+ retcode=$?
+ echo $retcode > $rrdir/rust_doctests_kernel.exitcode
+ buildphase=rust_doctests_kernel
+ fi
+ if test "$retcode" -eq 0
+ then
+ echo "rcu-rust($retcode)" $rrdir >> $T/successes
+ echo Success >> $rrdir/log
+ else
+ echo "rcu-rust($retcode)" $rrdir >> $T/failures
+ echo " --- rcu-rust Test summary:" >> $rrdir/log
+ echo " --- Summary: Exit code $retcode from $buildphase, see $rrdir/$buildphase.out" >> $rrdir/log
+ fi
fi
if test "$do_srcu_lockdep" = "yes"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 8ae41d5f81a3..54b1600c7eb5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -8,8 +8,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
-CONFIG_MAXSMP=y
-CONFIG_CPUMASK_OFFSTACK=y
CONFIG_RCU_NOCB_CPU=y
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_RCU_BOOST=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index 40af3df0f397..1cc5b47dde28 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1,4 +1,4 @@
-maxcpus=8 nr_cpus=43
+maxcpus=8 nr_cpus=17
rcutree.gp_preinit_delay=3
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index 50e03eefe7ac..0443beacf362 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -3,7 +3,14 @@
#
# Run installed kselftest tests.
#
-BASE_DIR=$(realpath $(dirname $0))
+
+# Fallback to readlink if realpath is not available
+if which realpath > /dev/null; then
+ BASE_DIR=$(realpath $(dirname $0))
+else
+ BASE_DIR=$(readlink -f $(dirname $0))
+fi
+
cd $BASE_DIR
TESTS="$BASE_DIR"/kselftest-list.txt
if [ ! -r "$TESTS" ] ; then
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index f4531327b8e7..9d9d6b4c38b0 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -162,10 +162,10 @@ all_test_bpfprogs := $(foreach prog,$(wildcard *.bpf.c),$(INCLUDE_DIR)/$(patsubs
auto-test-targets := \
create_dsq \
enq_last_no_enq_fails \
- enq_select_cpu_fails \
ddsp_bogus_dsq_fail \
ddsp_vtimelocal_fail \
dsp_local_on \
+ enq_select_cpu \
exit \
hotplug \
init_enable_count \
@@ -173,6 +173,7 @@ auto-test-targets := \
maybe_null \
minimal \
numa \
+ allowed_cpus \
prog_run \
reload_loop \
select_cpu_dfl \
diff --git a/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c b/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c
new file mode 100644
index 000000000000..35923e74a2ec
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A scheduler that validates the behavior of scx_bpf_select_cpu_and() by
+ * selecting idle CPUs strictly within a subset of allowed CPUs.
+ *
+ * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei);
+
+private(PREF_CPUS) struct bpf_cpumask __kptr * allowed_cpumask;
+
+static void
+validate_idle_cpu(const struct task_struct *p, const struct cpumask *allowed, s32 cpu)
+{
+ if (scx_bpf_test_and_clear_cpu_idle(cpu))
+ scx_bpf_error("CPU %d should be marked as busy", cpu);
+
+ if (bpf_cpumask_subset(allowed, p->cpus_ptr) &&
+ !bpf_cpumask_test_cpu(cpu, allowed))
+ scx_bpf_error("CPU %d not in the allowed domain for %d (%s)",
+ cpu, p->pid, p->comm);
+}
+
+s32 BPF_STRUCT_OPS(allowed_cpus_select_cpu,
+ struct task_struct *p, s32 prev_cpu, u64 wake_flags)
+{
+ const struct cpumask *allowed;
+ s32 cpu;
+
+ allowed = cast_mask(allowed_cpumask);
+ if (!allowed) {
+ scx_bpf_error("allowed domain not initialized");
+ return -EINVAL;
+ }
+
+ /*
+ * Select an idle CPU strictly within the allowed domain.
+ */
+ cpu = scx_bpf_select_cpu_and(p, prev_cpu, wake_flags, allowed, 0);
+ if (cpu >= 0) {
+ validate_idle_cpu(p, allowed, cpu);
+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
+
+ return cpu;
+ }
+
+ return prev_cpu;
+}
+
+void BPF_STRUCT_OPS(allowed_cpus_enqueue, struct task_struct *p, u64 enq_flags)
+{
+ const struct cpumask *allowed;
+ s32 prev_cpu = scx_bpf_task_cpu(p), cpu;
+
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
+
+ allowed = cast_mask(allowed_cpumask);
+ if (!allowed) {
+ scx_bpf_error("allowed domain not initialized");
+ return;
+ }
+
+ /*
+ * Use scx_bpf_select_cpu_and() to proactively kick an idle CPU
+ * within @allowed_cpumask, usable by @p.
+ */
+ cpu = scx_bpf_select_cpu_and(p, prev_cpu, 0, allowed, 0);
+ if (cpu >= 0) {
+ validate_idle_cpu(p, allowed, cpu);
+ scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE);
+ }
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(allowed_cpus_init)
+{
+ struct bpf_cpumask *mask;
+
+ mask = bpf_cpumask_create();
+ if (!mask)
+ return -ENOMEM;
+
+ mask = bpf_kptr_xchg(&allowed_cpumask, mask);
+ if (mask)
+ bpf_cpumask_release(mask);
+
+ bpf_rcu_read_lock();
+
+ /*
+ * Assign the first online CPU to the allowed domain.
+ */
+ mask = allowed_cpumask;
+ if (mask) {
+ const struct cpumask *online = scx_bpf_get_online_cpumask();
+
+ bpf_cpumask_set_cpu(bpf_cpumask_first(online), mask);
+ scx_bpf_put_cpumask(online);
+ }
+
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+void BPF_STRUCT_OPS(allowed_cpus_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+struct task_cpu_arg {
+ pid_t pid;
+};
+
+SEC("syscall")
+int select_cpu_from_user(struct task_cpu_arg *input)
+{
+ struct task_struct *p;
+ int cpu;
+
+ p = bpf_task_from_pid(input->pid);
+ if (!p)
+ return -EINVAL;
+
+ bpf_rcu_read_lock();
+ cpu = scx_bpf_select_cpu_and(p, bpf_get_smp_processor_id(), 0, p->cpus_ptr, 0);
+ bpf_rcu_read_unlock();
+
+ bpf_task_release(p);
+
+ return cpu;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops allowed_cpus_ops = {
+ .select_cpu = (void *)allowed_cpus_select_cpu,
+ .enqueue = (void *)allowed_cpus_enqueue,
+ .init = (void *)allowed_cpus_init,
+ .exit = (void *)allowed_cpus_exit,
+ .name = "allowed_cpus",
+};
diff --git a/tools/testing/selftests/sched_ext/allowed_cpus.c b/tools/testing/selftests/sched_ext/allowed_cpus.c
new file mode 100644
index 000000000000..093f285ab4ba
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/allowed_cpus.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "allowed_cpus.bpf.skel.h"
+#include "scx_test.h"
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct allowed_cpus *skel;
+
+ skel = allowed_cpus__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(allowed_cpus__load(skel), "Failed to load skel");
+
+ *ctx = skel;
+
+ return SCX_TEST_PASS;
+}
+
+static int test_select_cpu_from_user(const struct allowed_cpus *skel)
+{
+ int fd, ret;
+ __u64 args[1];
+
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+
+ args[0] = getpid();
+ fd = bpf_program__fd(skel->progs.select_cpu_from_user);
+ if (fd < 0)
+ return fd;
+
+ ret = bpf_prog_test_run_opts(fd, &attr);
+ if (ret < 0)
+ return ret;
+
+ fprintf(stderr, "%s: CPU %d\n", __func__, attr.retval);
+
+ return 0;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct allowed_cpus *skel = ctx;
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(skel->maps.allowed_cpus_ops);
+ SCX_FAIL_IF(!link, "Failed to attach scheduler");
+
+ /* Pick an idle CPU from user-space */
+ SCX_FAIL_IF(test_select_cpu_from_user(skel), "Failed to pick idle CPU");
+
+ /* Just sleeping is fine, plenty of scheduling events happening */
+ sleep(1);
+
+ SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+ bpf_link__destroy(link);
+
+ return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+ struct allowed_cpus *skel = ctx;
+
+ allowed_cpus__destroy(skel);
+}
+
+struct scx_test allowed_cpus = {
+ .name = "allowed_cpus",
+ .description = "Verify scx_bpf_select_cpu_and()",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&allowed_cpus)
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c
new file mode 100644
index 000000000000..ee2c9b89716e
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei);
+
+s32 BPF_STRUCT_OPS(enq_select_cpu_select_cpu, struct task_struct *p,
+ s32 prev_cpu, u64 wake_flags)
+{
+ /* Bounce all tasks to ops.enqueue() */
+ return prev_cpu;
+}
+
+void BPF_STRUCT_OPS(enq_select_cpu_enqueue, struct task_struct *p,
+ u64 enq_flags)
+{
+ s32 cpu, prev_cpu = scx_bpf_task_cpu(p);
+ bool found = false;
+
+ cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, 0, &found);
+ if (found) {
+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_DFL, enq_flags);
+ return;
+ }
+
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+}
+
+void BPF_STRUCT_OPS(enq_select_cpu_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+struct task_cpu_arg {
+ pid_t pid;
+};
+
+SEC("syscall")
+int select_cpu_from_user(struct task_cpu_arg *input)
+{
+ struct task_struct *p;
+ bool found = false;
+ s32 cpu;
+
+ p = bpf_task_from_pid(input->pid);
+ if (!p)
+ return -EINVAL;
+
+ bpf_rcu_read_lock();
+ cpu = scx_bpf_select_cpu_dfl(p, bpf_get_smp_processor_id(), 0, &found);
+ if (!found)
+ cpu = -EBUSY;
+ bpf_rcu_read_unlock();
+
+ bpf_task_release(p);
+
+ return cpu;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops enq_select_cpu_ops = {
+ .select_cpu = (void *)enq_select_cpu_select_cpu,
+ .enqueue = (void *)enq_select_cpu_enqueue,
+ .exit = (void *)enq_select_cpu_exit,
+ .name = "enq_select_cpu",
+ .timeout_ms = 1000U,
+};
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu.c b/tools/testing/selftests/sched_ext/enq_select_cpu.c
new file mode 100644
index 000000000000..340c6f8b86da
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "enq_select_cpu.bpf.skel.h"
+#include "scx_test.h"
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct enq_select_cpu *skel;
+
+ skel = enq_select_cpu__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(enq_select_cpu__load(skel), "Failed to load skel");
+
+ *ctx = skel;
+
+ return SCX_TEST_PASS;
+}
+
+static int test_select_cpu_from_user(const struct enq_select_cpu *skel)
+{
+ int fd, ret;
+ __u64 args[1];
+
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+
+ args[0] = getpid();
+ fd = bpf_program__fd(skel->progs.select_cpu_from_user);
+ if (fd < 0)
+ return fd;
+
+ ret = bpf_prog_test_run_opts(fd, &attr);
+ if (ret < 0)
+ return ret;
+
+ fprintf(stderr, "%s: CPU %d\n", __func__, attr.retval);
+
+ return 0;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct enq_select_cpu *skel = ctx;
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(skel->maps.enq_select_cpu_ops);
+ if (!link) {
+ SCX_ERR("Failed to attach scheduler");
+ return SCX_TEST_FAIL;
+ }
+
+ /* Pick an idle CPU from user-space */
+ SCX_FAIL_IF(test_select_cpu_from_user(skel), "Failed to pick idle CPU");
+
+ sleep(1);
+
+ SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+ bpf_link__destroy(link);
+
+ return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+ struct enq_select_cpu *skel = ctx;
+
+ enq_select_cpu__destroy(skel);
+}
+
+struct scx_test enq_select_cpu = {
+ .name = "enq_select_cpu",
+ .description = "Verify scx_bpf_select_cpu_dfl() from multiple contexts",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&enq_select_cpu)
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
deleted file mode 100644
index a7cf868d5e31..000000000000
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
- * Copyright (c) 2023 David Vernet <dvernet@meta.com>
- * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
- */
-
-#include <scx/common.bpf.h>
-
-char _license[] SEC("license") = "GPL";
-
-/* Manually specify the signature until the kfunc is added to the scx repo. */
-s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
- bool *found) __ksym;
-
-s32 BPF_STRUCT_OPS(enq_select_cpu_fails_select_cpu, struct task_struct *p,
- s32 prev_cpu, u64 wake_flags)
-{
- return prev_cpu;
-}
-
-void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p,
- u64 enq_flags)
-{
- /*
- * Need to initialize the variable or the verifier will fail to load.
- * Improving these semantics is actively being worked on.
- */
- bool found = false;
-
- /* Can only call from ops.select_cpu() */
- scx_bpf_select_cpu_dfl(p, 0, 0, &found);
-
- scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
-}
-
-SEC(".struct_ops.link")
-struct sched_ext_ops enq_select_cpu_fails_ops = {
- .select_cpu = (void *) enq_select_cpu_fails_select_cpu,
- .enqueue = (void *) enq_select_cpu_fails_enqueue,
- .name = "enq_select_cpu_fails",
- .timeout_ms = 1000U,
-};
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
deleted file mode 100644
index a80e3a3b3698..000000000000
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
- * Copyright (c) 2023 David Vernet <dvernet@meta.com>
- * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
- */
-#include <bpf/bpf.h>
-#include <scx/common.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#include "enq_select_cpu_fails.bpf.skel.h"
-#include "scx_test.h"
-
-static enum scx_test_status setup(void **ctx)
-{
- struct enq_select_cpu_fails *skel;
-
- skel = enq_select_cpu_fails__open();
- SCX_FAIL_IF(!skel, "Failed to open");
- SCX_ENUM_INIT(skel);
- SCX_FAIL_IF(enq_select_cpu_fails__load(skel), "Failed to load skel");
-
- *ctx = skel;
-
- return SCX_TEST_PASS;
-}
-
-static enum scx_test_status run(void *ctx)
-{
- struct enq_select_cpu_fails *skel = ctx;
- struct bpf_link *link;
-
- link = bpf_map__attach_struct_ops(skel->maps.enq_select_cpu_fails_ops);
- if (!link) {
- SCX_ERR("Failed to attach scheduler");
- return SCX_TEST_FAIL;
- }
-
- sleep(1);
-
- bpf_link__destroy(link);
-
- return SCX_TEST_PASS;
-}
-
-static void cleanup(void *ctx)
-{
- struct enq_select_cpu_fails *skel = ctx;
-
- enq_select_cpu_fails__destroy(skel);
-}
-
-struct scx_test enq_select_cpu_fails = {
- .name = "enq_select_cpu_fails",
- .description = "Verify we fail to call scx_bpf_select_cpu_dfl() "
- "from ops.enqueue()",
- .setup = setup,
- .run = run,
- .cleanup = cleanup,
-};
-REGISTER_SCX_TEST(&enq_select_cpu_fails)
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 94886c82ae60..5822e25e0217 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -1,6 +1,6 @@
/*
* Strictly speaking, this is not a test. But it can report during test
- * runs so relative performace can be measured.
+ * runs so relative performance can be measured.
*/
#define _GNU_SOURCE
#include <assert.h>
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index b2f76a52215a..61acbd45ffaa 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1629,14 +1629,8 @@ void teardown_trace_fixture(struct __test_metadata *_metadata,
{
if (tracer) {
int status;
- /*
- * Extract the exit code from the other process and
- * adopt it for ourselves in case its asserts failed.
- */
ASSERT_EQ(0, kill(tracer, SIGUSR1));
ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
- if (WEXITSTATUS(status))
- _metadata->exit_code = KSFT_FAIL;
}
}
@@ -3166,12 +3160,15 @@ TEST(syscall_restart)
ret = get_syscall(_metadata, child_pid);
#if defined(__arm__)
/*
- * FIXME:
* - native ARM registers do NOT expose true syscall.
* - compat ARM registers on ARM64 DO expose true syscall.
+ * - values of utsbuf.machine include 'armv8l' or 'armb8b'
+ * for ARM64 running in compat mode.
*/
ASSERT_EQ(0, uname(&utsbuf));
- if (strncmp(utsbuf.machine, "arm", 3) == 0) {
+ if ((strncmp(utsbuf.machine, "arm", 3) == 0) &&
+ (strncmp(utsbuf.machine, "armv8l", 6) != 0) &&
+ (strncmp(utsbuf.machine, "armv8b", 6) != 0)) {
EXPECT_EQ(__NR_nanosleep, ret);
} else
#endif
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index db1616857d89..a10350c8a46e 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -36,6 +36,7 @@ ALL_TESTS="$ALL_TESTS 0008:1:1:match_int:1"
ALL_TESTS="$ALL_TESTS 0009:1:1:unregister_error:0"
ALL_TESTS="$ALL_TESTS 0010:1:1:mnt/mnt_error:0"
ALL_TESTS="$ALL_TESTS 0011:1:1:empty_add:0"
+ALL_TESTS="$ALL_TESTS 0012:1:1:u8_valid:0"
function allow_user_defaults()
{
@@ -851,6 +852,34 @@ sysctl_test_0011()
return 0
}
+sysctl_test_0012()
+{
+ TARGET="${SYSCTL}/$(get_test_target 0012)"
+ echo -n "Testing u8 range check in sysctl table check in ${TARGET} ... "
+ if [ ! -f ${TARGET} ]; then
+ echo -e "FAIL\nCould not create ${TARGET}" >&2
+ rc=1
+ test_rc
+ fi
+
+ local u8over_msg=$(dmesg | grep "u8_over range value" | wc -l)
+ if [ ! ${u8over_msg} -eq 1 ]; then
+ echo -e "FAIL\nu8 overflow not detected" >&2
+ rc=1
+ test_rc
+ fi
+
+ local u8under_msg=$(dmesg | grep "u8_under range value" | wc -l)
+ if [ ! ${u8under_msg} -eq 1 ]; then
+ echo -e "FAIL\nu8 underflow not detected" >&2
+ rc=1
+ test_rc
+ fi
+
+ echo "OK"
+ return 0
+}
+
list_tests()
{
echo "Test ID list:"
@@ -870,6 +899,7 @@ list_tests()
echo "0009 x $(get_test_count 0009) - tests sysct unregister"
echo "0010 x $(get_test_count 0010) - tests sysct mount point"
echo "0011 x $(get_test_count 0011) - tests empty directories"
+ echo "0012 x $(get_test_count 0012) - tests range check for u8 proc_handler"
}
usage()
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index a951c0d33cd2..9aa44d8176d9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -573,5 +573,67 @@
"teardown": [
"$TC qdisc del dev $DEV1 handle 1: root"
]
+ },
+ {
+ "id": "831d",
+ "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain",
+ "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"],
+ "plugins": { "requires": ["nsPlugin", "scapyPlugin"] },
+ "setup": [
+ "$IP link set dev $DEV1 up || true",
+ "$TC qdisc add dev $DEV1 root handle 1: drr",
+ "$TC filter add dev $DEV1 parent 1: basic classid 1:1",
+ "$TC class add dev $DEV1 parent 1: classid 1:1 drr",
+ "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1",
+ "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0",
+ "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem",
+ "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole"
+ ],
+ "scapy": {
+ "iface": "$DEV0",
+ "count": 5,
+ "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()"
+ },
+ "cmdUnderTest": "$TC -s qdisc show dev $DEV1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DEV1",
+ "matchPattern": "qdisc hfsc",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"]
+ },
+ {
+ "id": "309e",
+ "name": "Test HFSC eltree double add with reentrant enqueue behaviour on netem",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 1s",
+ "$TC qdisc add dev $DUMMY parent 1:0 handle 2:0 hfsc",
+ "ping -I $DUMMY -f -c10 -s48 -W0.001 10.10.11.1 || true",
+ "$TC class add dev $DUMMY parent 2:0 classid 2:1 hfsc rt m2 20Kbit",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%",
+ "$TC class add dev $DUMMY parent 2:0 classid 2:2 hfsc rt m2 20Kbit",
+ "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip dst 10.10.11.2/32 flowid 2:1",
+ "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 2 u32 match ip dst 10.10.11.3/32 flowid 2:2",
+ "ping -c 1 10.10.11.2 -I$DUMMY > /dev/null || true",
+ "$TC filter del dev $DUMMY parent 2:0 protocol ip prio 1",
+ "$TC class del dev $DUMMY classid 2:1",
+ "ping -c 1 10.10.11.3 -I$DUMMY > /dev/null || true"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY parent 2:0 classid 2:2 hfsc sc m2 20Kbit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j class ls dev $DUMMY classid 2:1",
+ "matchJSON": [],
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
index e9469ee71e6f..6d515d0e5ed6 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
@@ -189,5 +189,29 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "deb1",
+ "name": "CODEL test qdisc limit trimming",
+ "category": ["qdisc", "codel"],
+ "plugins": {
+ "requires": ["nsPlugin", "scapyPlugin"]
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root codel limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root codel limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1p target 5ms interval 100ms",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
index 3a537b2ec4c9..24faf4e12dfa 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
@@ -377,5 +377,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "9479",
+ "name": "FQ test qdisc limit trimming",
+ "category": ["qdisc", "fq"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
index 9774b1e8801b..4ce62b857fd7 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
@@ -294,5 +294,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "0436",
+ "name": "FQ_CODEL test qdisc limit trimming",
+ "category": ["qdisc", "fq_codel"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq_codel limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_codel limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
index d012d88d67fe..229fe1bf4a90 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -18,5 +18,27 @@
"matchCount": "1",
"teardown": [
]
+ },
+ {
+ "id": "83bf",
+ "name": "FQ_PIE test qdisc limit trimming",
+ "category": ["qdisc", "fq_pie"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq_pie limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_pie limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq_pie 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
index dbef5474b26b..0ca19fac54a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
@@ -188,5 +188,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "385f",
+ "name": "HHF test qdisc limit trimming",
+ "category": ["qdisc", "hhf"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root hhf limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root hhf limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json
new file mode 100644
index 000000000000..1a98b66e8030
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json
@@ -0,0 +1,24 @@
+[
+ {
+ "id": "6158",
+ "name": "PIE test qdisc limit trimming",
+ "category": ["qdisc", "pie"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root pie limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root pie limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc pie 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index cddff1772e10..589b18ed758a 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -31,6 +31,10 @@ try_modprobe act_skbedit
try_modprobe act_skbmod
try_modprobe act_tunnel_key
try_modprobe act_vlan
+try_modprobe act_ife
+try_modprobe act_meta_mark
+try_modprobe act_meta_skbtcindex
+try_modprobe act_meta_skbprio
try_modprobe cls_basic
try_modprobe cls_bpf
try_modprobe cls_cgroup
diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c
index 72d41b955fb2..5cc0010e85ff 100644
--- a/tools/testing/selftests/timens/clock_nanosleep.c
+++ b/tools/testing/selftests/timens/clock_nanosleep.c
@@ -38,7 +38,7 @@ void *call_nanosleep(void *_args)
return NULL;
}
-int run_test(int clockid, int abs)
+static int run_test(int clockid, int abs)
{
struct timespec now = {}, rem;
struct thread_args args = { .now = &now, .rem = &rem, .clockid = clockid};
@@ -115,6 +115,8 @@ int main(int argc, char *argv[])
{
int ret, nsfd;
+ ksft_print_header();
+
nscheck();
ksft_set_plan(4);
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
index d12ff955de0d..a644162d56fd 100644
--- a/tools/testing/selftests/timens/exec.c
+++ b/tools/testing/selftests/timens/exec.c
@@ -36,6 +36,8 @@ int main(int argc, char *argv[])
return 0;
}
+ ksft_print_header();
+
nscheck();
ksft_set_plan(1);
diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c
index 6b2b9264e851..339633ae037a 100644
--- a/tools/testing/selftests/timens/futex.c
+++ b/tools/testing/selftests/timens/futex.c
@@ -66,6 +66,8 @@ int main(int argc, char *argv[])
pid_t pid;
struct timespec mtime_now;
+ ksft_print_header();
+
nscheck();
ksft_set_plan(2);
diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c
index 6b13dc277724..d6658b7b7548 100644
--- a/tools/testing/selftests/timens/gettime_perf.c
+++ b/tools/testing/selftests/timens/gettime_perf.c
@@ -67,6 +67,8 @@ int main(int argc, char *argv[])
time_t offset = 10;
int nsfd;
+ ksft_print_header();
+
ksft_set_plan(8);
fill_function_pointers();
diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
index 1833ca97eb24..0a9ff90ee69a 100644
--- a/tools/testing/selftests/timens/procfs.c
+++ b/tools/testing/selftests/timens/procfs.c
@@ -180,6 +180,8 @@ int main(int argc, char *argv[])
{
int ret = 0;
+ ksft_print_header();
+
nscheck();
ksft_set_plan(2);
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
index 387220791a05..a9c0534ef8f6 100644
--- a/tools/testing/selftests/timens/timens.c
+++ b/tools/testing/selftests/timens/timens.c
@@ -151,6 +151,8 @@ int main(int argc, char *argv[])
time_t offset;
int ret = 0;
+ ksft_print_header();
+
nscheck();
check_supported_timers();
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 5b939f59dfa4..79543ceb2c0f 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -15,7 +15,7 @@
#include "log.h"
#include "timens.h"
-int run_test(int clockid, struct timespec now)
+static int run_test(int clockid, struct timespec now)
{
struct itimerspec new_value;
long long elapsed;
@@ -75,6 +75,8 @@ int main(int argc, char *argv[])
pid_t pid;
struct timespec btime_now, mtime_now;
+ ksft_print_header();
+
nscheck();
check_supported_timers();
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index a4196bbd6e33..402e2e415545 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -15,14 +15,14 @@
#include "log.h"
#include "timens.h"
-static int tclock_gettime(clock_t clockid, struct timespec *now)
+static int tclock_gettime(clockid_t clockid, struct timespec *now)
{
if (clockid == CLOCK_BOOTTIME_ALARM)
clockid = CLOCK_BOOTTIME;
return clock_gettime(clockid, now);
}
-int run_test(int clockid, struct timespec now)
+static int run_test(int clockid, struct timespec now)
{
struct itimerspec new_value;
long long elapsed;
@@ -82,6 +82,8 @@ int main(int argc, char *argv[])
pid_t pid;
struct timespec btime_now, mtime_now;
+ ksft_print_header();
+
nscheck();
check_supported_timers();
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
index 5b8907bf451d..b957e1a65124 100644
--- a/tools/testing/selftests/timens/vfork_exec.c
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -91,6 +91,8 @@ int main(int argc, char *argv[])
return check("child after exec", &now);
}
+ ksft_print_header();
+
nscheck();
ksft_set_plan(4);
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index f34ac0bac696..4dde8838261d 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)
+CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)/usr/include
+ifneq ($(WERROR),0)
+ CFLAGS += -Werror
+endif
+
LDLIBS += -lpthread -lm -luring
TEST_PROGS := test_generic_01.sh
@@ -11,6 +15,11 @@ TEST_PROGS += test_generic_05.sh
TEST_PROGS += test_generic_06.sh
TEST_PROGS += test_generic_07.sh
+TEST_PROGS += test_generic_08.sh
+TEST_PROGS += test_generic_09.sh
+TEST_PROGS += test_generic_10.sh
+TEST_PROGS += test_generic_11.sh
+
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
TEST_PROGS += test_loop_01.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index 94a8e729ba4c..5421774d7867 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -16,6 +16,11 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
unsigned long dev_size = 250UL << 30;
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
dev->tgt.dev_size = dev_size;
dev->tgt.params = (struct ublk_params) {
.types = UBLK_PARAM_TYPE_BASIC,
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index 6f34eabfae97..509842df9bee 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -29,19 +29,23 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des
static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
unsigned ublk_op = ublksrv_get_op(iod);
- int zc = ublk_queue_use_zc(q);
- enum io_uring_op op = ublk_to_uring_op(iod, zc);
+ unsigned zc = ublk_queue_use_zc(q);
+ unsigned auto_zc = ublk_queue_use_auto_zc(q);
+ enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc);
struct io_uring_sqe *sqe[3];
+ void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr;
- if (!zc) {
+ if (!zc || auto_zc) {
ublk_queue_alloc_sqes(q, sqe, 1);
if (!sqe[0])
return -ENOMEM;
io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/,
- (void *)iod->addr,
+ addr,
iod->nr_sectors << 9,
iod->start_sector << 9);
+ if (auto_zc)
+ sqe[0]->buf_index = tag;
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
@@ -145,6 +149,11 @@ static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
},
};
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
ret = backing_file_tgt_init(dev);
if (ret)
return ret;
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 842b40736a9b..b5131a000795 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -216,6 +216,30 @@ static int ublk_ctrl_get_features(struct ublk_dev *dev,
return __ublk_ctrl_cmd(dev, &data);
}
+static int ublk_ctrl_update_size(struct ublk_dev *dev,
+ __u64 nr_sects)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_UPDATE_SIZE,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+
+ data.data[0] = nr_sects;
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev,
+ unsigned int timeout_ms)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_QUIESCE_DEV,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+
+ data.data[0] = timeout_ms;
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
static const char *ublk_dev_state_desc(struct ublk_dev *dev)
{
switch (dev->dev_info.state) {
@@ -405,7 +429,7 @@ static void ublk_queue_deinit(struct ublk_queue *q)
free(q->ios[i].buf_addr);
}
-static int ublk_queue_init(struct ublk_queue *q)
+static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
{
struct ublk_dev *dev = q->dev;
int depth = dev->dev_info.queue_depth;
@@ -420,10 +444,14 @@ static int ublk_queue_init(struct ublk_queue *q)
q->cmd_inflight = 0;
q->tid = gettid();
- if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
q->state |= UBLKSRV_NO_BUF;
- q->state |= UBLKSRV_ZC;
+ if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
+ q->state |= UBLKSRV_ZC;
+ if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG)
+ q->state |= UBLKSRV_AUTO_BUF_REG;
}
+ q->state |= extra_flags;
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
@@ -461,7 +489,7 @@ static int ublk_queue_init(struct ublk_queue *q)
goto fail;
}
- if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
if (ret) {
ublk_err("ublk dev %d queue %d register spare buffers failed %d",
@@ -525,6 +553,23 @@ static void ublk_dev_unprep(struct ublk_dev *dev)
close(dev->fds[0]);
}
+static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
+ struct io_uring_sqe *sqe,
+ unsigned short tag)
+{
+ struct ublk_auto_buf_reg buf = {};
+
+ if (q->tgt_ops->buf_index)
+ buf.index = q->tgt_ops->buf_index(q, tag);
+ else
+ buf.index = tag;
+
+ if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
+
+ sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf);
+}
+
int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
{
struct ublksrv_io_cmd *cmd;
@@ -579,6 +624,9 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
else
cmd->addr = 0;
+ if (q->state & UBLKSRV_AUTO_BUF_REG)
+ ublk_set_auto_buf_reg(q, sqe[0], tag);
+
user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0);
io_uring_sqe_set_data64(sqe[0], user_data);
@@ -729,6 +777,7 @@ struct ublk_queue_info {
struct ublk_queue *q;
sem_t *queue_sem;
cpu_set_t *affinity;
+ unsigned char auto_zc_fallback;
};
static void *ublk_io_handler_fn(void *data)
@@ -736,9 +785,13 @@ static void *ublk_io_handler_fn(void *data)
struct ublk_queue_info *info = data;
struct ublk_queue *q = info->q;
int dev_id = q->dev->dev_info.dev_id;
+ unsigned extra_flags = 0;
int ret;
- ret = ublk_queue_init(q);
+ if (info->auto_zc_fallback)
+ extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK;
+
+ ret = ublk_queue_init(q, extra_flags);
if (ret) {
ublk_err("ublk dev %d queue %d init queue failed\n",
dev_id, q->q_id);
@@ -831,6 +884,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
qinfo[i].q = &dev->q[i];
qinfo[i].queue_sem = &queue_sem;
qinfo[i].affinity = &affinity_buf[i];
+ qinfo[i].auto_zc_fallback = ctx->auto_zc_fallback;
pthread_create(&dev->q[i].thread, NULL,
ublk_io_handler_fn,
&qinfo[i]);
@@ -1011,6 +1065,9 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
info->nr_hw_queues = nr_queues;
info->queue_depth = depth;
info->flags = ctx->flags;
+ if ((features & UBLK_F_QUIESCE) &&
+ (info->flags & UBLK_F_USER_RECOVERY))
+ info->flags |= UBLK_F_QUIESCE;
dev->tgt.ops = ops;
dev->tgt.sq_depth = depth;
dev->tgt.cq_depth = depth;
@@ -1206,6 +1263,9 @@ static int cmd_dev_get_features(void)
[const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY",
[const_ilog2(UBLK_F_ZONED)] = "ZONED",
[const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO",
+ [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE",
+ [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG",
+ [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE",
};
struct ublk_dev *dev;
__u64 features = 0;
@@ -1239,13 +1299,66 @@ static int cmd_dev_get_features(void)
return ret;
}
+static int cmd_dev_update_size(struct dev_ctx *ctx)
+{
+ struct ublk_dev *dev = ublk_ctrl_init();
+ struct ublk_params p;
+ int ret = -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (ctx->dev_id < 0) {
+ fprintf(stderr, "device id isn't provided\n");
+ goto out;
+ }
+
+ dev->dev_info.dev_id = ctx->dev_id;
+ ret = ublk_ctrl_get_params(dev, &p);
+ if (ret < 0) {
+ ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
+ goto out;
+ }
+
+ if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) {
+ ublk_err("size isn't aligned with logical block size\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = ublk_ctrl_update_size(dev, ctx->size >> 9);
+out:
+ ublk_ctrl_deinit(dev);
+ return ret;
+}
+
+static int cmd_dev_quiesce(struct dev_ctx *ctx)
+{
+ struct ublk_dev *dev = ublk_ctrl_init();
+ int ret = -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (ctx->dev_id < 0) {
+ fprintf(stderr, "device id isn't provided for quiesce\n");
+ goto out;
+ }
+ dev->dev_info.dev_id = ctx->dev_id;
+ ret = ublk_ctrl_quiesce_dev(dev, 10000);
+
+out:
+ ublk_ctrl_deinit(dev);
+ return ret;
+}
+
static void __cmd_create_help(char *exe, bool recovery)
{
int i;
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
exe, recovery ? "recover" : "add");
- printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n");
+ printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
printf("\t[-e 0|1 ] [-i 0|1]\n");
printf("\t[target options] [backfile1] [backfile2] ...\n");
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
@@ -1281,6 +1394,8 @@ static int cmd_dev_help(char *exe)
printf("%s list [-n dev_id] -a \n", exe);
printf("\t -a list all devices, -n list specified device, default -a \n\n");
printf("%s features\n", exe);
+ printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe);
+ printf("%s quiesce -n dev_id\n", exe);
return 0;
}
@@ -1300,6 +1415,9 @@ int main(int argc, char *argv[])
{ "recovery_fail_io", 1, NULL, 'e'},
{ "recovery_reissue", 1, NULL, 'i'},
{ "get_data", 1, NULL, 'g'},
+ { "auto_zc", 0, NULL, 0 },
+ { "auto_zc_fallback", 0, NULL, 0 },
+ { "size", 1, NULL, 's'},
{ 0, 0, 0, 0 }
};
const struct ublk_tgt_ops *ops = NULL;
@@ -1321,7 +1439,7 @@ int main(int argc, char *argv[])
opterr = 0;
optind = 2;
- while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz",
+ while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz",
longopts, &option_idx)) != -1) {
switch (opt) {
case 'a':
@@ -1361,6 +1479,9 @@ int main(int argc, char *argv[])
case 'g':
ctx.flags |= UBLK_F_NEED_GET_DATA;
break;
+ case 's':
+ ctx.size = strtoull(optarg, NULL, 10);
+ break;
case 0:
if (!strcmp(longopts[option_idx].name, "debug_mask"))
ublk_dbg_mask = strtol(optarg, NULL, 16);
@@ -1368,6 +1489,10 @@ int main(int argc, char *argv[])
ublk_dbg_mask = 0;
if (!strcmp(longopts[option_idx].name, "foreground"))
ctx.fg = 1;
+ if (!strcmp(longopts[option_idx].name, "auto_zc"))
+ ctx.flags |= UBLK_F_AUTO_BUF_REG;
+ if (!strcmp(longopts[option_idx].name, "auto_zc_fallback"))
+ ctx.auto_zc_fallback = 1;
break;
case '?':
/*
@@ -1391,6 +1516,16 @@ int main(int argc, char *argv[])
}
}
+ /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */
+ if (ctx.auto_zc_fallback &&
+ !((ctx.flags & UBLK_F_AUTO_BUF_REG) &&
+ (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) {
+ ublk_err("%s: auto_zc_fallback is set but neither "
+ "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
i = optind;
while (i < argc && ctx.nr_files < MAX_BACK_FILES) {
ctx.files[ctx.nr_files++] = argv[i++];
@@ -1423,6 +1558,10 @@ int main(int argc, char *argv[])
ret = cmd_dev_help(argv[0]);
else if (!strcmp(cmd, "features"))
ret = cmd_dev_get_features();
+ else if (!strcmp(cmd, "update_size"))
+ ret = cmd_dev_update_size(&ctx);
+ else if (!strcmp(cmd, "quiesce"))
+ ret = cmd_dev_quiesce(&ctx);
else
cmd_dev_help(argv[0]);
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 44ee1e4ac55b..e34508bf5798 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -19,7 +19,6 @@
#include <sys/inotify.h>
#include <sys/wait.h>
#include <sys/eventfd.h>
-#include <sys/uio.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <linux/io_uring.h>
@@ -85,6 +84,7 @@ struct dev_ctx {
unsigned int all:1;
unsigned int fg:1;
unsigned int recovery:1;
+ unsigned int auto_zc_fallback:1;
int _evtfd;
int _shmid;
@@ -92,6 +92,9 @@ struct dev_ctx {
/* built from shmem, only for ublk_dump_dev() */
struct ublk_dev *shadow_dev;
+ /* for 'update_size' command */
+ unsigned long long size;
+
union {
struct stripe_ctx stripe;
struct fault_inject_ctx fault_inject;
@@ -116,6 +119,7 @@ struct ublk_io {
#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
#define UBLKSRV_IO_FREE (1UL << 2)
#define UBLKSRV_NEED_GET_DATA (1UL << 3)
+#define UBLKSRV_NEED_REG_BUF (1UL << 4)
unsigned short flags;
unsigned short refs; /* used by target code only */
@@ -141,6 +145,9 @@ struct ublk_tgt_ops {
*/
void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
void (*usage)(const struct ublk_tgt_ops *ops);
+
+ /* return buffer index for UBLK_F_AUTO_BUF_REG */
+ unsigned short (*buf_index)(const struct ublk_queue *, int tag);
};
struct ublk_tgt {
@@ -169,6 +176,8 @@ struct ublk_queue {
#define UBLKSRV_QUEUE_IDLE (1U << 1)
#define UBLKSRV_NO_BUF (1U << 2)
#define UBLKSRV_ZC (1U << 3)
+#define UBLKSRV_AUTO_BUF_REG (1U << 4)
+#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5)
unsigned state;
pid_t tid;
pthread_t thread;
@@ -204,6 +213,12 @@ struct ublk_dev {
extern unsigned int ublk_dbg_mask;
extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag);
+
+static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod)
+{
+ return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF);
+}
+
static inline int is_target_io(__u64 user_data)
{
return (user_data & (1ULL << 63)) != 0;
@@ -388,6 +403,11 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q)
return q->state & UBLKSRV_ZC;
}
+static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q)
+{
+ return q->state & UBLKSRV_AUTO_BUF_REG;
+}
+
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index 91fec3690d4b..44aca31cf2b0 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -42,10 +42,22 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
return 0;
}
+static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod,
+ struct io_uring_sqe *sqe)
+{
+ unsigned ublk_op = ublksrv_get_op(iod);
+
+ io_uring_prep_nop(sqe);
+ sqe->buf_index = tag;
+ sqe->flags |= IOSQE_FIXED_FILE;
+ sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
+ sqe->len = iod->nr_sectors << 9; /* injected result */
+ sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
+}
+
static int null_queue_zc_io(struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
- unsigned ublk_op = ublksrv_get_op(iod);
struct io_uring_sqe *sqe[3];
ublk_queue_alloc_sqes(q, sqe, 3);
@@ -55,12 +67,8 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
- io_uring_prep_nop(sqe[1]);
- sqe[1]->buf_index = tag;
- sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
- sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
- sqe[1]->len = iod->nr_sectors << 9; /* injected result */
- sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1);
+ __setup_nop_io(tag, iod, sqe[1]);
+ sqe[1]->flags |= IOSQE_IO_HARDLINK;
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
@@ -69,6 +77,16 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
return 2;
}
+static int null_queue_auto_zc_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ struct io_uring_sqe *sqe[1];
+
+ ublk_queue_alloc_sqes(q, sqe, 1);
+ __setup_nop_io(tag, iod, sqe[0]);
+ return 1;
+}
+
static void ublk_null_io_done(struct ublk_queue *q, int tag,
const struct io_uring_cqe *cqe)
{
@@ -94,22 +112,37 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag,
static int ublk_null_queue_io(struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
- int zc = ublk_queue_use_zc(q);
+ unsigned auto_zc = ublk_queue_use_auto_zc(q);
+ unsigned zc = ublk_queue_use_zc(q);
int queued;
- if (!zc) {
+ if (auto_zc && !ublk_io_auto_zc_fallback(iod))
+ queued = null_queue_auto_zc_io(q, tag);
+ else if (zc)
+ queued = null_queue_zc_io(q, tag);
+ else {
ublk_complete_io(q, tag, iod->nr_sectors << 9);
return 0;
}
-
- queued = null_queue_zc_io(q, tag);
ublk_queued_tgt_io(q, tag, queued);
return 0;
}
+/*
+ * return invalid buffer index for triggering auto buffer register failure,
+ * then UBLK_IO_RES_NEED_REG_BUF handling is covered
+ */
+static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag)
+{
+ if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
+ return (unsigned short)-1;
+ return tag;
+}
+
const struct ublk_tgt_ops null_tgt_ops = {
.name = "null",
.init_tgt = ublk_null_tgt_init,
.queue_io = ublk_null_queue_io,
.tgt_io_done = ublk_null_io_done,
+ .buf_index = ublk_null_buf_index,
};
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 5dbd6392d83d..404a143bf3d6 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -70,7 +70,7 @@ static void free_stripe_array(struct stripe_array *s)
}
static void calculate_stripe_array(const struct stripe_conf *conf,
- const struct ublksrv_io_desc *iod, struct stripe_array *s)
+ const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base)
{
const unsigned shift = conf->shift - 9;
const unsigned chunk_sects = 1 << shift;
@@ -102,7 +102,7 @@ static void calculate_stripe_array(const struct stripe_conf *conf,
}
assert(this->nr_vec < this->cap);
- this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done);
+ this->vec[this->nr_vec].iov_base = (void *)(base + done);
this->vec[this->nr_vec++].iov_len = nr_sects << 9;
start += nr_sects;
@@ -126,15 +126,17 @@ static inline enum io_uring_op stripe_to_uring_op(
static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
- int zc = !!(ublk_queue_use_zc(q) != 0);
- enum io_uring_op op = stripe_to_uring_op(iod, zc);
+ unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0);
+ unsigned zc = (ublk_queue_use_zc(q) != 0);
+ enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc);
struct io_uring_sqe *sqe[NR_STRIPE];
struct stripe_array *s = alloc_stripe_array(conf, iod);
struct ublk_io *io = ublk_get_io(q, tag);
int i, extra = zc ? 2 : 0;
+ void *base = (zc | auto_zc) ? NULL : (void *)iod->addr;
io->private_data = s;
- calculate_stripe_array(conf, iod, s);
+ calculate_stripe_array(conf, iod, s, base);
ublk_queue_alloc_sqes(q, sqe, s->nr + extra);
@@ -153,12 +155,11 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
(void *)t->vec,
t->nr_vec,
t->start << 9);
- if (zc) {
+ io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ if (auto_zc || zc) {
sqe[i]->buf_index = tag;
- io_uring_sqe_set_flags(sqe[i],
- IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK);
- } else {
- io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ if (zc)
+ sqe[i]->flags |= IOSQE_IO_HARDLINK;
}
/* bit63 marks us as tgt io */
sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1);
@@ -287,6 +288,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
loff_t bytes = 0;
int ret, i, mul = 1;
+ if (ctx->auto_zc_fallback) {
+ ublk_err("%s: not support auto_zc_fallback\n", __func__);
+ return -EINVAL;
+ }
+
if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
ublk_err("invalid chunk size %u\n", chunk_size);
return -EINVAL;
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index a81210ca3e99..0145569ee7e9 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -23,6 +23,11 @@ _get_disk_dev_t() {
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
}
+_get_disk_size()
+{
+ lsblk -b -o SIZE -n "$1"
+}
+
_run_fio_verify_io() {
fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \
--bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \
@@ -215,6 +220,26 @@ _recover_ublk_dev() {
echo "$state"
}
+# quiesce device and return ublk device state
+__ublk_quiesce_dev()
+{
+ local dev_id=$1
+ local exp_state=$2
+ local state
+
+ if ! ${UBLK_PROG} quiesce -n "${dev_id}"; then
+ state=$(_get_ublk_dev_state "${dev_id}")
+ return "$state"
+ fi
+
+ for ((j=0;j<50;j++)); do
+ state=$(_get_ublk_dev_state "${dev_id}")
+ [ "$state" == "$exp_state" ] && break
+ sleep 1
+ done
+ echo "$state"
+}
+
# kill the ublk daemon and return ublk device state
__ublk_kill_daemon()
{
@@ -251,7 +276,7 @@ __run_io_and_remove()
local kill_server=$3
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
- --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
+ --rw=randrw --norandommap --iodepth=256 --size="${size}" --numjobs="$(nproc)" \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 2
if [ "${kill_server}" = "yes" ]; then
@@ -303,20 +328,26 @@ run_io_and_kill_daemon()
run_io_and_recover()
{
+ local action=$1
local state
local dev_id
+ shift 1
dev_id=$(_add_ublk_dev "$@")
_check_add_dev "$TID" $?
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
- --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
+ --rw=randread --iodepth=256 --size="${size}" --numjobs=4 \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 4
- state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+ if [ "$action" == "kill_daemon" ]; then
+ state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+ elif [ "$action" == "quiesce_dev" ]; then
+ state=$(__ublk_quiesce_dev "${dev_id}" "QUIESCED")
+ fi
if [ "$state" != "QUIESCED" ]; then
- echo "device isn't quiesced($state) after killing daemon"
+ echo "device isn't quiesced($state) after $action"
return 255
fi
diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh
index 8a3bc080c577..8b533217d4a1 100755
--- a/tools/testing/selftests/ublk/test_generic_04.sh
+++ b/tools/testing/selftests/ublk/test_generic_04.sh
@@ -8,7 +8,7 @@ ERR_CODE=0
ublk_run_recover_test()
{
- run_io_and_recover "$@"
+ run_io_and_recover "kill_daemon" "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh
index 3bb00a347402..398e9e2b58e1 100755
--- a/tools/testing/selftests/ublk/test_generic_05.sh
+++ b/tools/testing/selftests/ublk/test_generic_05.sh
@@ -8,7 +8,7 @@ ERR_CODE=0
ublk_run_recover_test()
{
- run_io_and_recover "$@"
+ run_io_and_recover "kill_daemon" "$@"
ERR_CODE=$?
if [ ${ERR_CODE} -ne 0 ]; then
echo "$TID failure: $*"
diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh
index b67230c42c84..fd42062b7b76 100755
--- a/tools/testing/selftests/ublk/test_generic_06.sh
+++ b/tools/testing/selftests/ublk/test_generic_06.sh
@@ -17,7 +17,7 @@ STARTTIME=${SECONDS}
dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 &
dd_pid=$!
-__ublk_kill_daemon ${dev_id} "DEAD"
+__ublk_kill_daemon ${dev_id} "DEAD" >/dev/null
wait $dd_pid
dd_exitcode=$?
diff --git a/tools/testing/selftests/ublk/test_generic_08.sh b/tools/testing/selftests/ublk/test_generic_08.sh
new file mode 100755
index 000000000000..b222f3a77e12
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_08.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_08"
+ERR_CODE=0
+
+if ! _have_feature "AUTO_BUF_REG"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test UBLK_F_AUTO_BUF_REG"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t loop -q 2 --auto_zc "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then
+ _cleanup_test "generic"
+ _show_result $TID 255
+fi
+
+dev_id=$(_add_ublk_dev -t stripe --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_09.sh b/tools/testing/selftests/ublk/test_generic_09.sh
new file mode 100755
index 000000000000..bb6f77ca5522
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_09.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_09"
+ERR_CODE=0
+
+if ! _have_feature "AUTO_BUF_REG"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "basic IO test"
+
+dev_id=$(_add_ublk_dev -t null -z --auto_zc --auto_zc_fallback)
+_check_add_dev $TID $?
+
+# run fio over the two disks
+fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
+ERR_CODE=$?
+
+_cleanup_test "null"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_10.sh b/tools/testing/selftests/ublk/test_generic_10.sh
new file mode 100755
index 000000000000..abc11c3d416b
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_10.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_10"
+ERR_CODE=0
+
+if ! _have_feature "UPDATE_SIZE"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "check update size"
+
+dev_id=$(_add_ublk_dev -t null)
+_check_add_dev $TID $?
+
+size=$(_get_disk_size /dev/ublkb"${dev_id}")
+size=$(( size / 2 ))
+if ! "$UBLK_PROG" update_size -n "$dev_id" -s "$size"; then
+ ERR_CODE=255
+fi
+
+new_size=$(_get_disk_size /dev/ublkb"${dev_id}")
+if [ "$new_size" != "$size" ]; then
+ ERR_CODE=255
+fi
+
+_cleanup_test "null"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_11.sh b/tools/testing/selftests/ublk/test_generic_11.sh
new file mode 100755
index 000000000000..a00357a5ec6b
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_11.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_11"
+ERR_CODE=0
+
+ublk_run_quiesce_recover()
+{
+ run_io_and_recover "quiesce_dev" "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_feature "QUIESCE"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "quiesce" "basic quiesce & recover function verification"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_quiesce_recover -t null -q 2 -r 1 &
+ublk_run_quiesce_recover -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_quiesce_recover -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_quiesce_recover -t null -q 2 -r 1 -i 1 &
+ublk_run_quiesce_recover -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_quiesce_recover -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "quiesce"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh
index 1a9065125ae1..4bdd921081e5 100755
--- a/tools/testing/selftests/ublk/test_stress_02.sh
+++ b/tools/testing/selftests/ublk/test_stress_02.sh
@@ -25,10 +25,12 @@ _create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
-ublk_io_and_kill_daemon 8G -t null -q 4 &
-ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
-ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
-wait
+for nr_queue in 1 4; do
+ ublk_io_and_kill_daemon 8G -t null -q "$nr_queue" &
+ ublk_io_and_kill_daemon 256M -t loop -q "$nr_queue" "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_kill_daemon 256M -t stripe -q "$nr_queue" "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ wait
+done
_cleanup_test "stress"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
index e0854f71d35b..7d728ce50774 100755
--- a/tools/testing/selftests/ublk/test_stress_03.sh
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -32,6 +32,13 @@ _create_backfile 2 128M
ublk_io_and_remove 8G -t null -q 4 -z &
ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+
+if _have_feature "AUTO_BUF_REG"; then
+ ublk_io_and_remove 8G -t null -q 4 --auto_zc &
+ ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
+fi
wait
_cleanup_test "stress"
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index 1798a98387e8..9bcfa64ea1f0 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -31,6 +31,13 @@ _create_backfile 2 128M
ublk_io_and_kill_daemon 8G -t null -q 4 -z &
ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+
+if _have_feature "AUTO_BUF_REG"; then
+ ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc &
+ ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
+ ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
+fi
wait
_cleanup_test "stress"
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
index 88601b48f1cd..bcfc904cefc6 100755
--- a/tools/testing/selftests/ublk/test_stress_05.sh
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -60,5 +60,14 @@ if _have_feature "ZERO_COPY"; then
done
fi
+if _have_feature "AUTO_BUF_REG"; then
+ for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" &
+ wait
+ done
+fi
+
_cleanup_test "stress"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 55500f901fbc..a8f550aecb35 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -611,6 +611,35 @@ n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips"
} < <(n0 wg show wg0 allowed-ips)
ip0 link del wg0
+allowedips=( )
+for i in {1..197}; do
+ allowedips+=( 192.168.0.$i )
+ allowedips+=( abcd::$i )
+done
+saved_ifs="$IFS"
+IFS=,
+allowedips="${allowedips[*]}"
+IFS="$saved_ifs"
+ip0 link add wg0 type wireguard
+n0 wg set wg0 peer "$pub1" allowed-ips "$allowedips"
+n0 wg set wg0 peer "$pub1" allowed-ips -192.168.0.1/32,-192.168.0.20/32,-192.168.0.100/32,-abcd::1/128,-abcd::20/128,-abcd::100/128
+{
+ read -r pub allowedips
+ [[ $pub == "$pub1" ]]
+ i=0
+ for ip in $allowedips; do
+ [[ $ip != "192.168.0.1" ]]
+ [[ $ip != "192.168.0.20" ]]
+ [[ $ip != "192.168.0.100" ]]
+ [[ $ip != "abcd::1" ]]
+ [[ $ip != "abcd::20" ]]
+ [[ $ip != "abcd::100" ]]
+ ((++i))
+ done
+ ((i == 388))
+} < <(n0 wg show wg0 allowed-ips)
+ip0 link del wg0
+
! n0 wg show doesnotexist || false
ip0 link add wg0 type wireguard
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 35856b11c143..791d21b736a5 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -43,7 +43,7 @@ $(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.o
$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20250521,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,b6f2628b85b1b23cc06517ec9c74f82d52c4cdbd020f3dd2f00c972a1782950e))
export CFLAGS := -O3 -pipe
ifeq ($(HOST_ARCH),$(ARCH))
@@ -401,6 +401,7 @@ $(BASH_PATH)/.installed: $(BASH_TAR)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
touch $@
+$(BASH_PATH)/bash: export CFLAGS_FOR_BUILD += -std=gnu17
$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
$(MAKE) -C $(BASH_PATH)
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index c305d2f613f0..5d39f43dd667 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -22,7 +22,6 @@ CONFIG_HAVE_ARCH_KASAN=y
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
CONFIG_UBSAN=y
-CONFIG_UBSAN_SANITIZE_ALL=y
CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_SHIRQ=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 28422c32cc8f..f703fcfe9f7c 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
- corrupt_xstate_header amx lam test_shadow_stack avx
+ corrupt_xstate_header amx lam test_shadow_stack avx apx
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
@@ -136,3 +136,4 @@ $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
$(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f
$(OUTPUT)/amx_64: EXTRA_FILES += xstate.c
$(OUTPUT)/avx_64: EXTRA_FILES += xstate.c
+$(OUTPUT)/apx_64: EXTRA_FILES += xstate.c
diff --git a/tools/testing/selftests/x86/apx.c b/tools/testing/selftests/x86/apx.c
new file mode 100644
index 000000000000..d9c8d41b8c5a
--- /dev/null
+++ b/tools/testing/selftests/x86/apx.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include "xstate.h"
+
+int main(void)
+{
+ test_xstate(XFEATURE_APX);
+}
diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile
new file mode 100644
index 000000000000..8ff2d7226c7f
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/Makefile
@@ -0,0 +1,3 @@
+TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py
+TEST_FILES := common.py
+include ../../lib.mk
diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py
new file mode 100755
index 000000000000..2f9664a80617
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/common.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# This contains kselftest framework adapted common functions for testing
+# mitigation for x86 bugs.
+
+import os, sys, re, shutil
+
+sys.path.insert(0, '../../kselftest')
+import ksft
+
+def read_file(path):
+ if not os.path.exists(path):
+ return None
+ with open(path, 'r') as file:
+ return file.read().strip()
+
+def cpuinfo_has(arg):
+ cpuinfo = read_file('/proc/cpuinfo')
+ if arg in cpuinfo:
+ return True
+ return False
+
+def cmdline_has(arg):
+ cmdline = read_file('/proc/cmdline')
+ if arg in cmdline:
+ return True
+ return False
+
+def cmdline_has_either(args):
+ cmdline = read_file('/proc/cmdline')
+ for arg in args:
+ if arg in cmdline:
+ return True
+ return False
+
+def cmdline_has_none(args):
+ return not cmdline_has_either(args)
+
+def cmdline_has_all(args):
+ cmdline = read_file('/proc/cmdline')
+ for arg in args:
+ if arg not in cmdline:
+ return False
+ return True
+
+def get_sysfs(bug):
+ return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug)
+
+def sysfs_has(bug, mitigation):
+ status = get_sysfs(bug)
+ if mitigation in status:
+ return True
+ return False
+
+def sysfs_has_either(bugs, mitigations):
+ for bug in bugs:
+ for mitigation in mitigations:
+ if sysfs_has(bug, mitigation):
+ return True
+ return False
+
+def sysfs_has_none(bugs, mitigations):
+ return not sysfs_has_either(bugs, mitigations)
+
+def sysfs_has_all(bugs, mitigations):
+ for bug in bugs:
+ for mitigation in mitigations:
+ if not sysfs_has(bug, mitigation):
+ return False
+ return True
+
+def bug_check_pass(bug, found):
+ ksft.print_msg(f"\nFound: {found}")
+ # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_pass(f'{bug}: {found}')
+
+def bug_check_fail(bug, found, expected):
+ ksft.print_msg(f'\nFound:\t {found}')
+ ksft.print_msg(f'Expected:\t {expected}')
+ ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_fail(f'{bug}: {found}')
+
+def bug_status_unknown(bug, found):
+ ksft.print_msg(f'\nUnknown status: {found}')
+ ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_fail(f'{bug}: {found}')
+
+def basic_checks_sufficient(bug, mitigation):
+ if not mitigation:
+ bug_status_unknown(bug, "None")
+ return True
+ elif mitigation == "Not affected":
+ ksft.test_result_pass(bug)
+ return True
+ elif mitigation == "Vulnerable":
+ if cmdline_has_either([f'{bug}=off', 'mitigations=off']):
+ bug_check_pass(bug, mitigation)
+ return True
+ return False
+
+def get_section_info(vmlinux, section_name):
+ from elftools.elf.elffile import ELFFile
+ with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ section = elffile.get_section_by_name(section_name)
+ if section is None:
+ ksft.print_msg("Available sections in vmlinux:")
+ for sec in elffile.iter_sections():
+ ksft.print_msg(sec.name)
+ raise ValueError(f"Section {section_name} not found in {vmlinux}")
+ return section['sh_addr'], section['sh_offset'], section['sh_size']
+
+def get_patch_sites(vmlinux, offset, size):
+ import struct
+ output = []
+ with open(vmlinux, 'rb') as f:
+ f.seek(offset)
+ i = 0
+ while i < size:
+ data = f.read(4) # s32
+ if not data:
+ break
+ sym_offset = struct.unpack('<i', data)[0] + i
+ i += 4
+ output.append(sym_offset)
+ return output
+
+def get_instruction_from_vmlinux(elffile, section, virtual_address, target_address):
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_64
+ section_start = section['sh_addr']
+ section_end = section_start + section['sh_size']
+
+ if not (section_start <= target_address < section_end):
+ return None
+
+ offset = target_address - section_start
+ code = section.data()[offset:offset + 16]
+
+ cap = init_capstone()
+ for instruction in cap.disasm(code, target_address):
+ if instruction.address == target_address:
+ return instruction
+ return None
+
+def init_capstone():
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_OPT_SYNTAX_ATT
+ cap = Cs(CS_ARCH_X86, CS_MODE_64)
+ cap.syntax = CS_OPT_SYNTAX_ATT
+ return cap
+
+def get_runtime_kernel():
+ import drgn
+ return drgn.program_from_kernel()
+
+def check_dependencies_or_skip(modules, script_name="unknown test"):
+ for mod in modules:
+ try:
+ __import__(mod)
+ except ImportError:
+ ksft.test_result_skip(f"Skipping {script_name}: missing module '{mod}'")
+ ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_indirect_alignment.py b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py
new file mode 100755
index 000000000000..cdc33ae6a91c
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) mitigation.
+#
+# Test if indirect CALL/JMP are correctly patched by evaluating
+# the vmlinux .retpoline_sites in /proc/kcore.
+
+# Install dependencies
+# add-apt-repository ppa:michel-slm/kernel-utils
+# apt update
+# apt install -y python3-drgn python3-pyelftools python3-capstone
+#
+# Best to copy the vmlinux at a standard location:
+# mkdir -p /usr/lib/debug/lib/modules/$(uname -r)
+# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux
+#
+# Usage: ./its_indirect_alignment.py [vmlinux]
+
+import os, sys, argparse
+from pathlib import Path
+
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+
+mitigation = c.get_sysfs(bug)
+if not mitigation or "Aligned branch/return thunks" not in mitigation:
+ ksft.test_result_skip("Skipping its_indirect_alignment.py: Aligned branch/return thunks not enabled")
+ ksft.finished()
+
+if c.sysfs_has("spectre_v2", "Retpolines"):
+ ksft.test_result_skip("Skipping its_indirect_alignment.py: Retpolines deployed")
+ ksft.finished()
+
+c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_indirect_alignment.py")
+
+from elftools.elf.elffile import ELFFile
+from drgn.helpers.common.memory import identify_address
+
+cap = c.init_capstone()
+
+if len(os.sys.argv) > 1:
+ arg_vmlinux = os.sys.argv[1]
+ if not os.path.exists(arg_vmlinux):
+ ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}")
+ ksft.exit_fail()
+ os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True)
+ os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux')
+
+vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux"
+if not os.path.exists(vmlinux):
+ ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}")
+ ksft.exit_fail()
+
+ksft.print_msg(f"Using vmlinux: {vmlinux}")
+
+retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites')
+ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}")
+
+sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size)
+total_retpoline_tests = len(sites_offset)
+ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites")
+
+prog = c.get_runtime_kernel()
+retpolines_start_kcore = prog.symbol('__retpoline_sites').address
+ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}')
+
+x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address
+ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}')
+
+tests_passed = 0
+tests_failed = 0
+tests_unknown = 0
+
+with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ text_section = elffile.get_section_by_name('.text')
+
+ for i in range(0, len(sites_offset)):
+ site = retpolines_start_kcore + sites_offset[i]
+ vmlinux_site = retpolines_start_vmlinux + sites_offset[i]
+ passed = unknown = failed = False
+ try:
+ vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site)
+ kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0]
+ operand = kcore_insn.op_str
+ insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg
+ safe_site = insn_end & 0x20
+ site_status = "" if safe_site else "(unsafe)"
+
+ ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}")
+ ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}")
+ ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}")
+
+ if (site & 0x20) ^ (insn_end & 0x20):
+ ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}")
+ if safe_site:
+ tests_passed += 1
+ passed = True
+ ksft.print_msg(f"\tPASSED: At safe address")
+ continue
+
+ if operand.startswith('0xffffffff'):
+ thunk = int(operand, 16)
+ if thunk > x86_indirect_its_thunk_r15:
+ insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0]
+ operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' <dynamic-thunk?>'
+ if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20:
+ ksft.print_msg(f"\tPASSED: Found {operand} at safe address")
+ passed = True
+ if not passed:
+ if kcore_insn.operands[0].type == capstone.CS_OP_IMM:
+ operand += ' <' + prog.symbol(int(operand, 16)) + '>'
+ if '__x86_indirect_its_thunk_' in operand:
+ ksft.print_msg(f"\tPASSED: Found {operand}")
+ else:
+ ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.")
+ passed = True
+ else:
+ unknown = True
+ if passed:
+ tests_passed += 1
+ elif unknown:
+ ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}")
+ tests_unknown += 1
+ else:
+ ksft.print_msg(f'\t************* FAILED *************')
+ ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}")
+ ksft.print_msg(f'\t**********************************')
+ tests_failed += 1
+ except Exception as e:
+ ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}")
+ tests_unknown += 1
+
+ksft.print_msg(f"\n\nSummary:")
+ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}")
+ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}")
+ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}")
+
+if tests_failed == 0:
+ ksft.test_result_pass("All ITS return thunk sites passed")
+else:
+ ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed")
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py
new file mode 100755
index 000000000000..3204f4728c62
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_permutations.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) cmdline permutations with other bugs
+# like spectre_v2 and retbleed.
+
+import os, sys, subprocess, itertools, re, shutil
+
+test_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, test_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+mitigation = c.get_sysfs(bug)
+
+if not mitigation or "Not affected" in mitigation:
+ ksft.test_result_skip("Skipping its_permutations.py: not applicable")
+ ksft.finished()
+
+if shutil.which('vng') is None:
+ ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.")
+ ksft.finished()
+
+TEST = f"{test_dir}/its_sysfs.py"
+default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1']
+
+DEBUG = " -v "
+
+# Install dependencies
+# https://github.com/arighi/virtme-ng
+# apt install virtme-ng
+BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage "
+#BOOT_CMD += DEBUG
+
+bug = "indirect_target_selection"
+
+input_options = {
+ 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'],
+ 'retbleed' : ['off', 'stuff', 'auto'],
+ 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'],
+}
+
+def pretty_print(output):
+ OKBLUE = '\033[94m'
+ OKGREEN = '\033[92m'
+ WARNING = '\033[93m'
+ FAIL = '\033[91m'
+ ENDC = '\033[0m'
+ BOLD = '\033[1m'
+
+ # Define patterns and their corresponding colors
+ patterns = {
+ r"^ok \d+": OKGREEN,
+ r"^not ok \d+": FAIL,
+ r"^# Testing .*": OKBLUE,
+ r"^# Found: .*": WARNING,
+ r"^# Totals: .*": BOLD,
+ r"pass:([1-9]\d*)": OKGREEN,
+ r"fail:([1-9]\d*)": FAIL,
+ r"skip:([1-9]\d*)": WARNING,
+ }
+
+ # Apply colors based on patterns
+ for pattern, color in patterns.items():
+ output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE)
+
+ print(output)
+
+combinations = list(itertools.product(*input_options.values()))
+ksft.print_header()
+ksft.set_plan(len(combinations))
+
+logs = ""
+
+for combination in combinations:
+ append = ""
+ log = ""
+ for p in default_kparam:
+ append += f' --append={p}'
+ command = BOOT_CMD + append
+ test_params = ""
+ for i, key in enumerate(input_options.keys()):
+ param = f'{key}={combination[i]}'
+ test_params += f' {param}'
+ command += f" --append={param}"
+ command += f" -- {TEST}"
+ test_name = f"{bug} {test_params}"
+ pretty_print(f'# Testing {test_name}')
+ t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ t.wait()
+ output, _ = t.communicate()
+ if t.returncode == 0:
+ ksft.test_result_pass(test_name)
+ else:
+ ksft.test_result_fail(test_name)
+ output = output.decode()
+ log += f" {output}"
+ pretty_print(log)
+ logs += output + "\n"
+
+# Optionally use tappy to parse the output
+# apt install python3-tappy
+with open("logs.txt", "w") as f:
+ f.write(logs)
+
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py
new file mode 100755
index 000000000000..f40078d9f6ff
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) mitigation.
+#
+# Tests if the RETs are correctly patched by evaluating the
+# vmlinux .return_sites in /proc/kcore.
+#
+# Install dependencies
+# add-apt-repository ppa:michel-slm/kernel-utils
+# apt update
+# apt install -y python3-drgn python3-pyelftools python3-capstone
+#
+# Run on target machine
+# mkdir -p /usr/lib/debug/lib/modules/$(uname -r)
+# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux
+#
+# Usage: ./its_ret_alignment.py
+
+import os, sys, argparse
+from pathlib import Path
+
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+mitigation = c.get_sysfs(bug)
+if not mitigation or "Aligned branch/return thunks" not in mitigation:
+ ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled")
+ ksft.finished()
+
+c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py")
+
+from elftools.elf.elffile import ELFFile
+from drgn.helpers.common.memory import identify_address
+
+cap = c.init_capstone()
+
+if len(os.sys.argv) > 1:
+ arg_vmlinux = os.sys.argv[1]
+ if not os.path.exists(arg_vmlinux):
+ ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}")
+ ksft.exit_fail()
+ os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True)
+ os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux')
+
+vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux"
+if not os.path.exists(vmlinux):
+ ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}")
+ ksft.exit_fail()
+
+ksft.print_msg(f"Using vmlinux: {vmlinux}")
+
+rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites')
+ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}")
+
+sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size)
+total_rethunk_tests = len(sites_offset)
+ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites")
+
+prog = c.get_runtime_kernel()
+rethunks_start_kcore = prog.symbol('__return_sites').address
+ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}')
+
+its_return_thunk = prog.symbol('its_return_thunk').address
+ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}')
+
+tests_passed = 0
+tests_failed = 0
+tests_unknown = 0
+tests_skipped = 0
+
+with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ text_section = elffile.get_section_by_name('.text')
+
+ for i in range(len(sites_offset)):
+ site = rethunks_start_kcore + sites_offset[i]
+ vmlinux_site = rethunks_start_vmlinux + sites_offset[i]
+ try:
+ passed = unknown = failed = skipped = False
+
+ symbol = identify_address(prog, site)
+ vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site)
+ kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0]
+
+ insn_end = site + kcore_insn.size - 1
+
+ safe_site = insn_end & 0x20
+ site_status = "" if safe_site else "(unsafe)"
+
+ ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}")
+ ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}")
+ ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}")
+
+ if safe_site:
+ tests_passed += 1
+ passed = True
+ ksft.print_msg(f"\tPASSED: At safe address")
+ continue
+
+ if "jmp" in kcore_insn.mnemonic:
+ passed = True
+ elif "ret" not in kcore_insn.mnemonic:
+ skipped = True
+
+ if passed:
+ ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}")
+ tests_passed += 1
+ elif skipped:
+ ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'")
+ tests_skipped += 1
+ elif unknown:
+ ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}")
+ tests_unknown += 1
+ else:
+ ksft.print_msg(f'\t************* FAILED *************')
+ ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}")
+ ksft.print_msg(f'\t**********************************')
+ tests_failed += 1
+ except Exception as e:
+ ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}")
+ tests_unknown += 1
+
+ksft.print_msg(f"\n\nSummary:")
+ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}")
+
+if tests_failed == 0:
+ ksft.test_result_pass("All ITS return thunk sites passed.")
+else:
+ ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.")
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py
new file mode 100755
index 000000000000..7bca81f2f606
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_sysfs.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for Indirect Target Selection(ITS) mitigation sysfs status.
+
+import sys, os, re
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+
+from common import *
+
+bug = "indirect_target_selection"
+mitigation = get_sysfs(bug)
+
+ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks"
+ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB"
+ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected"
+ITS_MITIGATION_VULNERABLE = "Vulnerable"
+
+def check_mitigation():
+ if mitigation == ITS_MITIGATION_ALIGNED_THUNKS:
+ if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"):
+ bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'):
+ bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY)
+ return
+ bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS)
+ return
+
+ if mitigation == ITS_MITIGATION_RETPOLINE_STUFF:
+ if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"):
+ bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ if sysfs_has('retbleed', 'Stuffing'):
+ bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS)
+
+ if mitigation == ITS_MITIGATION_VMEXIT_ONLY:
+ if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'):
+ bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY)
+ return
+ bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS)
+
+ if mitigation == ITS_MITIGATION_VULNERABLE:
+ if sysfs_has("spectre_v2", "Vulnerable"):
+ bug_check_pass(bug, ITS_MITIGATION_VULNERABLE)
+ else:
+ bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE)
+
+ bug_status_unknown(bug, mitigation)
+ return
+
+ksft.print_header()
+ksft.set_plan(1)
+ksft.print_msg(f'{bug}: {mitigation} ...')
+
+if not basic_checks_sufficient(bug, mitigation):
+ check_mitigation()
+
+ksft.finished()
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 18d736640ece..0873b0e5f48b 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -682,7 +682,7 @@ int do_uring(unsigned long lam)
return 1;
if (fstat(file_fd, &st) < 0)
- return 1;
+ goto cleanup;
off_t file_sz = st.st_size;
@@ -690,7 +690,7 @@ int do_uring(unsigned long lam)
fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
if (!fi)
- return 1;
+ goto cleanup;
fi->file_sz = file_sz;
fi->file_fd = file_fd;
@@ -698,7 +698,7 @@ int do_uring(unsigned long lam)
ring = malloc(sizeof(*ring));
if (!ring) {
free(fi);
- return 1;
+ goto cleanup;
}
memset(ring, 0, sizeof(struct io_ring));
@@ -729,6 +729,8 @@ out:
}
free(fi);
+cleanup:
+ close(file_fd);
return ret;
}
@@ -1189,6 +1191,7 @@ void *allocate_dsa_pasid(void)
wq = mmap(NULL, 0x1000, PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, 0);
+ close(fd);
if (wq == MAP_FAILED)
perror("mmap");
diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c
index 23c1d6c964ea..97fe4bd8bc77 100644
--- a/tools/testing/selftests/x86/xstate.c
+++ b/tools/testing/selftests/x86/xstate.c
@@ -31,7 +31,8 @@
(1 << XFEATURE_OPMASK) | \
(1 << XFEATURE_ZMM_Hi256) | \
(1 << XFEATURE_Hi16_ZMM) | \
- (1 << XFEATURE_XTILEDATA))
+ (1 << XFEATURE_XTILEDATA) | \
+ (1 << XFEATURE_APX))
static inline uint64_t xgetbv(uint32_t index)
{
diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h
index 42af36ec852f..e91e3092b5d2 100644
--- a/tools/testing/selftests/x86/xstate.h
+++ b/tools/testing/selftests/x86/xstate.h
@@ -33,6 +33,7 @@ enum xfeature {
XFEATURE_RSRVD_COMP_16,
XFEATURE_XTILECFG,
XFEATURE_XTILEDATA,
+ XFEATURE_APX,
XFEATURE_MAX,
};
@@ -59,6 +60,7 @@ static const char *xfeature_names[] =
"unknown xstate feature",
"AMX Tile config",
"AMX Tile data",
+ "APX registers",
"unknown xstate feature",
};
diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c
index 44aee49b6cee..1453d38e08bb 100644
--- a/tools/testing/vsock/timeout.c
+++ b/tools/testing/vsock/timeout.c
@@ -21,6 +21,7 @@
#include <stdbool.h>
#include <unistd.h>
#include <stdio.h>
+#include <time.h>
#include "timeout.h"
static volatile bool timeout;
@@ -28,6 +29,8 @@ static volatile bool timeout;
/* SIGALRM handler function. Do not use sleep(2), alarm(2), or
* setitimer(2) while using this API - they may interfere with each
* other.
+ *
+ * If you need to sleep, please use timeout_sleep() provided by this API.
*/
void sigalrm(int signo)
{
@@ -58,3 +61,18 @@ void timeout_end(void)
alarm(0);
timeout = false;
}
+
+/* Sleep in a timeout section.
+ *
+ * nanosleep(2) can be used with this API since POSIX.1 explicitly
+ * specifies that it does not interact with signals.
+ */
+int timeout_usleep(useconds_t usec)
+{
+ struct timespec ts = {
+ .tv_sec = usec / 1000000,
+ .tv_nsec = (usec % 1000000) * 1000,
+ };
+
+ return nanosleep(&ts, NULL);
+}
diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h
index ecb7c840e65a..1c3fcad87a49 100644
--- a/tools/testing/vsock/timeout.h
+++ b/tools/testing/vsock/timeout.h
@@ -11,5 +11,6 @@ void sigalrm(int signo);
void timeout_begin(unsigned int seconds);
void timeout_check(const char *operation);
void timeout_end(void);
+int timeout_usleep(useconds_t usec);
#endif /* TIMEOUT_H */
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index de25892f865f..0c7e9cbcbc85 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -17,6 +17,7 @@
#include <assert.h>
#include <sys/epoll.h>
#include <sys/mman.h>
+#include <linux/sockios.h>
#include "timeout.h"
#include "control.h"
@@ -96,6 +97,30 @@ void vsock_wait_remote_close(int fd)
close(epollfd);
}
+/* Wait until transport reports no data left to be sent.
+ * Return false if transport does not implement the unsent_bytes() callback.
+ */
+bool vsock_wait_sent(int fd)
+{
+ int ret, sock_bytes_unsent;
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP)
+ break;
+
+ perror("ioctl(SIOCOUTQ)");
+ exit(EXIT_FAILURE);
+ }
+ timeout_check("SIOCOUTQ");
+ } while (sock_bytes_unsent != 0);
+ timeout_end();
+
+ return !ret;
+}
+
/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
int vsock_bind(unsigned int cid, unsigned int port, int type)
{
@@ -798,3 +823,16 @@ void enable_so_zerocopy_check(int fd)
setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1,
"setsockopt SO_ZEROCOPY");
}
+
+void enable_so_linger(int fd, int timeout)
+{
+ struct linger optval = {
+ .l_onoff = 1,
+ .l_linger = timeout
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
+ perror("setsockopt(SO_LINGER)");
+ exit(EXIT_FAILURE);
+ }
+}
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index d1f765ce3eee..5e2db67072d5 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -54,6 +54,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port);
int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp);
void vsock_wait_remote_close(int fd);
+bool vsock_wait_sent(int fd);
void send_buf(int fd, const void *buf, size_t len, int flags,
ssize_t expected_ret);
void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret);
@@ -79,4 +80,5 @@ void setsockopt_int_check(int fd, int level, int optname, int val,
void setsockopt_timeval_check(int fd, int level, int optname,
struct timeval val, char const *errmsg);
void enable_so_zerocopy_check(int fd);
+void enable_so_linger(int fd, int timeout);
#endif /* UTIL_H */
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index d0f6d253ac72..f669baaa0dca 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -21,7 +21,6 @@
#include <poll.h>
#include <signal.h>
#include <sys/ioctl.h>
-#include <linux/sockios.h>
#include <linux/time64.h>
#include "vsock_test_zerocopy.h"
@@ -1058,18 +1057,39 @@ static void sigpipe(int signo)
have_sigpipe = 1;
}
+#define SEND_SLEEP_USEC (10 * 1000)
+
static void test_stream_check_sigpipe(int fd)
{
ssize_t res;
have_sigpipe = 0;
- res = send(fd, "A", 1, 0);
- if (res != -1) {
- fprintf(stderr, "expected send(2) failure, got %zi\n", res);
- exit(EXIT_FAILURE);
+ /* When the other peer calls shutdown(SHUT_RD), there is a chance that
+ * the send() call could occur before the message carrying the close
+ * information arrives over the transport. In such cases, the send()
+ * might still succeed. To avoid this race, let's retry the send() call
+ * a few times, ensuring the test is more reliable.
+ */
+ timeout_begin(TIMEOUT);
+ while(1) {
+ res = send(fd, "A", 1, 0);
+ if (res == -1 && errno != EINTR)
+ break;
+
+ /* Sleep a little before trying again to avoid flooding the
+ * other peer and filling its receive buffer, causing
+ * false-negative.
+ */
+ timeout_usleep(SEND_SLEEP_USEC);
+ timeout_check("send");
}
+ timeout_end();
+ if (errno != EPIPE) {
+ fprintf(stderr, "unexpected send(2) errno %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
if (!have_sigpipe) {
fprintf(stderr, "SIGPIPE expected\n");
exit(EXIT_FAILURE);
@@ -1077,12 +1097,21 @@ static void test_stream_check_sigpipe(int fd)
have_sigpipe = 0;
- res = send(fd, "A", 1, MSG_NOSIGNAL);
- if (res != -1) {
- fprintf(stderr, "expected send(2) failure, got %zi\n", res);
- exit(EXIT_FAILURE);
+ timeout_begin(TIMEOUT);
+ while(1) {
+ res = send(fd, "A", 1, MSG_NOSIGNAL);
+ if (res == -1 && errno != EINTR)
+ break;
+
+ timeout_usleep(SEND_SLEEP_USEC);
+ timeout_check("send");
}
+ timeout_end();
+ if (errno != EPIPE) {
+ fprintf(stderr, "unexpected send(2) errno %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
if (have_sigpipe) {
fprintf(stderr, "SIGPIPE not expected\n");
exit(EXIT_FAILURE);
@@ -1250,7 +1279,7 @@ static void test_unsent_bytes_server(const struct test_opts *opts, int type)
static void test_unsent_bytes_client(const struct test_opts *opts, int type)
{
unsigned char buf[MSG_BUF_IOCTL_LEN];
- int ret, fd, sock_bytes_unsent;
+ int fd;
fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
if (fd < 0) {
@@ -1264,20 +1293,14 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type)
send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
control_expectln("RECEIVED");
- ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
- if (ret < 0) {
- if (errno == EOPNOTSUPP) {
- fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
- } else {
- perror("ioctl");
- exit(EXIT_FAILURE);
- }
- } else if (ret == 0 && sock_bytes_unsent != 0) {
- fprintf(stderr,
- "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
- sock_bytes_unsent);
- exit(EXIT_FAILURE);
- }
+ /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though
+ * the "RECEIVED" message means that the other side has received the
+ * data, there can be a delay in our kernel before updating the "unsent
+ * bytes" counter. vsock_wait_sent() will repeat SIOCOUTQ until it
+ * returns 0.
+ */
+ if (!vsock_wait_sent(fd))
+ fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
close(fd);
}
@@ -1790,10 +1813,6 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
static void test_stream_linger_client(const struct test_opts *opts)
{
- struct linger optval = {
- .l_onoff = 1,
- .l_linger = 1
- };
int fd;
fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
@@ -1802,15 +1821,58 @@ static void test_stream_linger_client(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
- if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
- perror("setsockopt(SO_LINGER)");
+ enable_so_linger(fd, 1);
+ close(fd);
+}
+
+static void test_stream_linger_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
exit(EXIT_FAILURE);
}
+ vsock_wait_remote_close(fd);
close(fd);
}
-static void test_stream_linger_server(const struct test_opts *opts)
+/* Half of the default to not risk timing out the control channel */
+#define LINGER_TIMEOUT (TIMEOUT / 2)
+
+static void test_stream_nolinger_client(const struct test_opts *opts)
+{
+ bool waited;
+ time_t ns;
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ enable_so_linger(fd, LINGER_TIMEOUT);
+ send_byte(fd, 1, 0); /* Left unread to expose incorrect behaviour. */
+ waited = vsock_wait_sent(fd);
+
+ ns = current_nsec();
+ close(fd);
+ ns = current_nsec() - ns;
+
+ if (!waited) {
+ fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+ } else if (DIV_ROUND_UP(ns, NSEC_PER_SEC) >= LINGER_TIMEOUT) {
+ fprintf(stderr, "Unexpected lingering\n");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("DONE");
+}
+
+static void test_stream_nolinger_server(const struct test_opts *opts)
{
int fd;
@@ -1820,7 +1882,7 @@ static void test_stream_linger_server(const struct test_opts *opts)
exit(EXIT_FAILURE);
}
- vsock_wait_remote_close(fd);
+ control_expectln("DONE");
close(fd);
}
@@ -1984,6 +2046,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_linger_client,
.run_server = test_stream_linger_server,
},
+ {
+ .name = "SOCK_STREAM SO_LINGER close() on unread",
+ .run_client = test_stream_nolinger_client,
+ .run_server = test_stream_nolinger_server,
+ },
{},
};
diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt
index dd5621038c55..43e98311d10f 100644
--- a/tools/tracing/rtla/README.txt
+++ b/tools/tracing/rtla/README.txt
@@ -13,6 +13,13 @@ RTLA depends on the following libraries and tools:
- libtraceevent
- libcpupower (optional, for --deepest-idle-state)
+For BPF sample collection support, the following extra dependencies are
+required:
+
+ - libbpf 1.0.0 or later
+ - bpftool with skeleton support
+ - clang with BPF CO-RE support
+
It also depends on python3-docutils to compile man pages.
For development, we suggest the following steps for compiling rtla:
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index d9d15c8f27c7..8d579bcee709 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -766,8 +766,8 @@ int osnoise_hist_main(int argc, char *argv[])
struct osnoise_params *params;
struct osnoise_tool *record = NULL;
struct osnoise_tool *tool = NULL;
+ enum result return_value = ERROR;
struct trace_instance *trace;
- int return_value = 1;
int retval;
params = osnoise_hist_parse_args(argc, argv);
@@ -889,12 +889,13 @@ int osnoise_hist_main(int argc, char *argv[])
osnoise_print_stats(params, tool);
- return_value = 0;
+ return_value = PASSED;
if (osnoise_trace_is_off(tool, record)) {
printf("rtla osnoise hit stop tracing\n");
save_trace_to_file(record ? record->trace.inst : NULL,
params->trace_output);
+ return_value = FAILED;
}
out_hist:
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index 3455ee73e2e6..2c12780c8aa9 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -594,8 +594,8 @@ int osnoise_top_main(int argc, char **argv)
struct osnoise_params *params;
struct osnoise_tool *record = NULL;
struct osnoise_tool *tool = NULL;
+ enum result return_value = ERROR;
struct trace_instance *trace;
- int return_value = 1;
int retval;
params = osnoise_top_parse_args(argc, argv);
@@ -715,12 +715,13 @@ int osnoise_top_main(int argc, char **argv)
osnoise_print_stats(params, tool);
- return_value = 0;
+ return_value = PASSED;
if (osnoise_trace_is_off(tool, record)) {
printf("osnoise hit stop tracing\n");
save_trace_to_file(record ? record->trace.inst : NULL,
params->trace_output);
+ return_value = FAILED;
}
out_top:
diff --git a/tools/tracing/rtla/src/timerlat_bpf.c b/tools/tracing/rtla/src/timerlat_bpf.c
index 5abee884037a..0bc44ce5d69b 100644
--- a/tools/tracing/rtla/src/timerlat_bpf.c
+++ b/tools/tracing/rtla/src/timerlat_bpf.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#ifdef HAVE_BPF_SKEL
+#define _GNU_SOURCE
#include "timerlat.h"
#include "timerlat_bpf.h"
#include "timerlat.skel.h"
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 9d9efeedc4c2..36d2294c963d 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -1141,11 +1141,11 @@ int timerlat_hist_main(int argc, char *argv[])
struct timerlat_params *params;
struct osnoise_tool *record = NULL;
struct timerlat_u_params params_u;
+ enum result return_value = ERROR;
struct osnoise_tool *tool = NULL;
struct osnoise_tool *aa = NULL;
struct trace_instance *trace;
int dma_latency_fd = -1;
- int return_value = 1;
pthread_t timerlat_u;
int retval;
int nr_cpus, i;
@@ -1378,7 +1378,7 @@ int timerlat_hist_main(int argc, char *argv[])
timerlat_print_stats(params, tool);
- return_value = 0;
+ return_value = PASSED;
if (osnoise_trace_is_off(tool, record) && !stop_tracing) {
printf("rtla timerlat hit stop tracing\n");
@@ -1388,6 +1388,7 @@ int timerlat_hist_main(int argc, char *argv[])
save_trace_to_file(record ? record->trace.inst : NULL,
params->trace_output);
+ return_value = FAILED;
}
out_hist:
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 79cb6f28967f..7365e08fe986 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -985,12 +985,12 @@ int timerlat_top_main(int argc, char *argv[])
struct timerlat_params *params;
struct osnoise_tool *record = NULL;
struct timerlat_u_params params_u;
+ enum result return_value = ERROR;
struct osnoise_tool *top = NULL;
struct osnoise_tool *aa = NULL;
struct trace_instance *trace;
int dma_latency_fd = -1;
pthread_t timerlat_u;
- int return_value = 1;
char *max_lat;
int retval;
int nr_cpus, i;
@@ -1197,7 +1197,7 @@ int timerlat_top_main(int argc, char *argv[])
timerlat_print_stats(params, top);
- return_value = 0;
+ return_value = PASSED;
if (osnoise_trace_is_off(top, record) && !stop_tracing) {
printf("rtla timerlat hit stop tracing\n");
@@ -1207,6 +1207,7 @@ int timerlat_top_main(int argc, char *argv[])
save_trace_to_file(record ? record->trace.inst : NULL,
params->trace_output);
+ return_value = FAILED;
} else if (params->aa_only) {
/*
* If the trace did not stop with --aa-only, at least print the
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
index 4995d35cf3ec..d6ab15dcb490 100644
--- a/tools/tracing/rtla/src/utils.c
+++ b/tools/tracing/rtla/src/utils.c
@@ -227,6 +227,8 @@ long parse_ns_duration(char *val)
# define __NR_sched_setattr 355
# elif __s390x__
# define __NR_sched_setattr 345
+# elif __loongarch__
+# define __NR_sched_setattr 274
# endif
#endif
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
index 101d4799a009..a2a6f89f342d 100644
--- a/tools/tracing/rtla/src/utils.h
+++ b/tools/tracing/rtla/src/utils.h
@@ -83,3 +83,9 @@ int auto_house_keeping(cpu_set_t *monitored_cpus);
#define ns_to_usf(x) (((double)x/1000))
#define ns_to_per(total, part) ((part * 100) / (double)total)
+
+enum result {
+ PASSED = 0, /* same as EXIT_SUCCESS */
+ ERROR = 1, /* same as EXIT_FAILURE, an error in arguments */
+ FAILED = 2, /* test hit the stop tracing condition */
+};
diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh
index b1697b3e3f52..f2616a8e4179 100644
--- a/tools/tracing/rtla/tests/engine.sh
+++ b/tools/tracing/rtla/tests/engine.sh
@@ -39,6 +39,7 @@ reset_osnoise() {
}
check() {
+ expected_exitcode=${3:-0}
# Simple check: run rtla with given arguments and test exit code.
# If TEST_COUNT is set, run the test. Otherwise, just count.
ctr=$(($ctr + 1))
@@ -49,7 +50,7 @@ check() {
# Run rtla; in case of failure, include its output as comment
# in the test results.
result=$(stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$?
- if [ $exitcode -eq 0 ]
+ if [ $exitcode -eq $expected_exitcode ]
then
echo "ok $ctr - $1"
else
@@ -68,12 +69,14 @@ check_with_osnoise_options() {
# Save original arguments
arg1=$1
arg2=$2
+ arg3=$3
# Apply osnoise options (if not dry run)
if [ -n "$TEST_COUNT" ]
then
[ "$NO_RESET_OSNOISE" == 1 ] || reset_osnoise
shift
+ shift
while shift
do
[ "$1" == "" ] && continue
@@ -84,7 +87,7 @@ check_with_osnoise_options() {
done
fi
- NO_RESET_OSNOISE=1 check "$arg1" "$arg2"
+ NO_RESET_OSNOISE=1 check "$arg1" "$arg2" "$arg3"
}
set_timeout() {
diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t
index bbed17580537..5f71401a139e 100644
--- a/tools/tracing/rtla/tests/hwnoise.t
+++ b/tools/tracing/rtla/tests/hwnoise.t
@@ -10,11 +10,11 @@ check "verify help page" \
check "detect noise higher than one microsecond" \
"hwnoise -c 0 -T 1 -d 5s -q"
check "set the automatic trace mode" \
- "hwnoise -a 5 -d 30s"
+ "hwnoise -a 5 -d 30s" 2
check "set scheduling param to the osnoise tracer threads" \
"hwnoise -P F:1 -c 0 -r 900000 -d 1M -q"
check "stop the trace if a single sample is higher than 1 us" \
- "hwnoise -s 1 -T 1 -t -d 30s"
+ "hwnoise -s 1 -T 1 -t -d 30s" 2
check "enable a trace event trigger" \
"hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 1m"
diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t
index e5995c03c790..44908fc01abf 100644
--- a/tools/tracing/rtla/tests/osnoise.t
+++ b/tools/tracing/rtla/tests/osnoise.t
@@ -10,9 +10,9 @@ check "verify help page" \
check "verify the --priority/-P param" \
"osnoise top -P F:1 -c 0 -r 900000 -d 1M -q"
check "verify the --stop/-s param" \
- "osnoise top -s 30 -T 1 -t"
+ "osnoise top -s 30 -T 1 -t" 2
check "verify the --trace param" \
- "osnoise hist -s 30 -T 1 -t"
+ "osnoise hist -s 30 -T 1 -t" 2
check "verify the --entries/-E param" \
"osnoise hist -P F:1 -c 0 -r 900000 -d 1M -b 10 -E 25"
@@ -20,6 +20,6 @@ check "verify the --entries/-E param" \
# and stopping on threshold.
# If default period is not set, this will time out.
check_with_osnoise_options "apply default period" \
- "osnoise hist -s 1" period_us=600000000
+ "osnoise hist -s 1" 2 period_us=600000000
test_end
diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t
index e939ff71d6be..579c12a85e8f 100644
--- a/tools/tracing/rtla/tests/timerlat.t
+++ b/tools/tracing/rtla/tests/timerlat.t
@@ -21,21 +21,21 @@ export RTLA_NO_BPF=$option
check "verify help page" \
"timerlat --help"
check "verify -s/--stack" \
- "timerlat top -s 3 -T 10 -t"
+ "timerlat top -s 3 -T 10 -t" 2
check "verify -P/--priority" \
"timerlat top -P F:1 -c 0 -d 1M -q"
check "test in nanoseconds" \
- "timerlat top -i 2 -c 0 -n -d 30s"
+ "timerlat top -i 2 -c 0 -n -d 30s" 2
check "set the automatic trace mode" \
- "timerlat top -a 5 --dump-tasks"
+ "timerlat top -a 5 --dump-tasks" 2
check "print the auto-analysis if hits the stop tracing condition" \
- "timerlat top --aa-only 5"
+ "timerlat top --aa-only 5" 2
check "disable auto-analysis" \
- "timerlat top -s 3 -T 10 -t --no-aa"
+ "timerlat top -s 3 -T 10 -t --no-aa" 2
check "verify -c/--cpus" \
"timerlat hist -c 0 -d 30s"
check "hist test in nanoseconds" \
- "timerlat hist -i 2 -c 0 -n -d 30s"
+ "timerlat hist -i 2 -c 0 -n -d 30s" 2
done
test_end