diff options
Diffstat (limited to 'tools/testing')
335 files changed, 18931 insertions, 2636 deletions
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c index 35ea65c54ffa..827507844e8f 100644 --- a/tools/testing/crypto/chacha20-s390/test-cipher.c +++ b/tools/testing/crypto/chacha20-s390/test-cipher.c @@ -50,7 +50,7 @@ struct skcipher_def { /* Perform cipher operations with the chacha lib */ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) { - u32 chacha_state[CHACHA_STATE_WORDS]; + struct chacha_state chacha_state; u8 iv[16], key[32]; u64 start, end; @@ -66,10 +66,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) } /* Encrypt */ - chacha_init(chacha_state, (u32 *)key, iv); + chacha_init(&chacha_state, (u32 *)key, iv); start = ktime_get_ns(); - chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20); + chacha_crypt_arch(&chacha_state, cipher, plain, data_size, 20); end = ktime_get_ns(); @@ -81,10 +81,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) pr_info("lib encryption took: %lld nsec", end - start); /* Decrypt */ - chacha_init(chacha_state, (u32 *)key, iv); + chacha_init(&chacha_state, (u32 *)key, iv); start = ktime_get_ns(); - chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20); + chacha_crypt_arch(&chacha_state, revert, cipher, data_size, 20); end = ktime_get_ns(); if (debug) diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 422e186cf3cf..e70c502a16df 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -10,6 +10,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=y CONFIG_KUNIT_ALL_TESTS=y CONFIG_FORTIFY_SOURCE=y +CONFIG_INIT_STACK_ALL_PATTERN=y CONFIG_IIO=y diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 10ff65689dd8..80fa4e354a17 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -39,10 +39,20 @@ def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj: status = _status_map.get(subtest.status, "FAIL") test_cases.append({"name": subtest.name, "status": status}) + test_counts = test.counts + counts_json = { + "tests": test_counts.total(), + "passed": test_counts.passed, + "failed": test_counts.failed, + "crashed": test_counts.crashed, + "skipped": test_counts.skipped, + "errors": test_counts.errors, + } test_group = { "name": test.name, "sub_groups": sub_groups, "test_cases": test_cases, + "misc": counts_json } test_group.update(common_fields) return test_group diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index d3f39bc1ceec..260d8d9aa1db 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -14,6 +14,7 @@ import os import shlex import shutil import signal +import sys import threading from typing import Iterator, List, Optional, Tuple from types import FrameType @@ -201,6 +202,13 @@ def _default_qemu_config_path(arch: str) -> str: return config_path options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')] + + if arch == 'help': + print('um') + for option in options: + print(option) + sys.exit() + raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options))) def _get_qemu_ops(config_path: str, diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py index 7ec38d4131f7..5b4c895d5d5a 100644 --- a/tools/testing/kunit/qemu_configs/powerpc.py +++ b/tools/testing/kunit/qemu_configs/powerpc.py @@ -3,6 +3,7 @@ from ..qemu_config import QemuArchParams QEMU_ARCH = QemuArchParams(linux_arch='powerpc', kconfig=''' CONFIG_PPC64=y +CONFIG_CPU_BIG_ENDIAN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_HVC_CONSOLE=y''', diff --git a/tools/testing/kunit/qemu_configs/powerpc32.py b/tools/testing/kunit/qemu_configs/powerpc32.py new file mode 100644 index 000000000000..88bd60dbb948 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/powerpc32.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='powerpc', + kconfig=''' +CONFIG_PPC32=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_ADB_CUDA=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +''', + qemu_arch='ppc', + kernel_path='vmlinux', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'g3beige', '-cpu', 'max']) diff --git a/tools/testing/kunit/qemu_configs/powerpcle.py b/tools/testing/kunit/qemu_configs/powerpcle.py new file mode 100644 index 000000000000..7ddee8af4bd7 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/powerpcle.py @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='powerpc', + kconfig=''' +CONFIG_PPC64=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_HVC_CONSOLE=y +''', + qemu_arch='ppc64', + kernel_path='vmlinux', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'pseries', '-cpu', 'power8']) diff --git a/tools/testing/kunit/qemu_configs/riscv32.py b/tools/testing/kunit/qemu_configs/riscv32.py new file mode 100644 index 000000000000..b79ba0ae30f8 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/riscv32.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='riscv', + kconfig=''' +CONFIG_NONPORTABLE=y +CONFIG_ARCH_RV32I=y +CONFIG_ARCH_VIRT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +''', + qemu_arch='riscv32', + kernel_path='arch/riscv/boot/Image', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-machine', 'virt']) diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index 256d9573b446..2019550a1b69 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -2,6 +2,8 @@ from ..qemu_config import QemuArchParams QEMU_ARCH = QemuArchParams(linux_arch='sparc', kconfig=''' +CONFIG_KUNIT_FAULT_TEST=n +CONFIG_SPARC32=y CONFIG_SERIAL_SUNZILOG=y CONFIG_SERIAL_SUNZILOG_CONSOLE=y ''', diff --git a/tools/testing/kunit/qemu_configs/sparc64.py b/tools/testing/kunit/qemu_configs/sparc64.py new file mode 100644 index 000000000000..53d4e5a8c972 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/sparc64.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='sparc', + kconfig=''' +CONFIG_64BIT=y +CONFIG_SPARC64=y +CONFIG_PCI=y +CONFIG_SERIAL_SUNSU=y +CONFIG_SERIAL_SUNSU_CONSOLE=y +''', + qemu_arch='sparc64', + kernel_path='arch/sparc/boot/image', + kernel_command_line='console=ttyS0 kunit_shutdown=poweroff', + extra_qemu_params=[]) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c77c8c8e3d9b..6aa11cd3db42 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -48,6 +48,7 @@ TARGETS += ipc TARGETS += ir TARGETS += kcmp TARGETS += kexec +TARGETS += kselftest_harness TARGETS += kvm TARGETS += landlock TARGETS += lib @@ -66,11 +67,13 @@ TARGETS += mseal_system_mappings TARGETS += nci TARGETS += net TARGETS += net/af_unix +TARGETS += net/can TARGETS += net/forwarding TARGETS += net/hsr TARGETS += net/mptcp TARGETS += net/netfilter TARGETS += net/openvswitch +TARGETS += net/ovpn TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao @@ -121,6 +124,7 @@ TARGETS += user_events TARGETS += vDSO TARGETS += mm TARGETS += x86 +TARGETS += x86/bugs TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 22029e60eff3..c4c72ee2ef55 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -21,6 +21,8 @@ CFLAGS += $(KHDR_INCLUDES) CFLAGS += -I$(top_srcdir)/tools/include +OUTPUT ?= $(CURDIR) + export CFLAGS export top_srcdir diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index 285c47dd42f6..eb19dcc37a75 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -169,8 +169,10 @@ static int sys_clone(unsigned long clone_flags, unsigned long newsp, child_tidptr); } +#define __STACK_SIZE (8 * 1024 * 1024) + /* - * If we clone with CLONE_SETTLS then the value in the parent should + * If we clone with CLONE_VM then the value in the parent should * be unchanged and the child should start with zero and be able to * set its own value. */ @@ -179,11 +181,19 @@ static int write_clone_read(void) int parent_tid, child_tid; pid_t parent, waiting; int ret, status; + void *stack; parent = getpid(); set_tpidr2(parent); - ret = sys_clone(CLONE_SETTLS, 0, &parent_tid, 0, &child_tid); + stack = malloc(__STACK_SIZE); + if (!stack) { + putstr("# malloc() failed\n"); + return 0; + } + + ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE, + &parent_tid, 0, &child_tid); if (ret == -1) { putstr("# clone() failed\n"); putnum(errno); diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 4930e03a7b99..191c47ca0ed8 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -439,10 +439,17 @@ static bool check_ptrace_values_sve(pid_t child, struct test_config *config) pass = false; } - if (sve->size != SVE_PT_SIZE(vq, sve->flags)) { - ksft_print_msg("Mismatch in SVE header size: %d != %lu\n", - sve->size, SVE_PT_SIZE(vq, sve->flags)); - pass = false; + if (svcr_in & SVCR_SM) { + if (sve->size != sizeof(sve)) { + ksft_print_msg("NT_ARM_SVE reports data with PSTATE.SM\n"); + pass = false; + } + } else { + if (sve->size != SVE_PT_SIZE(vq, sve->flags)) { + ksft_print_msg("Mismatch in SVE header size: %d != %lu\n", + sve->size, SVE_PT_SIZE(vq, sve->flags)); + pass = false; + } } /* The registers might be in completely different formats! */ @@ -515,10 +522,17 @@ static bool check_ptrace_values_ssve(pid_t child, struct test_config *config) pass = false; } - if (sve->size != SVE_PT_SIZE(vq, sve->flags)) { - ksft_print_msg("Mismatch in SSVE header size: %d != %lu\n", - sve->size, SVE_PT_SIZE(vq, sve->flags)); - pass = false; + if (!(svcr_in & SVCR_SM)) { + if (sve->size != sizeof(sve)) { + ksft_print_msg("NT_ARM_SSVE reports data without PSTATE.SM\n"); + pass = false; + } + } else { + if (sve->size != SVE_PT_SIZE(vq, sve->flags)) { + ksft_print_msg("Mismatch in SSVE header size: %d != %lu\n", + sve->size, SVE_PT_SIZE(vq, sve->flags)); + pass = false; + } } /* The registers might be in completely different formats! */ @@ -891,18 +905,11 @@ static void set_initial_values(struct test_config *config) { int vq = __sve_vq_from_vl(vl_in(config)); int sme_vq = __sve_vq_from_vl(config->sme_vl_in); - bool sm_change; svcr_in = config->svcr_in; svcr_expected = config->svcr_expected; svcr_out = 0; - if (sme_supported() && - (svcr_in & SVCR_SM) != (svcr_expected & SVCR_SM)) - sm_change = true; - else - sm_change = false; - fill_random(&v_in, sizeof(v_in)); memcpy(v_expected, v_in, sizeof(v_in)); memset(v_out, 0, sizeof(v_out)); @@ -953,12 +960,7 @@ static void set_initial_values(struct test_config *config) if (fpmr_supported()) { fill_random(&fpmr_in, sizeof(fpmr_in)); fpmr_in &= FPMR_SAFE_BITS; - - /* Entering or exiting streaming mode clears FPMR */ - if (sm_change) - fpmr_expected = 0; - else - fpmr_expected = fpmr_in; + fpmr_expected = fpmr_in; } else { fpmr_in = 0; fpmr_expected = 0; @@ -1195,18 +1197,8 @@ static void sve_write(pid_t child, struct test_config *config) static bool za_write_supported(struct test_config *config) { - if (config->sme_vl_in != config->sme_vl_expected) { - /* Changing the SME VL exits streaming mode. */ - if (config->svcr_expected & SVCR_SM) { - return false; - } - } else { - /* Otherwise we can't change streaming mode */ - if ((config->svcr_in & SVCR_SM) != - (config->svcr_expected & SVCR_SM)) { - return false; - } - } + if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM)) + return false; return true; } @@ -1224,10 +1216,8 @@ static void za_write_expected(struct test_config *config) memset(zt_expected, 0, sizeof(zt_expected)); } - /* Changing the SME VL flushes ZT, SVE state and exits SM */ + /* Changing the SME VL flushes ZT, SVE state */ if (config->sme_vl_in != config->sme_vl_expected) { - svcr_expected &= ~SVCR_SM; - sve_vq = __sve_vq_from_vl(vl_expected(config)); memset(z_expected, 0, __SVE_ZREGS_SIZE(sve_vq)); memset(p_expected, 0, __SVE_PREGS_SIZE(sve_vq)); diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST index f748f2c33b22..1789a61d0a9b 100644 --- a/tools/testing/selftests/bpf/DENYLIST +++ b/tools/testing/selftests/bpf/DENYLIST @@ -1,5 +1,6 @@ # TEMPORARY # Alphabetical order +dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge stacktrace_build_id stacktrace_build_id_nmi diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 6d8feda27ce9..12e99c0277a8 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,3 +1 @@ -fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 -fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 66bb50356be0..cf5ed3bee573 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -34,6 +34,9 @@ OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null) LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) +SKIP_DOCS ?= +SKIP_LLVM ?= + ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) @@ -172,6 +175,7 @@ override OUTPUT := $(patsubst %/,%,$(OUTPUT)) endif endif +ifneq ($(SKIP_LLVM),1) ifeq ($(feature-llvm),1) LLVM_CFLAGS += -DHAVE_LLVM_SUPPORT LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets @@ -180,13 +184,14 @@ ifeq ($(feature-llvm),1) # Prefer linking statically if it's available, otherwise fallback to shared ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static) LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS)) - LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) + LLVM_LDLIBS += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))) LLVM_LDLIBS += -lstdc++ else LLVM_LDLIBS += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS)) endif LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags) endif +endif SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build @@ -358,7 +363,9 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin endif +ifneq ($(SKIP_DOCS),1) all: docs +endif docs: $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ @@ -673,9 +680,6 @@ ifneq ($2:$(OUTPUT),:$(shell pwd)) $(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/ endif -$(OUTPUT)/$(TRUNNER_BINARY): LDLIBS += $$(LLVM_LDLIBS) -$(OUTPUT)/$(TRUNNER_BINARY): LDFLAGS += $$(LLVM_LDFLAGS) - # some X.test.o files have runtime dependencies on Y.bpf.o files $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS) @@ -686,7 +690,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(OUTPUT)/veristat \ | $(TRUNNER_BINARY)-extras $$(call msg,BINARY,,$$@) - $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@ + $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@ $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@ $(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \ $(OUTPUT)/$(if $2,$2/)bpftool @@ -811,6 +815,7 @@ $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.ske $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h +$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -831,6 +836,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_local_storage_create.o \ $(OUTPUT)/bench_htab_mem.o \ $(OUTPUT)/bench_bpf_crypto.o \ + $(OUTPUT)/bench_sockmap.o \ # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 1bd403a5ef7b..ddd73d06a1eb 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -283,6 +283,7 @@ extern struct argp bench_local_storage_create_argp; extern struct argp bench_htab_mem_argp; extern struct argp bench_trigger_batch_argp; extern struct argp bench_crypto_argp; +extern struct argp bench_sockmap_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -297,6 +298,7 @@ static const struct argp_child bench_parsers[] = { { &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 }, { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 }, { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 }, + { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 }, {}, }; @@ -526,6 +528,12 @@ extern const struct bench bench_trig_uprobe_multi_push; extern const struct bench bench_trig_uretprobe_multi_push; extern const struct bench bench_trig_uprobe_multi_ret; extern const struct bench bench_trig_uretprobe_multi_ret; +#ifdef __x86_64__ +extern const struct bench bench_trig_uprobe_nop5; +extern const struct bench bench_trig_uretprobe_nop5; +extern const struct bench bench_trig_uprobe_multi_nop5; +extern const struct bench bench_trig_uretprobe_multi_nop5; +#endif extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; @@ -549,6 +557,7 @@ extern const struct bench bench_local_storage_create; extern const struct bench bench_htab_mem; extern const struct bench bench_crypto_encrypt; extern const struct bench bench_crypto_decrypt; +extern const struct bench bench_sockmap; static const struct bench *benchs[] = { &bench_count_global, @@ -586,6 +595,12 @@ static const struct bench *benchs[] = { &bench_trig_uretprobe_multi_push, &bench_trig_uprobe_multi_ret, &bench_trig_uretprobe_multi_ret, +#ifdef __x86_64__ + &bench_trig_uprobe_nop5, + &bench_trig_uretprobe_nop5, + &bench_trig_uprobe_multi_nop5, + &bench_trig_uretprobe_multi_nop5, +#endif /* ringbuf/perfbuf benchmarks */ &bench_rb_libbpf, &bench_rb_custom, @@ -609,6 +624,7 @@ static const struct bench *benchs[] = { &bench_htab_mem, &bench_crypto_encrypt, &bench_crypto_decrypt, + &bench_sockmap, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c index 926ee822143e..297e32390cd1 100644 --- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -279,6 +279,7 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value) } got = read(fd, buf, sizeof(buf) - 1); + close(fd); if (got <= 0) { *value = 0; return; @@ -286,8 +287,6 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value) buf[got] = 0; *value = strtoull(buf, NULL, 0); - - close(fd); } static void htab_mem_measure(struct bench_res *res) diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c new file mode 100644 index 000000000000..8ebf563a67a2 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <error.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <sys/sendfile.h> +#include <arpa/inet.h> +#include <fcntl.h> +#include <argp.h> +#include "bench.h" +#include "bench_sockmap_prog.skel.h" + +#define FILE_SIZE (128 * 1024) +#define DATA_REPEAT_SIZE 10 + +static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + +/* c1 <-> [p1, p2] <-> c2 + * RX bench(BPF_SK_SKB_STREAM_VERDICT): + * ARG_FW_RX_PASS: + * send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2) + * ARG_FW_RX_VERDICT_EGRESS: + * send(c1) -> verdict skb to tx queuec of p2 -> recv(c2) + * ARG_FW_RX_VERDICT_INGRESS: + * send(c1) -> verdict skb to rx queuec of c2 -> recv(c2) + * + * TX bench(BPF_SK_MSG_VERDIC): + * ARG_FW_TX_PASS: + * send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2) + * ARG_FW_TX_VERDICT_INGRESS: + * send(p2) -> verdict msg to rx queue of c2 -> recv(c2) + * ARG_FW_TX_VERDICT_EGRESS: + * send(p1) -> verdict msg to tx queue of p2 -> recv(c2) + */ +enum SOCKMAP_ARG_FLAG { + ARG_FW_RX_NORMAL = 11000, + ARG_FW_RX_PASS, + ARG_FW_RX_VERDICT_EGRESS, + ARG_FW_RX_VERDICT_INGRESS, + ARG_FW_TX_NORMAL, + ARG_FW_TX_PASS, + ARG_FW_TX_VERDICT_INGRESS, + ARG_FW_TX_VERDICT_EGRESS, + ARG_CTL_RX_STRP, + ARG_CONSUMER_DELAY_TIME, + ARG_PRODUCER_DURATION, +}; + +#define TXMODE_NORMAL() \ + ((ctx.mode) == ARG_FW_TX_NORMAL) + +#define TXMODE_BPF_INGRESS() \ + ((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS) + +#define TXMODE_BPF_EGRESS() \ + ((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS) + +#define TXMODE_BPF_PASS() \ + ((ctx.mode) == ARG_FW_TX_PASS) + +#define TXMODE_BPF() ( \ + TXMODE_BPF_PASS() || \ + TXMODE_BPF_INGRESS() || \ + TXMODE_BPF_EGRESS()) + +#define TXMODE() ( \ + TXMODE_NORMAL() || \ + TXMODE_BPF()) + +#define RXMODE_NORMAL() \ + ((ctx.mode) == ARG_FW_RX_NORMAL) + +#define RXMODE_BPF_PASS() \ + ((ctx.mode) == ARG_FW_RX_PASS) + +#define RXMODE_BPF_VERDICT_EGRESS() \ + ((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS) + +#define RXMODE_BPF_VERDICT_INGRESS() \ + ((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS) + +#define RXMODE_BPF_VERDICT() ( \ + RXMODE_BPF_VERDICT_INGRESS() || \ + RXMODE_BPF_VERDICT_EGRESS()) + +#define RXMODE_BPF() ( \ + RXMODE_BPF_PASS() || \ + RXMODE_BPF_VERDICT()) + +#define RXMODE() ( \ + RXMODE_NORMAL() || \ + RXMODE_BPF()) + +static struct socmap_ctx { + struct bench_sockmap_prog *skel; + enum SOCKMAP_ARG_FLAG mode; + #define c1 fds[0] + #define p1 fds[1] + #define c2 fds[2] + #define p2 fds[3] + #define sfd fds[4] + int fds[5]; + long send_calls; + long read_calls; + long prod_send; + long user_read; + int file_size; + int delay_consumer; + int prod_run_time; + int strp_size; +} ctx = { + .prod_send = 0, + .user_read = 0, + .file_size = FILE_SIZE, + .mode = ARG_FW_RX_VERDICT_EGRESS, + .fds = {0}, + .delay_consumer = 0, + .prod_run_time = 0, + .strp_size = 0, +}; + +static void bench_sockmap_prog_destroy(void) +{ + int i; + + for (i = 0; i < sizeof(ctx.fds); i++) { + if (ctx.fds[0] > 0) + close(ctx.fds[i]); + } + + bench_sockmap_prog__destroy(ctx.skel); +} + +static void init_addr(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static bool set_non_block(int fd, bool blocking) +{ + int flags = fcntl(fd, F_GETFL, 0); + + if (flags == -1) + return false; + flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK); + return (fcntl(fd, F_SETFL, flags) == 0); +} + +static int create_pair(int *c, int *p, int type) +{ + struct sockaddr_storage addr; + int err, cfd, pfd; + socklen_t addr_len = sizeof(struct sockaddr_storage); + + err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len); + if (err) { + fprintf(stderr, "getsockname error %d\n", errno); + return err; + } + cfd = socket(AF_INET, type, 0); + if (cfd < 0) { + fprintf(stderr, "socket error %d\n", errno); + return err; + } + + err = connect(cfd, (struct sockaddr *)&addr, addr_len); + if (err && errno != EINPROGRESS) { + fprintf(stderr, "connect error %d\n", errno); + return err; + } + + pfd = accept(ctx.sfd, NULL, NULL); + if (pfd < 0) { + fprintf(stderr, "accept error %d\n", errno); + return err; + } + *c = cfd; + *p = pfd; + return 0; +} + +static int create_sockets(void) +{ + struct sockaddr_storage addr; + int err, one = 1; + socklen_t addr_len; + + init_addr(&addr, &addr_len); + ctx.sfd = socket(AF_INET, SOCK_STREAM, 0); + if (ctx.sfd < 0) { + fprintf(stderr, "socket error:%d\n", errno); + return ctx.sfd; + } + err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) { + fprintf(stderr, "setsockopt error:%d\n", errno); + return err; + } + + err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len); + if (err) { + fprintf(stderr, "bind error:%d\n", errno); + return err; + } + + err = listen(ctx.sfd, SOMAXCONN); + if (err) { + fprintf(stderr, "listen error:%d\n", errno); + return err; + } + + err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM); + if (err) { + fprintf(stderr, "create_pair 1 error\n"); + return err; + } + + err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM); + if (err) { + fprintf(stderr, "create_pair 2 error\n"); + return err; + } + printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n", + ctx.c1, ctx.p1, ctx.c2, ctx.p2); + return 0; +} + +static void validate(void) +{ + if (env.consumer_cnt != 2 || env.producer_cnt != 1 || + !env.affinity) + goto err; + return; +err: + fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary"); + exit(1); +} + +static int setup_rx_sockmap(void) +{ + int verdict, pass, parser, map; + int zero = 0, one = 1; + int err; + + parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser); + verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict); + pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass); + map = bpf_map__fd(ctx.skel->maps.sock_map_rx); + + if (ctx.strp_size != 0) { + ctx.skel->bss->pkt_size = ctx.strp_size; + err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0); + if (err) + return err; + } + + if (RXMODE_BPF_VERDICT()) + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + else if (RXMODE_BPF_PASS()) + err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) + return err; + + if (RXMODE_BPF_PASS()) + return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST); + + err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST); + if (err < 0) + return err; + + if (RXMODE_BPF_VERDICT_INGRESS()) { + ctx.skel->bss->verdict_dir = BPF_F_INGRESS; + err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST); + } else { + err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST); + } + if (err < 0) + return err; + + return 0; +} + +static int setup_tx_sockmap(void) +{ + int zero = 0, one = 1; + int prog, map; + int err; + + map = bpf_map__fd(ctx.skel->maps.sock_map_tx); + prog = TXMODE_BPF_PASS() ? + bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) : + bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict); + + err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0); + if (err) + return err; + + if (TXMODE_BPF_EGRESS()) { + err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST); + err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST); + } else { + ctx.skel->bss->verdict_dir = BPF_F_INGRESS; + err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST); + err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST); + } + + if (err < 0) + return err; + + return 0; +} + +static void setup(void) +{ + int err; + + ctx.skel = bench_sockmap_prog__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "error loading skel\n"); + exit(1); + } + + if (create_sockets()) { + fprintf(stderr, "create_net_mode error\n"); + goto err; + } + + if (RXMODE_BPF()) { + err = setup_rx_sockmap(); + if (err) { + fprintf(stderr, "setup_rx_sockmap error:%d\n", err); + goto err; + } + } else if (TXMODE_BPF()) { + err = setup_tx_sockmap(); + if (err) { + fprintf(stderr, "setup_tx_sockmap error:%d\n", err); + goto err; + } + } else { + fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode); + goto err; + } + + return; + +err: + bench_sockmap_prog_destroy(); + exit(1); +} + +static void measure(struct bench_res *res) +{ + res->drops = atomic_swap(&ctx.prod_send, 0); + res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0); + res->false_hits = atomic_swap(&ctx.user_read, 0); + res->important_hits = atomic_swap(&ctx.send_calls, 0); + res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32; +} + +static void verify_data(int *check_pos, char *buf, int rcv) +{ + for (int i = 0 ; i < rcv; i++) { + if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) { + fprintf(stderr, "verify data fail"); + exit(1); + } + (*check_pos)++; + if (*check_pos >= FILE_SIZE) + *check_pos = 0; + } +} + +static void *consumer(void *input) +{ + int rcv, sent; + int check_pos = 0; + int tid = (long)input; + int recv_buf_size = FILE_SIZE; + char *buf = malloc(recv_buf_size); + int delay_read = ctx.delay_consumer; + + if (!buf) { + fprintf(stderr, "fail to init read buffer"); + return NULL; + } + + while (true) { + if (tid == 1) { + /* consumer 1 is unused for tx test and stream verdict test */ + if (RXMODE_BPF() || TXMODE()) + return NULL; + /* it's only for RX_NORMAL which service as reserve-proxy mode */ + rcv = read(ctx.p1, buf, recv_buf_size); + if (rcv < 0) { + fprintf(stderr, "fail to read p1"); + return NULL; + } + + sent = send(ctx.p2, buf, recv_buf_size, 0); + if (sent < 0) { + fprintf(stderr, "fail to send p2"); + return NULL; + } + } else { + if (delay_read != 0) { + if (delay_read < 0) + return NULL; + sleep(delay_read); + delay_read = 0; + } + /* read real endpoint by consumer 0 */ + atomic_inc(&ctx.read_calls); + rcv = read(ctx.c2, buf, recv_buf_size); + if (rcv < 0 && errno != EAGAIN) { + fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno); + return NULL; + } + verify_data(&check_pos, buf, rcv); + atomic_add(&ctx.user_read, rcv); + } + } + + return NULL; +} + +static void *producer(void *input) +{ + int off = 0, fp, need_sent, sent; + int file_size = ctx.file_size; + struct timespec ts1, ts2; + int target; + FILE *file; + + file = tmpfile(); + if (!file) { + fprintf(stderr, "create file for sendfile"); + return NULL; + } + + /* we need simple verify */ + for (int i = 0; i < file_size; i++) { + if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) { + fprintf(stderr, "init tmpfile error"); + return NULL; + } + if (++off >= sizeof(snd_data)) + off = 0; + } + fflush(file); + fseek(file, 0, SEEK_SET); + + fp = fileno(file); + need_sent = file_size; + clock_gettime(CLOCK_MONOTONIC, &ts1); + + if (RXMODE_BPF_VERDICT()) + target = ctx.c1; + else if (TXMODE_BPF_EGRESS()) + target = ctx.p1; + else + target = ctx.p2; + set_non_block(target, true); + while (true) { + if (ctx.prod_run_time) { + clock_gettime(CLOCK_MONOTONIC, &ts2); + if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time) + return NULL; + } + + errno = 0; + atomic_inc(&ctx.send_calls); + sent = sendfile(target, fp, NULL, need_sent); + if (sent < 0) { + if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) { + fprintf(stderr, "sendfile return %d, errorno %d:%s\n", + sent, errno, strerror(errno)); + return NULL; + } + continue; + } else if (sent < need_sent) { + need_sent -= sent; + atomic_add(&ctx.prod_send, sent); + continue; + } + atomic_add(&ctx.prod_send, need_sent); + need_sent = file_size; + lseek(fp, 0, SEEK_SET); + } + + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz; + + prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0); + speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0); + bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0); + send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0); + read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0); + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, " + "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n", + prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + double verdict_mbs_mean = 0.0; + long verdict_total = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + verdict_total += res[i].hits / 1000000.0; + } + + printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n", + verdict_total, verdict_mbs_mean); +} + +static const struct argp_option opts[] = { + { "rx-normal", ARG_FW_RX_NORMAL, NULL, 0, + "simple reserve-proxy mode, no bfp enabled"}, + { "rx-pass", ARG_FW_RX_PASS, NULL, 0, + "run bpf prog but no redir applied"}, + { "rx-strp", ARG_CTL_RX_STRP, "Byte", 0, + "enable strparser and set the encapsulation size"}, + { "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0, + "forward data with bpf(stream verdict)"}, + { "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0, + "forward data with bpf(stream verdict)"}, + { "tx-normal", ARG_FW_TX_NORMAL, NULL, 0, + "simple c-s mode, no bfp enabled"}, + { "tx-pass", ARG_FW_TX_PASS, NULL, 0, + "run bpf prog but no redir applied"}, + { "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0, + "forward msg to ingress queue of another socket"}, + { "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0, + "forward msg to egress queue of another socket"}, + { "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0, + "delay consumer start"}, + { "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0, + "producer duration"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS: + ctx.mode = key; + break; + case ARG_CONSUMER_DELAY_TIME: + ctx.delay_consumer = strtol(arg, NULL, 10); + break; + case ARG_PRODUCER_DURATION: + ctx.prod_run_time = strtol(arg, NULL, 10); + break; + case ARG_CTL_RX_STRP: + ctx.strp_size = strtol(arg, NULL, 10); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_sockmap_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* Benchmark performance of creating bpf local storage */ +const struct bench bench_sockmap = { + .name = "sockmap", + .argp = &bench_sockmap_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 32e9f194d449..82327657846e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -333,6 +333,20 @@ static void *uprobe_producer_ret(void *input) return NULL; } +#ifdef __x86_64__ +__nocf_check __weak void uprobe_target_nop5(void) +{ + asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00"); +} + +static void *uprobe_producer_nop5(void *input) +{ + while (true) + uprobe_target_nop5(); + return NULL; +} +#endif + static void usetup(bool use_retprobe, bool use_multi, void *target_addr) { size_t uprobe_offset; @@ -448,6 +462,28 @@ static void uretprobe_multi_ret_setup(void) usetup(true, true /* use_multi */, &uprobe_target_ret); } +#ifdef __x86_64__ +static void uprobe_nop5_setup(void) +{ + usetup(false, false /* !use_multi */, &uprobe_target_nop5); +} + +static void uretprobe_nop5_setup(void) +{ + usetup(true, false /* !use_multi */, &uprobe_target_nop5); +} + +static void uprobe_multi_nop5_setup(void) +{ + usetup(false, true /* use_multi */, &uprobe_target_nop5); +} + +static void uretprobe_multi_nop5_setup(void) +{ + usetup(true, true /* use_multi */, &uprobe_target_nop5); +} +#endif + const struct bench bench_trig_syscall_count = { .name = "trig-syscall-count", .validate = trigger_validate, @@ -506,3 +542,9 @@ BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret"); BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop"); BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push"); BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret"); +#ifdef __x86_64__ +BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5"); +BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5"); +BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5"); +BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5"); +#endif diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh index af169f831f2f..03f55405484b 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh @@ -2,7 +2,7 @@ set -eufo pipefail -for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret} +for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5} do summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) printf "%-15s: %s\n" $i "$summary" diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 6535c8ae3c46..5e512a1d09d1 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -591,4 +591,9 @@ extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym; extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym; +struct bpf_iter_dmabuf; +extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym; +extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym; +extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c378d5d07e02..f74e1ea0ad3b 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -22,6 +22,8 @@ CONFIG_CRYPTO_AES=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DMABUF_HEAPS=y +CONFIG_DMABUF_HEAPS_SYSTEM=y CONFIG_DUMMY=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y @@ -71,8 +73,10 @@ CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=y +CONFIG_NET_SCH_BPF=y CONFIG_NET_SCH_FQ=y CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCH_HTB=y CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=y CONFIG_NETFILTER=y @@ -106,6 +110,7 @@ CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SYN_COOKIES=y CONFIG_TEST_BPF=m +CONFIG_UDMABUF=y CONFIG_USERFAULTFD=y CONFIG_VSOCKETS=y CONFIG_VXLAN=y diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 3720b7611523..e1495a4bbc99 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -158,7 +158,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TUN=y CONFIG_UNIX=y CONFIG_UPROBES=y -CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 706931a8c2c6..26c3bc2ce11d 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -128,7 +128,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TUN=y CONFIG_UNIX=y CONFIG_UPROBES=y -CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c index 7565fc7690c2..0223fce4db2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c @@ -51,9 +51,11 @@ static void test_arena_spin_lock_size(int size) struct arena_spin_lock *skel; pthread_t thread_id[16]; int prog_fd, i, err; + int nthreads; void *ret; - if (get_nprocs() < 2) { + nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id)); + if (nthreads < 2) { test__skip(); return; } @@ -66,25 +68,25 @@ static void test_arena_spin_lock_size(int size) goto end; } skel->bss->cs_count = size; - skel->bss->limit = repeat * 16; + skel->bss->limit = repeat * nthreads; - ASSERT_OK(pthread_barrier_init(&barrier, NULL, 16), "barrier init"); + ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init"); prog_fd = bpf_program__fd(skel->progs.prog); - for (i = 0; i < 16; i++) { + for (i = 0; i < nthreads; i++) { err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd); if (!ASSERT_OK(err, "pthread_create")) goto end_barrier; } - for (i = 0; i < 16; i++) { + for (i = 0; i < nthreads; i++) { if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join")) goto end_barrier; if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd")) goto end_barrier; } - ASSERT_EQ(skel->bss->counter, repeat * 16, "check counter value"); + ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value"); end_barrier: pthread_barrier_destroy(&barrier); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 329c7862b52d..cabc51c2ca6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -122,6 +122,85 @@ cleanup: test_attach_probe_manual__destroy(skel); } +/* attach uprobe/uretprobe long event name testings */ +static void test_attach_uprobe_long_event_name(void) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct bpf_link *uprobe_link, *uretprobe_link; + struct test_attach_probe_manual *skel; + ssize_t uprobe_offset; + char path[PATH_MAX] = {0}; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + + uprobe_offset = get_uprobe_offset(&trigger_func); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) + goto cleanup; + + if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink")) + goto cleanup; + + /* manual-attach uprobe/uretprobe */ + uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY; + uprobe_opts.ref_ctr_offset = 0; + uprobe_opts.retprobe = false; + uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, + 0 /* self pid */, + path, + uprobe_offset, + &uprobe_opts); + if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name")) + goto cleanup; + skel->links.handle_uprobe = uprobe_link; + + uprobe_opts.retprobe = true; + uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, + -1 /* any pid */, + path, + uprobe_offset, &uprobe_opts); + if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name")) + goto cleanup; + skel->links.handle_uretprobe = uretprobe_link; + +cleanup: + test_attach_probe_manual__destroy(skel); +} + +/* attach kprobe/kretprobe long event name testings */ +static void test_attach_kprobe_long_event_name(void) +{ + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + struct bpf_link *kprobe_link, *kretprobe_link; + struct test_attach_probe_manual *skel; + + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) + return; + + /* manual-attach kprobe/kretprobe */ + kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY; + kprobe_opts.retprobe = false; + kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + "bpf_testmod_looooooooooooooooooooooooooooooong_name", + &kprobe_opts); + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name")) + goto cleanup; + skel->links.handle_kprobe = kprobe_link; + + kprobe_opts.retprobe = true; + kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + "bpf_testmod_looooooooooooooooooooooooooooooong_name", + &kprobe_opts); + if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name")) + goto cleanup; + skel->links.handle_kretprobe = kretprobe_link; + +cleanup: + test_attach_probe_manual__destroy(skel); +} + static void test_attach_probe_auto(struct test_attach_probe *skel) { struct bpf_link *uprobe_err_link; @@ -323,6 +402,11 @@ void test_attach_probe(void) if (test__start_subtest("uprobe-ref_ctr")) test_uprobe_ref_ctr(skel); + if (test__start_subtest("uprobe-long_name")) + test_attach_uprobe_long_event_name(); + if (test__start_subtest("kprobe-long_name")) + test_attach_kprobe_long_event_name(); + cleanup: test_attach_probe__destroy(skel); ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup"); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index dbd13f8e42a7..dd6512fa652b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode) .repeat = 1, ); + if (SYS_NOFAIL("iptables-legacy --version")) { + fprintf(stdout, "Missing required iptables-legacy tool\n"); + test__skip(); + return; + } + skel = test_bpf_nf__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c new file mode 100644 index 000000000000..730357cd0c9a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/pkt_sched.h> +#include <linux/rtnetlink.h> +#include <test_progs.h> + +#include "network_helpers.h" +#include "bpf_qdisc_fifo.skel.h" +#include "bpf_qdisc_fq.skel.h" +#include "bpf_qdisc_fail__incompl_ops.skel.h" + +#define LO_IFINDEX 1 + +static const unsigned int total_bytes = 10 * 1024 * 1024; + +static void do_test(char *qdisc) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_ROOT, + .handle = 0x8000000, + .qdisc = qdisc); + int srv_fd = -1, cli_fd = -1; + int err; + + err = bpf_tc_hook_create(&hook); + if (!ASSERT_OK(err, "attach qdisc")) + return; + + srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_OK_FD(srv_fd, "start server")) + goto done; + + cli_fd = connect_to_fd(srv_fd, 0); + if (!ASSERT_OK_FD(cli_fd, "connect to client")) + goto done; + + err = send_recv_data(srv_fd, cli_fd, total_bytes); + ASSERT_OK(err, "send_recv_data"); + +done: + if (srv_fd != -1) + close(srv_fd); + if (cli_fd != -1) + close(cli_fd); + + bpf_tc_hook_destroy(&hook); +} + +static void test_fifo(void) +{ + struct bpf_qdisc_fifo *fifo_skel; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + do_test("bpf_fifo"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_fq(void) +{ + struct bpf_qdisc_fq *fq_skel; + + fq_skel = bpf_qdisc_fq__open_and_load(); + if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach")) + goto out; + + do_test("bpf_fq"); +out: + bpf_qdisc_fq__destroy(fq_skel); +} + +static void test_qdisc_attach_to_mq(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_MAKE(1 << 16, 1), + .handle = 0x11 << 16, + .qdisc = "bpf_fifo"); + struct bpf_qdisc_fifo *fifo_skel; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + SYS(out, "ip link add veth0 type veth peer veth1"); + hook.ifindex = if_nametoindex("veth0"); + SYS(out, "tc qdisc add dev veth0 root handle 1: mq"); + + err = bpf_tc_hook_create(&hook); + ASSERT_OK(err, "attach qdisc"); + + bpf_tc_hook_destroy(&hook); + + SYS(out, "tc qdisc delete dev veth0 root mq"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_qdisc_attach_to_non_root(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_QDISC, + .parent = TC_H_MAKE(1 << 16, 1), + .handle = 0x11 << 16, + .qdisc = "bpf_fifo"); + struct bpf_qdisc_fifo *fifo_skel; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + SYS(out, "tc qdisc add dev lo root handle 1: htb"); + SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit"); + + err = bpf_tc_hook_create(&hook); + if (!ASSERT_ERR(err, "attach qdisc")) + bpf_tc_hook_destroy(&hook); + +out_del_htb: + SYS(out, "tc qdisc delete dev lo root htb"); +out: + bpf_qdisc_fifo__destroy(fifo_skel); +} + +static void test_incompl_ops(void) +{ + struct bpf_qdisc_fail__incompl_ops *skel; + struct bpf_link *link; + + skel = bpf_qdisc_fail__incompl_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.test); + if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops")) + bpf_link__destroy(link); + + bpf_qdisc_fail__incompl_ops__destroy(skel); +} + +static int get_default_qdisc(char *qdisc_name) +{ + FILE *f; + int num; + + f = fopen("/proc/sys/net/core/default_qdisc", "r"); + if (!f) + return -errno; + + num = fscanf(f, "%s", qdisc_name); + fclose(f); + + return num == 1 ? 0 : -EFAULT; +} + +static void test_default_qdisc_attach_to_mq(void) +{ + char default_qdisc[IFNAMSIZ] = {}; + struct bpf_qdisc_fifo *fifo_skel; + struct netns_obj *netns = NULL; + int err; + + fifo_skel = bpf_qdisc_fifo__open_and_load(); + if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load")) + return; + + if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach")) + goto out; + + err = get_default_qdisc(default_qdisc); + if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc")) + goto out; + + err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo"); + if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc")) + goto out; + + netns = netns_new("bpf_qdisc_ns", true); + if (!ASSERT_OK_PTR(netns, "netns_new")) + goto out; + + SYS(out, "ip link add veth0 type veth peer veth1"); + SYS(out, "tc qdisc add dev veth0 root handle 1: mq"); + + ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called"); + + SYS(out, "tc qdisc delete dev veth0 root mq"); +out: + netns_free(netns); + if (default_qdisc[0]) + write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc); + + bpf_qdisc_fifo__destroy(fifo_skel); +} + +void test_ns_bpf_qdisc(void) +{ + if (test__start_subtest("fifo")) + test_fifo(); + if (test__start_subtest("fq")) + test_fq(); + if (test__start_subtest("attach to mq")) + test_qdisc_attach_to_mq(); + if (test__start_subtest("attach to non root")) + test_qdisc_attach_to_non_root(); + if (test__start_subtest("incompl_ops")) + test_incompl_ops(); +} + +void serial_test_bpf_qdisc_default(void) +{ + test_default_qdisc_attach_to_mq(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index d9024c7a892a..5bc15bb6b7ce 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -440,6 +440,105 @@ cleanup: btf__free(btf1); } +/* Ensure module split BTF dedup worked correctly; when dedup fails badly + * core kernel types are in split BTF also, so ensure that references to + * such types point at base - not split - BTF. + * + * bpf_testmod_test_write() has multiple core kernel type parameters; + * + * ssize_t + * bpf_testmod_test_write(struct file *file, struct kobject *kobj, + * struct bin_attribute *bin_attr, + * char *buf, loff_t off, size_t len); + * + * Ensure each of the FUNC_PROTO params is a core kernel type. + * + * Do the same for + * + * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk); + * + * ...and + * + * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb); + * + */ +const char *mod_funcs[] = { + "bpf_testmod_test_write", + "bpf_kfunc_call_test3", + "bpf_kfunc_call_test_pass_ctx" +}; + +static void test_split_module(void) +{ + struct btf *vmlinux_btf, *btf1 = NULL; + int i, nr_base_types; + + vmlinux_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf")) + return; + nr_base_types = btf__type_cnt(vmlinux_btf); + if (!ASSERT_GT(nr_base_types, 0, "nr_base_types")) + goto cleanup; + + btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf); + if (!ASSERT_OK_PTR(btf1, "split_btf")) + return; + + for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) { + const struct btf_param *p; + const struct btf_type *t; + __u16 vlen; + __u32 id; + int j; + + id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC); + if (!ASSERT_GE(id, nr_base_types, "func_id")) + goto cleanup; + t = btf__type_by_id(btf1, id); + if (!ASSERT_OK_PTR(t, "func_id_type")) + goto cleanup; + t = btf__type_by_id(btf1, t->type); + if (!ASSERT_OK_PTR(t, "func_proto_id_type")) + goto cleanup; + if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto")) + goto cleanup; + vlen = btf_vlen(t); + + for (j = 0, p = btf_params(t); j < vlen; j++, p++) { + /* bpf_testmod uses resilient split BTF, so any + * reference types will be added to split BTF and their + * associated targets will be base BTF types; for example + * for a "struct sock *" the PTR will be in split BTF + * while the "struct sock" will be in base. + * + * In some cases like loff_t we have to resolve + * multiple typedefs hence the while() loop below. + * + * Note that resilient split BTF generation depends + * on pahole version, so we do not assert that + * reference types are in split BTF, as if pahole + * does not support resilient split BTF they will + * also be base BTF types. + */ + id = p->type; + do { + t = btf__type_by_id(btf1, id); + if (!ASSERT_OK_PTR(t, "param_ref_type")) + goto cleanup; + if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t)) + break; + id = t->type; + } while (true); + + if (!ASSERT_LT(id, nr_base_types, "verify_base_type")) + goto cleanup; + } + } +cleanup: + btf__free(btf1); + btf__free(vmlinux_btf); +} + void test_btf_dedup_split() { if (test__start_subtest("split_simple")) @@ -450,4 +549,6 @@ void test_btf_dedup_split() test_split_fwd_resolve(); if (test__start_subtest("split_dup_struct_in_cu")) test_split_dup_struct_in_cu(); + if (test__start_subtest("split_module")) + test_split_module(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index eef1158676ed..3696fb9a05ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -12,10 +12,11 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args) vfprintf(ctx, fmt, args); } -void test_btf_split() { +static void __test_btf_split(bool multi) +{ struct btf_dump *d = NULL; const struct btf_type *t; - struct btf *btf1, *btf2; + struct btf *btf1, *btf2, *btf3 = NULL; int str_off, i, err; btf1 = btf__new_empty(); @@ -63,14 +64,46 @@ void test_btf_split() { ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen"); ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name"); + if (multi) { + btf3 = btf__new_empty_split(btf2); + if (!ASSERT_OK_PTR(btf3, "multi_split_btf")) + goto cleanup; + } else { + btf3 = btf2; + } + + btf__add_union(btf3, "u1", 16); /* [5] union u1 { */ + btf__add_field(btf3, "f1", 4, 0, 0); /* struct s2 f1; */ + btf__add_field(btf3, "uf2", 1, 0, 0); /* int f2; */ + /* } */ + + if (multi) { + t = btf__type_by_id(btf2, 5); + ASSERT_NULL(t, "multisplit_type_in_first_split"); + } + + t = btf__type_by_id(btf3, 5); + if (!ASSERT_OK_PTR(t, "split_union_type")) + goto cleanup; + ASSERT_EQ(btf_is_union(t), true, "split_union_kind"); + ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen"); + ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name"); + ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt"); + + t = btf__type_by_id(btf3, 1); + if (!ASSERT_OK_PTR(t, "split_base_type")) + goto cleanup; + ASSERT_EQ(btf_is_int(t), true, "split_base_int"); + ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name"); + /* BTF-to-C dump of split BTF */ dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) return; - d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL); + d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL); if (!ASSERT_OK_PTR(d, "btf_dump__new")) goto cleanup; - for (i = 1; i < btf__type_cnt(btf2); i++) { + for (i = 1; i < btf__type_cnt(btf3); i++) { err = btf_dump__dump_type(d, i); ASSERT_OK(err, "dump_type_ok"); } @@ -79,12 +112,15 @@ void test_btf_split() { ASSERT_STREQ(dump_buf, "struct s1 {\n" " int f1;\n" -"};\n" -"\n" +"};\n\n" "struct s2 {\n" " struct s1 f1;\n" " int f2;\n" " int *f3;\n" +"};\n\n" +"union u1 {\n" +" struct s2 f1;\n" +" int uf2;\n" "};\n\n", "c_dump"); cleanup: @@ -94,4 +130,14 @@ cleanup: btf_dump__free(d); btf__free(btf1); btf__free(btf2); + if (btf2 != btf3) + btf__free(btf3); +} + +void test_btf_split(void) +{ + if (test__start_subtest("single_split")) + __test_btf_split(false); + if (test__start_subtest("multi_split")) + __test_btf_split(true); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c new file mode 100644 index 000000000000..3923e64c4c1d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2025 Isovalent */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <unistd.h> + +static void test_btf_mmap_sysfs(const char *path, struct btf *base) +{ + struct stat st; + __u64 btf_size, end; + void *raw_data = NULL; + int fd = -1; + long page_size; + struct btf *btf = NULL; + + page_size = sysconf(_SC_PAGESIZE); + if (!ASSERT_GE(page_size, 0, "get_page_size")) + goto cleanup; + + if (!ASSERT_OK(stat(path, &st), "stat_btf")) + goto cleanup; + + btf_size = st.st_size; + end = (btf_size + page_size - 1) / page_size * page_size; + + fd = open(path, O_RDONLY); + if (!ASSERT_GE(fd, 0, "open_btf")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable")) + goto cleanup; + + raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared")) + goto cleanup; + + raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0); + if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size")) + goto cleanup; + + raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0); + if (!ASSERT_OK_PTR(raw_data, "mmap_btf")) + goto cleanup; + + if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1, + "mprotect_writable")) + goto cleanup; + + if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1, + "mprotect_executable")) + goto cleanup; + + /* Check padding is zeroed */ + for (int i = btf_size; i < end; i++) { + if (((__u8 *)raw_data)[i] != 0) { + PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i); + goto cleanup; + } + } + + btf = btf__new_split(raw_data, btf_size, base); + if (!ASSERT_OK_PTR(btf, "parse_btf")) + goto cleanup; + +cleanup: + btf__free(btf); + if (raw_data && raw_data != MAP_FAILED) + munmap(raw_data, btf_size); + if (fd >= 0) + close(fd); +} + +void test_btf_sysfs(void) +{ + test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c new file mode 100644 index 000000000000..6c2b0c3dbcd8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "dmabuf_iter.skel.h" + +#include <fcntl.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <linux/dma-buf.h> +#include <linux/dma-heap.h> +#include <linux/udmabuf.h> + +static int udmabuf = -1; +static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter"; +static size_t udmabuf_test_buffer_size; +static int sysheap_dmabuf = -1; +static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter"; +static size_t sysheap_test_buffer_size; + +static int create_udmabuf(void) +{ + struct udmabuf_create create; + int dev_udmabuf, memfd, local_udmabuf; + + udmabuf_test_buffer_size = 10 * getpagesize(); + + if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) + return -1; + + memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING); + if (!ASSERT_OK_FD(memfd, "memfd_create")) + return -1; + + if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate")) + goto close_memfd; + + if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal")) + goto close_memfd; + + dev_udmabuf = open("/dev/udmabuf", O_RDONLY); + if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf")) + goto close_memfd; + + memset(&create, 0, sizeof(create)); + create.memfd = memfd; + create.flags = UDMABUF_FLAGS_CLOEXEC; + create.offset = 0; + create.size = udmabuf_test_buffer_size; + + local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create); + close(dev_udmabuf); + if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create")) + goto close_memfd; + + if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name")) + goto close_udmabuf; + + return local_udmabuf; + +close_udmabuf: + close(local_udmabuf); +close_memfd: + close(memfd); + return -1; +} + +static int create_sys_heap_dmabuf(void) +{ + sysheap_test_buffer_size = 20 * getpagesize(); + + struct dma_heap_allocation_data data = { + .len = sysheap_test_buffer_size, + .fd = 0, + .fd_flags = O_RDWR | O_CLOEXEC, + .heap_flags = 0, + }; + int heap_fd, ret; + + if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG")) + return -1; + + heap_fd = open("/dev/dma_heap/system", O_RDONLY); + if (!ASSERT_OK_FD(heap_fd, "open dma heap")) + return -1; + + ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data); + close(heap_fd); + if (!ASSERT_OK(ret, "syheap alloc")) + return -1; + + if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name")) + goto close_sysheap_dmabuf; + + return data.fd; + +close_sysheap_dmabuf: + close(data.fd); + return -1; +} + +static int create_test_buffers(void) +{ + udmabuf = create_udmabuf(); + sysheap_dmabuf = create_sys_heap_dmabuf(); + + if (udmabuf < 0 || sysheap_dmabuf < 0) + return -1; + + return 0; +} + +static void destroy_test_buffers(void) +{ + close(udmabuf); + udmabuf = -1; + + close(sysheap_dmabuf); + sysheap_dmabuf = -1; +} + +enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT }; +struct DmabufInfo { + unsigned long inode; + unsigned long size; + char name[DMA_BUF_NAME_LEN]; + char exporter[32]; +}; + +static bool check_dmabuf_info(const struct DmabufInfo *bufinfo, + unsigned long size, + const char *name, const char *exporter) +{ + return size == bufinfo->size && + !strcmp(name, bufinfo->name) && + !strcmp(exporter, bufinfo->exporter); +} + +static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel) +{ + int iter_fd; + char buf[256]; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + while (read(iter_fd, buf, sizeof(buf)) > 0) + ; /* Read out all contents */ + + /* Next reads should return 0 */ + ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read"); + + close(iter_fd); +} + +static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel) +{ + bool found_test_sysheap_dmabuf = false; + bool found_test_udmabuf = false; + struct DmabufInfo bufinfo; + size_t linesize = 0; + char *line = NULL; + FILE *iter_file; + int iter_fd, f = INODE; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + iter_file = fdopen(iter_fd, "r"); + if (!ASSERT_OK_PTR(iter_file, "fdopen")) + goto close_iter_fd; + + while (getline(&line, &linesize, iter_file) != -1) { + if (f % FIELD_COUNT == INODE) { + ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1, + "read inode"); + } else if (f % FIELD_COUNT == SIZE) { + ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1, + "read size"); + } else if (f % FIELD_COUNT == NAME) { + ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1, + "read name"); + } else if (f % FIELD_COUNT == EXPORTER) { + ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1, + "read exporter"); + + if (check_dmabuf_info(&bufinfo, + sysheap_test_buffer_size, + sysheap_test_buffer_name, + "system")) + found_test_sysheap_dmabuf = true; + else if (check_dmabuf_info(&bufinfo, + udmabuf_test_buffer_size, + udmabuf_test_buffer_name, + "udmabuf")) + found_test_udmabuf = true; + } + ++f; + } + + ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields"); + + ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf"); + ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf"); + + free(line); + fclose(iter_file); +close_iter_fd: + close(iter_fd); +} + +static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + char key[DMA_BUF_NAME_LEN]; + int err, fd; + bool found; + + /* No need to attach it, just run it directly */ + fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each); + + err = bpf_prog_test_run_opts(fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key")) + return; + + do { + ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup"); + ASSERT_TRUE(found, "found test buffer"); + } while (bpf_map_get_next_key(map_fd, key, key)); +} + +void test_dmabuf_iter(void) +{ + struct dmabuf_iter *skel = NULL; + int map_fd; + const bool f = false; + + skel = dmabuf_iter__open_and_load(); + if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load")) + return; + + map_fd = bpf_map__fd(skel->maps.testbuf_hash); + if (!ASSERT_OK_FD(map_fd, "map_fd")) + goto destroy_skel; + + if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY), + "insert udmabuf")) + goto destroy_skel; + if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY), + "insert sysheap buffer")) + goto destroy_skel; + + if (!ASSERT_OK(create_test_buffers(), "create_test_buffers")) + goto destroy; + + if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach")) + goto destroy; + + if (test__start_subtest("no_infinite_reads")) + subtest_dmabuf_iter_check_no_infinite_reads(skel); + if (test__start_subtest("default_iter")) + subtest_dmabuf_iter_check_default_iter(skel); + if (test__start_subtest("open_coded")) + subtest_dmabuf_iter_check_open_coded(skel, map_fd); + +destroy: + destroy_test_buffers(); +destroy_skel: + dmabuf_iter__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index e29cc16124c2..62e7ec775f24 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -33,10 +33,19 @@ static struct { {"test_dynptr_skb_no_buff", SETUP_SKB_PROG}, {"test_dynptr_skb_strcmp", SETUP_SKB_PROG}, {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP}, + {"test_probe_read_user_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_kernel_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_user_str_dynptr", SETUP_XDP_PROG}, + {"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG}, + {"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP}, }; static void verify_success(const char *prog_name, enum test_setup_type setup_type) { + char user_data[384] = {[0 ... 382] = 'a', '\0'}; struct dynptr_success *skel; struct bpf_program *prog; struct bpf_link *link; @@ -58,6 +67,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ if (!ASSERT_OK(err, "dynptr_success__load")) goto cleanup; + skel->bss->user_ptr = user_data; + skel->data->test_len[0] = sizeof(user_data); + memcpy(skel->bss->expected_str, user_data, sizeof(user_data)); + switch (setup_type) { case SETUP_SYSCALL_SLEEP: link = bpf_program__attach(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c new file mode 100644 index 000000000000..ca46fdd6e1ae --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <stdbool.h> +#include <test_progs.h> +#include "fd_htab_lookup.skel.h" + +struct htab_op_ctx { + int fd; + int loop; + unsigned int entries; + bool stop; +}; + +#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err))) + +static void *htab_lookup_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + unsigned int j; + + for (j = 0; j < ctx->entries; j++) { + unsigned int key = j, zero = 0, value; + int inner_fd, err; + + err = bpf_map_lookup_elem(ctx->fd, &key, &value); + if (err) { + ctx->stop = true; + return ERR_TO_RETVAL(1, err); + } + + inner_fd = bpf_map_get_fd_by_id(value); + if (inner_fd < 0) { + /* The old map has been freed */ + if (inner_fd == -ENOENT) + continue; + ctx->stop = true; + return ERR_TO_RETVAL(2, inner_fd); + } + + err = bpf_map_lookup_elem(inner_fd, &zero, &value); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(3, err); + } + close(inner_fd); + + if (value != key) { + ctx->stop = true; + return ERR_TO_RETVAL(4, -EINVAL); + } + } + } + + return NULL; +} + +static void *htab_update_fn(void *arg) +{ + struct htab_op_ctx *ctx = arg; + int i = 0; + + while (i++ < ctx->loop && !ctx->stop) { + unsigned int j; + + for (j = 0; j < ctx->entries; j++) { + unsigned int key = j, zero = 0; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (inner_fd < 0) { + ctx->stop = true; + return ERR_TO_RETVAL(1, inner_fd); + } + + err = bpf_map_update_elem(inner_fd, &zero, &key, 0); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(2, err); + } + + err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST); + if (err) { + close(inner_fd); + ctx->stop = true; + return ERR_TO_RETVAL(3, err); + } + close(inner_fd); + } + } + + return NULL; +} + +static int setup_htab(int fd, unsigned int entries) +{ + unsigned int i; + + for (i = 0; i < entries; i++) { + unsigned int key = i, zero = 0; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "new array")) + return -1; + + err = bpf_map_update_elem(inner_fd, &zero, &key, 0); + if (!ASSERT_OK(err, "init array")) { + close(inner_fd); + return -1; + } + + err = bpf_map_update_elem(fd, &key, &inner_fd, 0); + if (!ASSERT_OK(err, "init outer")) { + close(inner_fd); + return -1; + } + close(inner_fd); + } + + return 0; +} + +static int get_int_from_env(const char *name, int dft) +{ + const char *value; + + value = getenv(name); + if (!value) + return dft; + + return atoi(value); +} + +void test_fd_htab_lookup(void) +{ + unsigned int i, wr_nr = 8, rd_nr = 16; + pthread_t tids[wr_nr + rd_nr]; + struct fd_htab_lookup *skel; + struct htab_op_ctx ctx; + int err; + + skel = fd_htab_lookup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load")) + return; + + ctx.fd = bpf_map__fd(skel->maps.outer_map); + ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5); + ctx.stop = false; + ctx.entries = 8; + + err = setup_htab(ctx.fd, ctx.entries); + if (err) + goto destroy; + + memset(tids, 0, sizeof(tids)); + for (i = 0; i < wr_nr; i++) { + err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + for (i = 0; i < rd_nr; i++) { + err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx); + if (!ASSERT_OK(err, "pthread_create")) { + ctx.stop = true; + goto reap; + } + } + +reap: + for (i = 0; i < wr_nr + rd_nr; i++) { + void *ret = NULL; + char desc[32]; + + if (!tids[i]) + continue; + + snprintf(desc, sizeof(desc), "thread %u", i + 1); + err = pthread_join(tids[i], &ret); + ASSERT_OK(err, desc); + ASSERT_EQ(ret, NULL, desc); + } +destroy: + fd_htab_lookup__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index e59af2aa6601..e40114620751 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -37,6 +37,7 @@ static noinline void uprobe_func(void) static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, ssize_t offset, ssize_t entry_offset) { + ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */; struct bpf_link_info info; __u32 len = sizeof(info); char buf[PATH_MAX]; @@ -97,6 +98,7 @@ again: case BPF_PERF_EVENT_UPROBE: case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset"); ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, "name_len"); @@ -241,20 +243,32 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, .retprobe = type == BPF_PERF_EVENT_URETPROBE, .bpf_cookie = PERF_EVENT_COOKIE, ); + const char *sema[1] = { + "uprobe_link_info_sema_1", + }; + __u64 *ref_ctr_offset; struct bpf_link *link; int link_fd, err; + err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema, + (unsigned long **) &ref_ctr_offset, STT_OBJECT); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object")) + return; + + opts.ref_ctr_offset = *ref_ctr_offset; link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run, 0, /* self pid */ UPROBE_FILE, uprobe_offset, &opts); if (!ASSERT_OK_PTR(link, "attach_uprobe")) - return; + goto out; link_fd = bpf_link__fd(link); - err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); + err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset); ASSERT_OK(err, "verify_perf_link_info"); bpf_link__destroy(link); +out: + free(ref_ctr_offset); } static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies) diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 8e13a3416a21..1de14b111931 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -104,7 +104,7 @@ void test_kmem_cache_iter(void) goto destroy; memset(buf, 0, sizeof(buf)); - while (read(iter_fd, buf, sizeof(buf) > 0)) { + while (read(iter_fd, buf, sizeof(buf)) > 0) { /* Read out all contents */ printf("%s", buf); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 77d07e0a4a55..5266c7022863 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -7,6 +7,7 @@ #include "linked_list.skel.h" #include "linked_list_fail.skel.h" +#include "linked_list_peek.skel.h" static char log_buf[1024 * 1024]; @@ -805,3 +806,8 @@ void test_linked_list(void) test_linked_list_success(LIST_IN_LIST, true); test_linked_list_success(TEST_ALL, false); } + +void test_linked_list_peek(void) +{ + RUN_TESTS(linked_list_peek); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c index 9818f06c97c5..d8f3d7a45fe9 100644 --- a/tools/testing/selftests/bpf/prog_tests/rbtree.c +++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c @@ -8,6 +8,7 @@ #include "rbtree_fail.skel.h" #include "rbtree_btf_fail__wrong_node_type.skel.h" #include "rbtree_btf_fail__add_wrong_type.skel.h" +#include "rbtree_search.skel.h" static void test_rbtree_add_nodes(void) { @@ -187,3 +188,8 @@ void test_rbtree_fail(void) { RUN_TESTS(rbtree_fail); } + +void test_rbtree_search(void) +{ + RUN_TESTS(rbtree_search); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 0b9bd1d6f7cc..10a0ab954b8a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -37,8 +37,10 @@ configure_stack(void) tc = popen("tc -V", "r"); if (CHECK_FAIL(!tc)) return false; - if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) + if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) { + pclose(tc); return false; + } if (strstr(tc_version, ", libbpf ")) prog = "test_sk_assign_libbpf.bpf.o"; else diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index d56e18b25528..a4517bee34d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -7,14 +7,433 @@ #define TEST_NS "sock_iter_batch_netns" +static const int init_batch_size = 16; static const int nr_soreuse = 4; +struct iter_out { + int idx; + __u64 cookie; +} __packed; + +struct sock_count { + __u64 cookie; + int count; +}; + +static int insert(__u64 cookie, struct sock_count counts[], int counts_len) +{ + int insert = -1; + int i = 0; + + for (; i < counts_len; i++) { + if (!counts[i].cookie) { + insert = i; + } else if (counts[i].cookie == cookie) { + insert = i; + break; + } + } + if (insert < 0) + return insert; + + counts[insert].cookie = cookie; + counts[insert].count++; + + return counts[insert].count; +} + +static int read_n(int iter_fd, int n, struct sock_count counts[], + int counts_len) +{ + struct iter_out out; + int nread = 1; + int i = 0; + + for (; nread > 0 && (n < 0 || i < n); i++) { + nread = read(iter_fd, &out, sizeof(out)); + if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread")) + break; + ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert"); + } + + ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n"); + + return i; +} + +static __u64 socket_cookie(int fd) +{ + __u64 cookie; + socklen_t cookie_len = sizeof(cookie); + + if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, + &cookie_len), "getsockopt(SO_COOKIE)")) + return 0; + return cookie; +} + +static bool was_seen(int fd, struct sock_count counts[], int counts_len) +{ + __u64 cookie = socket_cookie(fd); + int i = 0; + + for (; cookie && i < counts_len; i++) + if (cookie == counts[i].cookie) + return true; + + return false; +} + +static int get_seen_socket(int *fds, struct sock_count counts[], int n) +{ + int i = 0; + + for (; i < n; i++) + if (was_seen(fds[i], counts, n)) + return i; + return -1; +} + +static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n) +{ + int i, nread, iter_fd; + int nth_sock_idx = -1; + struct iter_out out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + return -1; + + for (; n >= 0; n--) { + nread = read(iter_fd, &out, sizeof(out)); + if (!nread || !ASSERT_GE(nread, 1, "nread")) + goto done; + } + + for (i = 0; i < fds_len && nth_sock_idx < 0; i++) + if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie) + nth_sock_idx = i; +done: + close(iter_fd); + return nth_sock_idx; +} + +static int get_seen_count(int fd, struct sock_count counts[], int n) +{ + __u64 cookie = socket_cookie(fd); + int count = 0; + int i = 0; + + for (; cookie && !count && i < n; i++) + if (cookie == counts[i].cookie) + count = counts[i].count; + + return count; +} + +static void check_n_were_seen_once(int *fds, int fds_len, int n, + struct sock_count counts[], int counts_len) +{ + int seen_once = 0; + int seen_cnt; + int i = 0; + + for (; i < fds_len; i++) { + /* Skip any sockets that were closed or that weren't seen + * exactly once. + */ + if (fds[i] < 0) + continue; + seen_cnt = get_seen_count(fds[i], counts, counts_len); + if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt")) + seen_once++; + } + + ASSERT_EQ(seen_once, n, "seen_once"); +} + +static void remove_seen(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through the first socks_len - 1 sockets. */ + read_n(iter_fd, socks_len - 1, counts, counts_len); + + /* Make sure we saw socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); + + /* Close a socket we've already seen to remove it from the bucket. */ + close_idx = get_seen_socket(socks, counts, counts_len); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the last socket wasn't skipped and that there were no + * repeats. + */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); +} + +static void remove_unseen(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through the first socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw a socket from fds. */ + check_n_were_seen_once(socks, socks_len, 1, counts, counts_len); + + /* Close what would be the next socket in the bucket to exercise the + * condition where we need to skip past the first cookie we remembered. + */ + close_idx = get_nth_socket(socks, socks_len, link, 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the remaining sockets were seen exactly once and that we + * didn't repeat the socket that was already seen. + */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); +} + +static void remove_all(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx, i; + + /* Iterate through the first socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw a socket from fds. */ + check_n_were_seen_once(socks, socks_len, 1, counts, counts_len); + + /* Close all remaining sockets to exhaust the list of saved cookies and + * exit without putting any sockets into the batch on the next read. + */ + for (i = 0; i < socks_len - 1; i++) { + close_idx = get_nth_socket(socks, socks_len, link, 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + close(socks[close_idx]); + socks[close_idx] = -1; + } + + /* Make sure there are no more sockets returned */ + ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); +} + +static void add_some(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through the first socks_len - 1 sockets. */ + read_n(iter_fd, socks_len - 1, counts, counts_len); + + /* Make sure we saw socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len - 1, counts, + counts_len); + + /* Double the number of sockets in the bucket. */ + new_socks = start_reuseport_server(family, sock_type, addr, port, 0, + socks_len); + if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each of the original sockets was seen exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len, counts, + counts_len); +done: + free_fds(new_socks, socks_len); +} + +static void force_realloc(int family, int sock_type, const char *addr, + __u16 port, int *socks, int socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through the first socket just to initialize the batch. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Double the number of sockets in the bucket to force a realloc on the + * next read. + */ + new_socks = start_reuseport_server(family, sock_type, addr, port, 0, + socks_len); + if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each socket from the first set was seen exactly once. */ + check_n_were_seen_once(socks, socks_len, socks_len, counts, + counts_len); +done: + free_fds(new_socks, socks_len); +} + +struct test_case { + void (*test)(int family, int sock_type, const char *addr, __u16 port, + int *socks, int socks_len, struct sock_count *counts, + int counts_len, struct bpf_link *link, int iter_fd); + const char *description; + int init_socks; + int max_socks; + int sock_type; + int family; +}; + +static struct test_case resume_tests[] = { + { + .description = "udp: resume after removing a seen socket", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_seen, + }, + { + .description = "udp: resume after removing one unseen socket", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_unseen, + }, + { + .description = "udp: resume after removing all unseen sockets", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + .family = AF_INET6, + .test = remove_all, + }, + { + .description = "udp: resume after adding a few sockets", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_DGRAM, + /* Use AF_INET so that new sockets are added to the head of the + * bucket's list. + */ + .family = AF_INET, + .test = add_some, + }, + { + .description = "udp: force a realloc to occur", + .init_socks = init_batch_size, + .max_socks = init_batch_size * 2, + .sock_type = SOCK_DGRAM, + /* Use AF_INET6 so that new sockets are added to the tail of the + * bucket's list, needing to be added to the next batch to force + * a realloc. + */ + .family = AF_INET6, + .test = force_realloc, + }, +}; + +static void do_resume_test(struct test_case *tc) +{ + struct sock_iter_batch *skel = NULL; + static const __u16 port = 10001; + struct bpf_link *link = NULL; + struct sock_count *counts; + int err, iter_fd = -1; + const char *addr; + int *fds = NULL; + int local_port; + + counts = calloc(tc->max_socks, sizeof(*counts)); + if (!ASSERT_OK_PTR(counts, "counts")) + goto done; + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + goto done; + + /* Prepare a bucket of sockets in the kernel hashtable */ + addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1"; + fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0, + tc->init_socks); + if (!ASSERT_OK_PTR(fds, "start_reuseport_server")) + goto done; + local_port = get_socket_local_port(*fds); + if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) + goto done; + skel->rodata->ports[0] = ntohs(local_port); + skel->rodata->sf = tc->family; + + err = sock_iter_batch__load(skel); + if (!ASSERT_OK(err, "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ? + skel->progs.iter_tcp_soreuse : + skel->progs.iter_udp_soreuse, + NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + goto done; + + tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks, + counts, tc->max_socks, link, iter_fd); +done: + free(counts); + free_fds(fds, tc->init_socks); + if (iter_fd >= 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); +} + +static void do_resume_tests(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(resume_tests); i++) { + if (test__start_subtest(resume_tests[i].description)) { + do_resume_test(&resume_tests[i]); + } + } +} + static void do_test(int sock_type, bool onebyone) { int err, i, nread, to_read, total_read, iter_fd = -1; - int first_idx, second_idx, indices[nr_soreuse]; + struct iter_out outputs[nr_soreuse]; struct bpf_link *link = NULL; struct sock_iter_batch *skel; + int first_idx, second_idx; int *fds[2] = {}; skel = sock_iter_batch__open(); @@ -34,6 +453,7 @@ static void do_test(int sock_type, bool onebyone) goto done; skel->rodata->ports[i] = ntohs(local_port); } + skel->rodata->sf = AF_INET6; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) @@ -55,38 +475,38 @@ static void do_test(int sock_type, bool onebyone) * from a bucket and leave one socket out from * that bucket on purpose. */ - to_read = (nr_soreuse - 1) * sizeof(*indices); + to_read = (nr_soreuse - 1) * sizeof(*outputs); total_read = 0; first_idx = -1; do { - nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); - if (nread <= 0 || nread % sizeof(*indices)) + nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read); + if (nread <= 0 || nread % sizeof(*outputs)) break; total_read += nread; if (first_idx == -1) - first_idx = indices[0]; - for (i = 0; i < nread / sizeof(*indices); i++) - ASSERT_EQ(indices[i], first_idx, "first_idx"); + first_idx = outputs[0].idx; + for (i = 0; i < nread / sizeof(*outputs); i++) + ASSERT_EQ(outputs[i].idx, first_idx, "first_idx"); } while (total_read < to_read); - ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread"); + ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread"); ASSERT_EQ(total_read, to_read, "total_read"); free_fds(fds[first_idx], nr_soreuse); fds[first_idx] = NULL; /* Read the "whole" second bucket */ - to_read = nr_soreuse * sizeof(*indices); + to_read = nr_soreuse * sizeof(*outputs); total_read = 0; second_idx = !first_idx; do { - nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); - if (nread <= 0 || nread % sizeof(*indices)) + nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read); + if (nread <= 0 || nread % sizeof(*outputs)) break; total_read += nread; - for (i = 0; i < nread / sizeof(*indices); i++) - ASSERT_EQ(indices[i], second_idx, "second_idx"); + for (i = 0; i < nread / sizeof(*outputs); i++) + ASSERT_EQ(outputs[i].idx, second_idx, "second_idx"); } while (total_read <= to_read); ASSERT_EQ(nread, 0, "nread"); /* Both so_reuseport ports should be in different buckets, so @@ -128,6 +548,7 @@ void test_sock_iter_batch(void) do_test(SOCK_DGRAM, true); do_test(SOCK_DGRAM, false); } + do_resume_tests(); close_netns(nstoken); done: diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h index 1bdfb79ef009..e02cabcc814e 100644 --- a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h @@ -3,6 +3,7 @@ #ifndef __SOCKET_HELPERS__ #define __SOCKET_HELPERS__ +#include <sys/un.h> #include <linux/vm_sockets.h> /* include/linux/net.h */ @@ -169,6 +170,15 @@ static inline void init_addr_loopback6(struct sockaddr_storage *ss, *len = sizeof(*addr6); } +static inline void init_addr_loopback_unix(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss)); + + addr->sun_family = AF_UNIX; + *len = sizeof(sa_family_t); +} + static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len) { @@ -190,6 +200,9 @@ static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, case AF_INET6: init_addr_loopback6(ss, len); return; + case AF_UNIX: + init_addr_loopback_unix(ss, len); + return; case AF_VSOCK: init_addr_loopback_vsock(ss, len); return; @@ -315,21 +328,27 @@ static inline int create_pair(int family, int sotype, int *p0, int *p1) { __close_fd int s, c = -1, p = -1; struct sockaddr_storage addr; - socklen_t len = sizeof(addr); + socklen_t len; int err; s = socket_loopback(family, sotype); if (s < 0) return s; - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - c = xsocket(family, sotype, 0); if (c < 0) return c; + init_addr_loopback(family, &addr, &len); + err = xbind(c, sockaddr(&addr), len); + if (err) + return err; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + err = connect(c, sockaddr(&addr), len); if (err) { if (errno != EINPROGRESS) { @@ -391,4 +410,59 @@ static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, return err; } +static inline const char *socket_kind_to_str(int sock_fd) +{ + socklen_t opt_len; + int domain, type; + + opt_len = sizeof(domain); + if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len)) + FAIL_ERRNO("getsockopt(SO_DOMAIN)"); + + opt_len = sizeof(type); + if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len)) + FAIL_ERRNO("getsockopt(SO_TYPE)"); + + switch (domain) { + case AF_INET: + switch (type) { + case SOCK_STREAM: + return "tcp4"; + case SOCK_DGRAM: + return "udp4"; + } + break; + case AF_INET6: + switch (type) { + case SOCK_STREAM: + return "tcp6"; + case SOCK_DGRAM: + return "udp6"; + } + break; + case AF_UNIX: + switch (type) { + case SOCK_STREAM: + return "u_str"; + case SOCK_DGRAM: + return "u_dgr"; + case SOCK_SEQPACKET: + return "u_seq"; + } + break; + case AF_VSOCK: + switch (type) { + case SOCK_STREAM: + return "v_str"; + case SOCK_DGRAM: + return "v_dgr"; + case SOCK_SEQPACKET: + return "v_seq"; + } + break; + } + + return "???"; +} + #endif // __SOCKET_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 3e5571dd578d..d815efac52fd 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -5,12 +5,15 @@ #define MAX_TEST_NAME 80 +#define u32(v) ((u32){(v)}) +#define u64(v) ((u64){(v)}) + #define __always_unused __attribute__((__unused__)) #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_delete"); \ __ret; \ }) @@ -18,7 +21,7 @@ #define xbpf_map_lookup_elem(fd, key, val) \ ({ \ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_lookup"); \ __ret; \ }) @@ -26,7 +29,7 @@ #define xbpf_map_update_elem(fd, key, val, flags) \ ({ \ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("map_update"); \ __ret; \ }) @@ -35,7 +38,7 @@ ({ \ int __ret = \ bpf_prog_attach((prog), (target), (type), (flags)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("prog_attach(" #type ")"); \ __ret; \ }) @@ -43,7 +46,7 @@ #define xbpf_prog_detach2(prog, target, type) \ ({ \ int __ret = bpf_prog_detach2((prog), (target), (type)); \ - if (__ret < 0) \ + if (__ret < 0) \ FAIL_ERRNO("prog_detach2(" #type ")"); \ __ret; \ }) @@ -66,21 +69,15 @@ __ret; \ }) -static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) +static inline int add_to_sockmap(int mapfd, int fd1, int fd2) { - u64 value; - u32 key; int err; - key = 0; - value = fd1; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST); if (err) return err; - key = 1; - value = fd2; - return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST); } #endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 0a99fd404f6d..b6c471da5c28 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -3,76 +3,62 @@ /* * Tests for sockmap/sockhash holding kTLS sockets. */ - +#include <error.h> #include <netinet/tcp.h> +#include <linux/tls.h> #include "test_progs.h" +#include "sockmap_helpers.h" +#include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_ktls.skel.h" #define MAX_TEST_NAME 80 #define TCP_ULP 31 -static int tcp_server(int family) +static int init_ktls_pairs(int c, int p) { - int err, s; - - s = socket(family, SOCK_STREAM, 0); - if (!ASSERT_GE(s, 0, "socket")) - return -1; - - err = listen(s, SOMAXCONN); - if (!ASSERT_OK(err, "listen")) - return -1; - - return s; -} + int err; + struct tls12_crypto_info_aes_gcm_128 crypto_rx; + struct tls12_crypto_info_aes_gcm_128 crypto_tx; -static int disconnect(int fd) -{ - struct sockaddr unspec = { AF_UNSPEC }; + err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto out; - return connect(fd, &unspec, sizeof(unspec)); + err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto out; + + memset(&crypto_rx, 0, sizeof(crypto_rx)); + memset(&crypto_tx, 0, sizeof(crypto_tx)); + crypto_rx.info.version = TLS_1_2_VERSION; + crypto_tx.info.version = TLS_1_2_VERSION; + crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128; + crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128; + + err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx)); + if (!ASSERT_OK(err, "setsockopt(TLS_TX)")) + goto out; + + err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx)); + if (!ASSERT_OK(err, "setsockopt(TLS_RX)")) + goto out; + return 0; +out: + return -1; } -/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */ -static void test_sockmap_ktls_disconnect_after_delete(int family, int map) +static int create_ktls_pairs(int family, int sotype, int *c, int *p) { - struct sockaddr_storage addr = {0}; - socklen_t len = sizeof(addr); - int err, cli, srv, zero = 0; - - srv = tcp_server(family); - if (srv == -1) - return; - - err = getsockname(srv, (struct sockaddr *)&addr, &len); - if (!ASSERT_OK(err, "getsockopt")) - goto close_srv; + int err; - cli = socket(family, SOCK_STREAM, 0); - if (!ASSERT_GE(cli, 0, "socket")) - goto close_srv; - - err = connect(cli, (struct sockaddr *)&addr, len); - if (!ASSERT_OK(err, "connect")) - goto close_cli; - - err = bpf_map_update_elem(map, &zero, &cli, 0); - if (!ASSERT_OK(err, "bpf_map_update_elem")) - goto close_cli; - - err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); - if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) - goto close_cli; - - err = bpf_map_delete_elem(map, &zero); - if (!ASSERT_OK(err, "bpf_map_delete_elem")) - goto close_cli; - - err = disconnect(cli); + err = create_pair(family, sotype, c, p); + if (!ASSERT_OK(err, "create_pair()")) + return -1; -close_cli: - close(cli); -close_srv: - close(srv); + err = init_ktls_pairs(*c, *p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + return -1; + return 0; } static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map) @@ -145,6 +131,189 @@ static const char *fmt_test_name(const char *subtest_name, int family, return test_name; } +static void test_sockmap_ktls_offload(int family, int sotype) +{ + int err; + int c = 0, p = 0, sent, recvd; + char msg[12] = "hello world\0"; + char rcv[13]; + + err = create_ktls_pairs(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_ktls_pairs()")) + goto out; + + sent = send(c, msg, sizeof(msg), 0); + if (!ASSERT_OK(err, "send(msg)")) + goto out; + + recvd = recv(p, rcv, sizeof(rcv), 0); + if (!ASSERT_OK(err, "recv(msg)") || + !ASSERT_EQ(recvd, sent, "length mismatch")) + goto out; + + ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch"); + +out: + if (c) + close(c); + if (p) + close(p); +} + +static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push) +{ + int err, off; + int i, j; + int start_push = 0, push_len = 0; + int c = 0, p = 0, one = 1, sent, recvd; + int prog_fd, map_fd; + char msg[12] = "hello world\0"; + char rcv[20] = {0}; + struct test_sockmap_ktls *skel; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + skel->bss->cork_byte = sizeof(msg); + if (push) { + start_push = 1; + push_len = 2; + } + skel->bss->push_start = start_push; + skel->bss->push_end = push_len; + + off = sizeof(msg) / 2; + sent = send(c, msg, off, 0); + if (!ASSERT_EQ(sent, off, "send(msg)")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "expected no data")) + goto out; + + /* send remaining msg */ + sent = send(c, msg + off, sizeof(msg) - off, 0); + if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_OK(err, "recv(msg)") || + !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch")) + goto out; + + for (i = 0, j = 0; i < recvd;) { + /* skip checking the data that has been pushed in */ + if (i >= start_push && i <= start_push + push_len - 1) { + i++; + continue; + } + if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch")) + goto out; + i++; + j++; + } +out: + if (c) + close(c); + if (p) + close(p); + test_sockmap_ktls__destroy(skel); +} + +static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push) +{ + int c = -1, p = -1, one = 1, two = 2; + struct test_sockmap_ktls *skel; + unsigned char *data = NULL; + struct msghdr msg = {0}; + struct iovec iov[2]; + int prog_fd, map_fd; + int txrx_buf = 1024; + int iov_length = 8192; + int err; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int)); + err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int)); + if (!ASSERT_OK(err, "set buf limit")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) + goto out; + + skel->bss->apply_bytes = 1024; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + data = calloc(iov_length, sizeof(char)); + if (!data) + goto out; + + iov[0].iov_base = data; + iov[0].iov_len = iov_length; + iov[1].iov_base = data; + iov[1].iov_len = iov_length; + msg.msg_iov = iov; + msg.msg_iovlen = 2; + + for (;;) { + err = sendmsg(c, &msg, MSG_DONTWAIT); + if (err <= 0) + break; + } + +out: + if (data) + free(data); + if (c != -1) + close(c); + if (p != -1) + close(p); + + test_sockmap_ktls__destroy(skel); +} + static void run_tests(int family, enum bpf_map_type map_type) { int map; @@ -153,18 +322,30 @@ static void run_tests(int family, enum bpf_map_type map_type) if (!ASSERT_GE(map, 0, "bpf_map_create")) return; - if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type))) - test_sockmap_ktls_disconnect_after_delete(family, map); if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type))) test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map); close(map); } +static void run_ktls_test(int family, int sotype) +{ + if (test__start_subtest("tls simple offload")) + test_sockmap_ktls_offload(family, sotype); + if (test__start_subtest("tls tx cork")) + test_sockmap_ktls_tx_cork(family, sotype, false); + if (test__start_subtest("tls tx cork with push")) + test_sockmap_ktls_tx_cork(family, sotype, true); + if (test__start_subtest("tls tx egress with no buf")) + test_sockmap_ktls_tx_no_buf(family, sotype, true); +} + void test_sockmap_ktls(void) { run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP); run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH); run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP); run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH); + run_ktls_test(AF_INET, SOCK_STREAM); + run_ktls_test(AF_INET6, SOCK_STREAM); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 4ee1148d22be..1d98eee7a2c3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -1366,237 +1366,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } } -static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, - int sock_mapfd, int nop_mapfd, - int verd_mapfd, enum redir_mode mode, - int send_flags) -{ - const char *log_prefix = redir_mode_str(mode); - unsigned int pass; - int err, n; - u32 key; - char b; - - zero_verdict_count(verd_mapfd); - - err = add_to_sockmap(sock_mapfd, peer0, peer1); - if (err) - return; - - if (nop_mapfd >= 0) { - err = add_to_sockmap(nop_mapfd, cli0, cli1); - if (err) - return; - } - - /* Last byte is OOB data when send_flags has MSG_OOB bit set */ - n = xsend(cli1, "ab", 2, send_flags); - if (n >= 0 && n < 2) - FAIL("%s: incomplete send", log_prefix); - if (n < 2) - return; - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - return; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); - - n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC); - if (n < 0) - FAIL_ERRNO("%s: recv_timeout", log_prefix); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - - if (send_flags & MSG_OOB) { - /* Check that we can't read OOB while in sockmap */ - errno = 0; - n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); - if (n != -1 || errno != EOPNOTSUPP) - FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d", - log_prefix, n, errno); - - /* Remove peer1 from sockmap */ - xbpf_map_delete_elem(sock_mapfd, &(int){ 1 }); - - /* Check that OOB was dropped on redirect */ - errno = 0; - n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); - if (n != -1 || errno != EINVAL) - FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d", - log_prefix, n, errno); - } -} - -static void unix_redir_to_connected(int sotype, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) -{ - int c0, c1, p0, p1; - int sfd[2]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd)) - goto close0; - c1 = sfd[0], p1 = sfd[1]; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close0: - xclose(c0); - xclose(p0); -} - -static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int sotype) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS); - skel->bss->test_ingress = true; - unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int sotype) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(AF_UNIX); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - unix_skb_redir_to_connected(skel, map, sotype); -} - -/* Returns two connected loopback vsock sockets */ -static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) -{ - return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1); -} - -static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, - enum redir_mode mode, int sotype) -{ - const char *log_prefix = redir_mode_str(mode); - char a = 'a', b = 'b'; - int u0, u1, v0, v1; - int sfd[2]; - unsigned int pass; - int err, n; - u32 key; - - zero_verdict_count(verd_mapfd); - - if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd)) - return; - - u0 = sfd[0]; - u1 = sfd[1]; - - err = vsock_socketpair_connectible(sotype, &v0, &v1); - if (err) { - FAIL("vsock_socketpair_connectible() failed"); - goto close_uds; - } - - err = add_to_sockmap(sock_mapfd, u0, v0); - if (err) { - FAIL("add_to_sockmap failed"); - goto close_vsock; - } - - n = write(v1, &a, sizeof(a)); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) - goto out; - - n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0); - if (n < 0) - FAIL("%s: recv() err, errno=%d", log_prefix, errno); - if (n == 0) - FAIL("%s: incomplete recv", log_prefix); - if (b != a) - FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b); - - key = SK_PASS; - err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); - if (err) - goto out; - if (pass != 1) - FAIL("%s: want pass count 1, have %d", log_prefix, pass); -out: - key = 0; - bpf_map_delete_elem(sock_mapfd, &key); - key = 1; - bpf_map_delete_elem(sock_mapfd, &key); - -close_vsock: - close(v0); - close(v1); - -close_uds: - close(u0); - close(u1); -} - -static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, - int sotype) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype); - skel->bss->test_ingress = true; - vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(AF_VSOCK); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - - vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM); - vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET); -} - static void test_reuseport(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { @@ -1637,224 +1406,6 @@ static void test_reuseport(struct test_sockmap_listen *skel, } } -static int inet_socketpair(int family, int type, int *s, int *c) -{ - return create_pair(family, type | SOCK_NONBLOCK, s, c); -} - -static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, - enum redir_mode mode) -{ - int c0, c1, p0, p1; - int err; - - err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); - if (err) - return; - err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); - if (err) - goto close_cli0; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close_cli0: - xclose(c0); - xclose(p0); -} - -static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS); - skel->bss->test_ingress = true; - udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int family) -{ - const char *family_name, *map_name; - char s[MAX_TEST_NAME]; - - family_name = family_str(family); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - udp_skb_redir_to_connected(skel, map, family); -} - -static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, - int verd_mapfd, enum redir_mode mode) -{ - int c0, c1, p0, p1; - int sfd[2]; - int err; - - if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) - return; - c0 = sfd[0], p0 = sfd[1]; - - err = inet_socketpair(family, type, &p1, &c1); - if (err) - goto close; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, - mode, NO_FLAGS); - - xclose(c1); - xclose(p1); -close: - xclose(c0); - xclose(p0); -} - -static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_EGRESS); - inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, - REDIR_EGRESS); - skel->bss->test_ingress = true; - inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map, - REDIR_INGRESS); - inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map, - REDIR_INGRESS); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, - int nop_mapfd, int verd_mapfd, - enum redir_mode mode, int send_flags) -{ - int c0, c1, p0, p1; - int sfd[2]; - int err; - - err = inet_socketpair(family, type, &p0, &c0); - if (err) - return; - - if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) - goto close_cli0; - c1 = sfd[0], p1 = sfd[1]; - - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd, - verd_mapfd, mode, send_flags); - - xclose(c1); - xclose(p1); -close_cli0: - xclose(c0); - xclose(p0); -} - -static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, - struct bpf_map *inner_map, int family) -{ - int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); - int nop_map = bpf_map__fd(skel->maps.nop_map); - int verdict_map = bpf_map__fd(skel->maps.verdict_map); - int sock_map = bpf_map__fd(inner_map); - int err; - - err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); - if (err) - return; - - skel->bss->test_ingress = false; - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, -1, verdict_map, - REDIR_EGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, -1, verdict_map, - REDIR_EGRESS, NO_FLAGS); - - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, NO_FLAGS); - - /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_EGRESS, MSG_OOB); - - skel->bss->test_ingress = true; - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, -1, verdict_map, - REDIR_INGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, -1, verdict_map, - REDIR_INGRESS, NO_FLAGS); - - unix_inet_redir_to_connected(family, SOCK_DGRAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, NO_FLAGS); - - /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ - unix_inet_redir_to_connected(family, SOCK_STREAM, - sock_map, nop_map, verdict_map, - REDIR_INGRESS, MSG_OOB); - - xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); -} - -static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map, - int family) -{ - const char *family_name, *map_name; - struct netns_obj *netns; - char s[MAX_TEST_NAME]; - - family_name = family_str(family); - map_name = map_type_str(map); - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); - if (!test__start_subtest(s)) - return; - - netns = netns_new("sockmap_listen", true); - if (!ASSERT_OK_PTR(netns, "netns_new")) - return; - - inet_unix_skb_redir_to_connected(skel, map, family); - unix_inet_skb_redir_to_connected(skel, map, family); - - netns_free(netns); -} - static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, int family) { @@ -1863,8 +1414,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, test_redir(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_STREAM); test_reuseport(skel, map, family, SOCK_DGRAM); - test_udp_redir(skel, map, family); - test_udp_unix_redir(skel, map, family); } void serial_test_sockmap_listen(void) @@ -1880,16 +1429,10 @@ void serial_test_sockmap_listen(void) skel->bss->test_sockmap = true; run_tests(skel, skel->maps.sock_map, AF_INET); run_tests(skel, skel->maps.sock_map, AF_INET6); - test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); - test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM); - test_vsock_redir(skel, skel->maps.sock_map); skel->bss->test_sockmap = false; run_tests(skel, skel->maps.sock_hash, AF_INET); run_tests(skel, skel->maps.sock_hash, AF_INET6); - test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); - test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM); - test_vsock_redir(skel, skel->maps.sock_hash); test_sockmap_listen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c new file mode 100644 index 000000000000..9c461d93113d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for sockmap/sockhash redirection. + * + * BPF_MAP_TYPE_SOCKMAP + * BPF_MAP_TYPE_SOCKHASH + * x + * sk_msg-to-egress + * sk_msg-to-ingress + * sk_skb-to-egress + * sk_skb-to-ingress + * x + * AF_INET, SOCK_STREAM + * AF_INET6, SOCK_STREAM + * AF_INET, SOCK_DGRAM + * AF_INET6, SOCK_DGRAM + * AF_UNIX, SOCK_STREAM + * AF_UNIX, SOCK_DGRAM + * AF_VSOCK, SOCK_STREAM + * AF_VSOCK, SOCK_SEQPACKET + */ + +#include <errno.h> +#include <error.h> +#include <sched.h> +#include <stdio.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <linux/string.h> +#include <linux/vm_sockets.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "linux/const.h" +#include "test_progs.h" +#include "sockmap_helpers.h" +#include "test_sockmap_redir.skel.h" + +/* The meaning of SUPPORTED is "will redirect packet as expected". + */ +#define SUPPORTED _BITUL(0) + +/* Note on sk_skb-to-ingress ->af_vsock: + * + * Peer socket may receive the packet some time after the return from sendmsg(). + * In a typical usage scenario, recvmsg() will block until the redirected packet + * appears in the destination queue, or timeout if the packet was dropped. By + * that point, the verdict map has already been updated to reflect what has + * happened. + * + * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg() + * takes place. Which means we may race the execution of the verdict logic and + * read map_verd before it has been updated, i.e. we might observe + * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1. + * + * This confuses the selftest logic: if there was no packet dropped, where's the + * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0 + * (which implies the verdict program has not been ran) just re-read the verdict + * map again. + */ +#define UNSUPPORTED_RACY_VERD _BITUL(1) + +enum prog_type { + SK_MSG_EGRESS, + SK_MSG_INGRESS, + SK_SKB_EGRESS, + SK_SKB_INGRESS, +}; + +enum { + SEND_INNER = 0, + SEND_OUTER, +}; + +enum { + RECV_INNER = 0, + RECV_OUTER, +}; + +struct maps { + int in; + int out; + int verd; +}; + +struct combo_spec { + enum prog_type prog_type; + const char *in, *out; +}; + +struct redir_spec { + const char *name; + int idx_send; + int idx_recv; + enum prog_type prog_type; +}; + +struct socket_spec { + int family; + int sotype; + int send_flags; + int in[2]; + int out[2]; +}; + +static int socket_spec_pairs(struct socket_spec *s) +{ + return create_socket_pairs(s->family, s->sotype, + &s->in[0], &s->out[0], + &s->in[1], &s->out[1]); +} + +static void socket_spec_close(struct socket_spec *s) +{ + xclose(s->in[0]); + xclose(s->in[1]); + xclose(s->out[0]); + xclose(s->out[1]); +} + +static void get_redir_params(struct redir_spec *redir, + struct test_sockmap_redir *skel, int *prog_fd, + enum bpf_attach_type *attach_type, + int *redirect_flags) +{ + enum prog_type type = redir->prog_type; + struct bpf_program *prog; + bool sk_msg; + + sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS; + prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict; + + *prog_fd = bpf_program__fd(prog); + *attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT; + + if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS) + *redirect_flags = BPF_F_INGRESS; + else + *redirect_flags = 0; +} + +static void try_recv(const char *prefix, int fd, int flags, bool expect_success) +{ + ssize_t n; + char buf; + + errno = 0; + n = recv(fd, &buf, 1, flags); + if (n < 0 && expect_success) + FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n); + if (!n && !expect_success) + FAIL("%s: expected failure: retval=%zd", prefix, n); +} + +static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out, + int sd_recv, int map_verd, int status) +{ + unsigned int drop, pass; + char recv_buf; + ssize_t n; + +get_verdict: + if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) || + xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass)) + return; + + if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) { + sched_yield(); + goto get_verdict; + } + + if (pass != 0) { + FAIL("unsupported: wanted verdict pass 0, have %u", pass); + return; + } + + /* If nothing was dropped, packet should have reached the peer */ + if (drop == 0) { + errno = 0; + n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC); + if (n != 1) + FAIL_ERRNO("unsupported: packet missing, retval=%zd", n); + } + + /* Ensure queues are empty */ + try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false); + if (sd_in != sd_send) + try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false); + + try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false); + if (sd_recv != sd_out) + try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false); +} + +static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer, + int sd_in, int sd_out, int sd_recv, + struct maps *maps, int status) +{ + unsigned int drop, pass; + char *send_buf = "ab"; + char recv_buf = '\0'; + ssize_t n, len = 1; + + /* Zero out the verdict map */ + if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) || + xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY)) + return; + + if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST)) + return; + + if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST)) + goto del_in; + + /* Last byte is OOB data when send_flags has MSG_OOB bit set */ + if (send_flags & MSG_OOB) + len++; + n = send(sd_send, send_buf, len, send_flags); + if (n >= 0 && n < len) + FAIL("incomplete send"); + if (n < 0) { + /* sk_msg redirect combo not supported? */ + if (status & SUPPORTED || errno != EACCES) + FAIL_ERRNO("send"); + goto out; + } + + if (!(status & SUPPORTED)) { + handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv, + maps->verd, status); + goto out; + } + + errno = 0; + n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC); + if (n != 1) { + FAIL_ERRNO("recv_timeout()"); + goto out; + } + + /* Check verdict _after_ recv(); af_vsock may need time to catch up */ + if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) || + xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass)) + goto out; + + if (drop != 0 || pass != 1) + FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u", + drop, pass); + + if (recv_buf != send_buf[0]) + FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]); + + if (send_flags & MSG_OOB) { + /* Fail reading OOB while in sockmap */ + try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out, + MSG_OOB | MSG_DONTWAIT, false); + + /* Remove sd_out from sockmap */ + xbpf_map_delete_elem(maps->out, &u32(0)); + + /* Check that OOB was dropped on redirect */ + try_recv("recv(sd_out, MSG_OOB)", sd_out, + MSG_OOB | MSG_DONTWAIT, false); + + goto del_in; + } +out: + xbpf_map_delete_elem(maps->out, &u32(0)); +del_in: + xbpf_map_delete_elem(maps->in, &u32(0)); +} + +static int is_redir_supported(enum prog_type type, const char *in, + const char *out) +{ + /* Matching based on strings returned by socket_kind_to_str(): + * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq + * Plus a wildcard: any + * Not in use: u_seq, v_dgr + */ + struct combo_spec *c, combos[] = { + /* Send to local: TCP -> any, but vsock */ + { SK_MSG_INGRESS, "tcp", "tcp" }, + { SK_MSG_INGRESS, "tcp", "udp" }, + { SK_MSG_INGRESS, "tcp", "u_str" }, + { SK_MSG_INGRESS, "tcp", "u_dgr" }, + + /* Send to egress: TCP -> TCP */ + { SK_MSG_EGRESS, "tcp", "tcp" }, + + /* Ingress to egress: any -> any */ + { SK_SKB_EGRESS, "any", "any" }, + + /* Ingress to local: any -> any, but vsock */ + { SK_SKB_INGRESS, "any", "tcp" }, + { SK_SKB_INGRESS, "any", "udp" }, + { SK_SKB_INGRESS, "any", "u_str" }, + { SK_SKB_INGRESS, "any", "u_dgr" }, + }; + + for (c = combos; c < combos + ARRAY_SIZE(combos); c++) { + if (c->prog_type == type && + (!strcmp(c->in, "any") || strstarts(in, c->in)) && + (!strcmp(c->out, "any") || strstarts(out, c->out))) + return SUPPORTED; + } + + return 0; +} + +static int get_support_status(enum prog_type type, const char *in, + const char *out) +{ + int status = is_redir_supported(type, in, out); + + if (type == SK_SKB_INGRESS && strstarts(out, "v_")) + status |= UNSUPPORTED_RACY_VERD; + + return status; +} + +static void test_socket(enum bpf_map_type type, struct redir_spec *redir, + struct maps *maps, struct socket_spec *s_in, + struct socket_spec *s_out) +{ + int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status; + const char *in_str, *out_str; + char s[MAX_TEST_NAME]; + + fd_in = s_in->in[0]; + fd_out = s_out->out[0]; + fd_send = s_in->in[redir->idx_send]; + fd_peer = s_in->in[redir->idx_send ^ 1]; + fd_recv = s_out->out[redir->idx_recv]; + flags = s_in->send_flags; + + in_str = socket_kind_to_str(fd_in); + out_str = socket_kind_to_str(fd_out); + status = get_support_status(redir->prog_type, in_str, out_str); + + snprintf(s, sizeof(s), + "%-4s %-17s %-5s %s %-5s%6s", + /* hash sk_skb-to-ingress u_str → v_str (OOB) */ + type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash", + redir->name, + in_str, + status & SUPPORTED ? "→" : " ", + out_str, + (flags & MSG_OOB) ? "(OOB)" : ""); + + if (!test__start_subtest(s)) + return; + + test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv, + maps, status); +} + +static void test_redir(enum bpf_map_type type, struct redir_spec *redir, + struct maps *maps) +{ + struct socket_spec *s, sockets[] = { + { AF_INET, SOCK_STREAM }, + // { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */ + { AF_INET6, SOCK_STREAM }, + { AF_INET, SOCK_DGRAM }, + { AF_INET6, SOCK_DGRAM }, + { AF_UNIX, SOCK_STREAM }, + { AF_UNIX, SOCK_STREAM, MSG_OOB }, + { AF_UNIX, SOCK_DGRAM }, + // { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */ + { AF_VSOCK, SOCK_STREAM }, + // { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */ + { AF_VSOCK, SOCK_SEQPACKET }, + }; + + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) + if (socket_spec_pairs(s)) + goto out; + + /* Intra-proto */ + for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++) + test_socket(type, redir, maps, s, s); + + /* Cross-proto */ + for (int i = 0; i < ARRAY_SIZE(sockets); i++) { + for (int j = 0; j < ARRAY_SIZE(sockets); j++) { + struct socket_spec *out = &sockets[j]; + struct socket_spec *in = &sockets[i]; + + /* Skip intra-proto and between variants */ + if (out->send_flags || + (in->family == out->family && + in->sotype == out->sotype)) + continue; + + test_socket(type, redir, maps, in, out); + } + } +out: + while (--s >= sockets) + socket_spec_close(s); +} + +static void test_map(enum bpf_map_type type) +{ + struct redir_spec *r, redirs[] = { + { "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS }, + { "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS }, + { "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS }, + { "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS }, + }; + + for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) { + enum bpf_attach_type attach_type; + struct test_sockmap_redir *skel; + struct maps maps; + int prog_fd; + + skel = test_sockmap_redir__open_and_load(); + if (!skel) { + FAIL("open_and_load"); + return; + } + + switch (type) { + case BPF_MAP_TYPE_SOCKMAP: + maps.in = bpf_map__fd(skel->maps.nop_map); + maps.out = bpf_map__fd(skel->maps.sock_map); + break; + case BPF_MAP_TYPE_SOCKHASH: + maps.in = bpf_map__fd(skel->maps.nop_hash); + maps.out = bpf_map__fd(skel->maps.sock_hash); + break; + default: + FAIL("Unsupported bpf_map_type"); + return; + } + + skel->bss->redirect_type = type; + maps.verd = bpf_map__fd(skel->maps.verdict_map); + get_redir_params(r, skel, &prog_fd, &attach_type, + &skel->bss->redirect_flags); + + if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0)) + return; + + test_redir(type, r, &maps); + + if (xbpf_prog_detach2(prog_fd, maps.in, attach_type)) + return; + + test_sockmap_redir__destroy(skel); + } +} + +void serial_test_sockmap_redir(void) +{ + test_map(BPF_MAP_TYPE_SOCKMAP); + test_map(BPF_MAP_TYPE_SOCKHASH); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index c85798966aec..76d72a59365e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -56,6 +56,8 @@ #define MAC_DST_FWD "00:11:22:33:44:55" #define MAC_DST "00:22:33:44:55:66" +#define MAC_SRC_FWD "00:33:44:55:66:77" +#define MAC_SRC "00:44:55:66:77:88" #define IFADDR_STR_LEN 18 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q" @@ -207,11 +209,10 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) int err; if (result->dev_mode == MODE_VETH) { - SYS(fail, "ip link add src type veth peer name src_fwd"); - SYS(fail, "ip link add dst type veth peer name dst_fwd"); - - SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); - SYS(fail, "ip link set dst address " MAC_DST); + SYS(fail, "ip link add src address " MAC_SRC " type veth " + "peer name src_fwd address " MAC_SRC_FWD); + SYS(fail, "ip link add dst address " MAC_DST " type veth " + "peer name dst_fwd address " MAC_DST_FWD); } else if (result->dev_mode == MODE_NETKIT) { err = create_netkit(NETKIT_L3, "src", "src_fwd"); if (!ASSERT_OK(err, "create_ifindex_src")) diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c new file mode 100644 index 000000000000..7d1b478c99a0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms Inc. */ +#include <test_progs.h> +#include "test_btf_ext.skel.h" +#include "btf_helpers.h" + +static void subtest_line_func_info(void) +{ + struct test_btf_ext *skel; + struct bpf_prog_info info; + struct bpf_line_info line_info[128], *libbpf_line_info; + struct bpf_func_info func_info[128], *libbpf_func_info; + __u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt; + int err, fd; + + skel = test_btf_ext__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + fd = bpf_program__fd(skel->progs.global_func); + + memset(&info, 0, sizeof(info)); + info.line_info = ptr_to_u64(&line_info); + info.nr_line_info = sizeof(line_info); + info.line_info_rec_size = sizeof(*line_info); + err = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(err, "prog_line_info")) + goto out; + + libbpf_line_info = bpf_program__line_info(skel->progs.global_func); + libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func); + + memset(&info, 0, sizeof(info)); + info.func_info = ptr_to_u64(&func_info); + info.nr_func_info = sizeof(func_info); + info.func_info_rec_size = sizeof(*func_info); + err = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(err, "prog_func_info")) + goto out; + + libbpf_func_info = bpf_program__func_info(skel->progs.global_func); + libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func); + + if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info")) + goto out; + if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt")) + goto out; + if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info")) + goto out; + if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt")) + goto out; + ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info), + "line_info"); + ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info), + "func_info"); +out: + test_btf_ext__destroy(skel); +} + +void test_btf_ext(void) +{ + if (test__start_subtest("line_func_info")) + subtest_line_func_info(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index a95b42bf744a..47b56c258f3f 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -63,6 +63,9 @@ static void test_set_global_vars_succeeds(void) " -G \"var_eb = EB2\" "\ " -G \"var_ec = EC2\" "\ " -G \"var_b = 1\" "\ + " -G \"struct1.struct2.u.var_u8 = 170\" "\ + " -G \"union1.struct3.var_u8_l = 0xaa\" "\ + " -G \"union1.struct3.var_u8_h = 0xaa\" "\ "-vl2 > %s", fix->veristat, fix->tmpfile); read(fix->fd, fix->output, fix->sz); @@ -78,6 +81,8 @@ static void test_set_global_vars_succeeds(void) __CHECK_STR("_w=12 ", "var_eb = EB2"); __CHECK_STR("_w=13 ", "var_ec = EC2"); __CHECK_STR("_w=1 ", "var_b = 1"); + __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170"); + __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa"); out: teardown_fixture(fix); diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e66a57970d28..c9da06741104 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -14,6 +14,7 @@ #include "verifier_bounds_deduction_non_const.skel.h" #include "verifier_bounds_mix_sign_unsign.skel.h" #include "verifier_bpf_get_stack.skel.h" +#include "verifier_bpf_trap.skel.h" #include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" #include "verifier_btf_unreliable_prog.skel.h" @@ -148,6 +149,7 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); } void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); } +void test_verifier_bpf_trap(void) { RUN(verifier_bpf_trap); } void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 3d47878ef6bf..19f92affc2da 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -351,9 +351,10 @@ void test_xdp_metadata(void) struct xdp_metadata2 *bpf_obj2 = NULL; struct xdp_metadata *bpf_obj = NULL; struct bpf_program *new_prog, *prog; + struct bpf_devmap_val devmap_e = {}; + struct bpf_map *prog_arr, *devmap; struct nstoken *tok = NULL; __u32 queue_id = QUEUE_ID; - struct bpf_map *prog_arr; struct xsk tx_xsk = {}; struct xsk rx_xsk = {}; __u32 val, key = 0; @@ -409,6 +410,13 @@ void test_xdp_metadata(void) bpf_program__set_ifindex(prog, rx_ifindex); bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); + /* Make sure we can load a dev-bound program that performs + * XDP_REDIRECT into a devmap. + */ + new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect"); + bpf_program__set_ifindex(new_prog, rx_ifindex); + bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY); + if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton")) goto out; @@ -423,6 +431,18 @@ void test_xdp_metadata(void) "update prog_arr")) goto out; + /* Make sure we can't add dev-bound programs to devmaps. */ + devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map"); + if (!ASSERT_OK_PTR(devmap, "no dev_map found")) + goto out; + + devmap_e.bpf_prog.fd = val; + if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key), + &devmap_e, sizeof(devmap_e), + BPF_ANY), + "update dev_map")) + goto out; + /* Attach BPF program to RX interface. */ ret = bpf_xdp_attach(rx_ifindex, diff --git a/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c new file mode 100644 index 000000000000..079bf3794b3a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +long process_byte = 0; +int verdict_dir = 0; +int dropped = 0; +int pkt_size = 0; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_rx SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_tx SEC(".maps"); + +SEC("sk_skb/stream_parser") +int prog_skb_parser(struct __sk_buff *skb) +{ + return pkt_size; +} + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + int one = 1; + int ret = bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir); + + if (ret == SK_DROP) + dropped++; + __sync_fetch_and_add(&process_byte, skb->len); + return ret; +} + +SEC("sk_skb/stream_verdict") +int prog_skb_pass(struct __sk_buff *skb) +{ + __sync_fetch_and_add(&process_byte, skb->len); + return SK_PASS; +} + +SEC("sk_msg") +int prog_skmsg_verdict(struct sk_msg_md *msg) +{ + int one = 1; + + __sync_fetch_and_add(&process_byte, msg->size); + return bpf_msg_redirect_map(msg, &sock_map_tx, one, verdict_dir); +} + +SEC("sk_msg") +int prog_skmsg_pass(struct sk_msg_md *msg) +{ + __sync_fetch_and_add(&process_byte, msg->size); + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h index fb8dc0768999..d67466c1ff77 100644 --- a/tools/testing/selftests/bpf/bpf_arena_spin_lock.h +++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h @@ -32,6 +32,7 @@ extern unsigned long CONFIG_NR_CPUS __kconfig; struct __qspinlock { union { atomic_t val; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct { u8 locked; u8 pending; @@ -40,6 +41,17 @@ struct __qspinlock { u16 locked_pending; u16 tail; }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif }; }; @@ -95,9 +107,6 @@ struct arena_qnode { #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) #define _Q_PENDING_VAL (1U << _Q_PENDING_OFFSET) -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES]; static inline u32 encode_tail(int cpu, int idx) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 863df7c0fdd0..6e208e24ba3b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -225,8 +225,9 @@ #define CAN_USE_BPF_ST #endif -#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) #define CAN_USE_LOAD_ACQ_STORE_REL #endif diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h new file mode 100644 index 000000000000..3754f581b328 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BPF_QDISC_COMMON_H +#define _BPF_QDISC_COMMON_H + +#define NET_XMIT_SUCCESS 0x00 +#define NET_XMIT_DROP 0x01 /* skb dropped */ +#define NET_XMIT_CN 0x02 /* congestion notification */ + +#define TC_PRIO_CONTROL 7 +#define TC_PRIO_MAX 15 + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) + +struct bpf_sk_buff_ptr; + +static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb) +{ + return (struct qdisc_skb_cb *)skb->cb; +} + +static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb) +{ + return qdisc_skb_cb(skb)->pkt_len; +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c new file mode 100644 index 000000000000..f188062ed730 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops") +int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + bpf_qdisc_skb_drop(skb, to_free); + return NET_XMIT_DROP; +} + +SEC("struct_ops") +struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch) +{ + return NULL; +} + +SEC("struct_ops") +void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch) +{ +} + +SEC("struct_ops") +void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops test = { + .enqueue = (void *)bpf_qdisc_test_enqueue, + .dequeue = (void *)bpf_qdisc_test_dequeue, + .reset = (void *)bpf_qdisc_test_reset, + .destroy = (void *)bpf_qdisc_test_destroy, + .id = "bpf_qdisc_test", +}; + diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c new file mode 100644 index 000000000000..1de2be3e370b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" + +char _license[] SEC("license") = "GPL"; + +struct skb_node { + struct sk_buff __kptr * skb; + struct bpf_list_node node; +}; + +private(A) struct bpf_spin_lock q_fifo_lock; +private(A) struct bpf_list_head q_fifo __contains(skb_node, node); + +bool init_called; + +SEC("struct_ops/bpf_fifo_enqueue") +int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct skb_node *skbn; + u32 pkt_len; + + if (sch->q.qlen == sch->limit) + goto drop; + + skbn = bpf_obj_new(typeof(*skbn)); + if (!skbn) + goto drop; + + pkt_len = qdisc_pkt_len(skb); + + sch->q.qlen++; + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_qdisc_skb_drop(skb, to_free); + + bpf_spin_lock(&q_fifo_lock); + bpf_list_push_back(&q_fifo, &skbn->node); + bpf_spin_unlock(&q_fifo_lock); + + sch->qstats.backlog += pkt_len; + return NET_XMIT_SUCCESS; +drop: + bpf_qdisc_skb_drop(skb, to_free); + return NET_XMIT_DROP; +} + +SEC("struct_ops/bpf_fifo_dequeue") +struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch) +{ + struct bpf_list_node *node; + struct sk_buff *skb = NULL; + struct skb_node *skbn; + + bpf_spin_lock(&q_fifo_lock); + node = bpf_list_pop_front(&q_fifo); + bpf_spin_unlock(&q_fifo_lock); + if (!node) + return NULL; + + skbn = container_of(node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + if (!skb) + return NULL; + + sch->qstats.backlog -= qdisc_pkt_len(skb); + bpf_qdisc_bstats_update(sch, skb); + sch->q.qlen--; + + return skb; +} + +SEC("struct_ops/bpf_fifo_init") +int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + sch->limit = 1000; + init_called = true; + return 0; +} + +SEC("struct_ops/bpf_fifo_reset") +void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch) +{ + struct bpf_list_node *node; + struct skb_node *skbn; + int i; + + bpf_for(i, 0, sch->q.qlen) { + struct sk_buff *skb = NULL; + + bpf_spin_lock(&q_fifo_lock); + node = bpf_list_pop_front(&q_fifo); + bpf_spin_unlock(&q_fifo_lock); + + if (!node) + break; + + skbn = container_of(node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_kfree_skb(skb); + bpf_obj_drop(skbn); + } + sch->q.qlen = 0; +} + +SEC("struct_ops") +void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops fifo = { + .enqueue = (void *)bpf_fifo_enqueue, + .dequeue = (void *)bpf_fifo_dequeue, + .init = (void *)bpf_fifo_init, + .reset = (void *)bpf_fifo_reset, + .destroy = (void *)bpf_fifo_destroy, + .id = "bpf_fifo", +}; + diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c new file mode 100644 index 000000000000..1a3233a275c7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c @@ -0,0 +1,756 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct + * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before + * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was + * introduced. It gives each flow a fair chance to transmit packets in a + * round-robin fashion. Note that for flow pacing, bpf_fq currently only + * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there + * are multiple bpf_fq instances, they will have a shared view of flows and + * configuration since some key data structure such as fq_prio_flows, + * fq_nonprio_flows, and fq_bpf_data are global. + * + * To use bpf_fq alone without running selftests, use the following commands. + * + * 1. Register bpf_fq to the kernel + * bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf + * 2. Add bpf_fq to an interface + * tc qdisc add dev <interface name> root handle <handle> bpf_fq + * 3. Delete bpf_fq attached to the interface + * tc qdisc delete dev <interface name> root + * 4. Unregister bpf_fq + * bpftool struct_ops unregister name fq + * + * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id. + * The struct_ops_map_name, fq, used in the bpftool command is the name of the + * Qdisc_ops. + * + * SEC(".struct_ops") + * struct Qdisc_ops fq = { + * ... + * .id = "bpf_fq", + * }; + */ + +#include <vmlinux.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" +#include "bpf_qdisc_common.h" + +char _license[] SEC("license") = "GPL"; + +#define NSEC_PER_USEC 1000L +#define NSEC_PER_SEC 1000000000L + +#define NUM_QUEUE (1 << 20) + +struct fq_bpf_data { + u32 quantum; + u32 initial_quantum; + u32 flow_refill_delay; + u32 flow_plimit; + u64 horizon; + u32 orphan_mask; + u32 timer_slack; + u64 time_next_delayed_flow; + u64 unthrottle_latency_ns; + u8 horizon_drop; + u32 new_flow_cnt; + u32 old_flow_cnt; + u64 ktime_cache; +}; + +enum { + CLS_RET_PRIO = 0, + CLS_RET_NONPRIO = 1, + CLS_RET_ERR = 2, +}; + +struct skb_node { + u64 tstamp; + struct sk_buff __kptr * skb; + struct bpf_rb_node node; +}; + +struct fq_flow_node { + int credit; + u32 qlen; + u64 age; + u64 time_next_packet; + struct bpf_list_node list_node; + struct bpf_rb_node rb_node; + struct bpf_rb_root queue __contains(skb_node, node); + struct bpf_spin_lock lock; + struct bpf_refcount refcount; +}; + +struct dequeue_nonprio_ctx { + bool stop_iter; + u64 expire; + u64 now; +}; + +struct remove_flows_ctx { + bool gc_only; + u32 reset_cnt; + u32 reset_max; +}; + +struct unset_throttled_flows_ctx { + bool unset_all; + u64 now; +}; + +struct fq_stashed_flow { + struct fq_flow_node __kptr * flow; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, struct fq_stashed_flow); + __uint(max_entries, NUM_QUEUE); +} fq_nonprio_flows SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, struct fq_stashed_flow); + __uint(max_entries, 1); +} fq_prio_flows SEC(".maps"); + +private(A) struct bpf_spin_lock fq_delayed_lock; +private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node); + +private(B) struct bpf_spin_lock fq_new_flows_lock; +private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node); + +private(C) struct bpf_spin_lock fq_old_flows_lock; +private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node); + +private(D) struct fq_bpf_data q; + +/* Wrapper for bpf_kptr_xchg that expects NULL dst */ +static void bpf_kptr_xchg_back(void *map_val, void *ptr) +{ + void *ret; + + ret = bpf_kptr_xchg(map_val, ptr); + if (ret) + bpf_obj_drop(ret); +} + +static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct skb_node *skbn_a; + struct skb_node *skbn_b; + + skbn_a = container_of(a, struct skb_node, node); + skbn_b = container_of(b, struct skb_node, node); + + return skbn_a->tstamp < skbn_b->tstamp; +} + +static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct fq_flow_node *flow_a; + struct fq_flow_node *flow_b; + + flow_a = container_of(a, struct fq_flow_node, rb_node); + flow_b = container_of(b, struct fq_flow_node, rb_node); + + return flow_a->time_next_packet < flow_b->time_next_packet; +} + +static void +fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct fq_flow_node *flow, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + bpf_list_push_front(head, &flow->list_node); + bpf_spin_unlock(lock); + *flow_cnt += 1; +} + +static void +fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct fq_flow_node *flow, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + bpf_list_push_back(head, &flow->list_node); + bpf_spin_unlock(lock); + *flow_cnt += 1; +} + +static void +fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct bpf_list_node **node, u32 *flow_cnt) +{ + bpf_spin_lock(lock); + *node = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + *flow_cnt -= 1; +} + +static bool +fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock) +{ + struct bpf_list_node *node; + + bpf_spin_lock(lock); + node = bpf_list_pop_front(head); + if (node) { + bpf_list_push_front(head, node); + bpf_spin_unlock(lock); + return false; + } + bpf_spin_unlock(lock); + + return true; +} + +/* flow->age is used to denote the state of the flow (not-detached, detached, throttled) + * as well as the timestamp when the flow is detached. + * + * 0: not-detached + * 1 - (~0ULL-1): detached + * ~0ULL: throttled + */ +static void fq_flow_set_detached(struct fq_flow_node *flow) +{ + flow->age = bpf_jiffies64(); +} + +static bool fq_flow_is_detached(struct fq_flow_node *flow) +{ + return flow->age != 0 && flow->age != ~0ULL; +} + +static bool sk_listener(struct sock *sk) +{ + return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); +} + +static void fq_gc(void); + +static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash) +{ + struct fq_stashed_flow tmp = {}; + struct fq_flow_node *flow; + int ret; + + flow = bpf_obj_new(typeof(*flow)); + if (!flow) + return -ENOMEM; + + flow->credit = q.initial_quantum, + flow->qlen = 0, + flow->age = 1, + flow->time_next_packet = 0, + + ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0); + if (ret == -ENOMEM || ret == -E2BIG) { + fq_gc(); + bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0); + } + + *sflow = bpf_map_lookup_elem(flow_map, &hash); + if (!*sflow) { + bpf_obj_drop(flow); + return -ENOMEM; + } + + bpf_kptr_xchg_back(&(*sflow)->flow, flow); + return 0; +} + +static int +fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow) +{ + struct sock *sk = skb->sk; + int ret = CLS_RET_NONPRIO; + u64 hash = 0; + + if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) { + *sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash); + ret = CLS_RET_PRIO; + } else { + if (!sk || sk_listener(sk)) { + hash = bpf_skb_get_hash(skb) & q.orphan_mask; + /* Avoid collision with an existing flow hash, which + * only uses the lower 32 bits of hash, by setting the + * upper half of hash to 1. + */ + hash |= (1ULL << 32); + } else if (sk->__sk_common.skc_state == TCP_CLOSE) { + hash = bpf_skb_get_hash(skb) & q.orphan_mask; + hash |= (1ULL << 32); + } else { + hash = sk->__sk_common.skc_hash; + } + *sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash); + } + + if (!*sflow) + ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ? + CLS_RET_ERR : CLS_RET_NONPRIO; + + return ret; +} + +static bool fq_packet_beyond_horizon(struct sk_buff *skb) +{ + return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon); +} + +SEC("struct_ops/bpf_fq_enqueue") +int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch, + struct bpf_sk_buff_ptr *to_free) +{ + struct fq_flow_node *flow = NULL, *flow_copy; + struct fq_stashed_flow *sflow; + u64 time_to_send, jiffies; + struct skb_node *skbn; + int ret; + + if (sch->q.qlen >= sch->limit) + goto drop; + + if (!skb->tstamp) { + time_to_send = q.ktime_cache = bpf_ktime_get_ns(); + } else { + if (fq_packet_beyond_horizon(skb)) { + q.ktime_cache = bpf_ktime_get_ns(); + if (fq_packet_beyond_horizon(skb)) { + if (q.horizon_drop) + goto drop; + + skb->tstamp = q.ktime_cache + q.horizon; + } + } + time_to_send = skb->tstamp; + } + + ret = fq_classify(skb, &sflow); + if (ret == CLS_RET_ERR) + goto drop; + + flow = bpf_kptr_xchg(&sflow->flow, flow); + if (!flow) + goto drop; + + if (ret == CLS_RET_NONPRIO) { + if (flow->qlen >= q.flow_plimit) { + bpf_kptr_xchg_back(&sflow->flow, flow); + goto drop; + } + + if (fq_flow_is_detached(flow)) { + flow_copy = bpf_refcount_acquire(flow); + + jiffies = bpf_jiffies64(); + if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) { + if (flow_copy->credit < q.quantum) + flow_copy->credit = q.quantum; + } + flow_copy->age = 0; + fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy, + &q.new_flow_cnt); + } + } + + skbn = bpf_obj_new(typeof(*skbn)); + if (!skbn) { + bpf_kptr_xchg_back(&sflow->flow, flow); + goto drop; + } + + skbn->tstamp = skb->tstamp = time_to_send; + + sch->qstats.backlog += qdisc_pkt_len(skb); + + skb = bpf_kptr_xchg(&skbn->skb, skb); + if (skb) + bpf_qdisc_skb_drop(skb, to_free); + + bpf_spin_lock(&flow->lock); + bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less); + bpf_spin_unlock(&flow->lock); + + flow->qlen++; + bpf_kptr_xchg_back(&sflow->flow, flow); + + sch->q.qlen++; + return NET_XMIT_SUCCESS; + +drop: + bpf_qdisc_skb_drop(skb, to_free); + sch->qstats.drops++; + return NET_XMIT_DROP; +} + +static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx) +{ + struct bpf_rb_node *node = NULL; + struct fq_flow_node *flow; + + bpf_spin_lock(&fq_delayed_lock); + + node = bpf_rbtree_first(&fq_delayed); + if (!node) { + bpf_spin_unlock(&fq_delayed_lock); + return 1; + } + + flow = container_of(node, struct fq_flow_node, rb_node); + if (!ctx->unset_all && flow->time_next_packet > ctx->now) { + q.time_next_delayed_flow = flow->time_next_packet; + bpf_spin_unlock(&fq_delayed_lock); + return 1; + } + + node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node); + + bpf_spin_unlock(&fq_delayed_lock); + + if (!node) + return 1; + + flow = container_of(node, struct fq_flow_node, rb_node); + flow->age = 0; + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + + return 0; +} + +static void fq_flow_set_throttled(struct fq_flow_node *flow) +{ + flow->age = ~0ULL; + + if (q.time_next_delayed_flow > flow->time_next_packet) + q.time_next_delayed_flow = flow->time_next_packet; + + bpf_spin_lock(&fq_delayed_lock); + bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less); + bpf_spin_unlock(&fq_delayed_lock); +} + +static void fq_check_throttled(u64 now) +{ + struct unset_throttled_flows_ctx ctx = { + .unset_all = false, + .now = now, + }; + unsigned long sample; + + if (q.time_next_delayed_flow > now) + return; + + sample = (unsigned long)(now - q.time_next_delayed_flow); + q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3; + q.unthrottle_latency_ns += sample >> 3; + + q.time_next_delayed_flow = ~0ULL; + bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0); +} + +static struct sk_buff* +fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx) +{ + u64 time_next_packet, time_to_send; + struct bpf_rb_node *rb_node; + struct sk_buff *skb = NULL; + struct bpf_list_head *head; + struct bpf_list_node *node; + struct bpf_spin_lock *lock; + struct fq_flow_node *flow; + struct skb_node *skbn; + bool is_empty; + u32 *cnt; + + if (q.new_flow_cnt) { + head = &fq_new_flows; + lock = &fq_new_flows_lock; + cnt = &q.new_flow_cnt; + } else if (q.old_flow_cnt) { + head = &fq_old_flows; + lock = &fq_old_flows_lock; + cnt = &q.old_flow_cnt; + } else { + if (q.time_next_delayed_flow != ~0ULL) + ctx->expire = q.time_next_delayed_flow; + goto break_loop; + } + + fq_flows_remove_front(head, lock, &node, cnt); + if (!node) + goto break_loop; + + flow = container_of(node, struct fq_flow_node, list_node); + if (flow->credit <= 0) { + flow->credit += q.quantum; + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + return NULL; + } + + bpf_spin_lock(&flow->lock); + rb_node = bpf_rbtree_first(&flow->queue); + if (!rb_node) { + bpf_spin_unlock(&flow->lock); + is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock); + if (head == &fq_new_flows && !is_empty) { + fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt); + } else { + fq_flow_set_detached(flow); + bpf_obj_drop(flow); + } + return NULL; + } + + skbn = container_of(rb_node, struct skb_node, node); + time_to_send = skbn->tstamp; + + time_next_packet = (time_to_send > flow->time_next_packet) ? + time_to_send : flow->time_next_packet; + if (ctx->now < time_next_packet) { + bpf_spin_unlock(&flow->lock); + flow->time_next_packet = time_next_packet; + fq_flow_set_throttled(flow); + return NULL; + } + + rb_node = bpf_rbtree_remove(&flow->queue, rb_node); + bpf_spin_unlock(&flow->lock); + + if (!rb_node) + goto add_flow_and_break; + + skbn = container_of(rb_node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + + if (!skb) + goto add_flow_and_break; + + flow->credit -= qdisc_skb_cb(skb)->pkt_len; + flow->qlen--; + +add_flow_and_break: + fq_flows_add_head(head, lock, flow, cnt); + +break_loop: + ctx->stop_iter = true; + return skb; +} + +static struct sk_buff *fq_dequeue_prio(void) +{ + struct fq_flow_node *flow = NULL; + struct fq_stashed_flow *sflow; + struct bpf_rb_node *rb_node; + struct sk_buff *skb = NULL; + struct skb_node *skbn; + u64 hash = 0; + + sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash); + if (!sflow) + return NULL; + + flow = bpf_kptr_xchg(&sflow->flow, flow); + if (!flow) + return NULL; + + bpf_spin_lock(&flow->lock); + rb_node = bpf_rbtree_first(&flow->queue); + if (!rb_node) { + bpf_spin_unlock(&flow->lock); + goto out; + } + + skbn = container_of(rb_node, struct skb_node, node); + rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node); + bpf_spin_unlock(&flow->lock); + + if (!rb_node) + goto out; + + skbn = container_of(rb_node, struct skb_node, node); + skb = bpf_kptr_xchg(&skbn->skb, skb); + bpf_obj_drop(skbn); + +out: + bpf_kptr_xchg_back(&sflow->flow, flow); + + return skb; +} + +SEC("struct_ops/bpf_fq_dequeue") +struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch) +{ + struct dequeue_nonprio_ctx cb_ctx = {}; + struct sk_buff *skb = NULL; + int i; + + if (!sch->q.qlen) + goto out; + + skb = fq_dequeue_prio(); + if (skb) + goto dequeue; + + q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns(); + fq_check_throttled(q.ktime_cache); + bpf_for(i, 0, sch->limit) { + skb = fq_dequeue_nonprio_flows(i, &cb_ctx); + if (cb_ctx.stop_iter) + break; + }; + + if (skb) { +dequeue: + sch->q.qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + bpf_qdisc_bstats_update(sch, skb); + return skb; + } + + if (cb_ctx.expire) + bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack); +out: + return NULL; +} + +static int fq_remove_flows_in_list(u32 index, void *ctx) +{ + struct bpf_list_node *node; + struct fq_flow_node *flow; + + bpf_spin_lock(&fq_new_flows_lock); + node = bpf_list_pop_front(&fq_new_flows); + bpf_spin_unlock(&fq_new_flows_lock); + if (!node) { + bpf_spin_lock(&fq_old_flows_lock); + node = bpf_list_pop_front(&fq_old_flows); + bpf_spin_unlock(&fq_old_flows_lock); + if (!node) + return 1; + } + + flow = container_of(node, struct fq_flow_node, list_node); + bpf_obj_drop(flow); + + return 0; +} + +extern unsigned CONFIG_HZ __kconfig; + +/* limit number of collected flows per round */ +#define FQ_GC_MAX 8 +#define FQ_GC_AGE (3*CONFIG_HZ) + +static bool fq_gc_candidate(struct fq_flow_node *flow) +{ + u64 jiffies = bpf_jiffies64(); + + return fq_flow_is_detached(flow) && + ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0); +} + +static int +fq_remove_flows(struct bpf_map *flow_map, u64 *hash, + struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx) +{ + if (sflow->flow && + (!ctx->gc_only || fq_gc_candidate(sflow->flow))) { + bpf_map_delete_elem(flow_map, hash); + ctx->reset_cnt++; + } + + return ctx->reset_cnt < ctx->reset_max ? 0 : 1; +} + +static void fq_gc(void) +{ + struct remove_flows_ctx cb_ctx = { + .gc_only = true, + .reset_cnt = 0, + .reset_max = FQ_GC_MAX, + }; + + bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0); +} + +SEC("struct_ops/bpf_fq_reset") +void BPF_PROG(bpf_fq_reset, struct Qdisc *sch) +{ + struct unset_throttled_flows_ctx utf_ctx = { + .unset_all = true, + }; + struct remove_flows_ctx rf_ctx = { + .gc_only = false, + .reset_cnt = 0, + .reset_max = NUM_QUEUE, + }; + struct fq_stashed_flow *sflow; + u64 hash = 0; + + sch->q.qlen = 0; + sch->qstats.backlog = 0; + + bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0); + + rf_ctx.reset_cnt = 0; + bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0); + fq_new_flow(&fq_prio_flows, &sflow, hash); + + bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0); + q.new_flow_cnt = 0; + q.old_flow_cnt = 0; + + bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0); +} + +SEC("struct_ops/bpf_fq_init") +int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = sch->dev_queue->dev; + u32 psched_mtu = dev->mtu + dev->hard_header_len; + struct fq_stashed_flow *sflow; + u64 hash = 0; + + if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0) + return -ENOMEM; + + sch->limit = 10000; + q.initial_quantum = 10 * psched_mtu; + q.quantum = 2 * psched_mtu; + q.flow_refill_delay = 40; + q.flow_plimit = 100; + q.horizon = 10ULL * NSEC_PER_SEC; + q.horizon_drop = 1; + q.orphan_mask = 1024 - 1; + q.timer_slack = 10 * NSEC_PER_USEC; + q.time_next_delayed_flow = ~0ULL; + q.unthrottle_latency_ns = 0ULL; + q.new_flow_cnt = 0; + q.old_flow_cnt = 0; + + return 0; +} + +SEC("struct_ops") +void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch) +{ +} + +SEC(".struct_ops") +struct Qdisc_ops fq = { + .enqueue = (void *)bpf_fq_enqueue, + .dequeue = (void *)bpf_fq_dequeue, + .reset = (void *)bpf_fq_reset, + .init = (void *)bpf_fq_init, + .destroy = (void *)bpf_fq_destroy, + .id = "bpf_fq", +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 659694162739..17db400f0e0d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -128,6 +128,7 @@ #define sk_refcnt __sk_common.skc_refcnt #define sk_state __sk_common.skc_state #define sk_net __sk_common.skc_net +#define sk_rcv_saddr __sk_common.skc_rcv_saddr #define sk_v6_daddr __sk_common.skc_v6_daddr #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr #define sk_flags __sk_common.skc_flags diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c index 38f78d9345de..69f81cb555ca 100644 --- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c +++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c @@ -30,22 +30,27 @@ int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg) /* trace_cgroup_mkdir(struct cgroup *cgrp, const char *path) * - * struct cgroup_rstat_cpu { + * struct css_rstat_cpu { * ... - * struct cgroup *updated_children; + * struct cgroup_subsys_state *updated_children; * ... * }; * - * struct cgroup { + * struct cgroup_subsys_state { + * ... + * struct css_rstat_cpu __percpu *rstat_cpu; * ... - * struct cgroup_rstat_cpu __percpu *rstat_cpu; + * }; + * + * struct cgroup { + * struct cgroup_subsys_state self; * ... * }; */ SEC("tp_btf/cgroup_mkdir") int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path) { - g = (__u64)cgrp->rstat_cpu->updated_children; + g = (__u64)cgrp->self.rstat_cpu->updated_children; return 0; } @@ -56,7 +61,8 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path) __u32 cpu; cpu = bpf_get_smp_processor_id(); - rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat_cpu, cpu); + rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr( + cgrp->self.rstat_cpu, cpu); if (rstat) { /* READ_ONCE */ *(volatile int *)rstat; diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c index c74362854948..ff189a736ad8 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c +++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c @@ -37,8 +37,9 @@ struct { __type(value, struct attach_counter); } attach_counters SEC(".maps"); -extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym; -extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym; +extern void css_rstat_updated( + struct cgroup_subsys_state *css, int cpu) __ksym; +extern void css_rstat_flush(struct cgroup_subsys_state *css) __ksym; static uint64_t cgroup_id(struct cgroup *cgrp) { @@ -75,7 +76,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader, else if (create_percpu_attach_counter(cg_id, 1)) return 0; - cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id()); + css_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id()); return 0; } @@ -141,7 +142,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp) return 1; /* Flush the stats to make sure we get the most updated numbers */ - cgroup_rstat_flush(cgrp); + css_rstat_flush(&cgrp->self); total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id); if (!total_counter) { diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c new file mode 100644 index 000000000000..13cdb11fdeb2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Google LLC */ +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> + +/* From uapi/linux/dma-buf.h */ +#define DMA_BUF_NAME_LEN 32 + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, DMA_BUF_NAME_LEN); + __type(value, bool); + __uint(max_entries, 5); +} testbuf_hash SEC(".maps"); + +/* + * Fields output by this iterator are delimited by newlines. Convert any + * newlines in user-provided printed strings to spaces. + */ +static void sanitize_string(char *src, size_t size) +{ + for (char *c = src; (size_t)(c - src) < size && *c; ++c) + if (*c == '\n') + *c = ' '; +} + +SEC("iter/dmabuf") +int dmabuf_collector(struct bpf_iter__dmabuf *ctx) +{ + const struct dma_buf *dmabuf = ctx->dmabuf; + struct seq_file *seq = ctx->meta->seq; + unsigned long inode = 0; + size_t size; + const char *pname, *exporter; + char name[DMA_BUF_NAME_LEN] = {'\0'}; + + if (!dmabuf) + return 0; + + if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) || + bpf_core_read(&size, sizeof(size), &dmabuf->size) || + bpf_core_read(&pname, sizeof(pname), &dmabuf->name) || + bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name)) + return 1; + + /* Buffers are not required to be named */ + if (pname) { + if (bpf_probe_read_kernel(name, sizeof(name), pname)) + return 1; + + /* Name strings can be provided by userspace */ + sanitize_string(name, sizeof(name)); + } + + BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter); + return 0; +} + +SEC("syscall") +int iter_dmabuf_for_each(const void *ctx) +{ + struct dma_buf *d; + + bpf_for_each(dmabuf, d) { + char name[DMA_BUF_NAME_LEN]; + const char *pname; + bool *found; + long len; + int i; + + if (bpf_core_read(&pname, sizeof(pname), &d->name)) + return 1; + + /* Buffers are not required to be named */ + if (!pname) + continue; + + len = bpf_probe_read_kernel_str(name, sizeof(name), pname); + if (len < 0) + return 1; + + /* + * The entire name buffer is used as a map key. + * Zeroize any uninitialized trailing bytes after the NUL. + */ + bpf_for(i, len, DMA_BUF_NAME_LEN) + name[i] = 0; + + found = bpf_map_lookup_elem(&testbuf_hash, name); + if (found) { + bool t = true; + + bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST); + } + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index e1fba28e4a86..a0391f9da2d4 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -680,3 +680,233 @@ out: bpf_ringbuf_discard_dynptr(&ptr_buf, 0); return XDP_DROP; } + +void *user_ptr; +/* Contains the copy of the data pointed by user_ptr. + * Size 384 to make it not fit into a single kernel chunk when copying + * but less than the maximum bpf stack size (512). + */ +char expected_str[384]; +__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257}; + +typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off, + u32 size, const void *unsafe_ptr); + +/* Returns the offset just before the end of the maximum sized xdp fragment. + * Any write larger than 32 bytes will be split between 2 fragments. + */ +__u32 xdp_near_frag_end_offset(void) +{ + const __u32 headroom = 256; + const __u32 max_frag_size = __PAGE_SIZE - headroom - sizeof(struct skb_shared_info); + + /* 32 bytes before the approximate end of the fragment */ + return max_frag_size - 32; +} + +/* Use __always_inline on test_dynptr_probe[_str][_xdp]() and callbacks + * of type bpf_read_dynptr_fn_t to prevent compiler from generating + * indirect calls that make program fail to load with "unknown opcode" error. + */ +static __always_inline void test_dynptr_probe(void *ptr, bpf_read_dynptr_fn_t bpf_read_dynptr_fn) +{ + char buf[sizeof(expected_str)]; + struct bpf_dynptr ptr_buf; + int i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + err = bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf); + + bpf_for(i, 0, ARRAY_SIZE(test_len)) { + __u32 len = test_len[i]; + + err = err ?: bpf_read_dynptr_fn(&ptr_buf, 0, test_len[i], ptr); + if (len > sizeof(buf)) + break; + err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0); + + if (err || bpf_memcmp(expected_str, buf, len)) + err = 1; + + /* Reset buffer and dynptr */ + __builtin_memset(buf, 0, sizeof(buf)); + err = err ?: bpf_dynptr_write(&ptr_buf, 0, buf, len, 0); + } + bpf_ringbuf_discard_dynptr(&ptr_buf, 0); +} + +static __always_inline void test_dynptr_probe_str(void *ptr, + bpf_read_dynptr_fn_t bpf_read_dynptr_fn) +{ + char buf[sizeof(expected_str)]; + struct bpf_dynptr ptr_buf; + __u32 cnt, i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf); + + bpf_for(i, 0, ARRAY_SIZE(test_len)) { + __u32 len = test_len[i]; + + cnt = bpf_read_dynptr_fn(&ptr_buf, 0, len, ptr); + if (cnt != len) + err = 1; + + if (len > sizeof(buf)) + continue; + err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0); + if (!len) + continue; + if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0') + err = 1; + } + bpf_ringbuf_discard_dynptr(&ptr_buf, 0); +} + +static __always_inline void test_dynptr_probe_xdp(struct xdp_md *xdp, void *ptr, + bpf_read_dynptr_fn_t bpf_read_dynptr_fn) +{ + struct bpf_dynptr ptr_xdp; + char buf[sizeof(expected_str)]; + __u32 off, i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + off = xdp_near_frag_end_offset(); + err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + + bpf_for(i, 0, ARRAY_SIZE(test_len)) { + __u32 len = test_len[i]; + + err = err ?: bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr); + if (len > sizeof(buf)) + continue; + err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0); + if (err || bpf_memcmp(expected_str, buf, len)) + err = 1; + /* Reset buffer and dynptr */ + __builtin_memset(buf, 0, sizeof(buf)); + err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0); + } +} + +static __always_inline void test_dynptr_probe_str_xdp(struct xdp_md *xdp, void *ptr, + bpf_read_dynptr_fn_t bpf_read_dynptr_fn) +{ + struct bpf_dynptr ptr_xdp; + char buf[sizeof(expected_str)]; + __u32 cnt, off, i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return; + + off = xdp_near_frag_end_offset(); + err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + if (err) + return; + + bpf_for(i, 0, ARRAY_SIZE(test_len)) { + __u32 len = test_len[i]; + + cnt = bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr); + if (cnt != len) + err = 1; + + if (len > sizeof(buf)) + continue; + err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0); + + if (!len) + continue; + if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0') + err = 1; + + __builtin_memset(buf, 0, sizeof(buf)); + err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0); + } +} + +SEC("xdp") +int test_probe_read_user_dynptr(struct xdp_md *xdp) +{ + test_dynptr_probe(user_ptr, bpf_probe_read_user_dynptr); + if (!err) + test_dynptr_probe_xdp(xdp, user_ptr, bpf_probe_read_user_dynptr); + return XDP_PASS; +} + +SEC("xdp") +int test_probe_read_kernel_dynptr(struct xdp_md *xdp) +{ + test_dynptr_probe(expected_str, bpf_probe_read_kernel_dynptr); + if (!err) + test_dynptr_probe_xdp(xdp, expected_str, bpf_probe_read_kernel_dynptr); + return XDP_PASS; +} + +SEC("xdp") +int test_probe_read_user_str_dynptr(struct xdp_md *xdp) +{ + test_dynptr_probe_str(user_ptr, bpf_probe_read_user_str_dynptr); + if (!err) + test_dynptr_probe_str_xdp(xdp, user_ptr, bpf_probe_read_user_str_dynptr); + return XDP_PASS; +} + +SEC("xdp") +int test_probe_read_kernel_str_dynptr(struct xdp_md *xdp) +{ + test_dynptr_probe_str(expected_str, bpf_probe_read_kernel_str_dynptr); + if (!err) + test_dynptr_probe_str_xdp(xdp, expected_str, bpf_probe_read_kernel_str_dynptr); + return XDP_PASS; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int test_copy_from_user_dynptr(void *ctx) +{ + test_dynptr_probe(user_ptr, bpf_copy_from_user_dynptr); + return 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int test_copy_from_user_str_dynptr(void *ctx) +{ + test_dynptr_probe_str(user_ptr, bpf_copy_from_user_str_dynptr); + return 0; +} + +static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off, + u32 size, const void *unsafe_ptr) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task); +} + +static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off, + u32 size, const void *unsafe_ptr) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + return bpf_copy_from_user_task_str_dynptr(dptr, off, size, unsafe_ptr, task); +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int test_copy_from_user_task_dynptr(void *ctx) +{ + test_dynptr_probe(user_ptr, bpf_copy_data_from_user_task); + return 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int test_copy_from_user_task_str_dynptr(void *ctx) +{ + test_dynptr_probe_str(user_ptr, bpf_copy_data_from_user_task_str); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fd_htab_lookup.c b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c new file mode 100644 index 000000000000..a4a9e1db626f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 64); + __type(key, int); + __type(value, int); + __array(values, struct inner_map_type); +} outer_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 427b72954b87..76adf4a8f2da 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -7,8 +7,6 @@ #include "bpf_misc.h" #include "bpf_compiler.h" -#define unlikely(x) __builtin_expect(!!(x), 0) - static volatile int zero = 0; int my_pid; diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c new file mode 100644 index 000000000000..264e81bfb287 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + struct bpf_list_node l; + int key; +}; + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock; +private(A) struct bpf_list_head ghead __contains(node_data, l); + +#define list_entry(ptr, type, member) container_of(ptr, type, member) +#define NR_NODES 16 + +int zero = 0; + +SEC("syscall") +__retval(0) +long list_peek(void *ctx) +{ + struct bpf_list_node *l_n; + struct node_data *n; + int i, err = 0; + + bpf_spin_lock(&glock); + l_n = bpf_list_front(&ghead); + bpf_spin_unlock(&glock); + if (l_n) + return __LINE__; + + bpf_spin_lock(&glock); + l_n = bpf_list_back(&ghead); + bpf_spin_unlock(&glock); + if (l_n) + return __LINE__; + + for (i = zero; i < NR_NODES && can_loop; i++) { + n = bpf_obj_new(typeof(*n)); + if (!n) + return __LINE__; + n->key = i; + bpf_spin_lock(&glock); + bpf_list_push_back(&ghead, &n->l); + bpf_spin_unlock(&glock); + } + + bpf_spin_lock(&glock); + + l_n = bpf_list_front(&ghead); + if (!l_n) { + err = __LINE__; + goto done; + } + + n = list_entry(l_n, struct node_data, l); + if (n->key != 0) { + err = __LINE__; + goto done; + } + + l_n = bpf_list_back(&ghead); + if (!l_n) { + err = __LINE__; + goto done; + } + + n = list_entry(l_n, struct node_data, l); + if (n->key != NR_NODES - 1) { + err = __LINE__; + goto done; + } + +done: + bpf_spin_unlock(&glock); + return err; +} + +#define TEST_FB(op, dolock) \ +SEC("syscall") \ +__failure __msg(MSG) \ +long test_##op##_spinlock_##dolock(void *ctx) \ +{ \ + struct bpf_list_node *l_n; \ + __u64 jiffies = 0; \ + \ + if (dolock) \ + bpf_spin_lock(&glock); \ + l_n = bpf_list_##op(&ghead); \ + if (l_n) \ + jiffies = bpf_jiffies64(); \ + if (dolock) \ + bpf_spin_unlock(&glock); \ + \ + return !!jiffies; \ +} + +#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref" +TEST_FB(front, true) +TEST_FB(back, true) +#undef MSG + +#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head" +TEST_FB(front, false) +TEST_FB(back, false) +#undef MSG + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c index 1f1dd547e4ee..cfc1f48e0d28 100644 --- a/tools/testing/selftests/bpf/progs/prepare.c +++ b/tools/testing/selftests/bpf/progs/prepare.c @@ -2,7 +2,6 @@ /* Copyright (c) 2025 Meta */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> -//#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c index 5927054b6dd9..efa416f53968 100644 --- a/tools/testing/selftests/bpf/progs/raw_tp_null.c +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; int tid; int i; -SEC("tp_btf/bpf_testmod_test_raw_tp_null") +SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp") int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) { struct task_struct *task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c index 38d669957bf1..0d58114a4955 100644 --- a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c +++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c @@ -8,7 +8,7 @@ char _license[] SEC("license") = "GPL"; /* Ensure module parameter has PTR_MAYBE_NULL */ -SEC("tp_btf/bpf_testmod_test_raw_tp_null") +SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp") __failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) { asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index dbd5eee8e25e..4acb6af2dfe3 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -69,11 +69,11 @@ long rbtree_api_nolock_first(void *ctx) } SEC("?tc") -__failure __msg("rbtree_remove node input must be non-owning ref") +__retval(0) long rbtree_api_remove_unadded_node(void *ctx) { struct node_data *n, *m; - struct bpf_rb_node *res; + struct bpf_rb_node *res_n, *res_m; n = bpf_obj_new(typeof(*n)); if (!n) @@ -88,19 +88,20 @@ long rbtree_api_remove_unadded_node(void *ctx) bpf_spin_lock(&glock); bpf_rbtree_add(&groot, &n->node, less); - /* This remove should pass verifier */ - res = bpf_rbtree_remove(&groot, &n->node); - n = container_of(res, struct node_data, node); + res_n = bpf_rbtree_remove(&groot, &n->node); - /* This remove shouldn't, m isn't in an rbtree */ - res = bpf_rbtree_remove(&groot, &m->node); - m = container_of(res, struct node_data, node); + res_m = bpf_rbtree_remove(&groot, &m->node); bpf_spin_unlock(&glock); - if (n) - bpf_obj_drop(n); - if (m) - bpf_obj_drop(m); + bpf_obj_drop(m); + if (res_n) + bpf_obj_drop(container_of(res_n, struct node_data, node)); + if (res_m) { + bpf_obj_drop(container_of(res_m, struct node_data, node)); + /* m was not added to the rbtree */ + return 2; + } + return 0; } @@ -178,7 +179,7 @@ err_out: } SEC("?tc") -__failure __msg("rbtree_remove node input must be non-owning ref") +__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer") long rbtree_api_add_release_unlock_escape(void *ctx) { struct node_data *n; @@ -202,7 +203,7 @@ long rbtree_api_add_release_unlock_escape(void *ctx) } SEC("?tc") -__failure __msg("rbtree_remove node input must be non-owning ref") +__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer") long rbtree_api_first_release_unlock_escape(void *ctx) { struct bpf_rb_node *res; diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c new file mode 100644 index 000000000000..098ef970fac1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rbtree_search.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + struct bpf_refcount ref; + struct bpf_rb_node r0; + struct bpf_rb_node r1; + int key0; + int key1; +}; + +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) +private(A) struct bpf_spin_lock glock0; +private(A) struct bpf_rb_root groot0 __contains(node_data, r0); + +private(B) struct bpf_spin_lock glock1; +private(B) struct bpf_rb_root groot1 __contains(node_data, r1); + +#define rb_entry(ptr, type, member) container_of(ptr, type, member) +#define NR_NODES 16 + +int zero = 0; + +static bool less0(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = rb_entry(a, struct node_data, r0); + node_b = rb_entry(b, struct node_data, r0); + + return node_a->key0 < node_b->key0; +} + +static bool less1(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = rb_entry(a, struct node_data, r1); + node_b = rb_entry(b, struct node_data, r1); + + return node_a->key1 < node_b->key1; +} + +SEC("syscall") +__retval(0) +long rbtree_search(void *ctx) +{ + struct bpf_rb_node *rb_n, *rb_m, *gc_ns[NR_NODES]; + long lookup_key = NR_NODES / 2; + struct node_data *n, *m; + int i, nr_gc = 0; + + for (i = zero; i < NR_NODES && can_loop; i++) { + n = bpf_obj_new(typeof(*n)); + if (!n) + return __LINE__; + + m = bpf_refcount_acquire(n); + + n->key0 = i; + m->key1 = i; + + bpf_spin_lock(&glock0); + bpf_rbtree_add(&groot0, &n->r0, less0); + bpf_spin_unlock(&glock0); + + bpf_spin_lock(&glock1); + bpf_rbtree_add(&groot1, &m->r1, less1); + bpf_spin_unlock(&glock1); + } + + n = NULL; + bpf_spin_lock(&glock0); + rb_n = bpf_rbtree_root(&groot0); + while (can_loop) { + if (!rb_n) { + bpf_spin_unlock(&glock0); + return __LINE__; + } + + n = rb_entry(rb_n, struct node_data, r0); + if (lookup_key == n->key0) + break; + if (nr_gc < NR_NODES) + gc_ns[nr_gc++] = rb_n; + if (lookup_key < n->key0) + rb_n = bpf_rbtree_left(&groot0, rb_n); + else + rb_n = bpf_rbtree_right(&groot0, rb_n); + } + + if (!n || lookup_key != n->key0) { + bpf_spin_unlock(&glock0); + return __LINE__; + } + + for (i = 0; i < nr_gc; i++) { + rb_n = gc_ns[i]; + gc_ns[i] = bpf_rbtree_remove(&groot0, rb_n); + } + + m = bpf_refcount_acquire(n); + bpf_spin_unlock(&glock0); + + for (i = 0; i < nr_gc; i++) { + rb_n = gc_ns[i]; + if (rb_n) { + n = rb_entry(rb_n, struct node_data, r0); + bpf_obj_drop(n); + } + } + + if (!m) + return __LINE__; + + bpf_spin_lock(&glock1); + rb_m = bpf_rbtree_remove(&groot1, &m->r1); + bpf_spin_unlock(&glock1); + bpf_obj_drop(m); + if (!rb_m) + return __LINE__; + bpf_obj_drop(rb_entry(rb_m, struct node_data, r1)); + + return 0; +} + +#define TEST_ROOT(dolock) \ +SEC("syscall") \ +__failure __msg(MSG) \ +long test_root_spinlock_##dolock(void *ctx) \ +{ \ + struct bpf_rb_node *rb_n; \ + __u64 jiffies = 0; \ + \ + if (dolock) \ + bpf_spin_lock(&glock0); \ + rb_n = bpf_rbtree_root(&groot0); \ + if (rb_n) \ + jiffies = bpf_jiffies64(); \ + if (dolock) \ + bpf_spin_unlock(&glock0); \ + \ + return !!jiffies; \ +} + +#define TEST_LR(op, dolock) \ +SEC("syscall") \ +__failure __msg(MSG) \ +long test_##op##_spinlock_##dolock(void *ctx) \ +{ \ + struct bpf_rb_node *rb_n; \ + struct node_data *n; \ + __u64 jiffies = 0; \ + \ + bpf_spin_lock(&glock0); \ + rb_n = bpf_rbtree_root(&groot0); \ + if (!rb_n) { \ + bpf_spin_unlock(&glock0); \ + return 1; \ + } \ + n = rb_entry(rb_n, struct node_data, r0); \ + n = bpf_refcount_acquire(n); \ + bpf_spin_unlock(&glock0); \ + if (!n) \ + return 1; \ + \ + if (dolock) \ + bpf_spin_lock(&glock0); \ + rb_n = bpf_rbtree_##op(&groot0, &n->r0); \ + if (rb_n) \ + jiffies = bpf_jiffies64(); \ + if (dolock) \ + bpf_spin_unlock(&glock0); \ + \ + return !!jiffies; \ +} + +/* + * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG) + * to ensure the message itself is not in the bpf prog lineinfo + * which the verifier includes in its log. + * Otherwise, the test_loader will incorrectly match the prog lineinfo + * instead of the log generated by the verifier. + */ +#define MSG "call bpf_rbtree_root{{.+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref" +TEST_ROOT(true) +#undef MSG +#define MSG "call bpf_rbtree_{{(left|right).+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref" +TEST_LR(left, true) +TEST_LR(right, true) +#undef MSG + +#define MSG "bpf_spin_lock at off=0 must be held for bpf_rb_root" +TEST_ROOT(false) +TEST_LR(left, false) +TEST_LR(right, false) +#undef MSG + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c index 9adb5ba4cd4d..90f5656c3991 100644 --- a/tools/testing/selftests/bpf/progs/set_global_vars.c +++ b/tools/testing/selftests/bpf/progs/set_global_vars.c @@ -24,6 +24,44 @@ const volatile enum Enumu64 var_eb = EB1; const volatile enum Enums64 var_ec = EC1; const volatile bool var_b = false; +struct Struct { + int:16; + __u16 filler; + struct { + const __u16 filler2; + }; + struct Struct2 { + __u16 filler; + volatile struct { + const int:1; + union { + const volatile __u8 var_u8; + const volatile __s16 filler3; + const int:1; + } u; + }; + } struct2; +}; + +const volatile __u32 stru = 0; /* same prefix as below */ +const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}}; + +union Union { + __u16 var_u16; + struct Struct3 { + struct { + __u8 var_u8_l; + }; + struct { + struct { + __u8 var_u8_h; + }; + }; + } struct3; +}; + +const volatile union Union union1 = {.var_u16 = -1}; + char arr[4] = {0}; SEC("socket") @@ -43,5 +81,8 @@ int test_set_globals(void *ctx) a = var_eb; a = var_ec; a = var_b; + a = struct1.struct2.u.var_u8; + a = union1.var_u16; + return a; } diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c index 0107a24b7522..d330b1511979 100644 --- a/tools/testing/selftests/bpf/progs/setget_sockopt.c +++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c @@ -83,6 +83,14 @@ struct loop_ctx { struct sock *sk; }; +static bool sk_is_tcp(struct sock *sk) +{ + return (sk->__sk_common.skc_family == AF_INET || + sk->__sk_common.skc_family == AF_INET6) && + sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + static int bpf_test_sockopt_flip(void *ctx, struct sock *sk, const struct sockopt_test *t, int level) @@ -91,6 +99,9 @@ static int bpf_test_sockopt_flip(void *ctx, struct sock *sk, opt = t->opt; + if (opt == SO_TXREHASH && !sk_is_tcp(sk)) + return 0; + if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old))) return 1; /* kernel initialized txrehash to 255 */ diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index 96531b0d9d55..8f483337e103 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -17,6 +17,12 @@ static bool ipv6_addr_loopback(const struct in6_addr *a) a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0; } +static bool ipv4_addr_loopback(__be32 a) +{ + return a == bpf_ntohl(0x7f000001); +} + +volatile const unsigned int sf; volatile const __u16 ports[2]; unsigned int bucket[2]; @@ -26,16 +32,20 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) struct sock *sk = (struct sock *)ctx->sk_common; struct inet_hashinfo *hinfo; unsigned int hash; + __u64 sock_cookie; struct net *net; int idx; if (!sk) return 0; + sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); - if (sk->sk_family != AF_INET6 || + if (sk->sk_family != sf || sk->sk_state != TCP_LISTEN || - !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + sk->sk_family == AF_INET6 ? + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : + !ipv4_addr_loopback(sk->sk_rcv_saddr)) return 0; if (sk->sk_num == ports[0]) @@ -52,6 +62,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) hinfo = net->ipv4.tcp_death_row.hashinfo; bucket[idx] = hash & hinfo->lhash2_mask; bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie)); return 0; } @@ -63,14 +74,18 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) { struct sock *sk = (struct sock *)ctx->udp_sk; struct udp_table *udptable; + __u64 sock_cookie; int idx; if (!sk) return 0; + sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); - if (sk->sk_family != AF_INET6 || - !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + if (sk->sk_family != sf || + sk->sk_family == AF_INET6 ? + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : + !ipv4_addr_loopback(sk->sk_rcv_saddr)) return 0; if (sk->sk_num == ports[0]) @@ -84,6 +99,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) udptable = sk->sk_net.net->ipv4.udp_table; bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask; bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie)); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_btf_ext.c b/tools/testing/selftests/bpf/progs/test_btf_ext.c new file mode 100644 index 000000000000..cdf20331db04 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_ext.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Meta Platforms Inc. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +__noinline static void f0(void) +{ + __u64 a = 1; + + __sink(a); +} + +SEC("xdp") +__u64 global_func(struct xdp_md *xdp) +{ + f0(); + return XDP_DROP; +} diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 7f3c233943b3..03d7f89787a1 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -19,7 +19,7 @@ int BPF_PROG(handle_raw_tp, __u32 raw_tp_bare_write_sz = 0; -SEC("raw_tp/bpf_testmod_test_write_bare") +SEC("raw_tp/bpf_testmod_test_write_bare_tp") int BPF_PROG(handle_raw_tp_bare, struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx) { @@ -31,7 +31,7 @@ int raw_tp_writable_bare_in_val = 0; int raw_tp_writable_bare_early_ret = 0; int raw_tp_writable_bare_out_val = 0; -SEC("raw_tp.w/bpf_testmod_test_writable_bare") +SEC("raw_tp.w/bpf_testmod_test_writable_bare_tp") int BPF_PROG(handle_raw_tp_writable_bare, struct bpf_testmod_test_writable_ctx *writable) { diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c new file mode 100644 index 000000000000..8bdb9987c0c7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +int cork_byte; +int push_start; +int push_end; +int apply_bytes; + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map SEC(".maps"); + +SEC("sk_msg") +int prog_sk_policy(struct sk_msg_md *msg) +{ + if (cork_byte > 0) + bpf_msg_cork_bytes(msg, cork_byte); + if (push_start > 0 && push_end > 0) + bpf_msg_push_data(msg, push_start, push_end, 0); + + return SK_PASS; +} + +SEC("sk_msg") +int prog_sk_policy_redir(struct sk_msg_md *msg) +{ + int two = 2; + + bpf_msg_apply_bytes(msg, apply_bytes); + return bpf_msg_redirect_map(msg, &sock_map, two, 0); +} diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_redir.c b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c new file mode 100644 index 000000000000..34d9f4f2f0a2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC(".maps") struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} nop_map, sock_map; + +SEC(".maps") struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} nop_hash, sock_hash; + +SEC(".maps") struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, unsigned int); +} verdict_map; + +/* Set by user space */ +int redirect_type; +int redirect_flags; + +#define redirect_map(__data) \ + _Generic((__data), \ + struct __sk_buff * : bpf_sk_redirect_map, \ + struct sk_msg_md * : bpf_msg_redirect_map \ + )((__data), &sock_map, (__u32){0}, redirect_flags) + +#define redirect_hash(__data) \ + _Generic((__data), \ + struct __sk_buff * : bpf_sk_redirect_hash, \ + struct sk_msg_md * : bpf_msg_redirect_hash \ + )((__data), &sock_hash, &(__u32){0}, redirect_flags) + +#define DEFINE_PROG(__type, __param) \ +SEC("sk_" XSTR(__type)) \ +int prog_ ## __type ## _verdict(__param data) \ +{ \ + unsigned int *count; \ + int verdict; \ + \ + if (redirect_type == BPF_MAP_TYPE_SOCKMAP) \ + verdict = redirect_map(data); \ + else if (redirect_type == BPF_MAP_TYPE_SOCKHASH) \ + verdict = redirect_hash(data); \ + else \ + verdict = redirect_type - __MAX_BPF_MAP_TYPE; \ + \ + count = bpf_map_lookup_elem(&verdict_map, &verdict); \ + if (count) \ + (*count)++; \ + \ + return verdict; \ +} + +DEFINE_PROG(skb, struct __sk_buff *); +DEFINE_PROG(msg, struct sk_msg_md *); + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c index eb5cca1fce16..7d5293de1952 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -294,7 +294,9 @@ static int tcp_validate_sysctl(struct tcp_syncookie *ctx) (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6)) goto err; - if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7) + if (!ctx->attrs.wscale_ok || + !ctx->attrs.snd_wscale || + ctx->attrs.snd_wscale >= BPF_SYNCOOKIE_WSCALE_MASK) goto err; if (!ctx->attrs.tstamp_ok) diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c index 39ff06f2c834..cf0547a613ff 100644 --- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -6,14 +6,14 @@ #include "../test_kmods/bpf_testmod.h" #include "bpf_misc.h" -SEC("tp_btf/bpf_testmod_test_nullable_bare") +SEC("tp_btf/bpf_testmod_test_nullable_bare_tp") __failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) { return nullable_ctx->len; } -SEC("tp_btf/bpf_testmod_test_nullable_bare") +SEC("tp_btf/bpf_testmod_test_nullable_bare_tp") int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx) { if (nullable_ctx) diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c new file mode 100644 index 000000000000..35e2cdc00a01 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#if __clang_major__ >= 21 && 0 +SEC("socket") +__description("__builtin_trap with simple c code") +__failure __msg("unexpected __bpf_trap() due to uninitialized variable?") +void bpf_builtin_trap_with_simple_c(void) +{ + __builtin_trap(); +} +#endif + +SEC("socket") +__description("__bpf_trap with simple c code") +__failure __msg("unexpected __bpf_trap() due to uninitialized variable?") +void bpf_trap_with_simple_c(void) +{ + __bpf_trap(); +} + +SEC("socket") +__description("__bpf_trap as the second-from-last insn") +__failure __msg("unexpected __bpf_trap() due to uninitialized variable?") +__naked void bpf_trap_at_func_end(void) +{ + asm volatile ( + "r0 = 0;" + "call %[__bpf_trap];" + "exit;" + : + : __imm(__bpf_trap) + : __clobber_all); +} + +SEC("socket") +__description("dead code __bpf_trap in the middle of code") +__success +__naked void dead_bpf_trap_in_middle(void) +{ + asm volatile ( + "r0 = 0;" + "if r0 == 0 goto +1;" + "call %[__bpf_trap];" + "r0 = 2;" + "exit;" + : + : __imm(__bpf_trap) + : __clobber_all); +} + +SEC("socket") +__description("reachable __bpf_trap in the middle of code") +__failure __msg("unexpected __bpf_trap() due to uninitialized variable?") +__naked void live_bpf_trap_in_middle(void) +{ + asm volatile ( + "r0 = 0;" + "if r0 == 1 goto +1;" + "call %[__bpf_trap];" + "r0 = 2;" + "exit;" + : + : __imm(__bpf_trap) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c index 28b939572cda..03942cec07e5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c @@ -65,4 +65,16 @@ __naked void ctx_access_u32_pointer_reject_8(void) " ::: __clobber_all); } +SEC("fentry/bpf_fentry_test10") +__description("btf_ctx_access const void pointer accept") +__success __retval(0) +__naked void ctx_access_const_void_pointer_accept(void) +{ + asm volatile (" \ + r2 = *(u64 *)(r1 + 0); /* load 1st argument value (const void pointer) */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c index 77698d5a19e4..74f4f19c10b8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c +++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c @@ -10,65 +10,81 @@ SEC("socket") __description("load-acquire, 8-bit") -__success __success_unpriv __retval(0x12) +__success __success_unpriv __retval(0) __naked void load_acquire_8(void) { asm volatile ( - "w1 = 0x12;" + "r0 = 0;" + "w1 = 0xfe;" "*(u8 *)(r10 - 1) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r10 - 1)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -1)) + BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1)) : __clobber_all); } SEC("socket") __description("load-acquire, 16-bit") -__success __success_unpriv __retval(0x1234) +__success __success_unpriv __retval(0) __naked void load_acquire_16(void) { asm volatile ( - "w1 = 0x1234;" + "r0 = 0;" + "w1 = 0xfedc;" "*(u16 *)(r10 - 2) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u16 *)(r10 - 2)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -2)) + BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2)) : __clobber_all); } SEC("socket") __description("load-acquire, 32-bit") -__success __success_unpriv __retval(0x12345678) +__success __success_unpriv __retval(0) __naked void load_acquire_32(void) { asm volatile ( - "w1 = 0x12345678;" + "r0 = 0;" + "w1 = 0xfedcba09;" "*(u32 *)(r10 - 4) = w1;" - ".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 4)); + ".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -4)) + BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4)) : __clobber_all); } SEC("socket") __description("load-acquire, 64-bit") -__success __success_unpriv __retval(0x1234567890abcdef) +__success __success_unpriv __retval(0) __naked void load_acquire_64(void) { asm volatile ( - "r1 = 0x1234567890abcdef ll;" + "r0 = 0;" + "r1 = 0xfedcba0987654321 ll;" "*(u64 *)(r10 - 8) = r1;" - ".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r10 - 8)); + ".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8)); + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(load_acquire_insn, - BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -8)) + BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8)) : __clobber_all); } diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 6662d4b39969..9fe5d255ee37 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -91,8 +91,7 @@ __naked int bpf_end_bswap(void) ::: __clobber_all); } -#if defined(ENABLE_ATOMICS_TESTS) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +#ifdef CAN_USE_LOAD_ACQ_STORE_REL SEC("?raw_tp") __success __log_level(2) @@ -138,7 +137,7 @@ __naked int bpf_store_release(void) : __clobber_all); } -#endif /* load-acquire, store-release */ +#endif /* CAN_USE_LOAD_ACQ_STORE_REL */ #endif /* v4 instruction */ SEC("?raw_tp") @@ -179,4 +178,57 @@ __naked int state_loop_first_last_equal(void) ); } +__used __naked static void __bpf_cond_op_r10(void) +{ + asm volatile ( + "r2 = 2314885393468386424 ll;" + "goto +0;" + "if r2 <= r10 goto +3;" + "if r1 >= -1835016 goto +0;" + "if r2 <= 8 goto +0;" + "if r3 <= 0 goto +0;" + "exit;" + ::: __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("8: (bd) if r2 <= r10 goto pc+3") +__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0") +__msg("10: (b5) if r2 <= 0x8 goto pc+0") +__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0") +__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3") +__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0") +__naked void bpf_cond_op_r10(void) +{ + asm volatile ( + "r3 = 0 ll;" + "call __bpf_cond_op_r10;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("3: (bf) r3 = r10") +__msg("4: (bd) if r3 <= r2 goto pc+1") +__msg("5: (b5) if r2 <= 0x8 goto pc+2") +__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10") +__naked void bpf_cond_op_not_r10(void) +{ + asm volatile ( + "r0 = 0;" + "r2 = 2314885393468386424 ll;" + "r3 = r10;" + "if r3 <= r2 goto +1;" + "if r2 <= 8 goto +2;" + "r0 = 2 ll;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c index c0442d5bb049..72f1eb006074 100644 --- a/tools/testing/selftests/bpf/progs/verifier_store_release.c +++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c @@ -6,18 +6,21 @@ #include "../../../include/linux/filter.h" #include "bpf_misc.h" -#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) +#ifdef CAN_USE_LOAD_ACQ_STORE_REL SEC("socket") __description("store-release, 8-bit") -__success __success_unpriv __retval(0x12) +__success __success_unpriv __retval(0) __naked void store_release_8(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12;" ".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1); - "w0 = *(u8 *)(r10 - 1);" + "w2 = *(u8 *)(r10 - 1);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -27,13 +30,17 @@ __naked void store_release_8(void) SEC("socket") __description("store-release, 16-bit") -__success __success_unpriv __retval(0x1234) +__success __success_unpriv __retval(0) __naked void store_release_16(void) { asm volatile ( + "r0 = 0;" "w1 = 0x1234;" ".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1); - "w0 = *(u16 *)(r10 - 2);" + "w2 = *(u16 *)(r10 - 2);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -43,13 +50,17 @@ __naked void store_release_16(void) SEC("socket") __description("store-release, 32-bit") -__success __success_unpriv __retval(0x12345678) +__success __success_unpriv __retval(0) __naked void store_release_32(void) { asm volatile ( + "r0 = 0;" "w1 = 0x12345678;" ".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1); - "w0 = *(u32 *)(r10 - 4);" + "w2 = *(u32 *)(r10 - 4);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -59,13 +70,17 @@ __naked void store_release_32(void) SEC("socket") __description("store-release, 64-bit") -__success __success_unpriv __retval(0x1234567890abcdef) +__success __success_unpriv __retval(0) __naked void store_release_64(void) { asm volatile ( + "r0 = 0;" "r1 = 0x1234567890abcdef ll;" ".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1); - "r0 = *(u64 *)(r10 - 8);" + "r2 = *(u64 *)(r10 - 8);" + "if r2 == r1 goto 1f;" + "r0 = 1;" +"1:" "exit;" : : __imm_insn(store_release_insn, @@ -271,7 +286,7 @@ __naked void store_release_with_invalid_reg(void) : __clobber_all); } -#else +#else /* CAN_USE_LOAD_ACQ_STORE_REL */ SEC("socket") __description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test") @@ -281,6 +296,6 @@ int dummy_test(void) return 0; } -#endif +#endif /* CAN_USE_LOAD_ACQ_STORE_REL */ char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c index 31ca229bb3c0..09bb8a038d52 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -19,6 +19,13 @@ struct { __type(value, __u32); } prog_arr SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 1); +} dev_map SEC(".maps"); + extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, @@ -95,4 +102,10 @@ int rx(struct xdp_md *ctx) return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); } +SEC("xdp") +int redirect(struct xdp_md *ctx) +{ + return bpf_redirect_map(&dev_map, ctx->rx_queue_index, XDP_PASS); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index ccde6a4c6319..683306db8594 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -4,6 +4,8 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/errno.h> #include "xsk_xdp_common.h" struct { @@ -14,6 +16,7 @@ struct { } xsk SEC(".maps"); static unsigned int idx; +int adjust_value = 0; int count = 0; SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp) @@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp) return bpf_redirect_map(&xsk, idx, XDP_DROP); } +SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp) +{ + __u32 buff_len, curr_buff_len; + int ret; + + buff_len = bpf_xdp_get_buff_len(xdp); + if (buff_len == 0) + return XDP_DROP; + + ret = bpf_xdp_adjust_tail(xdp, adjust_value); + if (ret < 0) { + /* Handle unsupported cases */ + if (ret == -EOPNOTSUPP) { + /* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case + * is unsupported + */ + adjust_value = -EOPNOTSUPP; + return bpf_redirect_map(&xsk, 0, XDP_DROP); + } + + return XDP_DROP; + } + + curr_buff_len = bpf_xdp_get_buff_len(xdp); + if (curr_buff_len != buff_len + adjust_value) + return XDP_DROP; + + if (curr_buff_len > buff_len) { + __u32 *pkt_data = (void *)(long)xdp->data; + __u32 len, words_to_end, seq_num; + + len = curr_buff_len - PKT_HDR_ALIGN; + words_to_end = len / sizeof(*pkt_data) - 1; + seq_num = words_to_end; + + /* Convert sequence number to network byte order. Store this in the last 4 bytes of + * the packet. Use 'adjust_value' to determine the position at the end of the + * packet for storing the sequence number. + */ + seq_num = __constant_htonl(words_to_end); + bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num, + sizeof(seq_num)); + } + + return bpf_redirect_map(&xsk, 0, XDP_DROP); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 3220f1d28697..e6c248e3ae54 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -134,6 +134,10 @@ bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { return bpf_testmod_test_struct_arg_result; } +__weak noinline void bpf_testmod_looooooooooooooooooooooooooooooong_name(void) +{ +} + __bpf_kfunc void bpf_testmod_test_mod_kfunc(int i) { @@ -413,7 +417,7 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); - (void)trace_bpf_testmod_test_raw_tp_null(NULL); + (void)trace_bpf_testmod_test_raw_tp_null_tp(NULL); bpf_testmod_test_struct_ops3(); @@ -431,14 +435,14 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, if (bpf_testmod_loop_test(101) > 100) trace_bpf_testmod_test_read(current, &ctx); - trace_bpf_testmod_test_nullable_bare(NULL); + trace_bpf_testmod_test_nullable_bare_tp(NULL); /* Magic number to enable writable tp */ if (len == 64) { struct bpf_testmod_test_writable_ctx writable = { .val = 1024, }; - trace_bpf_testmod_test_writable_bare(&writable); + trace_bpf_testmod_test_writable_bare_tp(&writable); if (writable.early_ret) return snprintf(buf, len, "%d\n", writable.val); } @@ -470,7 +474,7 @@ bpf_testmod_test_write(struct file *file, struct kobject *kobj, .len = len, }; - trace_bpf_testmod_test_write_bare(current, &ctx); + trace_bpf_testmod_test_write_bare_tp(current, &ctx); return -EIO; /* always fail */ } @@ -1340,7 +1344,7 @@ static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a)); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2); *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); - *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id), + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id); *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8); *insn++ = prog->insnsi[0]; @@ -1379,7 +1383,7 @@ static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struc *insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a)); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2); *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0); - *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id), + *insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id); *insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6); *insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2); *insn++ = BPF_EXIT_INSN(); diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 49f2fc61061f..9551d8d5f8f9 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -1042,6 +1042,14 @@ void run_subtest(struct test_loader *tester, emit_verifier_log(tester->log_buf, false /*force*/); validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log); + /* Restore capabilities because the kernel will silently ignore requests + * for program info (such as xlated program text) if we are not + * bpf-capable. Also, for some reason test_verifier executes programs + * with all capabilities restored. Do the same here. + */ + if (restore_capabilities(&caps)) + goto tobj_cleanup; + if (subspec->expect_xlated.cnt) { err = get_xlated_program_text(bpf_program__fd(tprog), tester->log_buf, tester->log_buf_sz); @@ -1067,12 +1075,6 @@ void run_subtest(struct test_loader *tester, } if (should_do_test_run(spec, subspec)) { - /* For some reason test_verifier executes programs - * with all capabilities restored. Do the same here. - */ - if (restore_capabilities(&caps)) - goto tobj_cleanup; - /* Do bpf_map__attach_struct_ops() for each struct_ops map. * This should trigger bpf_struct_ops->reg callback on kernel side. */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 447b68509d76..27db34ecf3f5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -734,7 +734,7 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */ }; -static char bpf_vlog[UINT_MAX >> 8]; +static char bpf_vlog[UINT_MAX >> 5]; static int load_btf_spec(__u32 *types, int types_len, const char *strings, int strings_len) @@ -1559,10 +1559,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->errstr_unpriv : test->errstr; opts.expected_attach_type = test->expected_attach_type; - if (verbose) - opts.log_level = verif_log_level | 4; /* force stats */ - else if (expected_ret == VERBOSE_ACCEPT) + if (expected_ret == VERBOSE_ACCEPT) opts.log_level = 2; + else if (verbose) + opts.log_level = verif_log_level | 4; /* force stats */ else opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index a18972ffdeb6..b2bb20b00952 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1486,7 +1486,84 @@ static bool is_preset_supported(const struct btf_type *t) return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); } -static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct btf_type *t, +const int btf_find_member(const struct btf *btf, + const struct btf_type *parent_type, + __u32 parent_offset, + const char *member_name, + int *member_tid, + __u32 *member_offset) +{ + int i; + + if (!btf_is_composite(parent_type)) + return -EINVAL; + + for (i = 0; i < btf_vlen(parent_type); ++i) { + const struct btf_member *member; + const struct btf_type *member_type; + int tid; + + member = btf_members(parent_type) + i; + tid = btf__resolve_type(btf, member->type); + if (tid < 0) + return -EINVAL; + + member_type = btf__type_by_id(btf, tid); + if (member->name_off) { + const char *name = btf__name_by_offset(btf, member->name_off); + + if (strcmp(member_name, name) == 0) { + if (btf_member_bitfield_size(parent_type, i) != 0) { + fprintf(stderr, "Bitfield presets are not supported %s\n", + name); + return -EINVAL; + } + *member_offset = parent_offset + member->offset; + *member_tid = tid; + return 0; + } + } else if (btf_is_composite(member_type)) { + int err; + + err = btf_find_member(btf, member_type, parent_offset + member->offset, + member_name, member_tid, member_offset); + if (!err) + return 0; + } + } + + return -EINVAL; +} + +static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, + struct btf_var_secinfo *sinfo, const char *var) +{ + char expr[256], *saveptr; + const struct btf_type *base_type, *member_type; + int err, member_tid; + char *name; + __u32 member_offset = 0; + + base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); + snprintf(expr, sizeof(expr), "%s", var); + strtok_r(expr, ".", &saveptr); + + while ((name = strtok_r(NULL, ".", &saveptr))) { + err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset); + if (err) { + fprintf(stderr, "Could not find member %s for variable %s\n", name, var); + return err; + } + member_type = btf__type_by_id(btf, member_tid); + sinfo->offset += member_offset / 8; + sinfo->size = member_type->size; + sinfo->type = member_tid; + base_type = member_type; + } + return 0; +} + +static int set_global_var(struct bpf_object *obj, struct btf *btf, struct bpf_map *map, struct btf_var_secinfo *sinfo, struct var_preset *preset) { @@ -1495,9 +1572,9 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct long long value = preset->ivalue; size_t size; - base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); + base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type)); if (!base_type) { - fprintf(stderr, "Failed to resolve type %d\n", t->type); + fprintf(stderr, "Failed to resolve type %d\n", sinfo->type); return -EINVAL; } if (!is_preset_supported(base_type)) { @@ -1530,7 +1607,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, const struct if (value >= max_val || value < -max_val) { fprintf(stderr, "Variable %s value %lld is out of range [%lld; %lld]\n", - btf__name_by_offset(btf, t->name_off), value, + btf__name_by_offset(btf, base_type->name_off), value, is_signed ? -max_val : 0, max_val - 1); return -EINVAL; } @@ -1583,14 +1660,20 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (j = 0; j < n; ++j, ++sinfo) { const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); const char *var_name; + int var_len; if (!btf_is_var(var_type)) continue; var_name = btf__name_by_offset(btf, var_type->name_off); + var_len = strlen(var_name); for (k = 0; k < npresets; ++k) { - if (strcmp(var_name, presets[k].name) != 0) + struct btf_var_secinfo tmp_sinfo; + + if (strncmp(var_name, presets[k].name, var_len) != 0 || + (presets[k].name[var_len] != '\0' && + presets[k].name[var_len] != '.')) continue; if (presets[k].applied) { @@ -1598,13 +1681,17 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i var_name); return -EINVAL; } + tmp_sinfo = *sinfo; + err = adjust_var_secinfo(btf, var_type, + &tmp_sinfo, presets[k].name); + if (err) + return err; - err = set_global_var(obj, btf, var_type, map, sinfo, presets + k); + err = set_global_var(obj, btf, map, &tmp_sinfo, presets + k); if (err) return err; presets[k].applied = true; - break; } } } diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h index 5a6f36f07383..45810ff552da 100644 --- a/tools/testing/selftests/bpf/xsk_xdp_common.h +++ b/tools/testing/selftests/bpf/xsk_xdp_common.h @@ -4,6 +4,7 @@ #define XSK_XDP_COMMON_H_ #define MAX_SOCKETS 2 +#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ struct xdp_info { __u64 count; diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 11f047b8af75..0ced4026ee44 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -524,6 +524,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, test->nb_sockets = 1; test->fail = false; test->set_ring = false; + test->adjust_tail = false; + test->adjust_tail_support = false; test->mtu = MAX_ETH_PKT_SIZE; test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; @@ -757,14 +759,15 @@ static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); } -static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len) { - struct pkt_stream *pkt_stream; + ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); +} - pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); - test->ifobj_tx->xsk->pkt_stream = pkt_stream; - pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); - test->ifobj_rx->xsk->pkt_stream = pkt_stream; +static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +{ + pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len); + pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len); } static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, @@ -991,6 +994,31 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) return true; } +static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx) +{ + struct bpf_map *data_map; + int adjust_value = 0; + int key = 0; + int ret; + + data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); + if (!data_map || !bpf_map__is_internal(data_map)) { + ksft_print_msg("Error: could not find bss section of XDP program\n"); + exit_with_error(errno); + } + + ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value); + if (ret) { + ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret); + exit_with_error(errno); + } + + /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail + * helper is not supported. Skip the adjust_tail test case in this scenario. + */ + return adjust_value != -EOPNOTSUPP; +} + static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb, u32 bytes_processed) { @@ -1767,8 +1795,13 @@ static void *worker_testapp_validate_rx(void *arg) if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); - if (err) - report_failure(test); + + if (err) { + if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs)) + test->adjust_tail_support = false; + else + report_failure(test); + } pthread_exit(NULL); } @@ -2515,6 +2548,71 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test) return testapp_validate_traffic(test); } +static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail, + skel_tx->progs.xsk_xdp_adjust_tail, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + skel_rx->bss->adjust_value = adjust_value; + + return testapp_validate_traffic(test); +} + +static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len) +{ + int ret; + + test->adjust_tail_support = true; + test->adjust_tail = true; + test->total_steps = 1; + + pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len); + pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value); + + ret = testapp_xdp_adjust_tail(test, value); + if (ret) + return ret; + + if (!test->adjust_tail_support) { + ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n", + mode_string(test), busy_poll_string(test)); + return TEST_SKIP; + } + + return 0; +} + +static int testapp_adjust_tail_shrink(struct test_spec *test) +{ + /* Shrink by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2); +} + +static int testapp_adjust_tail_shrink_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Shrink by the frag size */ + return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + +static int testapp_adjust_tail_grow(struct test_spec *test) +{ + /* Grow by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2); +} + +static int testapp_adjust_tail_grow_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */ + return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1, + XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + static void run_pkt_test(struct test_spec *test) { int ret; @@ -2621,6 +2719,10 @@ static const struct test_spec tests[] = { {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags}, {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size}, {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size}, + {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink}, + {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, + {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, + {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, }; static void print_tests(void) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index e46e823f6a1a..67fc44b2813b 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -173,6 +173,8 @@ struct test_spec { u16 nb_sockets; bool fail; bool set_ring; + bool adjust_tail; + bool adjust_tail_support; enum test_mode mode; char name[MAX_TEST_NAME_SIZE]; }; diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 137b2364a082..9984413be9f0 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -1,14 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 #include <fcntl.h> +#include <inttypes.h> #include <libgen.h> #include <linux/limits.h> #include <pthread.h> #include <string.h> +#include <sys/mount.h> #include <sys/resource.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> #include <unistd.h> #include "../kselftest_harness.h" +#include "../pidfd/pidfd.h" #define STACKDUMP_FILE "stack_values" #define STACKDUMP_SCRIPT "stackdump" @@ -35,6 +41,7 @@ static void crashing_child(void) FIXTURE(coredump) { char original_core_pattern[256]; + pid_t pid_coredump_server; }; FIXTURE_SETUP(coredump) @@ -44,6 +51,7 @@ FIXTURE_SETUP(coredump) char *dir; int ret; + self->pid_coredump_server = -ESRCH; file = fopen("/proc/sys/kernel/core_pattern", "r"); ASSERT_NE(NULL, file); @@ -61,10 +69,17 @@ FIXTURE_TEARDOWN(coredump) { const char *reason; FILE *file; - int ret; + int ret, status; unlink(STACKDUMP_FILE); + if (self->pid_coredump_server > 0) { + kill(self->pid_coredump_server, SIGTERM); + waitpid(self->pid_coredump_server, &status, 0); + } + unlink("/tmp/coredump.file"); + unlink("/tmp/coredump.socket"); + file = fopen("/proc/sys/kernel/core_pattern", "w"); if (!file) { reason = "Unable to open core_pattern"; @@ -89,14 +104,14 @@ fail: fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); } -TEST_F(coredump, stackdump) +TEST_F_TIMEOUT(coredump, stackdump, 120) { struct sigaction action = {}; unsigned long long stack; char *test_dir, *line; size_t line_length; char buf[PATH_MAX]; - int ret, i; + int ret, i, status; FILE *file; pid_t pid; @@ -129,6 +144,10 @@ TEST_F(coredump, stackdump) /* * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file */ + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + for (i = 0; i < 10; ++i) { file = fopen(STACKDUMP_FILE, "r"); if (file) @@ -138,14 +157,466 @@ TEST_F(coredump, stackdump) ASSERT_NE(file, NULL); /* Step 4: Make sure all stack pointer values are non-zero */ + line = NULL; for (i = 0; -1 != getline(&line, &line_length, file); ++i) { stack = strtoull(line, NULL, 10); ASSERT_NE(stack, 0); } + free(line); ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); fclose(file); } +TEST_F(coredump, socket) +{ + int fd, pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + struct stat st; + char core_file[PATH_MAX]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + ret = listen(fd_server, 1); + if (ret < 0) { + fprintf(stderr, "Failed to listen on coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "Failed to accept coredump socket connection\n"); + close(fd_server); + _exit(EXIT_FAILURE); + } + + fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, + &fd_peer_pidfd, &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + _exit(EXIT_FAILURE); + } + + memset(&info, 0, sizeof(info)); + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "Missing coredump information from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) { + fprintf(stderr, "Received connection from non-coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + fd_core_file = creat("/tmp/coredump.file", 0644); + if (fd_core_file < 0) { + fprintf(stderr, "Failed to create coredump file\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_FAILURE); + } + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) { + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_FAILURE); + } + } + + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); + ASSERT_GT(st.st_size, 0); + /* + * We should somehow validate the produced core file. + * For now just allow for visual inspection + */ + system("file /tmp/coredump.file"); +} + +TEST_F(coredump, socket_detect_userspace_client) +{ + int fd, pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + struct stat st; + char core_file[PATH_MAX]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + ret = listen(fd_server, 1); + if (ret < 0) { + fprintf(stderr, "Failed to listen on coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "Failed to accept coredump socket connection\n"); + close(fd_server); + _exit(EXIT_FAILURE); + } + + fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, + &fd_peer_pidfd, &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + _exit(EXIT_FAILURE); + } + + memset(&info, 0, sizeof(info)); + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "Missing coredump information from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (info.coredump_mask & PIDFD_COREDUMPED) { + fprintf(stderr, "Received unexpected connection from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + int fd_socket; + ssize_t ret; + + fd_socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd_socket < 0) + _exit(EXIT_FAILURE); + + + ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) + _exit(EXIT_FAILURE); + + (void *)write(fd_socket, &(char){ 0 }, 1); + close(fd_socket); + _exit(EXIT_SUCCESS); + } + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + ASSERT_NE(stat("/tmp/coredump.file", &st), 0); + ASSERT_EQ(errno, ENOENT); +} + +TEST_F(coredump, socket_enoent) +{ + int pidfd, ret, status; + FILE *file; + pid_t pid; + char core_file[PATH_MAX]; + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); +} + +TEST_F(coredump, socket_no_listener) +{ + int pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh index e350c521b467..9927b654fb8f 100755 --- a/tools/testing/selftests/cpufreq/cpufreq.sh +++ b/tools/testing/selftests/cpufreq/cpufreq.sh @@ -52,7 +52,14 @@ read_cpufreq_files_in_dir() for file in $files; do if [ -f $1/$file ]; then printf "$file:" - cat $1/$file + #file is readable ? + local rfile=$(ls -l $1/$file | awk '$1 ~ /^.*r.*/ { print $NF; }') + + if [ ! -z $rfile ]; then + cat $1/$file + else + printf "$file is not readable\n" + fi else printf "\n" read_cpufreq_files_in_dir "$1/$file" @@ -83,10 +90,10 @@ update_cpufreq_files_in_dir() for file in $files; do if [ -f $1/$file ]; then - # file is writable ? - local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }') + # file is readable and writable ? + local rwfile=$(ls -l $1/$file | awk '$1 ~ /^.*rw.*/ { print $NF; }') - if [ ! -z $wfile ]; then + if [ ! -z $rwfile ]; then # scaling_setspeed is a special file and we # should skip updating it if [ $file != "scaling_setspeed" ]; then @@ -244,9 +251,10 @@ do_suspend() printf "Failed to suspend using RTC wake alarm\n" return 1 fi + else + echo $filename > $SYSFS/power/state fi - echo $filename > $SYSFS/power/state printf "Came out of $1\n" printf "Do basic tests after finishing $1 to verify cpufreq state\n\n" diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index ec746f374e85..d634d8395d90 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -xdp_helper +napi_id_helper diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 0c95bd944d56..be780bcb73a3 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -3,12 +3,14 @@ CFLAGS += $(KHDR_INCLUDES) TEST_INCLUDES := $(wildcard lib/py/*.py) \ $(wildcard lib/sh/*.sh) \ - ../../net/net_helper.sh \ ../../net/lib.sh \ -TEST_GEN_FILES := xdp_helper +TEST_GEN_FILES := \ + napi_id_helper \ +# end of TEST_GEN_FILES TEST_PROGS := \ + napi_id.py \ netcons_basic.sh \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 07cddb19ba35..df2c047ffa90 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -15,12 +15,11 @@ TEST_PROGS = \ iou-zcrx.py \ irq.py \ loopback.sh \ - nic_link_layer.py \ - nic_performance.py \ pp_alloc_fail.py \ rss_ctx.py \ rss_input_xfrm.py \ tso.py \ + xsk_reconfig.py \ # TEST_FILES := \ diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 3947e9157115..7947650210a0 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +from os import path from lib.py import ksft_run, ksft_exit from lib.py import ksft_eq, KsftSkipEx from lib.py import NetDrvEpEnv @@ -10,8 +11,7 @@ from lib.py import ksft_disruptive def require_devmem(cfg): if not hasattr(cfg, "_devmem_probed"): - port = rand_port() - probe_command = f"./ncdevmem -f {cfg.ifname}" + probe_command = f"{cfg.bin_local} -f {cfg.ifname}" cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 cfg._devmem_probed = True @@ -21,22 +21,55 @@ def require_devmem(cfg): @ksft_disruptive def check_rx(cfg) -> None: - cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port}" + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}" + listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" + + with bkg(listen_cmd, exit_wait=True) as ncdevmem: + wait_port_listen(port) + cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \ + head -c 1K | {socat}", host=cfg.remote, shell=True) + + ksft_eq(ncdevmem.ret, 0) + + +@ksft_disruptive +def check_tx(cfg) -> None: + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" with bkg(listen_cmd) as socat: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.addr_v['6']}]:{port}", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +@ksft_disruptive +def check_tx_chunks(cfg) -> None: + cfg.require_ipver("6") + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"socat -U - TCP6-LISTEN:{port}" + + with bkg(listen_cmd, exit_wait=True) as socat: + wait_port_listen(port) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") def main() -> None: with NetDrvEpEnv(__file__) as cfg: - ksft_run([check_rx], + cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run([check_rx, check_tx, check_tx_chunks], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c index c26b4180eddd..62456df947bc 100644 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -37,8 +37,8 @@ #include <liburing.h> -#define PAGE_SIZE (4096) -#define AREA_SIZE (8192 * PAGE_SIZE) +static long page_size; +#define AREA_SIZE (8192 * page_size) #define SEND_SIZE (512 * 4096) #define min(a, b) \ ({ \ @@ -66,7 +66,7 @@ static int cfg_oneshot_recvs; static int cfg_send_size = SEND_SIZE; static struct sockaddr_in6 cfg_addr; -static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE))); +static char *payload; static void *area_ptr; static void *ring_ptr; static size_t ring_size; @@ -114,8 +114,8 @@ static inline size_t get_refill_ring_size(unsigned int rq_entries) ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe); /* add space for the header (head/tail/etc.) */ - ring_size += PAGE_SIZE; - return ALIGN_UP(ring_size, 4096); + ring_size += page_size; + return ALIGN_UP(ring_size, page_size); } static void setup_zcrx(struct io_uring *ring) @@ -219,7 +219,7 @@ static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe) connfd = cqe->res; if (cfg_oneshot) - add_recvzc_oneshot(ring, connfd, PAGE_SIZE); + add_recvzc_oneshot(ring, connfd, page_size); else add_recvzc(ring, connfd); } @@ -245,7 +245,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) if (cfg_oneshot) { if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) { - add_recvzc_oneshot(ring, connfd, PAGE_SIZE); + add_recvzc_oneshot(ring, connfd, page_size); cfg_oneshot_recvs--; } } else if (!(cqe->flags & IORING_CQE_F_MORE)) { @@ -260,7 +260,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) for (i = 0; i < n; i++) { if (*(data + i) != payload[(received + i)]) - error(1, 0, "payload mismatch at ", i); + error(1, 0, "payload mismatch at %d", i); } received += n; @@ -354,7 +354,7 @@ static void run_client(void) chunk = min_t(ssize_t, cfg_payload_len, to_send); res = send(fd, src, chunk, 0); if (res < 0) - error(1, 0, "send(): %d", sent); + error(1, 0, "send(): %zd", sent); sent += res; to_send -= res; } @@ -370,7 +370,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { - const int max_payload_len = sizeof(payload) - + const int max_payload_len = SEND_SIZE - sizeof(struct ipv6hdr) - sizeof(struct tcphdr) - 40 /* max tcp options */; @@ -443,6 +443,13 @@ int main(int argc, char **argv) const char *cfg_test = argv[argc - 1]; int i; + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) + return 1; + + if (posix_memalign((void **)&payload, page_size, SEND_SIZE)) + return 1; + parse_opts(argc, argv); for (i = 0; i < SEND_SIZE; i++) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 6a0378e06cab..9c03fd777f3d 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -5,13 +5,12 @@ import re from os import path from lib.py import ksft_run, ksft_exit from lib.py import NetDrvEpEnv -from lib.py import bkg, cmd, ethtool, wait_port_listen +from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen -def _get_rx_ring_entries(cfg): - output = ethtool(f"-g {cfg.ifname}", host=cfg.remote).stdout - values = re.findall(r'RX:\s+(\d+)', output) - return int(values[1]) +def _get_current_settings(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0] + return (output['rx'], output['hds-thresh']) def _get_combined_channels(cfg): @@ -20,8 +19,21 @@ def _get_combined_channels(cfg): return int(values[1]) -def _set_flow_rule(cfg, chan): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port 9999 action {chan}", host=cfg.remote).stdout +def _create_rss_ctx(cfg, chan): + output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout + values = re.search(r'New RSS context is (\d+)', output).group(1) + ctx_id = int(values) + return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote)) + + +def _set_flow_rule(cfg, port, chan): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout + values = re.search(r'ID (\d+)', output).group(1) + return int(values) + + +def _set_flow_rule_rss(cfg, port, ctx_id): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) @@ -32,24 +44,29 @@ def test_zcrx(cfg) -> None: combined_chans = _get_combined_channels(cfg) if combined_chans < 2: raise KsftSkipEx('at least 2 combined channels required') - rx_ring = _get_rx_ring_entries(cfg) - - try: - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) - - rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(9999, proto="tcp", host=cfg.remote) - cmd(tx_cmd) - finally: - ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) - ethtool(f"-X {cfg.ifname} default", host=cfg.remote) - ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) - ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + + rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(port, proto="tcp", host=cfg.remote) + cmd(tx_cmd) def test_zcrx_oneshot(cfg) -> None: @@ -58,24 +75,61 @@ def test_zcrx_oneshot(cfg) -> None: combined_chans = _get_combined_channels(cfg) if combined_chans < 2: raise KsftSkipEx('at least 2 combined channels required') - rx_ring = _get_rx_ring_entries(cfg) - - try: - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) - - rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(9999, proto="tcp", host=cfg.remote) - cmd(tx_cmd) - finally: - ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) - ethtool(f"-X {cfg.ifname} default", host=cfg.remote) - ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) - ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + + rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384" + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(port, proto="tcp", host=cfg.remote) + cmd(tx_cmd) + + +def test_zcrx_rss(cfg) -> None: + cfg.require_ipver('6') + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + (rx_ring, hds_thresh) = _get_current_settings(cfg) + port = rand_port() + + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + + ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + + (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) + flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + + rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(port, proto="tcp", host=cfg.remote) + cmd(tx_cmd) def main() -> None: diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 399789a9676a..b582885786f5 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -9,7 +9,6 @@ try: sys.path.append(KSFT_DIR.as_posix()) from net.lib.py import * from drivers.net.lib.py import * - from .linkconfig import LinkConfig except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py deleted file mode 100644 index 79fde603cbbc..000000000000 --- a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py +++ /dev/null @@ -1,222 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -from lib.py import cmd, ethtool, ip -from lib.py import ksft_pr, ksft_eq, KsftSkipEx -from typing import Optional -import re -import time -import json - -#The LinkConfig class is implemented to handle the link layer configurations. -#Required minimum ethtool version is 6.10 - -class LinkConfig: - """Class for handling the link layer configurations""" - def __init__(self, cfg: object) -> None: - self.cfg = cfg - self.partner_netif = self.get_partner_netif_name() - - """Get the initial link configuration of local interface""" - self.common_link_modes = self.get_common_link_modes() - - def get_partner_netif_name(self) -> Optional[str]: - partner_netif = None - try: - if not self.verify_link_up(): - return None - """Get partner interface name""" - partner_json_output = ip("addr show", json=True, host=self.cfg.remote) - for interface in partner_json_output: - for addr in interface.get('addr_info', []): - if addr.get('local') == self.cfg.remote_addr: - partner_netif = interface['ifname'] - ksft_pr(f"Partner Interface name: {partner_netif}") - if partner_netif is None: - ksft_pr("Unable to get the partner interface name") - except Exception as e: - print(f"Unexpected error occurred while getting partner interface name: {e}") - self.partner_netif = partner_netif - return partner_netif - - def verify_link_up(self) -> bool: - """Verify whether the local interface link is up""" - with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp: - link_state = fp.read().strip() - - if link_state == "down": - ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN") - return False - else: - return True - - def reset_interface(self, local: bool = True, remote: bool = True) -> bool: - ksft_pr("Resetting interfaces in local and remote") - if remote: - if self.verify_link_up(): - if self.partner_netif is not None: - ifname = self.partner_netif - link_up_cmd = f"ip link set up {ifname}" - link_down_cmd = f"ip link set down {ifname}" - reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" - try: - cmd(reset_cmd, host=self.cfg.remote) - except Exception as e: - ksft_pr(f"Unexpected error occurred while resetting remote: {e}") - else: - ksft_pr("Partner interface not available") - if local: - ifname = self.cfg.ifname - link_up_cmd = f"ip link set up {ifname}" - link_down_cmd = f"ip link set down {ifname}" - reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" - try: - cmd(reset_cmd) - except Exception as e: - ksft_pr(f"Unexpected error occurred while resetting local: {e}") - time.sleep(10) - if self.verify_link_up() and self.get_ethtool_field("link-detected"): - ksft_pr("Local and remote interfaces reset to original state") - return True - else: - ksft_pr("Error occurred after resetting interfaces. Link is DOWN.") - return False - - def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool: - """Set the speed and duplex state for the interface""" - autoneg_state = "on" if autoneg is True else "off" - process = None - try: - process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}") - except Exception as e: - ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}") - if process is None or process.ret != 0: - return False - else: - ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}") - return True - - def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool: - if not self.verify_link_up(): - return False - """Verifying the speed and duplex state for the interface""" - with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp: - actual_speed = fp.read().strip() - with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp: - actual_duplex = fp.read().strip() - - ksft_eq(actual_speed, expected_speed) - ksft_eq(actual_duplex, expected_duplex) - return True - - def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool: - common_link_modes = self.common_link_modes - speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes) - speed = speeds[0] - duplex = duplex_modes[0] - if not speed or not duplex: - ksft_pr("No speed or duplex modes found") - return False - - speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else "" - if remote: - if not self.verify_link_up(): - return False - """Set the autonegotiation state for the partner""" - command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}" - partner_autoneg_change = None - """Set autonegotiation state for interface in remote pc""" - try: - partner_autoneg_change = ethtool(command, host=self.cfg.remote) - except Exception as e: - ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}") - if partner_autoneg_change is None or partner_autoneg_change.ret != 0: - ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.") - return False - ksft_pr(f"Autoneg set as {state} for {self.partner_netif}") - else: - """Set the autonegotiation state for the interface""" - try: - process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}") - if process.ret != 0: - ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}") - return False - except Exception as e: - ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}") - return False - ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}") - return True - - def check_autoneg_supported(self, remote: bool = False) -> bool: - if not remote: - local_autoneg = self.get_ethtool_field("supports-auto-negotiation") - if local_autoneg is None: - ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}") - """Return autoneg status of the local interface""" - return local_autoneg - else: - if not self.verify_link_up(): - raise KsftSkipEx("Link is DOWN") - """Check remote auto-negotiation support status""" - partner_autoneg = False - if self.partner_netif is not None: - partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True) - if partner_autoneg is None: - ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}") - return partner_autoneg - - def get_common_link_modes(self) -> set[str]: - common_link_modes = [] - """Populate common link modes""" - link_modes = self.get_ethtool_field("supported-link-modes") - partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes") - if link_modes is None: - raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}") - if partner_link_modes is None: - raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}") - common_link_modes = set(link_modes) and set(partner_link_modes) - return common_link_modes - - def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]: - speed = [] - duplex = [] - """Check the link modes""" - for data in link_modes: - parts = data.split('/') - speed_value = re.match(r'\d+', parts[0]) - if speed_value: - speed.append(speed_value.group()) - else: - ksft_pr(f"No speed value found for interface {self.ifname}") - return None, None - duplex.append(parts[1].lower()) - return speed, duplex - - def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]: - process = None - if not remote: - """Get the ethtool field value for the local interface""" - try: - process = ethtool(self.cfg.ifname, json=True) - except Exception as e: - ksft_pr("Required minimum ethtool version is 6.10") - ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}") - return None - else: - if not self.verify_link_up(): - return None - """Get the ethtool field value for the remote interface""" - self.cfg.require_cmd("ethtool", remote=True) - if self.partner_netif is None: - ksft_pr(f"Partner interface name is unavailable.") - return None - try: - process = ethtool(self.partner_netif, json=True, host=self.cfg.remote) - except Exception as e: - ksft_pr("Required minimum ethtool version is 6.10") - ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}") - return None - json_data = process[0] - """Check if the field exist in the json data""" - if field not in json_data: - raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}') - return json_data[field] diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 2bf14ac2b8c6..02e4d3d7ded2 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -9,22 +9,31 @@ * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 * * On client: - * echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201 + * echo -n "hello\nworld" | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 * - * Test data validation: + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + * + * Test data validation (devmem TCP on RX only): * * On server: * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7 * * On client: * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ - * tr \\n \\0 | \ - * head -c 5G | \ + * head -c 1G | \ * nc <server IP> 5201 -p 5201 * + * Test data validation (devmem TCP on RX and TX, validation happens on RX): * - * Note this is compatible with regular netcat. i.e. the sender or receiver can - * be replaced with regular netcat to test the RX or TX path in isolation. + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \ + * head -c 1M | \ + * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 */ #define _GNU_SOURCE #define __EXPORTED_HEADERS__ @@ -40,15 +49,18 @@ #include <fcntl.h> #include <malloc.h> #include <error.h> +#include <poll.h> #include <arpa/inet.h> #include <sys/socket.h> #include <sys/mman.h> #include <sys/ioctl.h> #include <sys/syscall.h> +#include <sys/time.h> #include <linux/memfd.h> #include <linux/dma-buf.h> +#include <linux/errqueue.h> #include <linux/udmabuf.h> #include <linux/types.h> #include <linux/netlink.h> @@ -70,6 +82,9 @@ #define MSG_SOCK_DEVMEM 0x2000000 #endif +#define MAX_IOV 1024 + +static size_t max_chunk; static char *server_ip; static char *client_ip; static char *port; @@ -79,6 +94,8 @@ static int num_queues = -1; static char *ifname; static unsigned int ifindex; static unsigned int dmabuf_id; +static uint32_t tx_dmabuf_id; +static int waittime_ms = 500; struct memory_buffer { int fd; @@ -92,6 +109,8 @@ struct memory_buffer { struct memory_provider { struct memory_buffer *(*alloc)(size_t size); void (*free)(struct memory_buffer *ctx); + void (*memcpy_to_device)(struct memory_buffer *dst, size_t off, + void *src, int n); void (*memcpy_from_device)(void *dst, struct memory_buffer *src, size_t off, int n); }; @@ -152,6 +171,20 @@ static void udmabuf_free(struct memory_buffer *ctx) free(ctx); } +static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off, + void *src, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst->buf_mem + off, src, n); + + sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE; + ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, size_t off, int n) { @@ -169,6 +202,7 @@ static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, static struct memory_provider udmabuf_memory_provider = { .alloc = udmabuf_alloc, .free = udmabuf_free, + .memcpy_to_device = udmabuf_memcpy_to_device, .memcpy_from_device = udmabuf_memcpy_from_device, }; @@ -187,14 +221,16 @@ void validate_buffer(void *line, size_t size) { static unsigned char seed = 1; unsigned char *ptr = line; - int errors = 0; + unsigned char expected; + static int errors; size_t i; for (i = 0; i < size; i++) { - if (ptr[i] != seed) { + expected = seed ? seed : '\n'; + if (ptr[i] != expected) { fprintf(stderr, "Failed validation: expected=%u, actual=%u, index=%lu\n", - seed, ptr[i], i); + expected, ptr[i], i); errors++; if (errors > 20) error(1, 0, "validation failed."); @@ -337,7 +373,8 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) server_addr = strrchr(server_addr, ':') + 1; } - return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", + /* Try configure 5-tuple */ + if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", ifname, type, client_ip ? "src-ip" : "", @@ -345,7 +382,17 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) server_addr, client_ip ? "src-port" : "", client_ip ? port : "", - port, start_queue); + port, start_queue)) + /* If that fails, try configure 3-tuple */ + if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", + ifname, + type, + server_addr, + port, start_queue)) + /* If that fails, return error */ + return -1; + + return 0; } static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, @@ -393,6 +440,49 @@ err_close: return -1; } +static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct ynl_sock **ys) +{ + struct netdev_bind_tx_req *req = NULL; + struct netdev_bind_tx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_tx_req_alloc(); + netdev_bind_tx_req_set_ifindex(req, ifindex); + netdev_bind_tx_req_set_fd(req, dmabuf_fd); + + rsp = netdev_bind_tx(*ys, req); + if (!rsp) { + perror("netdev_bind_tx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id); + tx_dmabuf_id = rsp->id; + + netdev_bind_tx_req_free(req); + netdev_bind_tx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_tx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + static void enable_reuseaddr(int fd) { int opt = 1; @@ -431,10 +521,25 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) return 0; } -int do_server(struct memory_buffer *mem) +static struct netdev_queue_id *create_queues(void) { - char ctrl_data[sizeof(int) * 20000]; struct netdev_queue_id *queues; + size_t i = 0; + + queues = calloc(num_queues, sizeof(*queues)); + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + return queues; +} + +static int do_server(struct memory_buffer *mem) +{ + char ctrl_data[sizeof(int) * 20000]; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; struct sockaddr_in6 server_sin; @@ -448,7 +553,6 @@ int do_server(struct memory_buffer *mem) char buffer[256]; int socket_fd; int client_fd; - size_t i = 0; int ret; ret = parse_address(server_ip, atoi(port), &server_sin); @@ -471,16 +575,7 @@ int do_server(struct memory_buffer *mem) sleep(1); - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); tmp_mem = malloc(mem->size); @@ -545,7 +640,6 @@ int do_server(struct memory_buffer *mem) goto cleanup; } - i++; for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && @@ -630,10 +724,8 @@ cleanup: void run_devmem_tests(void) { - struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; - size_t i = 0; mem = provider->alloc(getpagesize() * NUM_PAGES); @@ -641,38 +733,24 @@ void run_devmem_tests(void) if (configure_rss()) error(1, 0, "rss error\n"); - queues = calloc(num_queues, sizeof(*queues)); - if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, + calloc(num_queues, sizeof(struct netdev_queue_id)), + num_queues, &ys)) error(1, 0, "Binding empty queues array should have failed\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - if (configure_headersplit(0)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Configure dmabuf with header split off should have failed\n"); if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); /* Deactivating a bound queue should not be legal */ @@ -685,13 +763,216 @@ void run_devmem_tests(void) provider->free(mem); } +static uint64_t gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL); +} + +static int do_poll(int fd) +{ + struct pollfd pfd; + int ret; + + pfd.revents = 0; + pfd.fd = fd; + + ret = poll(&pfd, 1, waittime_ms); + if (ret == -1) + error(1, errno, "poll"); + + return ret && (pfd.revents & POLLERR); +} + +static void wait_compl(int fd) +{ + int64_t tstop = gettimeofday_ms() + waittime_ms; + char control[CMSG_SPACE(100)] = {}; + struct sock_extended_err *serr; + struct msghdr msg = {}; + struct cmsghdr *cm; + __u32 hi, lo; + int ret; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (gettimeofday_ms() < tstop) { + if (!do_poll(fd)) + continue; + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret < 0) { + if (errno == EAGAIN) + continue; + error(1, errno, "recvmsg(MSG_ERRQUEUE)"); + return; + } + if (msg.msg_flags & MSG_CTRUNC) + error(1, 0, "MSG_CTRUNC\n"); + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_IP && + cm->cmsg_level != SOL_IPV6) + continue; + if (cm->cmsg_level == SOL_IP && + cm->cmsg_type != IP_RECVERR) + continue; + if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type != IPV6_RECVERR) + continue; + + serr = (void *)CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) + error(1, 0, "wrong origin %u", serr->ee_origin); + if (serr->ee_errno != 0) + error(1, 0, "wrong errno %d", serr->ee_errno); + + hi = serr->ee_data; + lo = serr->ee_info; + + fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); + return; + } + } + + error(1, 0, "did not receive tx completion"); +} + +static int do_client(struct memory_buffer *mem) +{ + char ctrl_data[CMSG_SPACE(sizeof(__u32))]; + struct sockaddr_in6 server_sin; + struct sockaddr_in6 client_sin; + struct ynl_sock *ys = NULL; + struct iovec iov[MAX_IOV]; + struct msghdr msg = {}; + ssize_t line_size = 0; + struct cmsghdr *cmsg; + char *line = NULL; + unsigned long mid; + size_t len = 0; + int socket_fd; + __u32 ddmabuf; + int opt = 1; + int ret; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) + error(1, 0, "parse server address"); + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) + error(1, socket_fd, "create socket"); + + enable_reuseaddr(socket_fd); + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, + strlen(ifname) + 1); + if (ret) + error(1, errno, "bindtodevice"); + + if (bind_tx_queue(ifindex, mem->fd, &ys)) + error(1, 0, "Failed to bind\n"); + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) + error(1, 0, "parse client address"); + + ret = bind(socket_fd, &client_sin, sizeof(client_sin)); + if (ret) + error(1, errno, "bind"); + } + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); + if (ret) + error(1, errno, "set sock opt"); + + fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, + ntohs(server_sin.sin6_port), ifname); + + ret = connect(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) + error(1, errno, "connect"); + + while (1) { + free(line); + line = NULL; + line_size = getline(&line, &len, stdin); + + if (line_size < 0) + break; + + if (max_chunk) { + msg.msg_iovlen = + (line_size + max_chunk - 1) / max_chunk; + if (msg.msg_iovlen > MAX_IOV) + error(1, 0, + "can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV); + + for (int i = 0; i < msg.msg_iovlen; i++) { + iov[i].iov_base = (void *)(i * max_chunk); + iov[i].iov_len = max_chunk; + } + + iov[msg.msg_iovlen - 1].iov_len = + line_size - (msg.msg_iovlen - 1) * max_chunk; + } else { + iov[0].iov_base = 0; + iov[0].iov_len = line_size; + msg.msg_iovlen = 1; + } + + msg.msg_iov = iov; + provider->memcpy_to_device(mem, 0, line, line_size); + + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_DEVMEM_DMABUF; + cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); + + ddmabuf = tx_dmabuf_id; + + *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; + + ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); + if (ret < 0) + error(1, errno, "Failed sendmsg"); + + fprintf(stderr, "sendmsg_ret=%d\n", ret); + + if (ret != line_size) + error(1, errno, "Did not send all bytes %d vs %zd", ret, + line_size); + + wait_compl(socket_fd); + } + + fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + + free(line); + close(socket_fd); + + if (ys) + ynl_sock_destroy(ys); + + return 0; +} + int main(int argc, char *argv[]) { struct memory_buffer *mem; int is_server = 0, opt; int ret; - while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { + while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { case 'l': is_server = 1; @@ -717,6 +998,9 @@ int main(int argc, char *argv[]) case 'f': ifname = optarg; break; + case 'z': + max_chunk = atoi(optarg); + break; case '?': fprintf(stderr, "unknown option: %c\n", optopt); break; @@ -728,6 +1012,8 @@ int main(int argc, char *argv[]) ifindex = if_nametoindex(ifname); + fprintf(stderr, "using ifindex=%u\n", ifindex); + if (!server_ip && !client_ip) { if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); @@ -778,7 +1064,7 @@ int main(int argc, char *argv[]) error(1, 0, "Missing -p argument\n"); mem = provider->alloc(getpagesize() * NUM_PAGES); - ret = is_server ? do_server(mem) : 1; + ret = is_server ? do_server(mem) : do_client(mem); provider->free(mem); return ret; diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py deleted file mode 100644 index efd921180532..000000000000 --- a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -#Introduction: -#This file has basic link layer tests for generic NIC drivers. -#The test comprises of auto-negotiation, speed and duplex checks. -# -#Setup: -#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) -# -# DUT PC Partner PC -#┌───────────────────────┐ ┌──────────────────────────┐ -#│ │ │ │ -#│ │ │ │ -#│ ┌───────────┐ │ │ -#│ │DUT NIC │ Eth │ │ -#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ -#│ └───────────┘ │ │ -#│ │ │ │ -#│ │ │ │ -#└───────────────────────┘ └──────────────────────────┘ -# -#Configurations: -#Required minimum ethtool version is 6.10 (supports json) -#Default values: -#time_delay = 8 #time taken to wait for transitions to happen, in seconds. - -import time -import argparse -from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq -from lib.py import KsftFailEx, KsftSkipEx -from lib.py import NetDrvEpEnv -from lib.py import LinkConfig - -def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: - if link_config.partner_netif is None: - KsftSkipEx("Partner interface is not available") - if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True): - KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}") - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - -def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None: - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - """Verifying the autonegotiation state in partner""" - partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True) - if partner_autoneg_output is None: - KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}") - partner_autoneg_state = "on" if partner_autoneg_output is True else "off" - - ksft_eq(partner_autoneg_state, expected_state) - - """Verifying the autonegotiation state of local""" - autoneg_output = link_config.get_ethtool_field("auto-negotiation") - if autoneg_output is None: - KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}") - actual_state = "on" if autoneg_output is True else "off" - - ksft_eq(actual_state, expected_state) - - """Verifying the link establishment""" - link_available = link_config.get_ethtool_field("link-detected") - if link_available is None: - KsftSkipEx(f"Link status not available for interface {cfg.ifname}") - if link_available != True: - raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation") - -def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None: - _pre_test_checks(cfg, link_config) - for state in ["off", "on"]: - if not link_config.set_autonegotiation_state(state, remote=True): - raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}") - if not link_config.set_autonegotiation_state(state): - raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}") - time.sleep(time_delay) - verify_autonegotiation(cfg, state, link_config) - -def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None: - _pre_test_checks(cfg, link_config) - common_link_modes = link_config.common_link_modes - if not common_link_modes: - KsftSkipEx("No common link modes exist") - speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) - - if speeds and duplex_modes and len(speeds) == len(duplex_modes): - for idx in range(len(speeds)): - speed = speeds[idx] - duplex = duplex_modes[idx] - if not link_config.set_speed_and_duplex(speed, duplex): - raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}") - time.sleep(time_delay) - if not link_config.verify_speed_and_duplex(speed, duplex): - raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}") - else: - if not speeds or not duplex_modes: - KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}") - else: - KsftSkipEx("Mismatch in the number of speeds and duplex modes") - -def main() -> None: - parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver") - parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') - args = parser.parse_args() - time_delay = args.time_delay - with NetDrvEpEnv(__file__, nsim_test=False) as cfg: - link_config = LinkConfig(cfg) - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,)) - link_config.reset_interface() - ksft_exit() - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py deleted file mode 100644 index 201403b76ea3..000000000000 --- a/tools/testing/selftests/drivers/net/hw/nic_performance.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -#Introduction: -#This file has basic performance test for generic NIC drivers. -#The test comprises of throughput check for TCP and UDP streams. -# -#Setup: -#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) -# -# DUT PC Partner PC -#┌───────────────────────┐ ┌──────────────────────────┐ -#│ │ │ │ -#│ │ │ │ -#│ ┌───────────┐ │ │ -#│ │DUT NIC │ Eth │ │ -#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ -#│ └───────────┘ │ │ -#│ │ │ │ -#│ │ │ │ -#└───────────────────────┘ └──────────────────────────┘ -# -#Configurations: -#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote. -#Required minimum ethtool version is 6.10 -#Change the below configuration based on your hw needs. -# """Default values""" -#time_delay = 8 #time taken to wait for transitions to happen, in seconds. -#test_duration = 10 #performance test duration for the throughput check, in seconds. -#send_throughput_threshold = 80 #percentage of send throughput required to pass the check -#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check - -import time -import json -import argparse -from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true -from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic -from lib.py import NetDrvEpEnv, bkg, wait_port_listen -from lib.py import cmd -from lib.py import LinkConfig - -class TestConfig: - def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None: - self.time_delay = time_delay - self.test_duration = test_duration - self.send_throughput_threshold = send_throughput_threshold - self.receive_throughput_threshold = receive_throughput_threshold - -def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: - if not link_config.verify_link_up(): - KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - common_link_modes = link_config.common_link_modes - if common_link_modes is None: - KsftSkipEx("No common link modes found") - if link_config.partner_netif == None: - KsftSkipEx("Partner interface is not available") - if link_config.check_autoneg_supported(): - KsftSkipEx("Auto-negotiation not supported by local") - if link_config.check_autoneg_supported(remote=True): - KsftSkipEx("Auto-negotiation not supported by remote") - cfg.require_cmd("iperf3", remote=True) - -def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None: - common_link_modes = link_config.common_link_modes - speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) - """Test duration in seconds""" - duration = test_config.test_duration - - ksft_pr(f"{protocol} test") - test_type = "-u" if protocol == "UDP" else "" - - send_throughput = [] - receive_throughput = [] - for idx in range(0, len(speeds)): - if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False: - raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}") - time.sleep(test_config.time_delay) - if not link_config.verify_link_up(): - raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") - - send_command=f"{test_type} -b 0 -t {duration} --json" - receive_command=f"{test_type} -b 0 -t {duration} --reverse --json" - - send_result = traffic.run_remote_test(cfg, command=send_command) - if send_result.ret != 0: - raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}") - - send_output = send_result.stdout - send_data = json.loads(send_output) - - """Convert throughput to Mbps""" - send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2)) - ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps") - - receive_result = traffic.run_remote_test(cfg, command=receive_command) - if receive_result.ret != 0: - raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}") - - receive_output = receive_result.stdout - receive_data = json.loads(receive_output) - - """Convert throughput to Mbps""" - receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2)) - ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps") - - """Check whether throughput is not below the threshold (default values set at start)""" - for idx in range(0, len(speeds)): - send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100) - receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100) - ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") - ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") - -def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: - _pre_test_checks(cfg, link_config) - check_throughput(cfg, link_config, test_config, 'TCP', traffic) - -def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: - _pre_test_checks(cfg, link_config) - check_throughput(cfg, link_config, test_config, 'UDP', traffic) - -def main() -> None: - parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver") - parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') - parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.') - parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.') - parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.') - args=parser.parse_args() - test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt) - with NetDrvEpEnv(__file__, nsim_test=False) as cfg: - traffic = GenerateTraffic(cfg) - link_config = LinkConfig(cfg) - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic, )) - link_config.reset_interface() - ksft_exit() - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index 53bb08cc29ec..f439c434ba36 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -32,6 +32,11 @@ def test_rss_input_xfrm(cfg, ipver): if multiprocessing.cpu_count() < 2: raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") + cfg.require_cmd("socat", remote=True) + + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + input_xfrm = cfg.ethnl.rss_get( {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py new file mode 100755 index 000000000000..d19d1d518208 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +# This is intended to be run on a virtio-net guest interface. +# The test binds the XDP socket to the interface without setting +# the fill ring to trigger delayed refill_work. This helps to +# make it easier to reproduce the deadlock when XDP program, +# XDP socket bind/unbind, rx ring resize race with refill_work on +# the buggy kernel. +# +# The Qemu command to setup virtio-net +# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no +# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on + +from lib.py import ksft_exit, ksft_run +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetDrvEnv +from lib.py import bkg, ip, cmd, ethtool +import time + +def _get_rx_ring_entries(cfg): + output = ethtool(f"-g {cfg.ifname}", json=True) + return output[0]["rx"] + +def setup_xsk(cfg, xdp_queue_id = 0) -> bkg: + # Probe for support + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) + if xdp.ret == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.ret > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + try: + return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \ + '{xdp_queue_id} -z', ksft_wait=3) + except: + raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \ + 'Please consider adding iommu_platform=on ' \ + 'when setting up virtio-net-pci') + +def check_xdp_bind(cfg): + with setup_xsk(cfg): + ip(f"link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o")) + ip(f"link set dev %s xdp off" % cfg.ifname) + +def check_rx_resize(cfg): + with setup_xsk(cfg): + rx_ring = _get_rx_ring_entries(cfg) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2)) + ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring)) + +def main(): + with NetDrvEnv(__file__, nsim_test=False) as cfg: + ksft_run([check_xdp_bind, check_rx_resize], + args=(cfg, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index ad5ff645183a..3bccddf8cbc5 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -12,7 +12,7 @@ from .remote import Remote class NetDrvEnvBase: """ - Base class for a NIC / host envirnoments + Base class for a NIC / host environments Attributes: test_dir: Path to the source directory of the test diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index da5af2c680fa..d9c10613ae67 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -2,7 +2,7 @@ import time -from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg +from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen class GenerateTraffic: def __init__(self, env, port=None): @@ -23,24 +23,6 @@ class GenerateTraffic: self.stop(verbose=True) raise Exception("iperf3 traffic did not ramp up") - def run_remote_test(self, env: object, port=None, command=None): - if port is None: - port = rand_port() - try: - server_cmd = f"iperf3 -s 1 -p {port} --one-off" - with bkg(server_cmd, host=env.remote): - #iperf3 opens TCP connection as default in server - #-u to be specified in client command for UDP - wait_port_listen(port, host=env.remote) - except Exception as e: - raise Exception(f"Unexpected error occurred while running server command: {e}") - try: - client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}" - proc = cmd(client_cmd) - return proc - except Exception as e: - raise Exception(f"Unexpected error occurred while running client command: {e}") - def _wait_pkts(self, pkt_cnt=None, pps=None): """ Wait until we've seen pkt_cnt or until traffic ramps up to pps. diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 3c96b022954d..29b01b8e2215 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -33,7 +33,6 @@ NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" # Used to create and delete namespaces source "${LIBDIR}"/../../../../net/lib.sh -source "${LIBDIR}"/../../../../net/net_helper.sh # Create netdevsim interfaces create_ifaces() { diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py new file mode 100755 index 000000000000..356bac46ba04 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_id.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, NetNSEnter + +def test_napi_id(cfg) -> None: + port = rand_port() + listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}" + + with bkg(listen_cmd, ksft_wait=3) as server: + cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True) + + ksft_eq(0, server.ret) + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_napi_id], args=(cfg,)) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c new file mode 100644 index 000000000000..eecd610c2109 --- /dev/null +++ b/tools/testing/selftests/drivers/net/napi_id_helper.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> + +#include "../../net/lib/ksft.h" + +int main(int argc, char *argv[]) +{ + struct sockaddr_in address; + unsigned int napi_id; + unsigned int port; + socklen_t optlen; + char buf[1024]; + int opt = 1; + int server; + int client; + int ret; + + server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (server < 0) { + perror("socket creation failed"); + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + port = atoi(argv[2]); + + if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { + perror("setsockopt"); + return 1; + } + + address.sin_family = AF_INET; + inet_pton(AF_INET, argv[1], &address.sin_addr); + address.sin_port = htons(port); + + if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) { + perror("bind failed"); + return 1; + } + + if (listen(server, 1) < 0) { + perror("listen"); + return 1; + } + + ksft_ready(); + + client = accept(server, NULL, 0); + if (client < 0) { + perror("accept"); + return 1; + } + + optlen = sizeof(napi_id); + ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, + &optlen); + if (ret != 0) { + perror("getsockopt"); + return 1; + } + + read(client, buf, 1024); + + ksft_wait(); + + if (napi_id == 0) { + fprintf(stderr, "napi ID is 0\n"); + return 1; + } + + close(client); + close(server); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh index aed62d9e6c0a..1bb46ec435d4 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -source ../../../net/net_helper.sh +source ../../../net/lib.sh NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index af8df2313a3b..e0f114612c1a 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -50,6 +50,16 @@ def _test_tcp(cfg) -> None: cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) ksft_eq(nc.stdout.strip(), test_string) +def _schedule_checksum_reset(cfg, netnl) -> None: + features = ethtool(f"-k {cfg.ifname}", json=True) + setting = "" + for side in ["tx", "rx"]: + f = features[0][side + "-checksumming"] + if not f["fixed"]: + setting += " " + side + setting += " " + ("on" if f["requested"] or f["active"] else "off") + defer(ethtool, f" -K {cfg.ifname} " + setting) + def _set_offload_checksum(cfg, netnl, on) -> None: try: ethtool(f" -K {cfg.ifname} rx {on} tx {on} ") @@ -139,6 +149,7 @@ def set_interface_init(cfg) -> None: def test_default_v4(cfg, netnl) -> None: cfg.require_ipver("4") + _schedule_checksum_reset(cfg, netnl) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) _test_tcp(cfg) @@ -149,6 +160,7 @@ def test_default_v4(cfg, netnl) -> None: def test_default_v6(cfg, netnl) -> None: cfg.require_ipver("6") + _schedule_checksum_reset(cfg, netnl) _set_offload_checksum(cfg, netnl, "off") _test_v6(cfg) _test_tcp(cfg) @@ -157,6 +169,7 @@ def test_default_v6(cfg, netnl) -> None: _test_tcp(cfg) def test_xdp_generic_sb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) _set_xdp_generic_sb_on(cfg) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) @@ -168,6 +181,7 @@ def test_xdp_generic_sb(cfg, netnl) -> None: _test_tcp(cfg) def test_xdp_generic_mb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) _set_xdp_generic_mb_on(cfg) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) @@ -179,6 +193,7 @@ def test_xdp_generic_mb(cfg, netnl) -> None: _test_tcp(cfg) def test_xdp_native_sb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) _set_xdp_native_sb_on(cfg) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) @@ -190,6 +205,7 @@ def test_xdp_native_sb(cfg, netnl) -> None: _test_tcp(cfg) def test_xdp_native_mb(cfg, netnl) -> None: + _schedule_checksum_reset(cfg, netnl) _set_xdp_native_mb_on(cfg) _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 06abd3f233e1..236005290a33 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'): def check_xsk(cfg, nl, xdp_queue_id=0) -> None: # Probe for support - xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False) + xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False) if xdp.ret == 255: raise KsftSkipEx('AF_XDP unsupported') elif xdp.ret > 0: raise KsftFailEx('unable to create AF_XDP socket') - with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}', + with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}', ksft_wait=3): rx = tx = False diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 2d5a76d99181..eaf6938f100e 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -TEST_PROGS := dev_addr_lists.sh +TEST_PROGS := dev_addr_lists.sh propagation.sh TEST_INCLUDES := \ ../bonding/lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index b5e3a3aad4bf..636b3525b679 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,5 +1,6 @@ CONFIG_DUMMY=y CONFIG_IPV6=y CONFIG_MACVLAN=y +CONFIG_NETDEVSIM=m CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y diff --git a/tools/testing/selftests/drivers/net/team/propagation.sh b/tools/testing/selftests/drivers/net/team/propagation.sh new file mode 100755 index 000000000000..4bea75b79878 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/propagation.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e + +NSIM_LRO_ID=$((256 + RANDOM % 256)) +NSIM_LRO_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_LRO_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device + +cleanup() +{ + set +e + ip link del dummyteam &>/dev/null + ip link del team0 &>/dev/null + echo $NSIM_LRO_ID > $NSIM_DEV_SYS_DEL + modprobe -r netdevsim +} + +# Trigger LRO propagation to the lower. +# https://lore.kernel.org/netdev/aBvOpkIoxcr9PfDg@mini-arch/ +team_lro() +{ + # using netdevsim because it supports NETIF_F_LRO + NSIM_LRO_NAME=$(find $NSIM_LRO_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_LRO_SYS/net -exec basename {} \;) + + ip link add name team0 type team + ip link set $NSIM_LRO_NAME down + ip link set dev $NSIM_LRO_NAME master team0 + ip link set team0 up + ethtool -K team0 large-receive-offload off + + ip link del team0 +} + +# Trigger promisc propagation to the lower during IFLA_MASTER. +# https://lore.kernel.org/netdev/20250506032328.3003050-1-sdf@fomichev.me/ +team_promisc() +{ + ip link add name dummyteam type dummy + ip link add name team0 type team + ip link set dummyteam down + ip link set team0 promisc on + ip link set dev dummyteam master team0 + ip link set team0 up + + ip link del team0 + ip link del dummyteam +} + +# Trigger promisc propagation to the lower via netif_change_flags (aka +# ndo_change_rx_flags). +# https://lore.kernel.org/netdev/20250514220319.3505158-1-stfomichev@gmail.com/ +team_change_flags() +{ + ip link add name dummyteam type dummy + ip link add name team0 type team + ip link set dummyteam down + ip link set dev dummyteam master team0 + ip link set team0 up + ip link set team0 promisc on + + # Make sure we can add more L2 addresses without any issues. + ip link add link team0 address 00:00:00:00:00:01 team0.1 type macvlan + ip link set team0.1 up + + ip link del team0.1 + ip link del team0 + ip link del dummyteam +} + +trap cleanup EXIT +modprobe netdevsim || : +echo $NSIM_LRO_ID > $NSIM_DEV_SYS_NEW +udevadm settle +team_lro +team_promisc +team_change_flags diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 828b66a10c63..7afa58e2bb20 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -2,3 +2,4 @@ dnotify_test devpts_pts file_stressor +anon_inode_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 66305fc34c60..b02326193fee 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c new file mode 100644 index 000000000000..e8e0ef1460d2 --- /dev/null +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/stat.h> + +#include "../kselftest_harness.h" +#include "overlayfs/wrappers.h" + +TEST(anon_inode_no_chown) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchown(fd_context, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_chmod) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchmod(fd_context, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_exec) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_open) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_GE(dup2(fd_context, 500), 0); + ASSERT_EQ(close(fd_context), 0); + fd_context = 500; + + ASSERT_LT(open("/proc/self/fd/500", 0), 0); + ASSERT_EQ(errno, ENXIO); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore index 82a4846cbc4b..124339ea7845 100644 --- a/tools/testing/selftests/filesystems/mount-notify/.gitignore +++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only /*_test +/*_test_ns diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile index 10be0227b5ae..836a4eb7be06 100644 --- a/tools/testing/selftests/filesystems/mount-notify/Makefile +++ b/tools/testing/selftests/filesystems/mount-notify/Makefile @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := mount-notify_test +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + +TEST_GEN_PROGS := mount-notify_test mount-notify_test_ns include ../../lib.mk + +$(OUTPUT)/mount-notify_test: ../utils.c +$(OUTPUT)/mount-notify_test_ns: ../utils.c diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 59a71f22fb11..63ce708d93ed 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -8,43 +8,21 @@ #include <string.h> #include <sys/stat.h> #include <sys/mount.h> -#include <linux/fanotify.h> #include <unistd.h> -#include <sys/fanotify.h> #include <sys/syscall.h> #include "../../kselftest_harness.h" #include "../statmount/statmount.h" +#include "../utils.h" -#ifndef FAN_MNT_ATTACH -struct fanotify_event_info_mnt { - struct fanotify_event_info_header hdr; - __u64 mnt_id; -}; -#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ -#endif - -#ifndef FAN_MNT_DETACH -#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ -#endif - -#ifndef FAN_REPORT_MNT -#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ +// Needed for linux/fanotify.h +#ifndef __kernel_fsid_t +typedef struct { + int val[2]; +} __kernel_fsid_t; #endif -#ifndef FAN_MARK_MNTNS -#define FAN_MARK_MNTNS 0x00000110 -#endif - -static uint64_t get_mnt_id(struct __test_metadata *const _metadata, - const char *path) -{ - struct statx sx; - - ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0); - ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE)); - return sx.stx_mnt_id; -} +#include <sys/fanotify.h> static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; @@ -94,7 +72,7 @@ FIXTURE_SETUP(fanotify) ASSERT_EQ(mkdir("b", 0700), 0); - self->root_id = get_mnt_id(_metadata, "/"); + self->root_id = get_unique_mnt_id("/"); ASSERT_NE(self->root_id, 0); for (i = 0; i < NUM_FAN_FDS; i++) { diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c new file mode 100644 index 000000000000..090a5ca65004 --- /dev/null +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "../../kselftest_harness.h" +#include "../../pidfd/pidfd.h" +#include "../statmount/statmount.h" +#include "../utils.h" + +// Needed for linux/fanotify.h +#ifndef __kernel_fsid_t +typedef struct { + int val[2]; +} __kernel_fsid_t; +#endif + +#include <sys/fanotify.h> + +static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; + +static const int mark_types[] = { + FAN_MARK_FILESYSTEM, + FAN_MARK_MOUNT, + FAN_MARK_INODE +}; + +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + +FIXTURE(fanotify) { + int fan_fd[NUM_FAN_FDS]; + char buf[256]; + unsigned int rem; + void *next; + char root_mntpoint[sizeof(root_mntpoint_templ)]; + int orig_root; + int orig_ns_fd; + int ns_fd; + uint64_t root_id; +}; + +FIXTURE_SETUP(fanotify) +{ + int i, ret; + + self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->orig_ns_fd, 0); + + ret = setup_userns(); + ASSERT_EQ(ret, 0); + + self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->ns_fd, 0); + + strcpy(self->root_mntpoint, root_mntpoint_templ); + ASSERT_NE(mkdtemp(self->root_mntpoint), NULL); + + self->orig_root = open("/", O_PATH | O_CLOEXEC); + ASSERT_GE(self->orig_root, 0); + + ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0); + + ASSERT_EQ(chroot(self->root_mntpoint), 0); + + ASSERT_EQ(chdir("/"), 0); + + ASSERT_EQ(mkdir("a", 0700), 0); + + ASSERT_EQ(mkdir("b", 0700), 0); + + self->root_id = get_unique_mnt_id("/"); + ASSERT_NE(self->root_id, 0); + + for (i = 0; i < NUM_FAN_FDS; i++) { + int fan_fd = fanotify_init(FAN_REPORT_FID, 0); + // Verify that watching tmpfs mounted inside userns is allowed + ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i], + FAN_OPEN, AT_FDCWD, "/"); + ASSERT_EQ(ret, 0); + // ...but watching entire orig root filesystem is not allowed + ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM, + FAN_OPEN, self->orig_root, "."); + ASSERT_NE(ret, 0); + close(fan_fd); + + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + // Verify that watching mntns where group was created is allowed + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // ...but watching orig mntns is not allowed + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->orig_ns_fd, NULL); + ASSERT_NE(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } + + self->rem = 0; +} + +FIXTURE_TEARDOWN(fanotify) +{ + int i; + + ASSERT_EQ(self->rem, 0); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); + + ASSERT_EQ(fchdir(self->orig_root), 0); + + ASSERT_EQ(chroot("."), 0); + + EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0); + EXPECT_EQ(chdir(self->root_mntpoint), 0); + EXPECT_EQ(chdir("/"), 0); + EXPECT_EQ(rmdir(self->root_mntpoint), 0); +} + +static uint64_t expect_notify(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t *mask) +{ + struct fanotify_event_metadata *meta; + struct fanotify_event_info_mnt *mnt; + unsigned int thislen; + + if (!self->rem) { + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } + + self->rem = len; + self->next = (void *) self->buf; + } + + meta = self->next; + ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem)); + + thislen = meta->event_len; + self->rem -= thislen; + self->next += thislen; + + *mask = meta->mask; + thislen -= sizeof(*meta); + + mnt = ((void *) meta) + meta->event_len - thislen; + + ASSERT_EQ(thislen, sizeof(*mnt)); + + return mnt->mnt_id; +} + +static void expect_notify_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + unsigned int n, uint64_t mask[], uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify(_metadata, self, &mask[i]); +} + +static uint64_t expect_notify_mask(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t expect_mask) +{ + uint64_t mntid, mask; + + mntid = expect_notify(_metadata, self, &mask); + ASSERT_EQ(expect_mask, mask); + + return mntid; +} + + +static void expect_notify_mask_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t mask, unsigned int n, uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify_mask(_metadata, self, mask); +} + +static void verify_mount_ids(struct __test_metadata *const _metadata, + const uint64_t list1[], const uint64_t list2[], + size_t num) +{ + unsigned int i, j; + + // Check that neither list has any duplicates + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (i != j) { + ASSERT_NE(list1[i], list1[j]); + ASSERT_NE(list2[i], list2[j]); + } + } + } + // Check that all list1 memebers can be found in list2. Together with + // the above it means that the list1 and list2 represent the same sets. + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (list1[i] == list2[j]) + break; + } + ASSERT_NE(j, num); + } +} + +static void check_mounted(struct __test_metadata *const _metadata, + const uint64_t mnts[], size_t num) +{ + ssize_t ret; + uint64_t *list; + + list = malloc((num + 1) * sizeof(list[0])); + ASSERT_NE(list, NULL); + + ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0); + ASSERT_EQ(ret, num); + + verify_mount_ids(_metadata, mnts, list, num); + + free(list); +} + +static void setup_mount_tree(struct __test_metadata *const _metadata, + int log2_num) +{ + int ret, i; + + ret = mount("", "/", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + for (i = 0; i < log2_num; i++) { + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + } +} + +TEST_F(fanotify, bind) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, move) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + uint64_t move_id; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0); + ASSERT_EQ(ret, 0); + + move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH); + ASSERT_EQ(move_id, mnts[1]); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, propagate) +{ + const unsigned int log2_num = 4; + const unsigned int num = (1 << log2_num); + uint64_t mnts[num]; + + setup_mount_tree(_metadata, log2_num); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1); + + mnts[0] = self->root_id; + check_mounted(_metadata, mnts, num); + + // Cleanup + int ret; + uint64_t mnts2[num]; + ret = umount2("/", MNT_DETACH); + ASSERT_EQ(ret, 0); + + ret = mount("", "/", NULL, MS_PRIVATE, NULL); + ASSERT_EQ(ret, 0); + + mnts2[0] = self->root_id; + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1); + verify_mount_ids(_metadata, mnts, mnts2, num); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, fsmount) +{ + int ret, fs, mnt; + uint64_t mnts[2] = { self->root_id }; + + fs = fsopen("tmpfs", 0); + ASSERT_GE(fs, 0); + + ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0); + ASSERT_EQ(ret, 0); + + mnt = fsmount(fs, 0, 0); + ASSERT_GE(mnt, 0); + + close(fs); + + ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH); + ASSERT_EQ(ret, 0); + + close(mnt); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, reparent) +{ + uint64_t mnts[6] = { self->root_id }; + uint64_t dmnts[3]; + uint64_t masks[3]; + unsigned int i; + int ret; + + // Create setup with a[1] -> b[2] propagation + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/a", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/b", NULL, MS_SLAVE, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + // Mount on a[3], which is propagated to b[4] + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3); + + check_mounted(_metadata, mnts, 5); + + // Mount on b[5], not propagated + ret = mount("/", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + check_mounted(_metadata, mnts, 6); + + // Umount a[3], which is propagated to b[4], but not b[5] + // This will result in b[5] "falling" on b[2] + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + expect_notify_n(_metadata, self, 3, masks, dmnts); + verify_mount_ids(_metadata, mnts + 3, dmnts, 3); + + for (i = 0; i < 3; i++) { + if (dmnts[i] == mnts[5]) { + ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH); + } else { + ASSERT_EQ(masks[i], FAN_MNT_DETACH); + } + } + + mnts[3] = mnts[5]; + check_mounted(_metadata, mnts, 4); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts); + verify_mount_ids(_metadata, mnts + 1, dmnts, 3); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, rmdir) +{ + uint64_t mnts[3] = { self->root_id }; + int ret; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/", "/a/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + ret = chdir("/a"); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret == 0) { + chdir("/"); + unshare(CLONE_NEWNS); + mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + umount2("/a", MNT_DETACH); + // This triggers a detach in the other namespace + rmdir("/a"); + exit(0); + } + wait(NULL); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1); + check_mounted(_metadata, mnts, 1); + + // Cleanup + ret = chdir("/"); + ASSERT_EQ(ret, 0); +} + +TEST_F(fanotify, pivot_root) +{ + uint64_t mnts[3] = { self->root_id }; + uint64_t mnts2[3]; + int ret; + + ret = mount("tmpfs", "/a", "tmpfs", 0, NULL); + ASSERT_EQ(ret, 0); + + mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + ret = mkdir("/a/new", 0700); + ASSERT_EQ(ret, 0); + + ret = mkdir("/a/old", 0700); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/a/new", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + check_mounted(_metadata, mnts, 3); + + ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2); + verify_mount_ids(_metadata, mnts, mnts2, 2); + check_mounted(_metadata, mnts, 3); + + // Cleanup + ret = syscall(SYS_pivot_root, "/old", "/old/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile index 6c661232b3b5..d3ad4a77db9b 100644 --- a/tools/testing/selftests/filesystems/overlayfs/Makefile +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wall CFLAGS += $(KHDR_INCLUDES) LDLIBS += -lcap -LOCAL_HDRS += wrappers.h log.h +LOCAL_HDRS += ../wrappers.h log.h TEST_GEN_PROGS := dev_in_maps TEST_GEN_PROGS += set_layers_via_fds diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c index 3b796264223f..31db54b00e64 100644 --- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -17,7 +17,7 @@ #include "../../kselftest.h" #include "log.h" -#include "wrappers.h" +#include "../wrappers.h" static long get_file_dev_and_inode(void *addr, struct statx *stx) { diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c index 5074e64e74a8..dc0449fa628f 100644 --- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c +++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c @@ -16,7 +16,7 @@ #include "../../pidfd/pidfd.h" #include "log.h" #include "../utils.h" -#include "wrappers.h" +#include "../wrappers.h" FIXTURE(set_layers_via_fds) { int pidfd; diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 14ee91a41650..8e354fe99b44 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test include ../../lib.mk + +$(OUTPUT)/statmount_test_ns: ../utils.c diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h index a7a5289ddae9..99e5ad082fb1 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount.h +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -7,6 +7,42 @@ #include <linux/mount.h> #include <asm/unistd.h> +#ifndef __NR_statmount + #if defined __alpha__ + #define __NR_statmount 567 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_statmount 4457 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_statmount 6457 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_statmount 5457 + #endif + #else + #define __NR_statmount 457 + #endif +#endif + +#ifndef __NR_listmount + #if defined __alpha__ + #define __NR_listmount 568 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_listmount 4458 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_listmount 6458 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_listmount 5458 + #endif + #else + #define __NR_listmount 458 + #endif +#endif + static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, struct statmount *buf, size_t bufsize, unsigned int flags) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c index 70cb0c8b21cf..605a3fa16bf7 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -14,6 +14,7 @@ #include <linux/stat.h> #include "statmount.h" +#include "../utils.h" #include "../../kselftest.h" #define NSID_PASS 0 @@ -78,87 +79,10 @@ static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id) return NSID_PASS; } -static int get_mnt_id(const char *path, uint64_t *mnt_id) -{ - struct statx sx; - int ret; - - ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); - if (ret == -1) { - ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, - strerror(errno)); - return NSID_ERROR; - } - - if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { - ksft_print_msg("no unique mount ID available for %s\n", path); - return NSID_ERROR; - } - - *mnt_id = sx.stx_mnt_id; - return NSID_PASS; -} - -static int write_file(const char *path, const char *val) -{ - int fd = open(path, O_WRONLY); - size_t len = strlen(val); - int ret; - - if (fd == -1) { - ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); - return NSID_ERROR; - } - - ret = write(fd, val, len); - if (ret == -1) { - ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); - return NSID_ERROR; - } - if (ret != len) { - ksft_print_msg("short write to %s\n", path); - return NSID_ERROR; - } - - ret = close(fd); - if (ret == -1) { - ksft_print_msg("closing %s\n", path); - return NSID_ERROR; - } - - return NSID_PASS; -} - static int setup_namespace(void) { - int ret; - char buf[32]; - uid_t uid = getuid(); - gid_t gid = getgid(); - - ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); - if (ret == -1) - ksft_exit_fail_msg("unsharing mountns and userns: %s\n", - strerror(errno)); - - sprintf(buf, "0 %d 1", uid); - ret = write_file("/proc/self/uid_map", buf); - if (ret != NSID_PASS) - return ret; - ret = write_file("/proc/self/setgroups", "deny"); - if (ret != NSID_PASS) - return ret; - sprintf(buf, "0 %d 1", gid); - ret = write_file("/proc/self/gid_map", buf); - if (ret != NSID_PASS) - return ret; - - ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); - if (ret == -1) { - ksft_print_msg("making mount tree private: %s\n", - strerror(errno)); + if (setup_userns() != 0) return NSID_ERROR; - } return NSID_PASS; } @@ -174,9 +98,9 @@ static int _test_statmount_mnt_ns_id(void) if (ret != NSID_PASS) return ret; - ret = get_mnt_id("/", &root_id); - if (ret != NSID_PASS) - return ret; + root_id = get_unique_mnt_id("/"); + if (!root_id) + return NSID_ERROR; ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); if (ret == -1) { diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c index e553c89c5b19..c43a69dffd83 100644 --- a/tools/testing/selftests/filesystems/utils.c +++ b/tools/testing/selftests/filesystems/utils.c @@ -18,7 +18,10 @@ #include <sys/types.h> #include <sys/wait.h> #include <sys/xattr.h> +#include <sys/mount.h> +#include "../kselftest.h" +#include "wrappers.h" #include "utils.h" #define MAX_USERNS_LEVEL 32 @@ -447,6 +450,71 @@ out_close: return fret; } +static int write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) { + ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); + return -1; + } + + ret = write(fd, val, len); + if (ret == -1) { + ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); + return -1; + } + if (ret != len) { + ksft_print_msg("short write to %s\n", path); + return -1; + } + + ret = close(fd); + if (ret == -1) { + ksft_print_msg("closing %s\n", path); + return -1; + } + + return 0; +} + +int setup_userns(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); + if (ret) { + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + return ret; + } + + sprintf(buf, "0 %d 1", uid); + ret = write_file("/proc/self/uid_map", buf); + if (ret) + return ret; + ret = write_file("/proc/self/setgroups", "deny"); + if (ret) + return ret; + sprintf(buf, "0 %d 1", gid); + ret = write_file("/proc/self/gid_map", buf); + if (ret) + return ret; + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret) { + ksft_print_msg("making mount tree private: %s\n", strerror(errno)); + return ret; + } + + return 0; +} + /* caps_down - lower all effective caps */ int caps_down(void) { @@ -499,3 +567,23 @@ out: cap_free(caps); return fret; } + +uint64_t get_unique_mnt_id(const char *path) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); + if (ret == -1) { + ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, + strerror(errno)); + return 0; + } + + if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { + ksft_print_msg("no unique mount ID available for %s\n", path); + return 0; + } + + return sx.stx_mnt_id; +} diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h index 7f1df2a3e94c..70f7ccc607f4 100644 --- a/tools/testing/selftests/filesystems/utils.h +++ b/tools/testing/selftests/filesystems/utils.h @@ -27,6 +27,7 @@ extern int caps_down(void); extern int cap_down(cap_value_t down); extern bool switch_ids(uid_t uid, gid_t gid); +extern int setup_userns(void); static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps) { @@ -42,4 +43,6 @@ static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps) return true; } +extern uint64_t get_unique_mnt_id(const char *path); + #endif /* __IDMAP_UTILS_H */ diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/wrappers.h index c38bc48e0cfa..420ae4f908cf 100644 --- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h +++ b/tools/testing/selftests/filesystems/wrappers.h @@ -9,6 +9,10 @@ #include <linux/mount.h> #include <sys/syscall.h> +#ifndef STATX_MNT_ID_UNIQUE +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ +#endif + static inline int sys_fsopen(const char *fsname, unsigned int flags) { return syscall(__NR_fsopen, fsname, flags); @@ -36,6 +40,28 @@ static inline int sys_mount(const char *src, const char *tgt, const char *fst, #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ #endif +#ifndef MOVE_MOUNT_T_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +#ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #else + #define __NR_move_mount 429 + #endif +#endif + static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, unsigned int flags) @@ -53,7 +79,25 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname, #endif #ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #else + #define __NR_open_tree 428 + #endif #endif static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index 49d96bb16355..7c12263f8260 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -6,6 +6,6 @@ TEST_PROGS := ftracetest-ktap TEST_FILES := test.d settings EXTRA_CLEAN := $(OUTPUT)/logs/* -TEST_GEN_PROGS = poll +TEST_GEN_FILES := poll include ../lib.mk diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index fbcbdb6963b3..7b24ae89594a 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -1,11 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only +futex_numa_mpol +futex_priv_hash +futex_requeue futex_requeue_pi futex_requeue_pi_mismatched_ops futex_requeue_pi_signal_restart +futex_wait futex_wait_private_mapped_file futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock -futex_wait -futex_requeue futex_waitv diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index f79f9bac7918..8cfb87f7f7c5 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES) -LDLIBS := -lpthread -lrt +LDLIBS := -lpthread -lrt -lnuma LOCAL_HDRS := \ ../include/futextest.h \ @@ -17,7 +17,10 @@ TEST_GEN_PROGS := \ futex_wait_private_mapped_file \ futex_wait \ futex_requeue \ - futex_waitv + futex_priv_hash \ + futex_numa_mpol \ + futex_waitv \ + futex_numa TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c new file mode 100644 index 000000000000..f29e4d627e79 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_numa.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <pthread.h> +#include <sys/shm.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <stdbool.h> +#include <time.h> +#include <assert.h> +#include "logging.h" +#include "futextest.h" +#include "futex2test.h" + +typedef u_int32_t u32; +typedef int32_t s32; +typedef u_int64_t u64; + +static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE); +static int fnode = FUTEX_NO_NODE; + +/* fairly stupid test-and-set lock with a waiter flag */ + +#define N_LOCK 0x0000001 +#define N_WAITERS 0x0001000 + +struct futex_numa_32 { + union { + u64 full; + struct { + u32 val; + u32 node; + }; + }; +}; + +void futex_numa_32_lock(struct futex_numa_32 *lock) +{ + for (;;) { + struct futex_numa_32 new, old = { + .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED), + }; + + for (;;) { + new = old; + if (old.val == 0) { + /* no waiter, no lock -> first lock, set no-node */ + new.node = fnode; + } + if (old.val & N_LOCK) { + /* contention, set waiter */ + new.val |= N_WAITERS; + } + new.val |= N_LOCK; + + /* nothing changed, ready to block */ + if (old.full == new.full) + break; + + /* + * Use u64 cmpxchg to set the futex value and node in a + * consistent manner. + */ + if (__atomic_compare_exchange_n(&lock->full, + &old.full, new.full, + /* .weak */ false, + __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) { + + /* if we just set N_LOCK, we own it */ + if (!(old.val & N_LOCK)) + return; + + /* go block */ + break; + } + } + + futex2_wait(lock, new.val, fflags, NULL, 0); + } +} + +void futex_numa_32_unlock(struct futex_numa_32 *lock) +{ + u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE); + assert((s32)val >= 0); + if (val & N_WAITERS) { + int woken = futex2_wake(lock, 1, fflags); + assert(val == N_WAITERS); + if (!woken) { + __atomic_compare_exchange_n(&lock->val, &val, 0U, + false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); + } + } +} + +static long nanos = 50000; + +struct thread_args { + pthread_t tid; + volatile int * done; + struct futex_numa_32 *lock; + int val; + int *val1, *val2; + int node; +}; + +static void *threadfn(void *_arg) +{ + struct thread_args *args = _arg; + struct timespec ts = { + .tv_nsec = nanos, + }; + int node; + + while (!*args->done) { + + futex_numa_32_lock(args->lock); + args->val++; + + assert(*args->val1 == *args->val2); + (*args->val1)++; + nanosleep(&ts, NULL); + (*args->val2)++; + + node = args->lock->node; + futex_numa_32_unlock(args->lock); + + if (node != args->node) { + args->node = node; + printf("node: %d\n", node); + } + + nanosleep(&ts, NULL); + } + + return NULL; +} + +static void *contendfn(void *_arg) +{ + struct thread_args *args = _arg; + + while (!*args->done) { + /* + * futex2_wait() will take hb-lock, verify *var == val and + * queue/abort. By knowingly setting val 'wrong' this will + * abort and thereby generate hb-lock contention. + */ + futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0); + args->val++; + } + + return NULL; +} + +static volatile int done = 0; +static struct futex_numa_32 lock = { .val = 0, }; +static int val1, val2; + +int main(int argc, char *argv[]) +{ + struct thread_args *tas[512], *cas[512]; + int c, t, threads = 2, contenders = 0; + int sleeps = 10; + int total = 0; + + while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) { + switch (c) { + case 'c': + contenders = atoi(optarg); + break; + case 't': + threads = atoi(optarg); + break; + case 's': + sleeps = atoi(optarg); + break; + case 'n': + nanos = atoi(optarg); + break; + case 'N': + fflags |= FUTEX2_NUMA; + if (optarg) + fnode = atoi(optarg); + break; + default: + exit(1); + break; + } + } + + for (t = 0; t < contenders; t++) { + struct thread_args *args = calloc(1, sizeof(*args)); + if (!args) { + perror("thread_args"); + exit(-1); + } + + args->done = &done; + args->lock = &lock; + args->val1 = &val1; + args->val2 = &val2; + args->node = -1; + + if (pthread_create(&args->tid, NULL, contendfn, args)) { + perror("pthread_create"); + exit(-1); + } + + cas[t] = args; + } + + for (t = 0; t < threads; t++) { + struct thread_args *args = calloc(1, sizeof(*args)); + if (!args) { + perror("thread_args"); + exit(-1); + } + + args->done = &done; + args->lock = &lock; + args->val1 = &val1; + args->val2 = &val2; + args->node = -1; + + if (pthread_create(&args->tid, NULL, threadfn, args)) { + perror("pthread_create"); + exit(-1); + } + + tas[t] = args; + } + + sleep(sleeps); + + done = true; + + for (t = 0; t < threads; t++) { + struct thread_args *args = tas[t]; + + pthread_join(args->tid, NULL); + total += args->val; +// printf("tval: %d\n", args->val); + } + printf("total: %d\n", total); + + if (contenders) { + total = 0; + for (t = 0; t < contenders; t++) { + struct thread_args *args = cas[t]; + + pthread_join(args->tid, NULL); + total += args->val; + // printf("tval: %d\n", args->val); + } + printf("contenders: %d\n", total); + } + + return 0; +} + diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c new file mode 100644 index 000000000000..20a9d3ecf743 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de> + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <numa.h> +#include <numaif.h> + +#include <linux/futex.h> +#include <sys/mman.h> + +#include "logging.h" +#include "futextest.h" +#include "futex2test.h" + +#define MAX_THREADS 64 + +static pthread_barrier_t barrier_main; +static pthread_t threads[MAX_THREADS]; + +struct thread_args { + void *futex_ptr; + unsigned int flags; + int result; +}; + +static struct thread_args thread_args[MAX_THREADS]; + +#ifndef FUTEX_NO_NODE +#define FUTEX_NO_NODE (-1) +#endif + +#ifndef FUTEX2_MPOL +#define FUTEX2_MPOL 0x08 +#endif + +static void *thread_lock_fn(void *arg) +{ + struct thread_args *args = arg; + int ret; + + pthread_barrier_wait(&barrier_main); + ret = futex2_wait(args->futex_ptr, 0, args->flags, NULL, 0); + args->result = ret; + return NULL; +} + +static void create_max_threads(void *futex_ptr) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + thread_args[i].futex_ptr = futex_ptr; + thread_args[i].flags = FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA; + thread_args[i].result = 0; + ret = pthread_create(&threads[i], NULL, thread_lock_fn, &thread_args[i]); + if (ret) + ksft_exit_fail_msg("pthread_create failed\n"); + } +} + +static void join_max_threads(void) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_join(threads[i], NULL); + if (ret) + ksft_exit_fail_msg("pthread_join failed for thread %d\n", i); + } +} + +static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags) +{ + int to_wake, ret, i, need_exit = 0; + + pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); + create_max_threads(futex_ptr); + pthread_barrier_wait(&barrier_main); + to_wake = MAX_THREADS; + + do { + ret = futex2_wake(futex_ptr, to_wake, futex_flags); + if (must_fail) { + if (ret < 0) + break; + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", + to_wake, futex_flags); + } + if (ret < 0) { + ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n", + to_wake, futex_flags); + } + if (!ret) + usleep(50); + to_wake -= ret; + + } while (to_wake); + join_max_threads(); + + for (i = 0; i < MAX_THREADS; i++) { + if (must_fail && thread_args[i].result != -1) { + ksft_print_msg("Thread %d should fail but succeeded (%d)\n", + i, thread_args[i].result); + need_exit = 1; + } + if (!must_fail && thread_args[i].result != 0) { + ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result); + need_exit = 1; + } + } + if (need_exit) + ksft_exit_fail_msg("Aborting due to earlier errors.\n"); +} + +static void test_futex(void *futex_ptr, int must_fail) +{ + __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); +} + +static void test_futex_mpol(void *futex_ptr, int must_fail) +{ + __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); +} + +static void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + struct futex32_numa *futex_numa; + int mem_size, i; + void *futex_ptr; + char c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(1); + + mem_size = sysconf(_SC_PAGE_SIZE); + futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (futex_ptr == MAP_FAILED) + ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size); + + futex_numa = futex_ptr; + + ksft_print_msg("Regular test\n"); + futex_numa->futex = 0; + futex_numa->numa = FUTEX_NO_NODE; + test_futex(futex_ptr, 0); + + if (futex_numa->numa == FUTEX_NO_NODE) + ksft_exit_fail_msg("NUMA node is left uninitialized\n"); + + ksft_print_msg("Memory too small\n"); + test_futex(futex_ptr + mem_size - 4, 1); + + ksft_print_msg("Memory out of range\n"); + test_futex(futex_ptr + mem_size, 1); + + futex_numa->numa = FUTEX_NO_NODE; + mprotect(futex_ptr, mem_size, PROT_READ); + ksft_print_msg("Memory, RO\n"); + test_futex(futex_ptr, 1); + + mprotect(futex_ptr, mem_size, PROT_NONE); + ksft_print_msg("Memory, no access\n"); + test_futex(futex_ptr, 1); + + mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE); + ksft_print_msg("Memory back to RW\n"); + test_futex(futex_ptr, 0); + + /* MPOL test. Does not work as expected */ + for (i = 0; i < 4; i++) { + unsigned long nodemask; + int ret; + + nodemask = 1 << i; + ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask, + sizeof(nodemask) * 8, 0); + if (ret == 0) { + ksft_print_msg("Node %d test\n", i); + futex_numa->futex = 0; + futex_numa->numa = FUTEX_NO_NODE; + + ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); + if (ret < 0) + ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n"); + if (0) + test_futex_mpol(futex_numa, 0); + if (futex_numa->numa != i) { + ksft_test_result_fail("Returned NUMA node is %d expected %d\n", + futex_numa->numa, i); + } + } + } + ksft_test_result_pass("NUMA MPOL tests passed\n"); + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c new file mode 100644 index 000000000000..2dca18fefedc --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de> + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/prctl.h> +#include <sys/prctl.h> + +#include "logging.h" + +#define MAX_THREADS 64 + +static pthread_barrier_t barrier_main; +static pthread_mutex_t global_lock; +static pthread_t threads[MAX_THREADS]; +static int counter; + +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 +#endif + +static int futex_hash_slots_set(unsigned int slots, int flags) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags); +} + +static int futex_hash_slots_get(void) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); +} + +static int futex_hash_immutable_get(void) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); +} + +static void futex_hash_slots_set_verify(int slots) +{ + int ret; + + ret = futex_hash_slots_set(slots, 0); + if (ret != 0) { + ksft_test_result_fail("Failed to set slots to %d: %m\n", slots); + ksft_finished(); + } + ret = futex_hash_slots_get(); + if (ret != slots) { + ksft_test_result_fail("Set %d slots but PR_FUTEX_HASH_GET_SLOTS returns: %d, %m\n", + slots, ret); + ksft_finished(); + } + ksft_test_result_pass("SET and GET slots %d passed\n", slots); +} + +static void futex_hash_slots_set_must_fail(int slots, int flags) +{ + int ret; + + ret = futex_hash_slots_set(slots, flags); + ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n", + slots, flags); +} + +static void *thread_return_fn(void *arg) +{ + return NULL; +} + +static void *thread_lock_fn(void *arg) +{ + pthread_barrier_wait(&barrier_main); + + pthread_mutex_lock(&global_lock); + counter++; + usleep(20); + pthread_mutex_unlock(&global_lock); + return NULL; +} + +static void create_max_threads(void *(*thread_fn)(void *)) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_create(&threads[i], NULL, thread_fn, NULL); + if (ret) + ksft_exit_fail_msg("pthread_create failed: %m\n"); + } +} + +static void join_max_threads(void) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_join(threads[i], NULL); + if (ret) + ksft_exit_fail_msg("pthread_join failed for thread %d\n", i); + } +} + +static void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -g Test global hash instead intead local immutable \n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n"; +static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n"; + +int main(int argc, char *argv[]) +{ + int futex_slots1, futex_slotsn, online_cpus; + pthread_mutexattr_t mutex_attr_pi; + int use_global_hash = 0; + int ret; + char c; + + while ((c = getopt(argc, argv, "cghv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'g': + use_global_hash = 1; + break; + case 'h': + usage(basename(argv[0])); + exit(0); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(22); + + ret = pthread_mutexattr_init(&mutex_attr_pi); + ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); + ret |= pthread_mutex_init(&global_lock, &mutex_attr_pi); + if (ret != 0) { + ksft_exit_fail_msg("Failed to initialize pthread mutex.\n"); + } + /* First thread, expect to be 0, not yet initialized */ + ret = futex_hash_slots_get(); + if (ret != 0) + ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret); + + ret = futex_hash_immutable_get(); + if (ret != 0) + ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret); + + ksft_test_result_pass("Basic get slots and immutable status.\n"); + ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL); + if (ret != 0) + ksft_exit_fail_msg("pthread_create() failed: %d, %m\n", ret); + + ret = pthread_join(threads[0], NULL); + if (ret != 0) + ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret); + + /* First thread, has to initialiaze private hash */ + futex_slots1 = futex_hash_slots_get(); + if (futex_slots1 <= 0) { + ksft_print_msg("Current hash buckets: %d\n", futex_slots1); + ksft_exit_fail_msg(test_msg_auto_create); + } + + ksft_test_result_pass(test_msg_auto_create); + + online_cpus = sysconf(_SC_NPROCESSORS_ONLN); + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); + if (ret != 0) + ksft_exit_fail_msg("pthread_barrier_init failed: %m.\n"); + + ret = pthread_mutex_lock(&global_lock); + if (ret != 0) + ksft_exit_fail_msg("pthread_mutex_lock failed: %m.\n"); + + counter = 0; + create_max_threads(thread_lock_fn); + pthread_barrier_wait(&barrier_main); + + /* + * The current default size of hash buckets is 16. The auto increase + * works only if more than 16 CPUs are available. + */ + ksft_print_msg("Online CPUs: %d\n", online_cpus); + if (online_cpus > 16) { + futex_slotsn = futex_hash_slots_get(); + if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) { + ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", + futex_slots1, futex_slotsn); + ksft_exit_fail_msg(test_msg_auto_inc); + } + ksft_test_result_pass(test_msg_auto_inc); + } else { + ksft_test_result_skip(test_msg_auto_inc); + } + ret = pthread_mutex_unlock(&global_lock); + + /* Once the user changes it, it has to be what is set */ + futex_hash_slots_set_verify(2); + futex_hash_slots_set_verify(4); + futex_hash_slots_set_verify(8); + futex_hash_slots_set_verify(32); + futex_hash_slots_set_verify(16); + + ret = futex_hash_slots_set(15, 0); + ksft_test_result(ret < 0, "Use 15 slots\n"); + + futex_hash_slots_set_verify(2); + join_max_threads(); + ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n", + counter, MAX_THREADS); + counter = 0; + /* Once the user set something, auto reisze must be disabled */ + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); + + create_max_threads(thread_lock_fn); + join_max_threads(); + + ret = futex_hash_slots_get(); + ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", + ret); + + futex_hash_slots_set_must_fail(1 << 29, 0); + + /* + * Once the private hash has been made immutable or global hash has been requested, + * then this requested can not be undone. + */ + if (use_global_hash) { + ret = futex_hash_slots_set(0, 0); + ksft_test_result(ret == 0, "Global hash request\n"); + } else { + ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE); + ksft_test_result(ret == 0, "Immutable resize to 4\n"); + } + if (ret != 0) + goto out; + + futex_hash_slots_set_must_fail(4, 0); + futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(8, 0); + futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE); + + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); + if (ret != 0) { + ksft_exit_fail_msg("pthread_barrier_init failed: %m\n"); + return 1; + } + create_max_threads(thread_lock_fn); + join_max_threads(); + + ret = futex_hash_slots_get(); + if (use_global_hash) { + ksft_test_result(ret == 0, "Continue to use global hash\n"); + } else { + ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n"); + } + + ret = futex_hash_immutable_get(); + ksft_test_result(ret == 1, "Hash reports to be immutable\n"); + +out: + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 5ccd599da6c3..81739849f299 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -82,3 +82,10 @@ echo echo ./futex_waitv $COLOR + +echo +./futex_priv_hash $COLOR +./futex_priv_hash -g $COLOR + +echo +./futex_numa_mpol $COLOR diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index 9d305520e849..ea79662405bc 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -8,6 +8,53 @@ #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) +#ifndef __NR_futex_waitv +#define __NR_futex_waitv 449 +struct futex_waitv { + __u64 val; + __u64 uaddr; + __u32 flags; + __u32 __reserved; +}; +#endif + +#ifndef __NR_futex_wake +#define __NR_futex_wake 454 +#endif + +#ifndef __NR_futex_wait +#define __NR_futex_wait 455 +#endif + +#ifndef FUTEX2_SIZE_U32 +#define FUTEX2_SIZE_U32 0x02 +#endif + +#ifndef FUTEX2_NUMA +#define FUTEX2_NUMA 0x04 +#endif + +#ifndef FUTEX2_MPOL +#define FUTEX2_MPOL 0x08 +#endif + +#ifndef FUTEX2_PRIVATE +#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG +#endif + +#ifndef FUTEX2_NO_NODE +#define FUTEX_NO_NODE (-1) +#endif + +#ifndef FUTEX_32 +#define FUTEX_32 FUTEX2_SIZE_U32 +#endif + +struct futex32_numa { + futex_t futex; + futex_t numa; +}; + /** * futex_waitv - Wait at multiple futexes, wake on any * @waiters: Array of waiters @@ -20,3 +67,26 @@ static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned lon { return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); } + +/* + * futex_wait() - block on uaddr with optional timeout + * @val: Expected value + * @flags: FUTEX2 flags + * @timeout: Relative timeout + * @clockid: Clock id for the timeout + */ +static inline int futex2_wait(void *uaddr, long val, unsigned int flags, + struct timespec *timeout, clockid_t clockid) +{ + return syscall(__NR_futex_wait, uaddr, val, ~0U, flags, timeout, clockid); +} + +/* + * futex2_wake() - Wake a number of futexes + * @nr: Number of threads to wake at most + * @flags: FUTEX2 flags + */ +static inline int futex2_wake(void *uaddr, int nr, unsigned int flags) +{ + return syscall(__NR_futex_wake, uaddr, ~0U, nr, flags); +} diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index e0884390447d..7bfe315f7001 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := gpio-mockup.sh gpio-sim.sh +TEST_PROGS := gpio-mockup.sh gpio-sim.sh gpio-aggregator.sh TEST_FILES := gpio-mockup-sysfs.sh TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config index 409a8532facc..1287abeaac7e 100644 --- a/tools/testing/selftests/gpio/config +++ b/tools/testing/selftests/gpio/config @@ -2,3 +2,4 @@ CONFIG_GPIOLIB=y CONFIG_GPIO_CDEV=y CONFIG_GPIO_MOCKUP=m CONFIG_GPIO_SIM=m +CONFIG_GPIO_AGGREGATOR=m diff --git a/tools/testing/selftests/gpio/gpio-aggregator.sh b/tools/testing/selftests/gpio/gpio-aggregator.sh new file mode 100755 index 000000000000..9b6f80ad9f8a --- /dev/null +++ b/tools/testing/selftests/gpio/gpio-aggregator.sh @@ -0,0 +1,727 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 Bartosz Golaszewski <brgl@bgdev.pl> +# Copyright (C) 2025 Koichiro Den <koichiro.den@canonical.com> + +BASE_DIR=$(dirname "$0") +CONFIGFS_SIM_DIR="/sys/kernel/config/gpio-sim" +CONFIGFS_AGG_DIR="/sys/kernel/config/gpio-aggregator" +SYSFS_AGG_DIR="/sys/bus/platform/drivers/gpio-aggregator" +MODULE="gpio-aggregator" + +fail() { + echo "$*" >&2 + echo "GPIO $MODULE test FAIL" + exit 1 +} + +skip() { + echo "$*" >&2 + echo "GPIO $MODULE test SKIP" + exit 4 +} + +# gpio-sim +sim_enable_chip() { + local CHIP=$1 + + echo 1 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to enable the chip" +} + +sim_disable_chip() { + local CHIP=$1 + + echo 0 > "$CONFIGFS_SIM_DIR/$CHIP/live" || fail "Unable to disable the chip" +} + +sim_configfs_cleanup() { + local NOCHECK=${1:-0} + + for CHIP_DIR in "$CONFIGFS_SIM_DIR"/*; do + [ -d "$CHIP_DIR" ] || continue + echo 0 > "$CHIP_DIR/live" + find "$CHIP_DIR" -depth -type d -exec rmdir {} \; + done + [ "$NOCHECK" -eq 1 ] && return; + remaining=$(find "$CONFIGFS_SIM_DIR" -mindepth 1 -type d 2> /dev/null) + if [ -n "$remaining" ]; then + fail "Directories remain in $CONFIGFS_SIM_DIR: $remaining" + fi +} + +sim_get_chip_label() { + local CHIP=$1 + local BANK=$2 + local CHIP_NAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name" 2> /dev/null) || \ + fail "Unable to read the chip name from configfs" + + $BASE_DIR/gpio-chip-info "/dev/$CHIP_NAME" label || \ + fail "Unable to read the chip label from the character device" +} + +# gpio-aggregator +agg_create_chip() { + local CHIP=$1 + + mkdir "$CONFIGFS_AGG_DIR/$CHIP" +} + +agg_remove_chip() { + local CHIP=$1 + + find "$CONFIGFS_AGG_DIR/$CHIP/" -depth -type d -exec rmdir {} \; || \ + fail "Unable to remove $CONFIGFS_AGG_DIR/$CHIP" +} + +agg_create_line() { + local CHIP=$1 + local LINE=$2 + + mkdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE" +} + +agg_remove_line() { + local CHIP=$1 + local LINE=$2 + + rmdir "$CONFIGFS_AGG_DIR/$CHIP/$LINE" +} + +agg_set_key() { + local CHIP=$1 + local LINE=$2 + local KEY=$3 + + echo "$KEY" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/key" || fail "Unable to set the lookup key" +} + +agg_set_offset() { + local CHIP=$1 + local LINE=$2 + local OFFSET=$3 + + echo "$OFFSET" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/offset" || \ + fail "Unable to set the lookup offset" +} + +agg_set_line_name() { + local CHIP=$1 + local LINE=$2 + local NAME=$3 + + echo "$NAME" > "$CONFIGFS_AGG_DIR/$CHIP/$LINE/name" || fail "Unable to set the line name" +} + +agg_enable_chip() { + local CHIP=$1 + + echo 1 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to enable the chip" +} + +agg_disable_chip() { + local CHIP=$1 + + echo 0 > "$CONFIGFS_AGG_DIR/$CHIP/live" || fail "Unable to disable the chip" +} + +agg_configfs_cleanup() { + local NOCHECK=${1:-0} + + for CHIP_DIR in "$CONFIGFS_AGG_DIR"/*; do + [ -d "$CHIP_DIR" ] || continue + echo 0 > "$CHIP_DIR/live" 2> /dev/null + find "$CHIP_DIR" -depth -type d -exec rmdir {} \; + done + [ "$NOCHECK" -eq 1 ] && return; + remaining=$(find "$CONFIGFS_AGG_DIR" -mindepth 1 -type d 2> /dev/null) + if [ -n "$remaining" ]; then + fail "Directories remain in $CONFIGFS_AGG_DIR: $remaining" + fi +} + +agg_configfs_dev_name() { + local CHIP=$1 + + cat "$CONFIGFS_AGG_DIR/$CHIP/dev_name" 2> /dev/null || \ + fail "Unable to read the device name from configfs" +} + +agg_configfs_chip_name() { + local CHIP=$1 + local DEV_NAME=$(agg_configfs_dev_name "$CHIP") + local CHIP_LIST=$(find "/sys/devices/platform/$DEV_NAME" \ + -maxdepth 1 -type d -name "gpiochip[0-9]*" 2> /dev/null) + local CHIP_COUNT=$(echo "$CHIP_LIST" | wc -l) + + if [ -z "$CHIP_LIST" ]; then + fail "No gpiochip in /sys/devices/platform/$DEV_NAME/" + elif [ "$CHIP_COUNT" -ne 1 ]; then + fail "Multiple gpiochips unexpectedly found: $CHIP_LIST" + fi + basename "$CHIP_LIST" +} + +agg_get_chip_num_lines() { + local CHIP=$1 + local N_DIR=$(ls -d $CONFIGFS_AGG_DIR/$CHIP/line[0-9]* 2> /dev/null | wc -l) + local N_LINES + + if [ "$(cat $CONFIGFS_AGG_DIR/$CHIP/live)" = 0 ]; then + echo "$N_DIR" + else + N_LINES=$( + $BASE_DIR/gpio-chip-info \ + "/dev/$(agg_configfs_chip_name "$CHIP")" num-lines + ) || fail "Unable to read the number of lines from the character device" + if [ $N_DIR != $N_LINES ]; then + fail "Discrepancy between two sources for the number of lines" + fi + echo "$N_LINES" + fi +} + +agg_get_chip_label() { + local CHIP=$1 + + $BASE_DIR/gpio-chip-info "/dev/$(agg_configfs_chip_name "$CHIP")" label || \ + fail "Unable to read the chip label from the character device" +} + +agg_get_line_name() { + local CHIP=$1 + local OFFSET=$2 + local NAME_CONFIGFS=$(cat "$CONFIGFS_AGG_DIR/$CHIP/line${OFFSET}/name") + local NAME_CDEV + + if [ "$(cat "$CONFIGFS_AGG_DIR/$CHIP/live")" = 0 ]; then + echo "$NAME_CONFIGFS" + else + NAME_CDEV=$( + $BASE_DIR/gpio-line-name \ + "/dev/$(agg_configfs_chip_name "$CHIP")" "$OFFSET" + ) || fail "Unable to read the line name from the character device" + if [ "$NAME_CONFIGFS" != "$NAME_CDEV" ]; then + fail "Discrepancy between two sources for the name of line" + fi + echo "$NAME_CDEV" + fi +} + + +# Load the modules. This will pull in configfs if needed too. +modprobe gpio-sim || skip "unable to load the gpio-sim module" +modprobe gpio-aggregator || skip "unable to load the gpio-aggregator module" + +# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed. +for IDX in $(seq 5); do + if [ "$IDX" -eq "5" ]; then + skip "configfs not mounted at /sys/kernel/config" + fi + + mountpoint -q /sys/kernel/config && break + sleep 0.1 +done + +# If the module was already loaded: remove all previous chips +agg_configfs_cleanup +sim_configfs_cleanup + +trap "exit 1" SIGTERM SIGINT +trap "agg_configfs_cleanup 1; sim_configfs_cleanup 1" EXIT + +# Use gpio-sim chips as the test backend +for CHIP in $(seq -f "chip%g" 0 1); do + mkdir $CONFIGFS_SIM_DIR/$CHIP + for BANK in $(seq -f "bank%g" 0 1); do + mkdir -p "$CONFIGFS_SIM_DIR/$CHIP/$BANK" + echo "${CHIP}_${BANK}" > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/label" || \ + fail "unable to set the chip label" + echo 16 > "$CONFIGFS_SIM_DIR/$CHIP/$BANK/num_lines" || \ + fail "unable to set the number of lines" + for IDX in $(seq 0 15); do + LINE_NAME="${CHIP}${BANK}_${IDX}" + LINE_DIR="$CONFIGFS_SIM_DIR/$CHIP/$BANK/line$IDX" + mkdir -p $LINE_DIR + echo "$LINE_NAME" > "$LINE_DIR/name" || fail "unable to set the line name" + done + done + sim_enable_chip "$CHIP" +done + +echo "1. GPIO aggregator creation/deletion" + +echo "1.1. Creation/deletion via configfs" + +echo "1.1.1. Minimum creation/deletion" +agg_create_chip agg0 +agg_create_line agg0 line0 +agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)" +agg_set_offset agg0 line0 5 +agg_set_line_name agg0 line0 test0 +agg_enable_chip agg0 +test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead" +test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \ + fail "label is inconsistent" +test "$(agg_get_chip_num_lines agg0)" = "1" || fail "number of lines is not 1" +test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset" +agg_disable_chip agg0 +agg_remove_line agg0 line0 +agg_remove_chip agg0 + +echo "1.1.2. Complex creation/deletion" +agg_create_chip agg0 +agg_create_line agg0 line0 +agg_create_line agg0 line1 +agg_create_line agg0 line2 +agg_create_line agg0 line3 +agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)" +agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)" +agg_set_key agg0 line2 "$(sim_get_chip_label chip1 bank0)" +agg_set_key agg0 line3 "$(sim_get_chip_label chip1 bank1)" +agg_set_offset agg0 line0 1 +agg_set_offset agg0 line1 3 +agg_set_offset agg0 line2 5 +agg_set_offset agg0 line3 7 +agg_set_line_name agg0 line0 test0 +agg_set_line_name agg0 line1 test1 +agg_set_line_name agg0 line2 test2 +agg_set_line_name agg0 line3 test3 +agg_enable_chip agg0 +test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 1 || fail "chip unexpectedly dead" +test "$(agg_get_chip_label agg0)" = "$(agg_configfs_dev_name agg0)" || \ + fail "label is inconsistent" +test "$(agg_get_chip_num_lines agg0)" = "4" || fail "number of lines is not 1" +test "$(agg_get_line_name agg0 0)" = "test0" || fail "line name is unset" +test "$(agg_get_line_name agg0 1)" = "test1" || fail "line name is unset" +test "$(agg_get_line_name agg0 2)" = "test2" || fail "line name is unset" +test "$(agg_get_line_name agg0 3)" = "test3" || fail "line name is unset" +agg_disable_chip agg0 +agg_remove_line agg0 line0 +agg_remove_line agg0 line1 +agg_remove_line agg0 line2 +agg_remove_line agg0 line3 +agg_remove_chip agg0 + +echo "1.1.3. Can't instantiate a chip without any line" +agg_create_chip agg0 +echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled" +test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive" +agg_remove_chip agg0 + +echo "1.1.4. Can't instantiate a chip with invalid configuration" +agg_create_chip agg0 +agg_create_line agg0 line0 +agg_set_key agg0 line0 "chipX_bankX" +agg_set_offset agg0 line0 99 +agg_set_line_name agg0 line0 test0 +echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled" +test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive" +agg_remove_line agg0 line0 +agg_remove_chip agg0 + +echo "1.1.5. Can't instantiate a chip asynchronously via deferred probe" +agg_create_chip agg0 +agg_create_line agg0 line0 +agg_set_key agg0 line0 "chip0_bank0" +agg_set_offset agg0 line0 5 +agg_set_line_name agg0 line0 test0 +sim_disable_chip chip0 +echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled" +test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || fail "chip unexpectedly alive" +sim_enable_chip chip0 +sleep 1 +test "$(cat "$CONFIGFS_AGG_DIR/agg0/live")" = 0 || \ + fail "chip unexpectedly transitioned to 'live' state" +agg_remove_line agg0 line0 +agg_remove_chip agg0 + +echo "1.1.6. Can't instantiate a chip with _sysfs prefix" +mkdir "$CONFIGFS_AGG_DIR/_sysfs" 2> /dev/null && fail "chip _sysfs unexpectedly created" +mkdir "$CONFIGFS_AGG_DIR/_sysfs.foo" 2> /dev/null && fail "chip _sysfs.foo unexpectedly created" + +echo "1.2. Creation/deletion via sysfs" + +echo "1.2.1. Minimum creation/deletion" +echo "chip0_bank0 0" > "$SYSFS_AGG_DIR/new_device" +CHIPNAME=$(agg_configfs_chip_name _sysfs.0) +test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead" +test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \ + fail "label is inconsistent" +test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1" +test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset" +echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device" +test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains" +test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains" + +echo "1.2.2. Complex creation/deletion" +echo "chip0bank0_0 chip1_bank1 10-11" > "$SYSFS_AGG_DIR/new_device" +CHIPNAME=$(agg_configfs_chip_name _sysfs.0) +test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly dead" +test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \ + fail "label is inconsistent" +test "$(agg_get_chip_num_lines _sysfs.0)" = "3" || fail "number of lines is not 3" +test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name is unset" +test "$(agg_get_line_name _sysfs.0 1)" = "" || fail "line name is unset" +test "$(agg_get_line_name _sysfs.0 2)" = "" || fail "line name is unset" +echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device" +test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains" +test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains" + +echo "1.2.3. Asynchronous creation with deferred probe" +sim_disable_chip chip0 +echo 'chip0_bank0 0' > $SYSFS_AGG_DIR/new_device +sleep 1 +test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive" +sim_enable_chip chip0 +sleep 1 +CHIPNAME=$(agg_configfs_chip_name _sysfs.0) +test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 1 || fail "chip unexpectedly remains dead" +test "$(agg_get_chip_label _sysfs.0)" = "$(agg_configfs_dev_name _sysfs.0)" || \ + fail "label is inconsistent" +test "$(agg_get_chip_num_lines _sysfs.0)" = "1" || fail "number of lines is not 1" +test "$(agg_get_line_name _sysfs.0 0)" = "" || fail "line name unexpectedly set" +echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device" +test -d $CONFIGFS_AGG_DIR/_sysfs.0 && fail "_sysfs.0 unexpectedly remains" +test -d /dev/${CHIPNAME} && fail "/dev/${CHIPNAME} unexpectedly remains" + +echo "1.2.4. Can't instantiate a chip with invalid configuration" +echo "xyz 0" > "$SYSFS_AGG_DIR/new_device" +test "$(cat $CONFIGFS_AGG_DIR/_sysfs.0/live)" = 0 || fail "chip unexpectedly alive" +echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device" + +echo "2. GPIO aggregator configuration" + +echo "2.1. Configuring aggregators instantiated via configfs" +setup_2_1() { + agg_create_chip agg0 + agg_create_line agg0 line0 + agg_create_line agg0 line1 + agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank0)" + agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank0)" + agg_set_offset agg0 line0 1 + agg_set_offset agg0 line1 3 + agg_set_line_name agg0 line0 test0 + agg_set_line_name agg0 line1 test1 + agg_enable_chip agg0 +} +teardown_2_1() { + agg_configfs_cleanup +} + +echo "2.1.1. While offline" + +echo "2.1.1.1. Line can be added/removed" +setup_2_1 +agg_disable_chip agg0 +agg_create_line agg0 line2 +agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)" +agg_set_offset agg0 line2 5 +agg_enable_chip agg0 +test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1" +teardown_2_1 + +echo "2.1.1.2. Line key can be modified" +setup_2_1 +agg_disable_chip agg0 +agg_set_key agg0 line0 "$(sim_get_chip_label chip0 bank1)" +agg_set_key agg0 line1 "$(sim_get_chip_label chip1 bank1)" +agg_enable_chip agg0 +teardown_2_1 + +echo "2.1.1.3. Line name can be modified" +setup_2_1 +agg_disable_chip agg0 +agg_set_line_name agg0 line0 new0 +agg_set_line_name agg0 line1 new1 +agg_enable_chip agg0 +test "$(agg_get_line_name agg0 0)" = "new0" || fail "line name is unset" +test "$(agg_get_line_name agg0 1)" = "new1" || fail "line name is unset" +teardown_2_1 + +echo "2.1.1.4. Line offset can be modified" +setup_2_1 +agg_disable_chip agg0 +agg_set_offset agg0 line0 5 +agg_set_offset agg0 line1 7 +agg_enable_chip agg0 +teardown_2_1 + +echo "2.1.1.5. Can re-enable a chip after valid reconfiguration" +setup_2_1 +agg_disable_chip agg0 +agg_set_key agg0 line0 "$(sim_get_chip_label chip1 bank1)" +agg_set_offset agg0 line0 15 +agg_set_key agg0 line1 "$(sim_get_chip_label chip0 bank1)" +agg_set_offset agg0 line0 14 +agg_create_line agg0 line2 +agg_set_key agg0 line2 "$(sim_get_chip_label chip0 bank1)" +agg_set_offset agg0 line2 13 +agg_enable_chip agg0 +test "$(agg_get_chip_num_lines agg0)" = "3" || fail "number of lines is not 1" +teardown_2_1 + +echo "2.1.1.7. Can't re-enable a chip with invalid reconfiguration" +setup_2_1 +agg_disable_chip agg0 +agg_set_key agg0 line0 invalidkey +echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled" +teardown_2_1 +setup_2_1 +agg_disable_chip agg0 +agg_set_offset agg0 line0 99 +echo 1 > "$CONFIGFS_AGG_DIR/agg0/live" 2> /dev/null && fail "chip unexpectedly enabled" +teardown_2_1 + +echo "2.1.2. While online" + +echo "2.1.2.1. Can't add/remove line" +setup_2_1 +mkdir "$CONFIGFS_AGG_DIR/agg0/line2" 2> /dev/null && fail "line unexpectedly added" +rmdir "$CONFIGFS_AGG_DIR/agg0/line1" 2> /dev/null && fail "line unexpectedly removed" +teardown_2_1 + +echo "2.1.2.2. Can't modify line key" +setup_2_1 +echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/agg0/line0/key" 2> /dev/null && \ + fail "lookup key unexpectedly updated" +teardown_2_1 + +echo "2.1.2.3. Can't modify line name" +setup_2_1 +echo "new0" > "$CONFIGFS_AGG_DIR/agg0/line0/name" 2> /dev/null && \ + fail "name unexpectedly updated" +teardown_2_1 + +echo "2.1.2.4. Can't modify line offset" +setup_2_1 +echo "5" > "$CONFIGFS_AGG_DIR/agg0/line0/offset" 2> /dev/null && \ + fail "offset unexpectedly updated" +teardown_2_1 + +echo "2.2. Configuring aggregators instantiated via sysfs" +setup_2_2() { + echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device" +} +teardown_2_2() { + echo "$(agg_configfs_dev_name _sysfs.0)" > "$SYSFS_AGG_DIR/delete_device" +} + +echo "2.2.1. While online" + +echo "2.2.1.1. Can toggle live" +setup_2_2 +agg_disable_chip _sysfs.0 +agg_enable_chip _sysfs.0 +teardown_2_2 + +echo "2.2.1.2. Can't add/remove line" +setup_2_2 +mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added" +rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed" +teardown_2_2 + +echo "2.2.1.3. Can't modify line key" +setup_2_2 +echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \ + fail "lookup key unexpectedly updated" +teardown_2_2 + +echo "2.2.1.4. Can't modify line name" +setup_2_2 +echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \ + fail "name unexpectedly updated" +teardown_2_2 + +echo "2.2.1.5. Can't modify line offset" +setup_2_2 +echo "5" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \ + fail "offset unexpectedly updated" +teardown_2_2 + +echo "2.2.2. While waiting for deferred probe" + +echo "2.2.2.1. Can't add/remove line despite live = 0" +sim_disable_chip chip0 +setup_2_2 +mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added" +rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed" +teardown_2_2 +sim_enable_chip chip0 + +echo "2.2.2.2. Can't modify line key" +sim_disable_chip chip0 +setup_2_2 +echo "chip1_bank1" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/key" 2> /dev/null && \ + fail "lookup key unexpectedly updated" +teardown_2_2 +sim_enable_chip chip0 + +echo "2.2.2.3. Can't modify line name" +sim_disable_chip chip0 +setup_2_2 +echo "new0" > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/name" 2> /dev/null && \ + fail "name unexpectedly updated" +teardown_2_2 +sim_enable_chip chip0 + +echo "2.2.2.4. Can't modify line offset" +sim_disable_chip chip0 +setup_2_2 +echo 5 > "$CONFIGFS_AGG_DIR/_sysfs.0/line0/offset" 2> /dev/null && \ + fail "offset unexpectedly updated" +teardown_2_2 +sim_enable_chip chip0 + +echo "2.2.2.5. Can't toggle live" +sim_disable_chip chip0 +setup_2_2 +test "$(cat "$CONFIGFS_AGG_DIR/_sysfs.0/live")" = 0 || fail "chip unexpectedly alive" +echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled" +teardown_2_2 +sim_enable_chip chip0 + +echo "2.2.3. While offline" + +echo "2.2.3.1. Can't add/remove line despite live = 0" +setup_2_2 +agg_disable_chip _sysfs.0 +mkdir "$CONFIGFS_AGG_DIR/_sysfs.0/line2" 2> /dev/null && fail "line unexpectedly added" +rmdir "$CONFIGFS_AGG_DIR/_sysfs.0/line1" 2> /dev/null && fail "line unexpectedly removed" +teardown_2_2 + +echo "2.2.3.2. Line key can be modified" +setup_2_2 +agg_disable_chip _sysfs.0 +agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip0 bank1)" +agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip1 bank1)" +agg_enable_chip _sysfs.0 +teardown_2_2 + +echo "2.2.3.3. Line name can be modified" +setup_2_2 +agg_disable_chip _sysfs.0 +agg_set_line_name _sysfs.0 line0 new0 +agg_set_line_name _sysfs.0 line1 new1 +agg_enable_chip _sysfs.0 +test "$(agg_get_line_name _sysfs.0 0)" = "new0" || fail "line name is unset" +test "$(agg_get_line_name _sysfs.0 1)" = "new1" || fail "line name is unset" +teardown_2_2 + +echo "2.2.3.4. Line offset can be modified" +setup_2_2 +agg_disable_chip _sysfs.0 +agg_set_offset _sysfs.0 line0 5 +agg_set_offset _sysfs.0 line1 7 +agg_enable_chip _sysfs.0 +teardown_2_2 + +echo "2.2.3.5. Can re-enable a chip with valid reconfiguration" +setup_2_2 +agg_disable_chip _sysfs.0 +agg_set_key _sysfs.0 line0 "$(sim_get_chip_label chip1 bank1)" +agg_set_offset _sysfs.0 line0 15 +agg_set_key _sysfs.0 line1 "$(sim_get_chip_label chip0 bank1)" +agg_set_offset _sysfs.0 line0 14 +agg_enable_chip _sysfs.0 +teardown_2_2 + +echo "2.2.3.6. Can't re-enable a chip with invalid reconfiguration" +setup_2_2 +agg_disable_chip _sysfs.0 +agg_set_key _sysfs.0 line0 invalidkey +echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled" +teardown_2_2 +setup_2_2 +agg_disable_chip _sysfs.0 +agg_set_offset _sysfs.0 line0 99 +echo 1 > "$CONFIGFS_AGG_DIR/_sysfs.0/live" 2> /dev/null && fail "chip unexpectedly enabled" +teardown_2_2 + +echo "3. Module unload" + +echo "3.1. Can't unload module if there is at least one device created via configfs" +agg_create_chip agg0 +modprobe -r gpio-aggregator 2> /dev/null +test -d /sys/module/gpio_aggregator || fail "module unexpectedly unloaded" +agg_remove_chip agg0 + +echo "3.2. Can unload module if there is no device created via configfs" +echo "chip0_bank0 1 chip1_bank0 3" > "$SYSFS_AGG_DIR/new_device" +modprobe -r gpio-aggregator 2> /dev/null +test -d /sys/module/gpio_aggregator && fail "module unexpectedly remains to be loaded" +modprobe gpio-aggregator 2> /dev/null + +echo "4. GPIO forwarder functional" +SETTINGS="chip0:bank0:2 chip0:bank1:4 chip1:bank0:6 chip1:bank1:8" +setup_4() { + local OFFSET=0 + agg_create_chip agg0 + for SETTING in $SETTINGS; do + CHIP=$(echo "$SETTING" | cut -d: -f1) + BANK=$(echo "$SETTING" | cut -d: -f2) + LINE=$(echo "$SETTING" | cut -d: -f3) + agg_create_line agg0 "line${OFFSET}" + agg_set_key agg0 "line${OFFSET}" "$(sim_get_chip_label "$CHIP" "$BANK")" + agg_set_offset agg0 "line${OFFSET}" "$LINE" + OFFSET=$(expr $OFFSET + 1) + done + agg_enable_chip agg0 +} +teardown_4() { + agg_configfs_cleanup +} + +echo "4.1. Forwarding set values" +setup_4 +OFFSET=0 +for SETTING in $SETTINGS; do + CHIP=$(echo "$SETTING" | cut -d: -f1) + BANK=$(echo "$SETTING" | cut -d: -f2) + LINE=$(echo "$SETTING" | cut -d: -f3) + DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name") + CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name") + VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value" + test $(cat $VAL_PATH) = "0" || fail "incorrect value read from sysfs" + $BASE_DIR/gpio-mockup-cdev -s 1 "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET" & + mock_pid=$! + sleep 0.1 # FIXME Any better way? + test "$(cat $VAL_PATH)" = "1" || fail "incorrect value read from sysfs" + kill "$mock_pid" + OFFSET=$(expr $OFFSET + 1) +done +teardown_4 + +echo "4.2. Forwarding set config" +setup_4 +OFFSET=0 +for SETTING in $SETTINGS; do + CHIP=$(echo "$SETTING" | cut -d: -f1) + BANK=$(echo "$SETTING" | cut -d: -f2) + LINE=$(echo "$SETTING" | cut -d: -f3) + DEVNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/dev_name") + CHIPNAME=$(cat "$CONFIGFS_SIM_DIR/$CHIP/$BANK/chip_name") + VAL_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio${LINE}/value" + $BASE_DIR/gpio-mockup-cdev -b pull-up "/dev/$(agg_configfs_chip_name agg0)" "$OFFSET" + test $(cat "$VAL_PATH") = "1" || fail "incorrect value read from sysfs" + OFFSET=$(expr $OFFSET + 1) +done +teardown_4 + +echo "5. Race condition verification" + +echo "5.1. Stress test of new_device/delete_device and module load/unload" +for _ in $(seq 1000); do + { + echo "dummy 0" > "$SYSFS_AGG_DIR/new_device" + cat "$CONFIGFS_AGG_DIR/_sysfs.0/dev_name" > "$SYSFS_AGG_DIR/delete_device" + } 2> /dev/null +done & +writer_pid=$! +while kill -0 "$writer_pid" 2> /dev/null; do + { + modprobe gpio-aggregator + modprobe -r gpio-aggregator + } 2> /dev/null +done + +echo "GPIO $MODULE test PASS" diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index 67fe7a46cb62..e3000ccb9a5d 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -8,6 +8,13 @@ ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le)) TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh TEST_FILES := kexec_common_lib.sh +include ../../../scripts/Makefile.arch + +ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86) +TEST_PROGS += test_kexec_jump.sh +test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump +endif + include ../lib.mk endif diff --git a/tools/testing/selftests/kexec/test_kexec_jump.c b/tools/testing/selftests/kexec/test_kexec_jump.c new file mode 100644 index 000000000000..fbce287866f5 --- /dev/null +++ b/tools/testing/selftests/kexec/test_kexec_jump.c @@ -0,0 +1,72 @@ +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <linux/kexec.h> +#include <linux/reboot.h> +#include <sys/reboot.h> +#include <sys/syscall.h> + +asm( + " .code64\n" + " .data\n" + "purgatory_start:\n" + + // Trigger kexec debug exception handling + " int3\n" + + // Set load address for next time + " leaq purgatory_start_b(%rip), %r11\n" + " movq %r11, 8(%rsp)\n" + + // Back to Linux + " ret\n" + + // Same again + "purgatory_start_b:\n" + + // Trigger kexec debug exception handling + " int3\n" + + // Set load address for next time + " leaq purgatory_start(%rip), %r11\n" + " movq %r11, 8(%rsp)\n" + + // Back to Linux + " ret\n" + + "purgatory_end:\n" + ".previous" +); +extern char purgatory_start[], purgatory_end[]; + +int main (void) +{ + struct kexec_segment segment = {}; + int ret; + + segment.buf = purgatory_start; + segment.bufsz = purgatory_end - purgatory_start; + segment.mem = (void *)0x400000; + segment.memsz = 0x1000; + ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT); + if (ret) { + perror("kexec_load"); + exit(1); + } + + ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC); + if (ret) { + perror("kexec reboot"); + exit(1); + } + + ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC); + if (ret) { + perror("kexec reboot"); + exit(1); + } + printf("Success\n"); + return 0; +} + diff --git a/tools/testing/selftests/kexec/test_kexec_jump.sh b/tools/testing/selftests/kexec/test_kexec_jump.sh new file mode 100755 index 000000000000..6ae977054ba2 --- /dev/null +++ b/tools/testing/selftests/kexec/test_kexec_jump.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Prevent loading a kernel image via the kexec_load syscall when +# signatures are required. (Dependent on CONFIG_IMA_ARCH_POLICY.) + +TEST="$0" +. ./kexec_common_lib.sh + +# kexec requires root privileges +require_root_privileges + +# get the kernel config +get_kconfig + +kconfig_enabled "CONFIG_KEXEC_JUMP=y" "kexec_jump is enabled" +if [ $? -eq 0 ]; then + log_skip "kexec_jump is not enabled" +fi + +kconfig_enabled "CONFIG_IMA_APPRAISE=y" "IMA enabled" +ima_appraise=$? + +kconfig_enabled "CONFIG_IMA_ARCH_POLICY=y" \ + "IMA architecture specific policy enabled" +arch_policy=$? + +get_secureboot_mode +secureboot=$? + +if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ]; then + log_skip "Secure boot and CONFIG_IMA_ARCH_POLICY are enabled" +fi + +./test_kexec_jump +if [ $? -eq 0 ]; then + log_pass "kexec_jump succeeded" +else + # The more likely failure mode if anything went wrong is that the + # kernel just crashes. But if we get back here, sure, whine anyway. + log_fail "kexec_jump failed" +fi diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 666c9fde76da..2925e47db995 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -56,6 +56,8 @@ #include <asm/types.h> #include <ctype.h> #include <errno.h> +#include <linux/unistd.h> +#include <poll.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -65,7 +67,6 @@ #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> -#include <setjmp.h> #include "kselftest.h" @@ -172,14 +173,11 @@ #define __TEST_IMPL(test_name, _signal) \ static void test_name(struct __test_metadata *_metadata); \ - static inline void wrapper_##test_name( \ + static void wrapper_##test_name( \ struct __test_metadata *_metadata, \ - struct __fixture_variant_metadata *variant) \ + struct __fixture_variant_metadata __attribute__((unused)) *variant) \ { \ - _metadata->setup_completed = true; \ - if (setjmp(_metadata->env) == 0) \ - test_name(_metadata); \ - __test_check_assert(_metadata); \ + test_name(_metadata); \ } \ static struct __test_metadata _##test_name##_object = \ { .name = #test_name, \ @@ -258,7 +256,7 @@ * A bare "return;" statement may be used to return early. */ #define FIXTURE_SETUP(fixture_name) \ - void fixture_name##_setup( \ + static void fixture_name##_setup( \ struct __test_metadata __attribute__((unused)) *_metadata, \ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ const FIXTURE_VARIANT(fixture_name) \ @@ -307,7 +305,7 @@ __FIXTURE_TEARDOWN(fixture_name) #define __FIXTURE_TEARDOWN(fixture_name) \ - void fixture_name##_teardown( \ + static void fixture_name##_teardown( \ struct __test_metadata __attribute__((unused)) *_metadata, \ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ const FIXTURE_VARIANT(fixture_name) \ @@ -401,7 +399,7 @@ struct __test_metadata *_metadata, \ FIXTURE_DATA(fixture_name) *self, \ const FIXTURE_VARIANT(fixture_name) *variant); \ - static inline void wrapper_##fixture_name##_##test_name( \ + static void wrapper_##fixture_name##_##test_name( \ struct __test_metadata *_metadata, \ struct __fixture_variant_metadata *variant) \ { \ @@ -410,9 +408,9 @@ pid_t child = 1; \ int status = 0; \ /* Makes sure there is only one teardown, even when child forks again. */ \ - bool *teardown = mmap(NULL, sizeof(*teardown), \ + _metadata->no_teardown = mmap(NULL, sizeof(*_metadata->no_teardown), \ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \ - *teardown = false; \ + *_metadata->no_teardown = true; \ if (sizeof(*self) > 0) { \ if (fixture_name##_teardown_parent) { \ self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \ @@ -422,31 +420,26 @@ self = &self_private; \ } \ } \ - if (setjmp(_metadata->env) == 0) { \ - /* _metadata and potentially self are shared with all forks. */ \ - child = fork(); \ - if (child == 0) { \ - fixture_name##_setup(_metadata, self, variant->data); \ - /* Let setup failure terminate early. */ \ - if (_metadata->exit_code) \ - _exit(0); \ - _metadata->setup_completed = true; \ - fixture_name##_##test_name(_metadata, self, variant->data); \ - } else if (child < 0 || child != waitpid(child, &status, 0)) { \ - ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \ - _metadata->exit_code = KSFT_FAIL; \ - } \ - } \ + _metadata->variant = variant->data; \ + _metadata->self = self; \ + /* _metadata and potentially self are shared with all forks. */ \ + child = fork(); \ if (child == 0) { \ - if (_metadata->setup_completed && !fixture_name##_teardown_parent && \ - __sync_bool_compare_and_swap(teardown, false, true)) \ - fixture_name##_teardown(_metadata, self, variant->data); \ + fixture_name##_setup(_metadata, self, variant->data); \ + /* Let setup failure terminate early. */ \ + if (_metadata->exit_code) \ + _exit(0); \ + *_metadata->no_teardown = false; \ + fixture_name##_##test_name(_metadata, self, variant->data); \ + _metadata->teardown_fn(false, _metadata, self, variant->data); \ _exit(0); \ + } else if (child < 0 || child != waitpid(child, &status, 0)) { \ + ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \ + _metadata->exit_code = KSFT_FAIL; \ } \ - if (_metadata->setup_completed && fixture_name##_teardown_parent && \ - __sync_bool_compare_and_swap(teardown, false, true)) \ - fixture_name##_teardown(_metadata, self, variant->data); \ - munmap(teardown, sizeof(*teardown)); \ + _metadata->teardown_fn(true, _metadata, self, variant->data); \ + munmap(_metadata->no_teardown, sizeof(*_metadata->no_teardown)); \ + _metadata->no_teardown = NULL; \ if (self && fixture_name##_teardown_parent) \ munmap(self, sizeof(*self)); \ if (WIFEXITED(status)) { \ @@ -456,7 +449,14 @@ /* Forward signal to __wait_for_test(). */ \ kill(getpid(), WTERMSIG(status)); \ } \ - __test_check_assert(_metadata); \ + } \ + static void wrapper_##fixture_name##_##test_name##_teardown( \ + bool in_parent, struct __test_metadata *_metadata, \ + void *self, const void *variant) \ + { \ + if (fixture_name##_teardown_parent == in_parent && \ + !__atomic_test_and_set(_metadata->no_teardown, __ATOMIC_RELAXED)) \ + fixture_name##_teardown(_metadata, self, variant); \ } \ static struct __test_metadata *_##fixture_name##_##test_name##_object; \ static void __attribute__((constructor)) \ @@ -467,6 +467,7 @@ object->name = #test_name; \ object->fn = &wrapper_##fixture_name##_##test_name; \ object->fixture = &_##fixture_name##_fixture_object; \ + object->teardown_fn = &wrapper_##fixture_name##_##test_name##_teardown; \ object->termsig = signal; \ object->timeout = tmout; \ _##fixture_name##_##test_name##_object = object; \ @@ -910,14 +911,16 @@ struct __test_metadata { struct __fixture_variant_metadata *); pid_t pid; /* pid of test when being run */ struct __fixture_metadata *fixture; + void (*teardown_fn)(bool in_parent, struct __test_metadata *_metadata, + void *self, const void *variant); int termsig; int exit_code; int trigger; /* extra handler after the evaluation */ int timeout; /* seconds to wait for test timeout */ - bool timed_out; /* did this test timeout instead of exiting? */ bool aborted; /* stopped test due to failed ASSERT */ - bool setup_completed; /* did setup finish? */ - jmp_buf env; /* for exiting out of test early */ + bool *no_teardown; /* fixture needs teardown */ + void *self; + const void *variant; struct __test_results *results; struct __test_metadata *prev, *next; }; @@ -951,88 +954,60 @@ static inline int __bail(int for_realz, struct __test_metadata *t) { /* if this is ASSERT, return immediately. */ if (for_realz) { - t->aborted = true; - longjmp(t->env, 1); + if (t->teardown_fn) + t->teardown_fn(false, t, t->self, t->variant); + abort(); } /* otherwise, end the for loop and continue. */ return 0; } -static inline void __test_check_assert(struct __test_metadata *t) -{ - if (t->aborted) - abort(); -} - -struct __test_metadata *__active_test; -static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) -{ - struct __test_metadata *t = __active_test; - - /* Sanity check handler execution environment. */ - if (!t) { - fprintf(TH_LOG_STREAM, - "# no active test in SIGALRM handler!?\n"); - abort(); - } - if (sig != SIGALRM || sig != info->si_signo) { - fprintf(TH_LOG_STREAM, - "# %s: SIGALRM handler caught signal %d!?\n", - t->name, sig != SIGALRM ? sig : info->si_signo); - abort(); - } - - t->timed_out = true; - // signal process group - kill(-(t->pid), SIGKILL); -} - -void __wait_for_test(struct __test_metadata *t) +static void __wait_for_test(struct __test_metadata *t) { - struct sigaction action = { - .sa_sigaction = __timeout_handler, - .sa_flags = SA_SIGINFO, - }; - struct sigaction saved_action; /* * Sets status so that WIFEXITED(status) returns true and * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value * should never be evaluated because of the waitpid(2) check and - * SIGALRM handling. + * timeout handling. */ int status = KSFT_FAIL << 8; - int child; + struct pollfd poll_child; + int ret, child, childfd; + bool timed_out = false; - if (sigaction(SIGALRM, &action, &saved_action)) { + childfd = syscall(__NR_pidfd_open, t->pid, 0); + if (childfd == -1) { t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, - "# %s: unable to install SIGALRM handler\n", + "# %s: unable to open pidfd\n", t->name); return; } - __active_test = t; - t->timed_out = false; - alarm(t->timeout); - child = waitpid(t->pid, &status, 0); - if (child == -1 && errno != EINTR) { + + poll_child.fd = childfd; + poll_child.events = POLLIN; + ret = poll(&poll_child, 1, t->timeout * 1000); + if (ret == -1) { t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, - "# %s: Failed to wait for PID %d (errno: %d)\n", - t->name, t->pid, errno); + "# %s: unable to wait on child pidfd\n", + t->name); return; + } else if (ret == 0) { + timed_out = true; + /* signal process group */ + kill(-(t->pid), SIGKILL); } - - alarm(0); - if (sigaction(SIGALRM, &saved_action, NULL)) { + child = waitpid(t->pid, &status, WNOHANG); + if (child == -1 && errno != EINTR) { t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, - "# %s: unable to uninstall SIGALRM handler\n", - t->name); + "# %s: Failed to wait for PID %d (errno: %d)\n", + t->name, t->pid, errno); return; } - __active_test = NULL; - if (t->timed_out) { + if (timed_out) { t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test terminated by timeout\n", t->name); @@ -1205,9 +1180,9 @@ static bool test_enabled(int argc, char **argv, return !has_positive; } -void __run_test(struct __fixture_metadata *f, - struct __fixture_variant_metadata *variant, - struct __test_metadata *t) +static void __run_test(struct __fixture_metadata *f, + struct __fixture_variant_metadata *variant, + struct __test_metadata *t) { struct __test_xfail *xfail; char test_name[1024]; @@ -1218,8 +1193,7 @@ void __run_test(struct __fixture_metadata *f, t->exit_code = KSFT_PASS; t->trigger = 0; t->aborted = false; - t->setup_completed = false; - memset(t->env, 0, sizeof(t->env)); + t->no_teardown = NULL; memset(t->results->reason, 0, sizeof(t->results->reason)); snprintf(test_name, sizeof(test_name), "%s%s%s.%s", diff --git a/tools/testing/selftests/kselftest_harness/.gitignore b/tools/testing/selftests/kselftest_harness/.gitignore new file mode 100644 index 000000000000..e4e476a333c9 --- /dev/null +++ b/tools/testing/selftests/kselftest_harness/.gitignore @@ -0,0 +1,2 @@ +/harness-selftest +/harness-selftest.seen diff --git a/tools/testing/selftests/kselftest_harness/Makefile b/tools/testing/selftests/kselftest_harness/Makefile new file mode 100644 index 000000000000..0617535a6ce4 --- /dev/null +++ b/tools/testing/selftests/kselftest_harness/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS_EXTENDED := harness-selftest +TEST_PROGS := harness-selftest.sh +EXTRA_CLEAN := harness-selftest.seen + +include ../lib.mk diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.c b/tools/testing/selftests/kselftest_harness/harness-selftest.c new file mode 100644 index 000000000000..b555493bdb4d --- /dev/null +++ b/tools/testing/selftests/kselftest_harness/harness-selftest.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> + +#include <sys/resource.h> +#include <sys/prctl.h> + +/* Avoid any inconsistencies */ +#define TH_LOG_STREAM stdout + +#include "../kselftest_harness.h" + +static void test_helper(struct __test_metadata *_metadata) +{ + ASSERT_EQ(0, 0); +} + +TEST(standalone_pass) { + TH_LOG("before"); + ASSERT_EQ(0, 0); + EXPECT_EQ(0, 0); + test_helper(_metadata); + TH_LOG("after"); +} + +TEST(standalone_fail) { + TH_LOG("before"); + EXPECT_EQ(0, 0); + EXPECT_EQ(0, 1); + ASSERT_EQ(0, 1); + TH_LOG("after"); +} + +TEST_SIGNAL(signal_pass, SIGUSR1) { + TH_LOG("before"); + ASSERT_EQ(0, 0); + TH_LOG("after"); + kill(getpid(), SIGUSR1); +} + +TEST_SIGNAL(signal_fail, SIGUSR1) { + TH_LOG("before"); + ASSERT_EQ(0, 1); + TH_LOG("after"); + kill(getpid(), SIGUSR1); +} + +FIXTURE(fixture) { + pid_t testpid; +}; + +FIXTURE_SETUP(fixture) { + TH_LOG("setup"); + self->testpid = getpid(); +} + +FIXTURE_TEARDOWN(fixture) { + TH_LOG("teardown same-process=%d", self->testpid == getpid()); +} + +TEST_F(fixture, pass) { + TH_LOG("before"); + ASSERT_EQ(0, 0); + test_helper(_metadata); + standalone_pass(_metadata); + TH_LOG("after"); +} + +TEST_F(fixture, fail) { + TH_LOG("before"); + ASSERT_EQ(0, 1); + fixture_pass(_metadata, self, variant); + TH_LOG("after"); +} + +TEST_F_TIMEOUT(fixture, timeout, 1) { + TH_LOG("before"); + sleep(2); + TH_LOG("after"); +} + +FIXTURE(fixture_parent) { + pid_t testpid; +}; + +FIXTURE_SETUP(fixture_parent) { + TH_LOG("setup"); + self->testpid = getpid(); +} + +FIXTURE_TEARDOWN_PARENT(fixture_parent) { + TH_LOG("teardown same-process=%d", self->testpid == getpid()); +} + +TEST_F(fixture_parent, pass) { + TH_LOG("before"); + ASSERT_EQ(0, 0); + TH_LOG("after"); +} + +FIXTURE(fixture_setup_failure) { + pid_t testpid; +}; + +FIXTURE_SETUP(fixture_setup_failure) { + TH_LOG("setup"); + self->testpid = getpid(); + ASSERT_EQ(0, 1); +} + +FIXTURE_TEARDOWN(fixture_setup_failure) { + TH_LOG("teardown same-process=%d", self->testpid == getpid()); +} + +TEST_F(fixture_setup_failure, pass) { + TH_LOG("before"); + ASSERT_EQ(0, 0); + TH_LOG("after"); +} + +int main(int argc, char **argv) +{ + /* + * The harness uses abort() to signal assertion failures, which triggers coredumps. + * This may be useful to debug real failures but not for this selftest, disable them. + */ + struct rlimit rlimit = { + .rlim_cur = 0, + .rlim_max = 0, + }; + + prctl(PR_SET_DUMPABLE, 0, 0, 0, 0); + setrlimit(RLIMIT_CORE, &rlimit); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.expected b/tools/testing/selftests/kselftest_harness/harness-selftest.expected new file mode 100644 index 000000000000..97e1418c1c7e --- /dev/null +++ b/tools/testing/selftests/kselftest_harness/harness-selftest.expected @@ -0,0 +1,64 @@ +TAP version 13 +1..9 +# Starting 9 tests from 4 test cases. +# RUN global.standalone_pass ... +# harness-selftest.c:19:standalone_pass:before +# harness-selftest.c:23:standalone_pass:after +# OK global.standalone_pass +ok 1 global.standalone_pass +# RUN global.standalone_fail ... +# harness-selftest.c:27:standalone_fail:before +# harness-selftest.c:29:standalone_fail:Expected 0 (0) == 1 (1) +# harness-selftest.c:30:standalone_fail:Expected 0 (0) == 1 (1) +# standalone_fail: Test terminated by assertion +# FAIL global.standalone_fail +not ok 2 global.standalone_fail +# RUN global.signal_pass ... +# harness-selftest.c:35:signal_pass:before +# harness-selftest.c:37:signal_pass:after +# OK global.signal_pass +ok 3 global.signal_pass +# RUN global.signal_fail ... +# harness-selftest.c:42:signal_fail:before +# harness-selftest.c:43:signal_fail:Expected 0 (0) == 1 (1) +# signal_fail: Test terminated by assertion +# FAIL global.signal_fail +not ok 4 global.signal_fail +# RUN fixture.pass ... +# harness-selftest.c:53:pass:setup +# harness-selftest.c:62:pass:before +# harness-selftest.c:19:pass:before +# harness-selftest.c:23:pass:after +# harness-selftest.c:66:pass:after +# harness-selftest.c:58:pass:teardown same-process=1 +# OK fixture.pass +ok 5 fixture.pass +# RUN fixture.fail ... +# harness-selftest.c:53:fail:setup +# harness-selftest.c:70:fail:before +# harness-selftest.c:71:fail:Expected 0 (0) == 1 (1) +# harness-selftest.c:58:fail:teardown same-process=1 +# fail: Test terminated by assertion +# FAIL fixture.fail +not ok 6 fixture.fail +# RUN fixture.timeout ... +# harness-selftest.c:53:timeout:setup +# harness-selftest.c:77:timeout:before +# timeout: Test terminated by timeout +# FAIL fixture.timeout +not ok 7 fixture.timeout +# RUN fixture_parent.pass ... +# harness-selftest.c:87:pass:setup +# harness-selftest.c:96:pass:before +# harness-selftest.c:98:pass:after +# harness-selftest.c:92:pass:teardown same-process=0 +# OK fixture_parent.pass +ok 8 fixture_parent.pass +# RUN fixture_setup_failure.pass ... +# harness-selftest.c:106:pass:setup +# harness-selftest.c:108:pass:Expected 0 (0) == 1 (1) +# pass: Test terminated by assertion +# FAIL fixture_setup_failure.pass +not ok 9 fixture_setup_failure.pass +# FAILED: 4 / 9 tests passed. +# Totals: pass:4 fail:5 xfail:0 xpass:0 skip:0 error:0 diff --git a/tools/testing/selftests/kselftest_harness/harness-selftest.sh b/tools/testing/selftests/kselftest_harness/harness-selftest.sh new file mode 100755 index 000000000000..fe72d16370fe --- /dev/null +++ b/tools/testing/selftests/kselftest_harness/harness-selftest.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Selftest for kselftest_harness.h +# + +set -e + +DIR="$(dirname $(readlink -f "$0"))" + +"$DIR"/harness-selftest > harness-selftest.seen || true + +diff -u "$DIR"/harness-selftest.expected harness-selftest.seen diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 20af35a91d6f..d9fffe06d3ea 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -3,7 +3,7 @@ top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64)) +ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch)) # Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) ARCH := x86 diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f62b0a5aba35..3e786080473d 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -47,6 +47,10 @@ LIBKVM_riscv += lib/riscv/handlers.S LIBKVM_riscv += lib/riscv/processor.c LIBKVM_riscv += lib/riscv/ucall.c +LIBKVM_loongarch += lib/loongarch/processor.c +LIBKVM_loongarch += lib/loongarch/ucall.c +LIBKVM_loongarch += lib/loongarch/exception.S + # Non-compiled test targets TEST_PROGS_x86 += x86/nx_huge_pages_test.sh @@ -147,6 +151,7 @@ TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls TEST_GEN_PROGS_arm64 += arm64/mmio_abort TEST_GEN_PROGS_arm64 += arm64/page_fault_test @@ -190,6 +195,19 @@ TEST_GEN_PROGS_riscv += coalesced_io_test TEST_GEN_PROGS_riscv += get-reg-list TEST_GEN_PROGS_riscv += steal_time +TEST_GEN_PROGS_loongarch += coalesced_io_test +TEST_GEN_PROGS_loongarch += demand_paging_test +TEST_GEN_PROGS_loongarch += dirty_log_perf_test +TEST_GEN_PROGS_loongarch += dirty_log_test +TEST_GEN_PROGS_loongarch += guest_print_test +TEST_GEN_PROGS_loongarch += hardware_disable_test +TEST_GEN_PROGS_loongarch += kvm_binary_stats_test +TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus +TEST_GEN_PROGS_loongarch += kvm_page_table_test +TEST_GEN_PROGS_loongarch += memslot_modification_stress_test +TEST_GEN_PROGS_loongarch += memslot_perf_test +TEST_GEN_PROGS_loongarch += set_memory_region_test + SPLIT_TESTS += arch_timer SPLIT_TESTS += get-reg-list diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c new file mode 100644 index 000000000000..3826772fd470 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/host_sve.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls. + * + * Copyright 2025 Arm, Ltd + */ + +#include <errno.h> +#include <signal.h> +#include <sys/auxv.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "ucall_common.h" + +static void guest_code(void) +{ + for (int i = 0; i < 10; i++) { + GUEST_UCALL_NONE(); + } + + GUEST_DONE(); +} + +void handle_sigill(int sig, siginfo_t *info, void *ctx) +{ + ucontext_t *uctx = ctx; + + printf(" < host signal %d >\n", sig); + + /* + * Skip the UDF + */ + uctx->uc_mcontext.pc += 4; +} + +void register_sigill_handler(void) +{ + struct sigaction sa = { + .sa_sigaction = handle_sigill, + .sa_flags = SA_SIGINFO, + }; + sigaction(SIGILL, &sa, NULL); +} + +static void do_sve_roundtrip(void) +{ + unsigned long before, after; + + /* + * Set all bits in a predicate register, force a save/restore via a + * SIGILL (which handle_sigill() will recover from), then report + * whether the value has changed. + */ + asm volatile( + " .arch_extension sve\n" + " ptrue p0.B\n" + " cntp %[before], p0, p0.B\n" + " udf #0\n" + " cntp %[after], p0, p0.B\n" + : [before] "=r" (before), + [after] "=r" (after) + : + : "p0" + ); + + if (before != after) { + TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n", + before, after); + } else { + printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n", + before, after); + } +} + +static void test_run(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + register_sigill_handler(); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + do_sve_roundtrip(); + + while (!guest_done) { + + printf("Running VCPU...\n"); + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_NONE: + do_sve_roundtrip(); + do_sve_roundtrip(); + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); +} + +int main(void) +{ + /* + * This is testing the host environment, we don't care about + * guest SVE support. + */ + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + printf("SVE not supported\n"); + return KSFT_SKIP; + } + + test_run(); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 57708de2075d..8f422bfdfcb9 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -15,6 +15,8 @@ #include "test_util.h" #include <linux/bitfield.h> +bool have_cap_arm_mte; + enum ftr_type { FTR_EXACT, /* Use a predefined safe value */ FTR_LOWER_SAFE, /* Smaller value is safe */ @@ -543,6 +545,70 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); } +#define MTE_IDREG_TEST 1 +static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + uint64_t mte; + uint64_t mte_frac; + int idx, err; + + if (!have_cap_arm_mte) { + ksft_test_result_skip("MTE capability not supported, nothing to test\n"); + return; + } + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n"); + return; + } + + /* + * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2) + * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported + * and MTE_frac == 0 indicates it is supported. + * + * As MTE_frac was previously unconditionally read as 0, check + * that the set to 0 succeeds but does not change MTE_frac + * from unsupported (0xF) to supported (0). + * + */ + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + + mte = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), val); + mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || + mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { + ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n"); + return; + } + + /* Try to set MTE_frac=0. */ + val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) { + ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n"); + return; + } + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val); + if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI) + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n"); + else + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n"); +} + static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; @@ -673,6 +739,14 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) ksft_test_result_pass("%s\n", __func__); } +void kvm_arch_vm_post_create(struct kvm_vm *vm) +{ + if (vm_check_cap(vm, KVM_CAP_ARM_MTE)) { + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); + have_cap_arm_mte = true; + } +} + int main(void) { struct kvm_vcpu *vcpu; @@ -701,7 +775,7 @@ int main(void) ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 + - MPAM_IDREG_TEST; + MPAM_IDREG_TEST + MTE_IDREG_TEST; ksft_set_plan(test_cnt); @@ -709,6 +783,7 @@ int main(void) test_vcpu_ftr_id_regs(vcpu); test_vcpu_non_ftr_id_regs(vcpu); test_user_set_mpam_reg(vcpu); + test_user_set_mte_reg(vcpu); test_guest_reg_read(vcpu); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 373912464fb4..93013564428b 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -177,6 +177,7 @@ enum vm_guest_mode { VM_MODE_P36V48_4K, VM_MODE_P36V48_16K, VM_MODE_P36V48_64K, + VM_MODE_P47V47_16K, VM_MODE_P36V47_16K, NUM_VM_MODES, }; @@ -232,6 +233,11 @@ extern enum vm_guest_mode vm_mode_default; #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 8) +#elif defined(__loongarch__) +#define VM_MODE_DEFAULT VM_MODE_P47V47_16K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + #endif #define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h new file mode 100644 index 000000000000..e43a57d99b56 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_vm_arch {}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h new file mode 100644 index 000000000000..6427a3275e6a --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#ifndef __ASSEMBLER__ +#include "ucall_common.h" + +#else +/* general registers */ +#define zero $r0 +#define ra $r1 +#define tp $r2 +#define sp $r3 +#define a0 $r4 +#define a1 $r5 +#define a2 $r6 +#define a3 $r7 +#define a4 $r8 +#define a5 $r9 +#define a6 $r10 +#define a7 $r11 +#define t0 $r12 +#define t1 $r13 +#define t2 $r14 +#define t3 $r15 +#define t4 $r16 +#define t5 $r17 +#define t6 $r18 +#define t7 $r19 +#define t8 $r20 +#define u0 $r21 +#define fp $r22 +#define s0 $r23 +#define s1 $r24 +#define s2 $r25 +#define s3 $r26 +#define s4 $r27 +#define s5 $r28 +#define s6 $r29 +#define s7 $r30 +#define s8 $r31 +#endif + +/* + * LoongArch page table entry definition + * Original header file arch/loongarch/include/asm/loongarch.h + */ +#define _PAGE_VALID_SHIFT 0 +#define _PAGE_DIRTY_SHIFT 1 +#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */ +#define PLV_KERN 0 +#define PLV_USER 3 +#define PLV_MASK 0x3 +#define _CACHE_SHIFT 4 /* 4~5, two bits */ +#define _PAGE_PRESENT_SHIFT 7 +#define _PAGE_WRITE_SHIFT 8 + +#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT) +#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT) +#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT) +#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT) +#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT) +#define __READABLE (_PAGE_VALID) +#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE) +/* Coherent Cached */ +#define _CACHE_CC BIT_ULL(_CACHE_SHIFT) +#define PS_4K 0x0000000c +#define PS_16K 0x0000000e +#define PS_64K 0x00000010 +#define PS_DEFAULT_SIZE PS_16K + +/* LoongArch Basic CSR registers */ +#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */ +#define CSR_CRMD_PG_SHIFT 4 +#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT) +#define CSR_CRMD_IE_SHIFT 2 +#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT) +#define CSR_CRMD_PLV_SHIFT 0 +#define CSR_CRMD_PLV_WIDTH 2 +#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT) +#define PLV_MASK 0x3 +#define LOONGARCH_CSR_PRMD 0x1 +#define LOONGARCH_CSR_EUEN 0x2 +#define LOONGARCH_CSR_ECFG 0x4 +#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ +#define LOONGARCH_CSR_ERA 0x6 /* ERA */ +#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ +#define LOONGARCH_CSR_EENTRY 0xc +#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */ +#define CSR_TLBIDX_PS_SHIFT 24 +#define CSR_TLBIDX_PS_WIDTH 6 +#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT) +#define CSR_TLBIDX_SIZEM 0x3f000000 +#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT +#define LOONGARCH_CSR_ASID 0x18 /* ASID */ +#define LOONGARCH_CSR_PGDL 0x19 +#define LOONGARCH_CSR_PGDH 0x1a +/* Page table base */ +#define LOONGARCH_CSR_PGD 0x1b +#define LOONGARCH_CSR_PWCTL0 0x1c +#define LOONGARCH_CSR_PWCTL1 0x1d +#define LOONGARCH_CSR_STLBPGSIZE 0x1e +#define LOONGARCH_CSR_CPUID 0x20 +#define LOONGARCH_CSR_KS0 0x30 +#define LOONGARCH_CSR_KS1 0x31 +#define LOONGARCH_CSR_TMID 0x40 +#define LOONGARCH_CSR_TCFG 0x41 +/* TLB refill exception entry */ +#define LOONGARCH_CSR_TLBRENTRY 0x88 +#define LOONGARCH_CSR_TLBRSAVE 0x8b +#define LOONGARCH_CSR_TLBREHI 0x8e +#define CSR_TLBREHI_PS_SHIFT 0 +#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT) + +#define EXREGS_GPRS (32) + +#ifndef __ASSEMBLER__ +void handle_tlb_refill(void); +void handle_exception(void); + +struct ex_regs { + unsigned long regs[EXREGS_GPRS]; + unsigned long pc; + unsigned long estat; + unsigned long badv; +}; + +#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc) +#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat) +#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv) +#define EXREGS_SIZE sizeof(struct ex_regs) + +#else +#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8) +#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8) +#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8) +#define EXREGS_SIZE ((EXREGS_GPRS + 3) * 8) +#endif + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h new file mode 100644 index 000000000000..4ec801f37f00 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern vm_vaddr_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 5f389166338c..162f303d9daa 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -11,6 +11,19 @@ #include <asm/csr.h> #include "kvm_util.h" +#define INSN_OPCODE_MASK 0x007c +#define INSN_OPCODE_SHIFT 2 +#define INSN_OPCODE_SYSTEM 28 + +#define INSN_MASK_FUNCT3 0x7000 +#define INSN_SHIFT_FUNCT3 12 + +#define INSN_CSR_MASK 0xfff00000 +#define INSN_CSR_SHIFT 20 + +#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3) +#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT) + static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, uint64_t idx, uint64_t size) { @@ -60,7 +73,8 @@ static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext) return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext)); } -struct ex_regs { +struct pt_regs { + unsigned long epc; unsigned long ra; unsigned long sp; unsigned long gp; @@ -92,16 +106,19 @@ struct ex_regs { unsigned long t4; unsigned long t5; unsigned long t6; - unsigned long epc; + /* Supervisor/Machine CSRs */ unsigned long status; + unsigned long badaddr; unsigned long cause; + /* a0 value before the syscall */ + unsigned long orig_a0; }; #define NR_VECTORS 2 #define NR_EXCEPTIONS 32 #define EC_MASK (NR_EXCEPTIONS - 1) -typedef void(*exception_handler_fn)(struct ex_regs *); +typedef void(*exception_handler_fn)(struct pt_regs *); void vm_init_vector_tables(struct kvm_vm *vm); void vcpu_init_vector_tables(struct kvm_vcpu *vcpu); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 815bc45dd8dc..5649cf2f40e8 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -222,6 +222,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", + [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages", [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, @@ -248,6 +249,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, + [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 }, [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, @@ -319,6 +321,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape) case VM_MODE_P36V48_16K: vm->pgtable_levels = 4; break; + case VM_MODE_P47V47_16K: case VM_MODE_P36V47_16K: vm->pgtable_levels = 3; break; diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S new file mode 100644 index 000000000000..88bfa505c6f5 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "processor.h" + +/* address of refill exception should be 4K aligned */ +.balign 4096 +.global handle_tlb_refill +handle_tlb_refill: + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 + lddir t0, t0, 1 + ldpte t0, 0 + ldpte t0, 1 + tlbfill + csrrd t0, LOONGARCH_CSR_TLBRSAVE + ertn + + /* + * save and restore all gprs except base register, + * and default value of base register is sp ($r3). + */ +.macro save_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + st.d $r\n, \base, 8 * \n + .endr +.endm + +.macro restore_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ld.d $r\n, \base, 8 * \n + .endr +.endm + +/* address of general exception should be 4K aligned */ +.balign 4096 +.global handle_exception +handle_exception: + csrwr sp, LOONGARCH_CSR_KS0 + csrrd sp, LOONGARCH_CSR_KS1 + addi.d sp, sp, -EXREGS_SIZE + + save_gprs sp + /* save sp register to stack */ + csrrd t0, LOONGARCH_CSR_KS0 + st.d t0, sp, 3 * 8 + + csrrd t0, LOONGARCH_CSR_ERA + st.d t0, sp, PC_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_ESTAT + st.d t0, sp, ESTAT_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_BADV + st.d t0, sp, BADV_OFFSET_EXREGS + + or a0, sp, zero + bl route_exception + restore_gprs sp + csrrd sp, LOONGARCH_CSR_KS0 + ertn diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c new file mode 100644 index 000000000000..0ac1abcb71cb --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <assert.h> +#include <linux/compiler.h> + +#include "kvm_util.h" +#include "processor.h" +#include "ucall_common.h" + +#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000 +#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 + +static vm_paddr_t invalid_pgtable[4]; + +static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +{ + unsigned int shift; + uint64_t mask; + + shift = level * (vm->page_shift - 3) + vm->page_shift; + mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> shift) & mask; +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +{ + return entry & ~((0x1UL << vm->page_shift) - 1); +} + +static uint64_t ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child) +{ + uint64_t *ptep; + int i, ptrs_per_pte; + + ptep = addr_gpa2hva(vm, table); + ptrs_per_pte = 1 << (vm->page_shift - 3); + for (i = 0; i < ptrs_per_pte; i++) + WRITE_ONCE(*(ptep + i), child); +} + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + int i; + vm_paddr_t child, table; + + if (vm->pgd_created) + return; + + child = table = 0; + for (i = 0; i < vm->pgtable_levels; i++) { + invalid_pgtable[i] = child; + table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN, + vm->memslots[MEM_REGION_PT]); + TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i); + virt_set_pgtable(vm, table, child); + child = table; + } + vm->pgd = table; + vm->pgd_created = true; +} + +static int virt_pte_none(uint64_t *ptep, int level) +{ + return *ptep == invalid_pgtable[level]; +} + +static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc) +{ + int level; + uint64_t *ptep; + vm_paddr_t child; + + if (!vm->pgd_created) + goto unmapped_gva; + + child = vm->pgd; + level = vm->pgtable_levels - 1; + while (level > 0) { + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + if (virt_pte_none(ptep, level)) { + if (alloc) { + child = vm_alloc_page_table(vm); + virt_set_pgtable(vm, child, invalid_pgtable[level - 1]); + WRITE_ONCE(*ptep, child); + } else + goto unmapped_gva; + + } else + child = pte_addr(vm, *ptep); + level--; + } + + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + return ptep; + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + + ptep = virt_populate_pte(vm, gva, 0); + TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); +} + +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + uint32_t prot_bits; + uint64_t *ptep; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = virt_populate_pte(vm, vaddr, 1); + prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER; + WRITE_ONCE(*ptep, paddr | prot_bits); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +{ + uint64_t pte, *ptep; + static const char * const type[] = { "pte", "pmd", "pud", "pgd"}; + + if (level < 0) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (virt_pte_none(ptep, level)) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", + indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--); + } +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level; + + if (!vm->pgd_created) + return; + + level = vm->pgtable_levels - 1; + pte_dump(stream, vm, indent, vm->pgd, level); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +{ +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) + return; + + TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)", + uc.args[0], uc.args[1], uc.args[2]); +} + +void route_exception(struct ex_regs *regs) +{ + unsigned long pc, estat, badv; + + pc = regs->pc; + badv = regs->badv; + estat = regs->estat; + ucall(UCALL_UNHANDLED, 3, pc, estat, badv); + while (1) ; +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + int i; + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + "num: %u\n", num); + + vcpu_regs_get(vcpu, ®s); + + va_start(ap, num); + for (i = 0; i < num; i++) + regs.gpr[i + 4] = va_arg(ap, uint64_t); + va_end(ap); + + vcpu_regs_set(vcpu, ®s); +} + +static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + uint64_t csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_get_reg(vcpu, csrid, addr); +} + +static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + uint64_t csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_set_reg(vcpu, csrid, val); +} + +static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int width; + unsigned long val; + struct kvm_vm *vm = vcpu->vm; + + switch (vm->mode) { + case VM_MODE_P36V47_16K: + case VM_MODE_P47V47_16K: + break; + + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + /* user mode and page enable mode */ + val = PLV_USER | CSR_CRMD_PG; + loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1); + + val = 0; + width = vm->page_shift - 3; + + switch (vm->pgtable_levels) { + case 4: + /* pud page shift and width */ + val = (vm->page_shift + width * 2) << 20 | (width << 25); + /* fall throuth */ + case 3: + /* pmd page shift and width */ + val |= (vm->page_shift + width) << 10 | (width << 15); + /* pte page shift and width */ + val |= vm->page_shift | width << 5; + break; + default: + TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels); + } + + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val); + + /* PGD page shift and width */ + val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6; + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd); + + /* + * Refill exception runs on real mode + * Entry address should be physical address + */ + val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val); + + /* + * General exception runs on page-enabled mode + * Entry address should be virtual address + */ + val = (unsigned long)handle_exception; + loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val); + val &= ~CSR_TLBIDX_SIZEM; + val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE); + + /* LOONGARCH_CSR_KS1 is used for exception stack */ + val = __vm_vaddr_alloc(vm, vm->page_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(val != 0, "No memory for exception stack"); + val = val + vm->page_size; + loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val); + val &= ~CSR_TLBREHI_PS; + val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id); +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +{ + size_t stack_size; + uint64_t stack_vaddr; + struct kvm_regs regs; + struct kvm_vcpu *vcpu; + + vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack"); + + loongarch_vcpu_setup(vcpu); + /* Setup guest general purpose registers */ + vcpu_regs_get(vcpu, ®s); + regs.gpr[3] = stack_vaddr + stack_size; + vcpu_regs_set(vcpu, ®s); + + return vcpu; +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + struct kvm_regs regs; + + /* Setup guest PC register */ + vcpu_regs_get(vcpu, ®s); + regs.pc = (uint64_t)guest_code; + vcpu_regs_set(vcpu, ®s); +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c new file mode 100644 index 000000000000..fc6cbb50573f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + */ +#include "kvm_util.h" + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +vm_vaddr_t *ucall_exit_mmio_addr; + +void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ + vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + + virt_map(vm, mmio_gva, mmio_gpa, 1); + + vm->ucall_mmio_addr = mmio_gpa; + + write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + "Unexpected ucall exit mmio address access"); + + return (void *)(*((uint64_t *)run->mmio.data)); + } + + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S index aa0abd3f35bb..b787b982e922 100644 --- a/tools/testing/selftests/kvm/lib/riscv/handlers.S +++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S @@ -10,85 +10,88 @@ #include <asm/csr.h> .macro save_context - addi sp, sp, (-8*34) - sd x1, 0(sp) - sd x2, 8(sp) - sd x3, 16(sp) - sd x4, 24(sp) - sd x5, 32(sp) - sd x6, 40(sp) - sd x7, 48(sp) - sd x8, 56(sp) - sd x9, 64(sp) - sd x10, 72(sp) - sd x11, 80(sp) - sd x12, 88(sp) - sd x13, 96(sp) - sd x14, 104(sp) - sd x15, 112(sp) - sd x16, 120(sp) - sd x17, 128(sp) - sd x18, 136(sp) - sd x19, 144(sp) - sd x20, 152(sp) - sd x21, 160(sp) - sd x22, 168(sp) - sd x23, 176(sp) - sd x24, 184(sp) - sd x25, 192(sp) - sd x26, 200(sp) - sd x27, 208(sp) - sd x28, 216(sp) - sd x29, 224(sp) - sd x30, 232(sp) - sd x31, 240(sp) + addi sp, sp, (-8*36) + sd x1, 8(sp) + sd x2, 16(sp) + sd x3, 24(sp) + sd x4, 32(sp) + sd x5, 40(sp) + sd x6, 48(sp) + sd x7, 56(sp) + sd x8, 64(sp) + sd x9, 72(sp) + sd x10, 80(sp) + sd x11, 88(sp) + sd x12, 96(sp) + sd x13, 104(sp) + sd x14, 112(sp) + sd x15, 120(sp) + sd x16, 128(sp) + sd x17, 136(sp) + sd x18, 144(sp) + sd x19, 152(sp) + sd x20, 160(sp) + sd x21, 168(sp) + sd x22, 176(sp) + sd x23, 184(sp) + sd x24, 192(sp) + sd x25, 200(sp) + sd x26, 208(sp) + sd x27, 216(sp) + sd x28, 224(sp) + sd x29, 232(sp) + sd x30, 240(sp) + sd x31, 248(sp) csrr s0, CSR_SEPC csrr s1, CSR_SSTATUS - csrr s2, CSR_SCAUSE - sd s0, 248(sp) + csrr s2, CSR_STVAL + csrr s3, CSR_SCAUSE + sd s0, 0(sp) sd s1, 256(sp) sd s2, 264(sp) + sd s3, 272(sp) .endm .macro restore_context + ld s3, 272(sp) ld s2, 264(sp) ld s1, 256(sp) - ld s0, 248(sp) - csrw CSR_SCAUSE, s2 + ld s0, 0(sp) + csrw CSR_SCAUSE, s3 csrw CSR_SSTATUS, s1 csrw CSR_SEPC, s0 - ld x31, 240(sp) - ld x30, 232(sp) - ld x29, 224(sp) - ld x28, 216(sp) - ld x27, 208(sp) - ld x26, 200(sp) - ld x25, 192(sp) - ld x24, 184(sp) - ld x23, 176(sp) - ld x22, 168(sp) - ld x21, 160(sp) - ld x20, 152(sp) - ld x19, 144(sp) - ld x18, 136(sp) - ld x17, 128(sp) - ld x16, 120(sp) - ld x15, 112(sp) - ld x14, 104(sp) - ld x13, 96(sp) - ld x12, 88(sp) - ld x11, 80(sp) - ld x10, 72(sp) - ld x9, 64(sp) - ld x8, 56(sp) - ld x7, 48(sp) - ld x6, 40(sp) - ld x5, 32(sp) - ld x4, 24(sp) - ld x3, 16(sp) - ld x2, 8(sp) - ld x1, 0(sp) - addi sp, sp, (8*34) + ld x31, 248(sp) + ld x30, 240(sp) + ld x29, 232(sp) + ld x28, 224(sp) + ld x27, 216(sp) + ld x26, 208(sp) + ld x25, 200(sp) + ld x24, 192(sp) + ld x23, 184(sp) + ld x22, 176(sp) + ld x21, 168(sp) + ld x20, 160(sp) + ld x19, 152(sp) + ld x18, 144(sp) + ld x17, 136(sp) + ld x16, 128(sp) + ld x15, 120(sp) + ld x14, 112(sp) + ld x13, 104(sp) + ld x12, 96(sp) + ld x11, 88(sp) + ld x10, 80(sp) + ld x9, 72(sp) + ld x8, 64(sp) + ld x7, 56(sp) + ld x6, 48(sp) + ld x5, 40(sp) + ld x4, 32(sp) + ld x3, 24(sp) + ld x2, 16(sp) + ld x1, 8(sp) + addi sp, sp, (8*36) .endm .balign 4 diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index dd663bcf0cc0..2eac7d4b59e9 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -402,7 +402,7 @@ struct handlers { exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS]; }; -void route_exception(struct ex_regs *regs) +void route_exception(struct pt_regs *regs) { struct handlers *handlers = (struct handlers *)exception_handlers; int vector = 0, ec; diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 9e370800a6a2..f962fefc48fa 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -15,7 +15,7 @@ static int timer_irq = IRQ_S_TIMER; -static void guest_irq_handler(struct ex_regs *regs) +static void guest_irq_handler(struct pt_regs *regs) { uint64_t xcnt, xcnt_diff_us, cmp; unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG; diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index cfed6c727bfc..739d17befb5a 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -27,7 +27,7 @@ static void guest_code(void) GUEST_DONE(); } -static void guest_breakpoint_handler(struct ex_regs *regs) +static void guest_breakpoint_handler(struct pt_regs *regs) { WRITE_ONCE(sw_bp_addr, regs->epc); regs->epc += 4; diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 569f2d67c9b8..a0b7dabb5040 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -17,6 +17,15 @@ enum { VCPU_FEATURE_SBI_EXT, }; +enum { + KVM_RISC_V_REG_OFFSET_VSTART = 0, + KVM_RISC_V_REG_OFFSET_VL, + KVM_RISC_V_REG_OFFSET_VTYPE, + KVM_RISC_V_REG_OFFSET_VCSR, + KVM_RISC_V_REG_OFFSET_VLENB, + KVM_RISC_V_REG_OFFSET_MAX, +}; + static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; bool filter_reg(__u64 reg) @@ -143,6 +152,38 @@ bool check_reject_set(int err) return err == EINVAL; } +static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s, + uint64_t feature) +{ + unsigned long vlenb_reg = 0; + int rc; + u64 reg, size; + + /* Enable V extension so that we can get the vlenb register */ + rc = __vcpu_set_reg(vcpu, feature, 1); + if (rc) + return rc; + + vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]); + if (!vlenb_reg) { + TEST_FAIL("Can't compute vector register size from zero vlenb\n"); + return -EPERM; + } + + size = __builtin_ctzl(vlenb_reg); + size <<= KVM_REG_SIZE_SHIFT; + + for (int i = 0; i < 32; i++) { + reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i); + s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg; + } + + /* We should assert if disabling failed here while enabling succeeded before */ + vcpu_set_reg(vcpu, feature, 0); + + return 0; +} + void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) { unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; @@ -172,6 +213,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) if (!s->feature) continue; + if (s->feature == KVM_RISCV_ISA_EXT_V) { + feature = RISCV_ISA_EXT_REG(s->feature); + rc = override_vector_reg_size(vcpu, s, feature); + if (rc) + goto skip; + } + switch (s->feature_type) { case VCPU_FEATURE_ISA_EXT: feature = RISCV_ISA_EXT_REG(s->feature); @@ -186,6 +234,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) /* Try to enable the desired extension */ __vcpu_set_reg(vcpu, feature, 1); +skip: /* Double check whether the desired extension was enabled */ __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature), "%s not available, skipping tests", s->name); @@ -410,6 +459,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id) return strdup_printf("%lld /* UNKNOWN */", reg_off); } +static const char *vector_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_v_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR); + int reg_index = 0; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR); + + if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0)) + reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0); + switch (reg_off) { + case KVM_REG_RISCV_VECTOR_REG(0) ... + KVM_REG_RISCV_VECTOR_REG(31): + return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index); + case KVM_REG_RISCV_VECTOR_CSR_REG(vstart): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vl): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vtype): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + #define KVM_ISA_EXT_ARR(ext) \ [KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext @@ -639,6 +717,9 @@ void print_reg(const char *prefix, __u64 id) case KVM_REG_SIZE_U128: reg_size = "KVM_REG_SIZE_U128"; break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; default: printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n", (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK); @@ -670,6 +751,10 @@ void print_reg(const char *prefix, __u64 id) printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n", reg_size, fp_d_id_to_str(prefix, id)); break; + case KVM_REG_RISCV_VECTOR: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n", + reg_size, vector_id_to_str(prefix, id)); + break; case KVM_REG_RISCV_ISA_EXT: printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n", reg_size, isa_ext_id_to_str(prefix, id)); @@ -874,6 +959,48 @@ static __u64 fp_d_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D, }; +/* Define a default vector registers with length. This will be overwritten at runtime */ +static __u64 vector_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V, +}; + #define SUBLIST_BASE \ {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \ .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),} @@ -898,6 +1025,9 @@ static __u64 fp_d_regs[] = { {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \ .regs_n = ARRAY_SIZE(fp_d_regs),} +#define SUBLIST_V \ + {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),} + #define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \ static __u64 regs_##ext[] = { \ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \ @@ -966,6 +1096,7 @@ KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); +KVM_ISA_EXT_SUBLIST_CONFIG(v, V); KVM_ISA_EXT_SIMPLE_CONFIG(h, H); KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); @@ -1040,6 +1171,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_fp_f, &config_fp_d, &config_h, + &config_v, &config_smnpm, &config_smstateen, &config_sscofpmf, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 03406de4989d..924a335d2262 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -73,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num) switch (csr_num) { switchcase_csr_read_32(CSR_CYCLE, ret) - switchcase_csr_read_32(CSR_CYCLEH, ret) default : break; } @@ -128,17 +127,36 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags) "Unable to stop counter %ld error %ld\n", counter, ret.error); } -static void guest_illegal_exception_handler(struct ex_regs *regs) +static void guest_illegal_exception_handler(struct pt_regs *regs) { + unsigned long insn; + int opcode, csr_num, funct3; + __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL, "Unexpected exception handler %lx\n", regs->cause); + insn = regs->badaddr; + opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT; + __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM, + "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn); + + csr_num = GET_CSR_NUM(insn); + funct3 = GET_RM(insn); + /* Validate if it is a CSR read/write operation */ + __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4), + "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n", + funct3, csr_num); + + /* Validate if it is a HPMCOUNTER CSR operation */ + __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31), + "Unexpected csr_num 0x%x\n", csr_num); + illegal_handler_invoked = true; /* skip the trapping instruction */ regs->epc += 4; } -static void guest_irq_handler(struct ex_regs *regs) +static void guest_irq_handler(struct pt_regs *regs) { unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG; struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bc440d5aba57..ce3ac0fd6dfb 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -350,7 +350,7 @@ static void test_invalid_memory_region_flags(void) struct kvm_vm *vm; int r, i; -#if defined __aarch64__ || defined __riscv || defined __x86_64__ +#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__ supported_flags |= KVM_MEM_READONLY; #endif diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile index 0c0d7b1234c1..4d4f810cdf2c 100644 --- a/tools/testing/selftests/mount_setattr/Makefile +++ b/tools/testing/selftests/mount_setattr/Makefile @@ -2,6 +2,8 @@ # Makefile for mount selftests. CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 -pthread +LOCAL_HDRS += ../filesystems/wrappers.h + TEST_GEN_PROGS := mount_setattr_test include ../lib.mk diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 48a000cabc97..8b378c91debf 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -20,7 +20,7 @@ #include <stdarg.h> #include <linux/mount.h> -#include "../filesystems/overlayfs/wrappers.h" +#include "../filesystems/wrappers.h" #include "../kselftest_harness.h" #ifndef CLONE_NEWNS @@ -107,46 +107,6 @@ #endif #endif -#ifndef __NR_open_tree - #if defined __alpha__ - #define __NR_open_tree 538 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_open_tree 4428 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_open_tree 6428 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_open_tree 5428 - #endif - #elif defined __ia64__ - #define __NR_open_tree (428 + 1024) - #else - #define __NR_open_tree 428 - #endif -#endif - -#ifndef __NR_move_mount - #if defined __alpha__ - #define __NR_move_mount 539 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_move_mount 4429 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_move_mount 6429 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_move_mount 5429 - #endif - #elif defined __ia64__ - #define __NR_move_mount (428 + 1024) - #else - #define __NR_move_mount 429 - #endif -#endif - #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif @@ -161,23 +121,6 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); } -#ifndef OPEN_TREE_CLONE -#define OPEN_TREE_CLONE 1 -#endif - -#ifndef OPEN_TREE_CLOEXEC -#define OPEN_TREE_CLOEXEC O_CLOEXEC -#endif - -#ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -#endif - -static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) -{ - return syscall(__NR_open_tree, dfd, filename, flags); -} - static ssize_t write_nointr(int fd, const void *buf, size_t count) { ssize_t ret; @@ -1076,7 +1019,7 @@ FIXTURE_SETUP(mount_setattr_idmapped) ASSERT_EQ(mkdir("/mnt/D", 0777), 0); img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600); ASSERT_GE(img_fd, 0); - ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0); + ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0); ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0); ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0); ASSERT_EQ(close(img_fd), 0); diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index 1562aa7d60b0..6dec59d64083 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2021 Samsung Electrnoics + * Copyright (C) 2021 Samsung Electronics * Bongsu Jeon <bongsu.jeon@samsung.com> * * Test code for nci diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 70a38f485d4d..ea84b88bcb30 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -115,7 +115,7 @@ YNL_GEN_FILES := busy_poller netlink-dumps TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_FILES := settings -TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh +TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index d66336256580..8b015f16c03d 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights) int type; int flags; bool test_listener; + bool disabled; }; FIXTURE_VARIANT_ADD(scm_rights, dgram) @@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram) .type = SOCK_DGRAM, .flags = 0, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled) +{ + .name = "UNIX ", + .type = SOCK_DGRAM, + .flags = 0, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream) @@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream) .type = SOCK_STREAM, .flags = 0, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_oob) @@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob) .type = SOCK_STREAM, .flags = MSG_OOB, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_listener) @@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener) .type = SOCK_STREAM, .flags = 0, .test_listener = true, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = true, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) @@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) .type = SOCK_STREAM, .flags = MSG_OOB, .test_listener = true, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = true, + .disabled = true, }; static int count_sockets(struct __test_metadata *_metadata, @@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights) ret = unshare(CLONE_NEWNET); ASSERT_EQ(0, ret); + if (variant->disabled) + return; + ret = count_sockets(_metadata, variant); ASSERT_EQ(0, ret); } @@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights) { int ret; + if (variant->disabled) + return; + sleep(1); ret = count_sockets(_metadata, variant); @@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights) static void create_listeners(struct __test_metadata *_metadata, FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, int n) { struct sockaddr_un addr = { @@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata, ret = listen(self->fd[i], -1); ASSERT_EQ(0, ret); + if (variant->disabled) { + ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); + } + addrlen = sizeof(addr); ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen); ASSERT_EQ(0, ret); @@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata, for (i = 0; i < n * 2; i += 2) { ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i); ASSERT_EQ(0, ret); + + if (variant->disabled) { + ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); + } } } @@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata, ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); if (variant->test_listener) - create_listeners(_metadata, self, n); + create_listeners(_metadata, self, variant, n); else create_socketpairs(_metadata, self, variant, n); } @@ -230,7 +300,13 @@ void __send_fd(struct __test_metadata *_metadata, int ret; ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); - ASSERT_EQ(MSGLEN, ret); + + if (variant->disabled) { + ASSERT_EQ(-1, ret); + ASSERT_EQ(-EPERM, -errno); + } else { + ASSERT_EQ(MSGLEN, ret); + } } #define create_sockets(n) \ diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index f366cadbc5e8..4046131e7888 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -106,26 +106,16 @@ # | | # +-----------------------------------------------------------------------+ +. ./lib.sh + ERR=4 # Return 4 by default, which is the SKIP code for kselftest PING6="ping" PAUSE_ON_FAIL="no" -readonly NS0=$(mktemp -u ns0-XXXXXXXX) -readonly NS1=$(mktemp -u ns1-XXXXXXXX) -readonly NS2=$(mktemp -u ns2-XXXXXXXX) -readonly NS3=$(mktemp -u ns3-XXXXXXXX) - # Exit the script after having removed the network namespaces it created -# -# Parameters: -# -# * The list of network namespaces to delete before exiting. -# exit_cleanup() { - for ns in "$@"; do - ip netns delete "${ns}" 2>/dev/null || true - done + cleanup_all_ns if [ "${ERR}" -eq 4 ]; then echo "Error: Setting up the testing environment failed." >&2 @@ -140,17 +130,7 @@ exit_cleanup() # namespaces created by this script are deleted. create_namespaces() { - ip netns add "${NS0}" || exit_cleanup - ip netns add "${NS1}" || exit_cleanup "${NS0}" - ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" - ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" -} - -# The trap function handler -# -exit_cleanup_all() -{ - exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" + setup_ns NS0 NS1 NS2 NS3 || exit_cleanup } # Configure a network interface using a host route @@ -188,10 +168,6 @@ iface_config() # setup_underlay() { - for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do - ip -netns "${ns}" link set dev lo up - done; - ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" @@ -234,14 +210,6 @@ setup_overlay_ipv4() ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 - - # The intermediate namespaces don't have routes for the reverse path, - # as it will be handled by tc. So we need to ensure that rp_filter is - # not going to block the traffic. - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 } setup_overlay_ipv6() @@ -521,13 +489,10 @@ done check_features -# Create namespaces before setting up the exit trap. -# Otherwise, exit_cleanup_all() could delete namespaces that were not created -# by this script. -create_namespaces - set -e -trap exit_cleanup_all EXIT +trap exit_cleanup EXIT + +create_namespaces setup_underlay setup_overlay_ipv4 diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh index 7db292ec4884..7d2d40812074 100755 --- a/tools/testing/selftests/net/busy_poll_test.sh +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -1,6 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -source net_helper.sh +source lib.sh NSIM_SV_ID=$((256 + RANDOM % 256)) NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore new file mode 100644 index 000000000000..764a53fc837f --- /dev/null +++ b/tools/testing/selftests/net/can/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +test_raw_filter diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile new file mode 100644 index 000000000000..5b82e60a03e7 --- /dev/null +++ b/tools/testing/selftests/net/can/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) + +TEST_PROGS := test_raw_filter.sh + +TEST_GEN_FILES := test_raw_filter + +include ../../lib.mk diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c new file mode 100644 index 000000000000..4101c36390fd --- /dev/null +++ b/tools/testing/selftests/net/can/test_raw_filter.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +/* + * Copyright (c) 2011 Volkswagen Group Electronic Research + * All rights reserved. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <net/if.h> +#include <linux/if.h> + +#include <linux/can.h> +#include <linux/can/raw.h> + +#include "../../kselftest_harness.h" + +#define ID 0x123 + +char CANIF[IFNAMSIZ]; + +static int send_can_frames(int sock, int testcase) +{ + struct can_frame frame; + + frame.can_dlc = 1; + frame.data[0] = testcase; + + frame.can_id = ID; + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_RTR_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_EFF_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + return 0; + +write_err: + perror("write"); + return 1; +} + +FIXTURE(can_filters) { + int sock; +}; + +FIXTURE_SETUP(can_filters) +{ + struct sockaddr_can addr; + struct ifreq ifr; + int recv_own_msgs = 1; + int s, ret; + + s = socket(PF_CAN, SOCK_RAW, CAN_RAW); + ASSERT_GE(s, 0) + TH_LOG("failed to create CAN_RAW socket: %d", errno); + + strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name)); + ret = ioctl(s, SIOCGIFINDEX, &ifr); + ASSERT_GE(ret, 0) + TH_LOG("failed SIOCGIFINDEX: %d", errno); + + addr.can_family = AF_CAN; + addr.can_ifindex = ifr.ifr_ifindex; + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS, + &recv_own_msgs, sizeof(recv_own_msgs)); + + ret = bind(s, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(ret, 0) + TH_LOG("failed bind socket: %d", errno); + + self->sock = s; +} + +FIXTURE_TEARDOWN(can_filters) +{ + close(self->sock); +} + +FIXTURE_VARIANT(can_filters) { + int testcase; + canid_t id; + canid_t mask; + int exp_num_rx; + canid_t exp_flags[]; +}; + +/* Receive all frames when filtering for the ID in standard frame format */ +FIXTURE_VARIANT_ADD(can_filters, base) { + .testcase = 1, + .id = ID, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore EFF flag in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_eff) { + .testcase = 2, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore RTR flag in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_rtr) { + .testcase = 3, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_effrtr) { + .testcase = 4, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF frames when expecting no EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff) { + .testcase = 5, + .id = ID, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF frames when filter id and filter mask include EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) { + .testcase = 6, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) { + .testcase = 7, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF frames when filter id and filter mask include EFF flag, + * ignoring RTR flag + */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) { + .testcase = 8, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive no remote frames when filtering for no RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr) { + .testcase = 9, + .id = ID, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_EFF_FLAG, + }, +}; + +/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) { + .testcase = 10, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_EFF_FLAG, + }, +}; + +/* Receive only remote frames when filter includes RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) { + .testcase = 11, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_RTR_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only remote frames when filter includes RTR flag, ignoring EFF + * flag + */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) { + .testcase = 12, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_RTR_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF data frame when filtering for no flags */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) { + .testcase = 13, + .id = ID, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + 0, + }, +}; + +/* Receive only EFF data frame when filtering for EFF but no RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) { + .testcase = 14, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG, + }, +}; + +/* Receive only SFF remote frame when filtering for RTR but no EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) { + .testcase = 15, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF remote frame when filtering for EFF and RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) { + .testcase = 16, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag + * but based on EFF mask + */ +FIXTURE_VARIANT_ADD(can_filters, eff) { + .testcase = 17, + .id = ID, + .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + 0, + }, +}; + +/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but + * based on EFF mask + */ +FIXTURE_VARIANT_ADD(can_filters, eff_eff) { + .testcase = 18, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG, + }, +}; + +/* This test verifies that the raw CAN filters work, by checking if only frames + * with the expected set of flags are received. For each test case, the given + * filter (id and mask) is added and four CAN frames are sent with every + * combination of set/unset EFF/RTR flags. + */ +TEST_F(can_filters, test_filter) +{ + struct can_filter rfilter; + int ret; + + rfilter.can_id = variant->id; + rfilter.can_mask = variant->mask; + setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER, + &rfilter, sizeof(rfilter)); + + TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X", + rfilter.can_id, rfilter.can_mask); + + ret = send_can_frames(self->sock, variant->testcase); + ASSERT_EQ(ret, 0) + TH_LOG("failed to send CAN frames"); + + for (int i = 0; i <= variant->exp_num_rx; i++) { + struct can_frame frame; + struct timeval tv = { + .tv_sec = 0, + .tv_usec = 50000, /* 50ms timeout */ + }; + fd_set rdfs; + + FD_ZERO(&rdfs); + FD_SET(self->sock, &rdfs); + + ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv); + ASSERT_GE(ret, 0) + TH_LOG("failed select for frame %d, err: %d)", i, errno); + + ret = FD_ISSET(self->sock, &rdfs); + if (i == variant->exp_num_rx) { + ASSERT_EQ(ret, 0) + TH_LOG("too many frames received"); + } else { + ASSERT_NE(ret, 0) + TH_LOG("too few frames received"); + + ret = read(self->sock, &frame, sizeof(frame)); + ASSERT_GE(ret, 0) + TH_LOG("failed to read frame %d, err: %d", i, errno); + + TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i); + + ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK) + TH_LOG("received wrong can_id"); + ASSERT_EQ(variant->testcase, frame.data[0]) + TH_LOG("received wrong test case"); + + ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK, + variant->exp_flags[i]) + TH_LOG("received unexpected flags"); + } + } +} + +int main(int argc, char **argv) +{ + char *ifname = getenv("CANIF"); + + if (!ifname) { + printf("CANIF environment variable must contain the test interface\n"); + return KSFT_FAIL; + } + + strncpy(CANIF, ifname, sizeof(CANIF) - 1); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh new file mode 100755 index 000000000000..276d6c06ac95 --- /dev/null +++ b/tools/testing/selftests/net/can/test_raw_filter.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_raw_filter +" + +net_dir=$(dirname $0)/.. +source $net_dir/lib.sh + +export CANIF=${CANIF:-"vcan0"} +BITRATE=${BITRATE:-500000} + +setup() +{ + if [[ $CANIF == vcan* ]]; then + ip link add name $CANIF type vcan || exit $ksft_skip + else + ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip + fi + ip link set dev $CANIF up + pwd +} + +cleanup() +{ + ip link set dev $CANIF down + if [[ $CANIF == vcan* ]]; then + ip link delete $CANIF + fi +} + +test_raw_filter() +{ + ./test_raw_filter + check_err $? + log_test "test_raw_filter" +} + +trap cleanup EXIT +setup + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 130d532b7e67..3cfef5153823 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -33,7 +33,6 @@ CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m CONFIG_IPV6_MROUTE=y CONFIG_IPV6_SIT=y -CONFIG_IP_DCCP=m CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index c7cea556b416..5fbdd2a0b537 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -516,10 +516,7 @@ fib_rule4_test() fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ "oif redirect to table" "oif no redirect to table" - # Enable forwarding and disable rp_filter as all the addresses are in - # the same subnet and egress device == ingress device. ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 - ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" getnomatch="from $SRC_IP iif lo" fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 3ea6f886a210..a94b73a53f72 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -11,7 +11,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ - ipv4_mpath_list ipv6_mpath_list" + ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance" VERBOSE=0 PAUSE_ON_FAIL=no @@ -1085,6 +1085,35 @@ route_setup() set +e } +forwarding_cleanup() +{ + cleanup_ns $ns3 + + route_cleanup +} + +# extend route_setup with an ns3 reachable through ns2 over both devices +forwarding_setup() +{ + forwarding_cleanup + + route_setup + + setup_ns ns3 + + ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2 + ip -netns $ns3 link set veth5 up + ip -netns $ns2 link set veth6 up + + ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24 + ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24 + ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2 + + ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad + ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad + ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2 +} + # assumption is that basic add of a single path route works # otherwise just adding an address on an interface is broken ipv6_rt_add() @@ -2531,9 +2560,6 @@ ipv4_mpath_list_test() run_cmd "ip -n $ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') @@ -2600,6 +2626,93 @@ ipv6_mpath_list_test() route_cleanup } +tc_set_flower_counter__saddr_syn() { + tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2" +} + +ip_mpath_balance_dep_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi +} + +ip_mpath_balance() { + local -r ipver=$1 + local -r daddr=$2 + local -r num_conn=20 + + for i in $(seq 1 $num_conn); do + ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null & + sleep 0.02 + echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000 + done + + local -r syn0="$(tc_get_flower_counter $ns1 veth1)" + local -r syn1="$(tc_get_flower_counter $ns1 veth3)" + local -r syns=$((syn0+syn1)) + + [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)" + + [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]] +} + +ipv4_mpath_balance_test() +{ + echo + echo "IPv4 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 172.16.105.1 \ + nexthop via 172.16.101.2 \ + nexthop via 172.16.103.2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1 + tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1 + + ip_mpath_balance -4 172.16.105.1 + + log_test $? 0 "IPv4 multipath loadbalance" + + forwarding_cleanup +} + +ipv6_mpath_balance_test() +{ + echo + echo "IPv6 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 2001:db8:105::1\ + nexthop via 2001:db8:101::2 \ + nexthop via 2001:db8:103::2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1 + tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1 + + ip_mpath_balance -6 "[2001:db8:105::1]" + + log_test $? 0 "IPv6 multipath loadbalance" + + forwarding_cleanup +} + ################################################################################ # usage @@ -2683,6 +2796,8 @@ do fib6_gc_test|ipv6_gc) fib6_gc_test;; ipv4_mpath_list) ipv4_mpath_list_test;; ipv6_mpath_list) ipv6_mpath_list_test;; + ipv4_mpath_balance) ipv4_mpath_balance_test;; + ipv6_mpath_balance) ipv6_mpath_balance_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index e6a3e04fd83f..d4e7dd659354 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,10 +1,24 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ - v3exc_timeout_test v3star_ex_auto_add_test" +ALL_TESTS=" + v2reportleave_test + v3include_test + v3inc_allow_test + v3inc_is_include_test + v3inc_is_exclude_test + v3inc_to_exclude_test + v3exc_allow_test + v3exc_is_include_test + v3exc_is_exclude_test + v3exc_to_exclude_test + v3inc_block_test + v3exc_block_test + v3exc_timeout_test + v3star_ex_auto_add_test + v2per_vlan_snooping_port_stp_test + v2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -554,6 +568,64 @@ v3star_ex_auto_add_test() v3cleanup $swp2 $TEST_GROUP } +v2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_igmp_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No IGMP queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +v2per_vlan_snooping_port_stp_test() +{ + v2per_vlan_snooping_stp_test 1 +} + +v2per_vlan_snooping_vlan_stp_test() +{ + v2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index f84ab2e65754..4cacef5a813a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,10 +1,23 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ - mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +ALL_TESTS=" + mldv2include_test + mldv2inc_allow_test + mldv2inc_is_include_test + mldv2inc_is_exclude_test + mldv2inc_to_exclude_test + mldv2exc_allow_test + mldv2exc_is_include_test + mldv2exc_is_exclude_test + mldv2exc_to_exclude_test + mldv2inc_block_test + mldv2exc_block_test + mldv2exc_timeout_test + mldv2star_ex_auto_add_test + mldv2per_vlan_snooping_port_stp_test + mldv2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test() mldv2cleanup $swp2 } +mldv2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No MLD queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 \ + mcast_mld_version 1 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +mldv2per_vlan_snooping_port_stp_test() +{ + mldv2per_vlan_snooping_stp_test 1 +} + +mldv2per_vlan_snooping_vlan_stp_test() +{ + mldv2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 8d7a1a004b7c..18fd69d8d937 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -1,6 +1,7 @@ CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_BRIDGE_IGMP_SNOOPING=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index d6f0e449c029..b13c89a99ecb 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -178,8 +178,6 @@ setup() else ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh index a6b2b1f9c641..c6866e42f95c 100755 --- a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -69,7 +69,6 @@ # which can affect the conditions needed to trigger a soft lockup. source lib.sh -source net_helper.sh TEST_DURATION=300 ROUTING_TABLE_REFRESH_PERIOD=0.01 diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 701905eeff66..006fdadcc4b9 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -217,6 +217,8 @@ setup_ns() return $ksft_skip fi ip -n "${!ns_name}" link set lo up + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ns_list+=("${!ns_name}") done NS_LIST+=("${ns_list[@]}") @@ -270,6 +272,30 @@ tc_rule_handle_stats_get() .options.actions[0].stats$selector" } +# attach a qdisc with two children match/no-match and a flower filter to match +tc_set_flower_counter() { + local -r ns=$1 + local -r ipver=$2 + local -r dev=$3 + local -r flower_expr=$4 + + tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \ + priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo + tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo + + tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \ + flower $flower_expr classid 1:2 +} + +tc_get_flower_counter() { + local -r ns=$1 + local -r dev=$2 + + tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets +} + ret_set_ksft_status() { local ksft_status=$1; shift @@ -569,3 +595,24 @@ bridge_vlan_add() bridge vlan add "$@" defer bridge vlan del "$@" } + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ ${protocol} = "tcp" ] && pattern="${pattern}0A" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ + /proc/net/"${protocol}"* | grep -q "${pattern}"; then + break + fi + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore index 1ebc6187f421..bbc97d6bf556 100644 --- a/tools/testing/selftests/net/lib/.gitignore +++ b/tools/testing/selftests/net/lib/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only csum +xdp_helper diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index c22623b9a2a5..88c4bc461459 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -10,6 +10,7 @@ TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) +TEST_GEN_FILES += xdp_helper TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h new file mode 100644 index 000000000000..17dc34a612c6 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(__NET_KSFT_H__) +#define __NET_KSFT_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +static inline void ksft_ready(void) +{ + const char msg[7] = "ready\n"; + char *env_str; + int fd; + + env_str = getenv("KSFT_READY_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", + env_str); + return; + } + } else { + fd = STDOUT_FILENO; + } + + write(fd, msg, sizeof(msg)); + if (fd != STDOUT_FILENO) + close(fd); +} + +static inline void ksft_wait(void) +{ + char *env_str; + char byte; + int fd; + + env_str = getenv("KSFT_WAIT_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", + env_str); + return; + } + } else { + /* Not running in KSFT env, wait for input from STDIN instead */ + fd = STDIN_FILENO; + } + + read(fd, &byte, sizeof(byte)); + if (fd != STDIN_FILENO) + close(fd); +} + +#endif diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 3cfad0fd4570..61287c203b6e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -3,6 +3,7 @@ import builtins import functools import inspect +import signal import sys import time import traceback @@ -26,6 +27,10 @@ class KsftXfailEx(Exception): pass +class KsftTerminate(KeyboardInterrupt): + pass + + def ksft_pr(*objs, **kwargs): print("#", *objs, **kwargs) @@ -193,6 +198,17 @@ def ksft_setup(env): return env +def _ksft_intr(signum, frame): + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout + # if we don't ignore the second one it will stop us from handling cleanup + global term_cnt + term_cnt += 1 + if term_cnt == 1: + raise KsftTerminate() + else: + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] @@ -205,6 +221,10 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases.append(value) break + global term_cnt + term_cnt = 0 + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} print("TAP version 13") @@ -233,7 +253,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): for line in tb.strip().split('\n'): ksft_pr("Exception|", line) if stop: - ksft_pr("Stopping tests due to KeyboardInterrupt.") + ksft_pr(f"Stopping tests due to {type(e).__name__}.") KSFT_RESULT = False cnt_key = 'fail' @@ -248,6 +268,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): if stop: break + signal.signal(signal.SIGTERM, prev_sigterm) + print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" ) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 8986c584cb37..6329ae805abf 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -39,12 +39,12 @@ class EthtoolFamily(YnlFamily): class RtnlFamily(YnlFamily): def __init__(self, recv_size=0): - super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), + super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(), schema='', recv_size=recv_size) class RtnlAddrFamily(YnlFamily): def __init__(self, recv_size=0): - super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(), + super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(), schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c index aeed25914104..eb025a9f35b1 100644 --- a/tools/testing/selftests/drivers/net/xdp_helper.c +++ b/tools/testing/selftests/net/lib/xdp_helper.c @@ -11,55 +11,16 @@ #include <net/if.h> #include <inttypes.h> +#include "ksft.h" + #define UMEM_SZ (1U << 16) #define NUM_DESC (UMEM_SZ / 2048) -/* Move this to a common header when reused! */ -static void ksft_ready(void) -{ - const char msg[7] = "ready\n"; - char *env_str; - int fd; - - env_str = getenv("KSFT_READY_FD"); - if (env_str) { - fd = atoi(env_str); - if (!fd) { - fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", - env_str); - return; - } - } else { - fd = STDOUT_FILENO; - } - - write(fd, msg, sizeof(msg)); - if (fd != STDOUT_FILENO) - close(fd); -} -static void ksft_wait(void) +static void print_usage(const char *bin) { - char *env_str; - char byte; - int fd; - - env_str = getenv("KSFT_WAIT_FD"); - if (env_str) { - fd = atoi(env_str); - if (!fd) { - fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", - env_str); - return; - } - } else { - /* Not running in KSFT env, wait for input from STDIN instead */ - fd = STDIN_FILENO; - } - - read(fd, &byte, sizeof(byte)); - if (fd != STDIN_FILENO) - close(fd); + fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n" + "where:\n\t-z: force zerocopy mode", bin); } /* this is a simple helper program that creates an XDP socket and does the @@ -77,12 +38,13 @@ int main(int argc, char **argv) struct sockaddr_xdp sxdp = { 0 }; int num_desc = NUM_DESC; void *umem_area; + int retry = 0; int ifindex; int sock_fd; int queue; - if (argc != 3) { - fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]); + if (argc != 3 && argc != 4) { + print_usage(argv[0]); return 1; } @@ -132,11 +94,29 @@ int main(int argc, char **argv) sxdp.sxdp_queue_id = queue; sxdp.sxdp_flags = 0; - if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) { - munmap(umem_area, UMEM_SZ); - perror("bind failed"); - close(sock_fd); - return 1; + if (argc > 3) { + if (!strcmp(argv[3], "-z")) { + sxdp.sxdp_flags = XDP_ZEROCOPY; + } else { + print_usage(argv[0]); + return 1; + } + } + + while (1) { + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0) + break; + + if (errno == EBUSY && retry < 3) { + retry++; + sleep(1); + continue; + } else { + perror("bind failed"); + munmap(umem_area, UMEM_SZ); + close(sock_fd); + return 1; + } } ksft_ready(); diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 340e1a777e16..e47788bfa671 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -11,7 +11,7 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag TEST_FILES := mptcp_lib.sh settings -TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh +TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index e7a75341f0f3..7a3cb4c09e45 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -225,6 +225,37 @@ chk_dump_one() fi } +chk_dump_subflow() +{ + local inet_diag_token + local subflow_line + local ss_output + local ss_token + local msg + + ss_output=$(ss -tniN $ns) + + subflow_line=$(echo "$ss_output" | \ + grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+') + + ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+') + + inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \ + grep -Eo 'token:[^ ]+') + + msg="....chk dump_subflow" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + msk_info_get_value() { local port="${1}" @@ -316,6 +347,7 @@ chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 chk_msk_cestab 2 chk_dump_one +chk_dump_subflow flush_pids chk_msk_inuse 0 "2->0" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index c83a8b47bbdf..ac1349c4b9e5 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -180,13 +180,26 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + /* glibc starts to support MPTCP since v2.42. + * For older versions, use IPPROTO_TCP to resolve, + * and use TCP/MPTCP to create socket. + * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82 + */ + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -292,7 +305,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -356,7 +369,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, int infd, struct wstate *winfo) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c index 284286c524cf..e084796e804d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_diag.c +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -8,6 +8,7 @@ #include <sys/socket.h> #include <netinet/in.h> #include <linux/tcp.h> +#include <arpa/inet.h> #include <unistd.h> #include <stdlib.h> @@ -19,6 +20,15 @@ #define IPPROTO_MPTCP 262 #endif +#define parse_rtattr_nested(tb, max, rta) \ + (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \ + NLA_F_NESTED)) + +struct params { + __u32 target_token; + char subflow_addrs[1024]; +}; + struct mptcp_info { __u8 mptcpi_subflows; __u8 mptcpi_add_addr_signal; @@ -46,6 +56,37 @@ struct mptcp_info { __u32 mptcpi_last_ack_recv; }; +enum { + MPTCP_SUBFLOW_ATTR_UNSPEC, + MPTCP_SUBFLOW_ATTR_TOKEN_REM, + MPTCP_SUBFLOW_ATTR_TOKEN_LOC, + MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, + MPTCP_SUBFLOW_ATTR_SSN_OFFSET, + MPTCP_SUBFLOW_ATTR_MAP_DATALEN, + MPTCP_SUBFLOW_ATTR_FLAGS, + MPTCP_SUBFLOW_ATTR_ID_REM, + MPTCP_SUBFLOW_ATTR_ID_LOC, + MPTCP_SUBFLOW_ATTR_PAD, + + __MPTCP_SUBFLOW_ATTR_MAX +}; + +#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1) + +#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) +#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) +#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) +#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3) +#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4) +#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5) +#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6) +#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7) +#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8) + +#define rta_getattr(type, value) (*(type *)RTA_DATA(value)) + static void die_perror(const char *msg) { perror(msg); @@ -54,11 +95,13 @@ static void die_perror(const char *msg) static void die_usage(int r) { - fprintf(stderr, "Usage: mptcp_diag -t\n"); + fprintf(stderr, "Usage:\n" + "mptcp_diag -t <token>\n" + "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n"); exit(r); } -static void send_query(int fd, __u32 token) +static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto) { struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK @@ -72,31 +115,26 @@ static void send_query(int fd, __u32 token) .nlmsg_type = SOCK_DIAG_BY_FAMILY, .nlmsg_flags = NLM_F_REQUEST }, - .r = { - .sdiag_family = AF_INET, - /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ - .sdiag_protocol = IPPROTO_TCP, - .id.idiag_cookie[0] = token, - } + .r = *r }; struct rtattr rta_proto; struct iovec iov[6]; - int iovlen = 1; - __u32 proto; - - req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1)); - proto = IPPROTO_MPTCP; - rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; - rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + int iovlen = 0; - iov[0] = (struct iovec) { + iov[iovlen++] = (struct iovec) { .iov_base = &req, .iov_len = sizeof(req) }; - iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; - iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)}; - req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); - iovlen += 2; + + if (proto == IPPROTO_MPTCP) { + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + + iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; + iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)}; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + } + struct msghdr msg = { .msg_name = &nladdr, .msg_namelen = sizeof(nladdr), @@ -160,7 +198,67 @@ static void print_info_msg(struct mptcp_info *info) printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked); } -static void parse_nlmsg(struct nlmsghdr *nlh) +/* + * 'print_subflow_info' is from 'mptcp_subflow_info' + * which is a function in 'misc/ss.c' of iproute2. + */ +static void print_subflow_info(struct rtattr *tb[]) +{ + u_int32_t flags = 0; + + printf("It's a mptcp subflow, the subflow info:\n"); + if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) { + char caps[32 + 1] = { 0 }, *cap = &caps[0]; + + flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]); + + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM) + *cap++ = 'M'; + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC) + *cap++ = 'm'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM) + *cap++ = 'J'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC) + *cap++ = 'j'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM) + *cap++ = 'B'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC) + *cap++ = 'b'; + if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED) + *cap++ = 'e'; + if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED) + *cap++ = 'c'; + if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID) + *cap++ = 'v'; + + if (flags) + printf(" flags:%s", caps); + } + if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] && + tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] && + tb[MPTCP_SUBFLOW_ATTR_ID_REM] && + tb[MPTCP_SUBFLOW_ATTR_ID_LOC]) + printf(" token:%04x(id:%u)/%04x(id:%u)", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]), + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]) + printf(" seq:%llu", + rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]) + printf(" sfseq:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]) + printf(" ssnoff:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]) + printf(" maplen:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])); + printf("\n"); +} + +static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto) { struct inet_diag_msg *r = NLMSG_DATA(nlh); struct rtattr *tb[INET_DIAG_MAX + 1]; @@ -169,7 +267,7 @@ static void parse_nlmsg(struct nlmsghdr *nlh) nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)), NLA_F_NESTED); - if (tb[INET_DIAG_INFO]) { + if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) { int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); struct mptcp_info *info; @@ -183,11 +281,28 @@ static void parse_nlmsg(struct nlmsghdr *nlh) } print_info_msg(info); } + if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) { + struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 }; + + parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX, + tb[INET_DIAG_ULP_INFO]); + + if (ulpinfo[INET_ULP_INFO_MPTCP]) { + struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 }; + + parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX, + ulpinfo[INET_ULP_INFO_MPTCP]); + print_subflow_info(sfinfo); + } else { + printf("It's a normal TCP!\n"); + } + } } -static void recv_nlmsg(int fd, struct nlmsghdr *nlh) +static void recv_nlmsg(int fd, __u32 proto) { char rcv_buff[8192]; + struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff; struct sockaddr_nl rcv_nladdr = { .nl_family = AF_NETLINK }; @@ -204,7 +319,6 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh) int len; len = recvmsg(fd, &rcv_msg, 0); - nlh = (struct nlmsghdr *)rcv_buff; while (NLMSG_OK(nlh, len)) { if (nlh->nlmsg_type == NLMSG_DONE) { @@ -218,40 +332,84 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh) -(err->error), strerror(-(err->error))); break; } - parse_nlmsg(nlh); + parse_nlmsg(nlh, proto); nlh = NLMSG_NEXT(nlh, len); } } static void get_mptcpinfo(__u32 token) { - struct nlmsghdr *nlh = NULL; + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = token, + }; + __u32 proto = IPPROTO_MPTCP; int fd; fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); if (fd < 0) die_perror("Netlink socket"); - send_query(fd, token); - recv_nlmsg(fd, nlh); + send_query(fd, &r, proto); + recv_nlmsg(fd, proto); close(fd); } -static void parse_opts(int argc, char **argv, __u32 *target_token) +static void get_subflow_info(char *subflow_addrs) +{ + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE, + .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE, + }; + char saddr[64], daddr[64]; + int sport, dport; + int ret; + int fd; + + ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport); + if (ret != 4) + die_perror("IP PORT Pairs has style problems!"); + + printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + r.id.idiag_sport = htons(sport); + r.id.idiag_dport = htons(dport); + + inet_pton(AF_INET, saddr, &r.id.idiag_src); + inet_pton(AF_INET, daddr, &r.id.idiag_dst); + send_query(fd, &r, IPPROTO_TCP); + recv_nlmsg(fd, IPPROTO_TCP); +} + +static void parse_opts(int argc, char **argv, struct params *p) { int c; if (argc < 2) die_usage(1); - while ((c = getopt(argc, argv, "ht:")) != -1) { + while ((c = getopt(argc, argv, "ht:s:")) != -1) { switch (c) { case 'h': die_usage(0); break; case 't': - sscanf(optarg, "%x", target_token); + sscanf(optarg, "%x", &p->target_token); + break; + case 's': + strncpy(p->subflow_addrs, optarg, + sizeof(p->subflow_addrs) - 1); break; default: die_usage(1); @@ -262,10 +420,15 @@ static void parse_opts(int argc, char **argv, __u32 *target_token) int main(int argc, char *argv[]) { - __u32 target_token; + struct params p = { 0 }; + + parse_opts(argc, argv, &p); + + if (p.target_token) + get_mptcpinfo(p.target_token); - parse_opts(argc, argv, &target_token); - get_mptcpinfo(target_token); + if (p.subflow_addrs[0] != '\0') + get_subflow_info(p.subflow_addrs); return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 218aac467321..3cf1e2a612ce 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -72,13 +72,21 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -91,7 +99,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -136,7 +144,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index befa66f5a366..b8af65373b3a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -62,6 +62,7 @@ unset sflags unset fastclose unset fullmesh unset speed +unset join_syn_rej unset join_csum_ns1 unset join_csum_ns2 unset join_fail_nr @@ -1403,6 +1404,7 @@ chk_join_nr() local syn_nr=$1 local syn_ack_nr=$2 local ack_nr=$3 + local syn_rej=${join_syn_rej:-0} local csum_ns1=${join_csum_ns1:-0} local csum_ns2=${join_csum_ns2:-0} local fail_nr=${join_fail_nr:-0} @@ -1468,6 +1470,15 @@ chk_join_nr() fail_test "got $count JOIN[s] ack HMAC failure expected 0" fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$syn_rej" ]; then + rc=${KSFT_FAIL} + print_check "syn rejected" + fail_test "got $count JOIN[s] syn rejected expected $syn_rej" + fi + print_results "join Rx" ${rc} join_syn_tx="${join_syn_tx:-${syn_nr}}" \ @@ -1963,7 +1974,8 @@ subflows_tests() pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # subflow @@ -1992,7 +2004,8 @@ subflows_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 2 1 + join_syn_rej=1 \ + chk_join_nr 2 2 1 fi # single subflow, dev @@ -3061,7 +3074,8 @@ syncookies_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 1 1 + join_syn_rej=1 \ + chk_join_nr 2 1 1 fi # test signal address with cookies @@ -3545,7 +3559,8 @@ userspace_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # userspace pm type does not send join @@ -3568,7 +3583,8 @@ userspace_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 chk_prio_nr 0 0 0 0 fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 051e289d7967..09cd24b2ae46 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 . "$(dirname "${0}")/../lib.sh" -. "$(dirname "${0}")/../net_helper.sh" readonly KSFT_PASS=0 readonly KSFT_FAIL=1 @@ -331,12 +330,15 @@ mptcp_lib_result_print_all_tap() { # get the value of keyword $1 in the line marked by keyword $2 mptcp_lib_get_info_value() { - grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + grep "${2}" 2>/dev/null | + sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + # the ';q' at the end limits to the first matched entry. } # $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] mptcp_lib_evts_get_info() { - grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," + grep "${4:-}" "${2}" 2>/dev/null | + mptcp_lib_get_info_value "${1}" "^type:${3:-1}," } # $1: PID @@ -476,8 +478,6 @@ mptcp_lib_ns_init() { local netns for netns in "${@}"; do ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 done } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 926b0be87c99..9934a68df237 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -159,13 +159,21 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { +again: int err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -178,7 +186,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -223,7 +231,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh deleted file mode 100644 index 6596fe03c77f..000000000000 --- a/tools/testing/selftests/net/net_helper.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Helper functions - -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - local protocol="${3}" - local pattern - local i - - pattern=":$(printf "%04X" "${port}") " - - # for tcp protocol additionally check the socket state - [ ${protocol} = "tcp" ] && pattern="${pattern}0A" - for i in $(seq 10); do - if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ - /proc/net/"${protocol}"* | grep -q "${pattern}"; then - break - fi - sleep 0.1 - done -} diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index ffe161fac8b5..e9b2f553588d 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -12,6 +12,7 @@ TEST_PROGS += conntrack_dump_flush.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh +TEST_PROGS += conntrack_resize.sh TEST_PROGS += conntrack_sctp_collision.sh TEST_PROGS += conntrack_vrf.sh TEST_PROGS += conntrack_reverse_clash.sh @@ -23,6 +24,7 @@ TEST_PROGS += nft_concat_range.sh TEST_PROGS += nft_conntrack_helper.sh TEST_PROGS += nft_fib.sh TEST_PROGS += nft_flowtable.sh +TEST_PROGS += nft_interface_stress.sh TEST_PROGS += nft_meta.sh TEST_PROGS += nft_nat.sh TEST_PROGS += nft_nat_zones.sh diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index 1559ba275105..011de8763094 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -60,9 +60,6 @@ bcast_ping() done } -ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0 - if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then echo "SKIP: Can't create veth device" exit $ksft_skip diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh index 2549b6590693..ea76f2bc2f59 100755 --- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh +++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh @@ -22,8 +22,6 @@ trap cleanup EXIT setup_ns nsbr ns1 ns2 -ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0 -ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0 if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then echo "SKIP: Can't create veth device" exit $ksft_skip diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 43d8b500d391..363646f4fefe 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -46,6 +46,7 @@ CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_MARK=y diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh new file mode 100755 index 000000000000..9e033e80219e --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -0,0 +1,427 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +checktool "conntrack --version" "run test without conntrack" +checktool "nft --version" "run test without nft tool" + +init_net_max=0 +ct_buckets=0 +tmpfile="" +tmpfile_proc="" +tmpfile_uniq="" +ret=0 + +insert_count=2000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 + +modprobe -q nf_conntrack +if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then + echo "SKIP: conntrack sysctls not available" + exit $KSFT_SKIP +fi + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1 +ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1 + +cleanup() { + cleanup_all_ns + + rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq" + + # restore original sysctl setting + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max + sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets +} +trap cleanup EXIT + +check_max_alias() +{ + local expected="$1" + # old name, expected to alias to the first, i.e. changing one + # changes the other as well. + local lv=$(sysctl -n net.nf_conntrack_max) + + if [ $expected -ne "$lv" ];then + echo "nf_conntrack_max sysctls should have identical values" + exit 1 + fi +} + +insert_ctnetlink() { + local ns="$1" + local count="$2" + local i=0 + local bulk=16 + + while [ $i -lt $count ] ;do + ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \ + if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \ + return;\ + fi & \ + done ; wait" 2>/dev/null + + i=$((i+bulk)) + done +} + +check_ctcount() { + local ns="$1" + local count="$2" + local msg="$3" + + local now=$(ip netns exec "$ns" conntrack -C) + + if [ $now -ne "$count" ] ;then + echo "expected $count entries in $ns, not $now: $msg" + exit 1 + fi + + echo "PASS: got $count connections: $msg" +} + +ctresize() { + local duration="$1" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM + now=$(date +%s) + done +} + +do_rsleep() { + local limit="$1" + local r=$RANDOM + + r=$((r%limit)) + sleep "$r" +} + +ct_flush_once() { + local ns="$1" + + ip netns exec "$ns" conntrack -F 2>/dev/null +} + +ctflush() { + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + do_rsleep "$duration" + + while [ $now -lt $end ]; do + ct_flush_once "$ns" + do_rsleep "$duration" + now=$(date +%s) + done +} + +ctflood() +{ + local ns="$1" + local duration="$2" + local msg="$3" + local now=$(date +%s) + local end=$((now + duration)) + local j=0 + local k=0 + + while [ $now -lt $end ]; do + j=$((j%256)) + k=$((k%256)) + + ip netns exec "$ns" bash -c \ + "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1 + + j=$((j+1)) + + if [ $j -eq 256 ];then + k=$((k+1)) + fi + + now=$(date +%s) + done + + wait +} + +# dump to /dev/null. We don't want dumps to cause infinite loops +# or use-after-free even when conntrack table is altered while dumps +# are in progress. +ct_nulldump() +{ + local ns="$1" + + ip netns exec "$ns" conntrack -L > /dev/null 2>&1 & + + # Don't require /proc support in conntrack + if [ -r /proc/self/net/nf_conntrack ] ; then + ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null & + fi + + wait +} + +check_taint() +{ + local tainted_then="$1" + local msg="$2" + + local tainted_now=0 + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + + if [ "$tainted_now" -eq 0 ];then + echo "PASS: $msg" + else + echo "TAINT: $msg" + dmesg + exit 1 + fi +} + +insert_flood() +{ + local n="$1" + local r=0 + + r=$((RANDOM%$insert_count)) + + ctflood "$n" "$timeout" "floodresize" & + insert_ctnetlink "$n" "$r" & + ctflush "$n" "$timeout" & + ct_nulldump "$n" & + + wait +} + +test_floodresize_all() +{ + local timeout=20 + local n="" + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + for n in "$nsclient1" "$nsclient2";do + insert_flood "$n" & + done + + # resize table constantly while flood/insert/dump/flushs + # are happening in parallel. + ctresize "$timeout" + + # wait for subshells to complete, everything is limited + # by $timeout. + wait + + check_taint "$tainted_then" "resize+flood" +} + +check_dump() +{ + local ns="$1" + local protoname="$2" + local c=0 + local proto=0 + local proc=0 + local unique="" + local lret=0 + + # NOTE: assumes timeouts are large enough to not have + # expirations in all following tests. + l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l) + c=$(ip netns exec "$ns" conntrack -C) + + if [ "$c" -eq 0 ]; then + echo "FAIL: conntrack count for $ns is 0" + lret=1 + fi + + if [ "$c" -ne "$l" ]; then + echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l" + lret=1 + fi + + # check the dump we retrieved is free of duplicated entries. + unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$unique" ]; then + echo "FAIL: listing contained redundant entries for $ns: $l != $unique" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter. + proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proto" ]; then + echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + if [ -r /proc/self/net/nf_conntrack ] ; then + proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l") + + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency for $ns: $l != $proc" + lret=1 + fi + + proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc" + diff -u "$tmpfile_proc" "$tmpfile_uniq" + lret=1 + fi + fi + + if [ $lret -eq 0 ];then + echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)" + else + echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)" + ret=1 + fi +} + +test_dump_all() +{ + local timeout=3 + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 + + ctflood "$nsclient1" $timeout "dumpall" & + insert_ctnetlink "$nsclient2" $insert_count + + wait + + check_dump "$nsclient1" "icmp" + check_dump "$nsclient2" "udp" + + check_taint "$tainted_then" "test parallel conntrack dumps" +} + +check_sysctl_immutable() +{ + local ns="$1" + local name="$2" + local failhard="$3" + local o=0 + local n=0 + + o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + n=$((o+1)) + + # return value isn't reliable, need to read it back + ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null + + n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + + [ -z "$n" ] && return 1 + + if [ $o -ne $n ]; then + if [ $failhard -gt 0 ] ;then + echo "FAIL: net.$name should not be changeable from namespace (now $n)" + ret=1 + fi + return 0 + fi + + return 1 +} + +test_conntrack_max_limit() +{ + sysctl -q net.netfilter.nf_conntrack_max=100 + insert_ctnetlink "$nsclient1" 101 + + # check netns is clamped by init_net, i.e., either netns follows + # init_net value, or a higher pernet limit (compared to init_net) is ignored. + check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound" + + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +} + +test_conntrack_disable() +{ + local timeout=2 + + # disable conntrack pickups + ip netns exec "$nsclient1" nft flush table ip test_ct + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ctflood "$nsclient1" "$timeout" "conntrack disable" + ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 + + # Disabled, should not have picked up any connection. + check_ctcount "$nsclient1" 0 "conntrack disabled" + + # This one is still active, expect 1 connection. + check_ctcount "$nsclient2" 1 "conntrack enabled" +} + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) + +check_max_alias $init_net_max + +sysctl -q net.netfilter.nf_conntrack_max="262000" +check_max_alias 262000 + +setup_ns nsclient1 nsclient2 + +# check this only works from init_net +for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do + check_sysctl_immutable "$nsclient1" "net.$n" 1 +done + +# won't work on older kernels. If it works, check that the netns obeys the limit +if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then + # subtest: if pernet is changeable, check that reducing it in pernet + # limits the pernet entries. Inverse, pernet clamped by a lower init_net + # setting, is already checked by "test_conntrack_max_limit" test. + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1 + insert_ctnetlink "$nsclient1" 2 + check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound" + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +fi + +for n in "$nsclient1" "$nsclient2";do +# enable conntrack in both namespaces +ip netns exec "$n" nft -f - <<EOF +table ip test_ct { + chain input { + type filter hook input priority 0 + ct state new counter + } +} +EOF +done + +tmpfile=$(mktemp) +tmpfile_proc=$(mktemp) +tmpfile_uniq=$(mktemp) +test_conntrack_max_limit +test_dump_all +test_floodresize_all +test_conntrack_disable + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh index e95ecb37c2b1..207b79932d91 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -32,7 +32,6 @@ source lib.sh IP0=172.30.30.1 IP1=172.30.30.2 -DUMMYNET=10.9.9 PFXL=30 ret=0 @@ -52,11 +51,6 @@ trap cleanup EXIT setup_ns ns0 ns1 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1 - if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: Could not add veth device" exit $ksft_skip @@ -67,18 +61,13 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then exit $ksft_skip fi -ip -net "$ns0" link add dummy0 type dummy - ip -net "$ns0" li set veth0 master tvrf -ip -net "$ns0" li set dummy0 master tvrf ip -net "$ns0" li set tvrf up ip -net "$ns0" li set veth0 up -ip -net "$ns0" li set dummy0 up ip -net "$ns1" li set veth0 up ip -net "$ns0" addr add $IP0/$PFXL dev veth0 ip -net "$ns1" addr add $IP1/$PFXL dev veth0 -ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0 listener_ready() { @@ -219,35 +208,9 @@ EOF fi } -test_fib() -{ -ip netns exec "$ns0" nft -f - <<EOF -flush ruleset -table ip t { - counter fibcount { } - - chain prerouting { - type filter hook prerouting priority 0; - meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack - } -} -EOF - ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0 - ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null - - if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then - echo "PASS: fib lookup returned exepected output interface" - else - echo "FAIL: fib lookup did not return exepected output interface" - ret=1 - return - fi -} - test_ct_zone_in test_masquerade_vrf "default" test_masquerade_vrf "pfifo" test_masquerade_veth -test_fib exit $ret diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh index d3edb16cd4b3..6af2ea3ad6b8 100755 --- a/tools/testing/selftests/net/netfilter/ipvs.sh +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -129,9 +129,6 @@ test_dr() { # avoid incorrect arp response ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 - # avoid reverse route lookup - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service @@ -167,9 +164,6 @@ test_tun() { ip netns exec "${ns2}" ip link set tunl0 up ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 1f5979c1510c..efea93cf23d4 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -15,10 +15,12 @@ source lib.sh # Available test groups: # - reported_issues: check for issues that were reported in the past # - correctness: check that packets match given entries, and only those +# - correctness_large: same but with additional non-matching entries # - concurrency: attempt races between insertion, deletion and lookup # - timeout: check that packets match entries until they expire # - performance: estimate matching rate, compare with rbtree and hash baselines -TESTS="reported_issues correctness concurrency timeout" +TESTS="reported_issues correctness correctness_large concurrency timeout" + [ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}" # Set types, defined by TYPE_ variables below @@ -1257,9 +1259,7 @@ send_nomatch() { # - add ranged element, check that packets match it # - check that packets outside range don't match it # - remove some elements, check that packets don't match anymore -test_correctness() { - setup veth send_"${proto}" set || return ${ksft_skip} - +test_correctness_main() { range_size=1 for i in $(seq "${start}" $((start + count))); do end=$((start + range_size)) @@ -1293,6 +1293,163 @@ test_correctness() { done } +test_correctness() { + setup veth send_"${proto}" set || return ${ksft_skip} + + test_correctness_main +} + +# Repeat the correctness tests, but add extra non-matching entries. +# This exercises the more compact '4 bit group' representation that +# gets picked when the default 8-bit representation exceed +# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory. +# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust(). +# +# The format() helper is way too slow when generating lots of +# entries so its not used here. +test_correctness_large() { + setup veth send_"${proto}" set || return ${ksft_skip} + # number of dummy (filler) entries to add. + local dcount=16385 + + ( + echo -n "add element inet filter test { " + + case "$type_spec" in + "ether_addr . ipv4_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_mac $((1000000 + i)) + printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "inet_proto . ipv6_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "%i . " $((RANDOM%256)) + format_addr6 $((1000000 + i)) + done + ;; + "inet_service . inet_proto") + # smaller key sizes, need more entries to hit the + # 4-bit threshold. + dcount=65536 + for i in $(seq 1 $dcount); do + local proto=$((RANDOM%256)) + + # Test uses UDP to match, as it also fails when matching + # an entry that doesn't exist, so skip 'udp' entries + # to not trigger a wrong failure. + [ $proto -eq 17 ] && proto=18 + [ $i -gt 1 ] && echo ", " + printf "%i . %i " $(((i%65534) + 1)) $((proto)) + done + ;; + "inet_service . ipv4_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "ipv4_addr . ether_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + format_mac $((1000000 + i)) + done + ;; + "ipv4_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) + done + ;; + "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr") + dcount=65536 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) + format_mac $((1000000 + i)) + printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "ipv4_addr . inet_service . inet_proto") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) + done + ;; + "ipv4_addr . inet_service . inet_proto . ipv4_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256)) + done + ;; + "ipv4_addr . inet_service . ipv4_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) + done + ;; + "ipv6_addr . ether_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . " + format_mac $((1000000 + i)) + done + ;; + "ipv6_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1))" + done + ;; + "ipv6_addr . inet_service . ether_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_mac $((i + 1000000)) + done + ;; + "ipv6_addr . inet_service . ether_addr . inet_proto") + dcount=65536 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_mac $((i + 1000000)) + echo -n " . $((RANDOM%256))" + done + ;; + "ipv6_addr . inet_service . ipv6_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_addr6 $((i + 2123456)) + echo -n " . $((RANDOM%256))" + done + ;; + *) + "Unhandled $type_spec" + return 1 + esac + echo -n "}" + + ) | nft -f - || return 1 + + test_correctness_main +} + # Concurrency test template: # - add all the elements # - start a thread for each physical thread that: diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh index ce1451c275fd..9929a9ffef65 100755 --- a/tools/testing/selftests/net/netfilter/nft_fib.sh +++ b/tools/testing/selftests/net/netfilter/nft_fib.sh @@ -3,6 +3,10 @@ # This tests the fib expression. # # Kselftest framework requirement - SKIP code is 4. +# +# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 +# dead:1::99 dead:1::1 dead:2::1 dead:2::99 +# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 source lib.sh @@ -45,6 +49,19 @@ table inet filter { EOF } +load_input_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain input { + type filter hook input priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + load_pbr_ruleset() { local netns=$1 @@ -59,6 +76,89 @@ table inet filter { EOF } +load_type_ruleset() { + local netns=$1 + + for family in ip ip6;do +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table $family filter { + chain type_match_in { + fib daddr type local counter comment "daddr configured on other iface" + fib daddr . iif type local counter comment "daddr configured on iif" + fib daddr type unicast counter comment "daddr not local" + fib daddr . iif type unicast counter comment "daddr not configured on iif" + } + + chain type_match_out { + fib daddr type unicast counter + fib daddr . oif type unicast counter + fib daddr type local counter + fib daddr . oif type local counter + } + + chain prerouting { + type filter hook prerouting priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain input { + type filter hook input priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain forward { + type filter hook forward priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain output { + type filter hook output priority 0; + icmp type echo-request counter jump type_match_out + icmpv6 type echo-request counter jump type_match_out + } + + chain postrouting { + type filter hook postrouting priority 0; + icmp type echo-request counter jump type_match_out + icmpv6 type echo-request counter jump type_match_out + } +} +EOF +done +} + +reload_type_ruleset() { + ip netns exec "$1" nft flush table ip filter + ip netns exec "$1" nft flush table ip6 filter + load_type_ruleset "$1" +} + +check_fib_type_counter_family() { + local family="$1" + local want="$2" + local ns="$3" + local chain="$4" + local what="$5" + local errmsg="$6" + + if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then + echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2 + ip netns exec "$ns" nft list chain "$family" filter "$chain" + ret=1 + return 1 + fi + + return 0 +} + +check_fib_type_counter() { + check_fib_type_counter_family "ip" "$@" || return 1 + check_fib_type_counter_family "ip6" "$@" || return 1 +} + load_ruleset_count() { local netns=$1 @@ -77,6 +177,7 @@ check_drops() { if dmesg | grep -q ' nft_rpfilter: ';then dmesg | grep ' nft_rpfilter: ' echo "FAIL: rpfilter did drop packets" + ret=1 return 1 fi @@ -151,19 +252,506 @@ test_ping() { return 0 } +test_ping_unreachable() { + local daddr4=$1 + local daddr6=$2 + + if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then + echo "FAIL: ${ns1} could reach $daddr4" 1>&2 + return 1 + fi + + if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then + echo "FAIL: ${ns1} could reach $daddr6" 1>&2 + return 1 + fi + + return 0 +} + +test_fib_type() { + local notice="$1" + local errmsg="addr-on-if" + local lret=0 + + if ! load_type_ruleset "$nsrouter";then + echo "SKIP: Could not load fib type ruleset" + [ $ret -eq 0 ] && ret=$ksft_skip + return + fi + + # makes router receive packet for addresses configured on incoming + # interface. + test_ping 10.0.1.1 dead:1::1 || return 1 + + # expectation: triggers all 'local' in prerouting/input. + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1 + + reload_type_ruleset "$nsrouter" + # makes router receive packet for address configured on a different (but local) + # interface. + test_ping 10.0.2.1 dead:2::1 || return 1 + + # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'. + errmsg="addr-on-host" + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 + + reload_type_ruleset "$nsrouter" + test_ping 10.0.2.99 dead:2::99 || return 1 + errmsg="addr-on-otherhost" + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: fib expression address types match ($notice)" + else + echo "FAIL: fib expression address types match ($notice)" + ret=1 + fi +} + +test_fib_vrf_dev_add_dummy() +{ + if ! ip -net "$nsrouter" link add dummy0 type dummy ;then + echo "SKIP: VRF tests: dummy device type not supported" + return 1 + fi + + if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then + echo "SKIP: VRF tests: vrf device type not supported" + return 1 + fi + + ip -net "$nsrouter" link set dummy0 master tvrf + ip -net "$nsrouter" link set dummy0 up + ip -net "$nsrouter" link set tvrf up +} + +load_ruleset_vrf() +{ +# Due to the many different possible combinations using named counters +# or one-rule-per-expected-result is complex. +# +# Instead, add dynamic sets for the fib modes +# (fib address type, fib output interface lookup .. ), +# and then add the obtained fib results to them. +# +# The test is successful if the sets contain the expected results +# and no unexpected extra entries existed. +ip netns exec "$nsrouter" nft -f - <<EOF +flush ruleset +table inet t { + set fibif4 { + typeof meta iif . ip daddr . fib daddr oif + flags dynamic + counter + } + + set fibif4iif { + typeof meta iif . ip daddr . fib daddr . iif oif + flags dynamic + counter + } + + set fibif6 { + typeof meta iif . ip6 daddr . fib daddr oif + flags dynamic + counter + } + + set fibif6iif { + typeof meta iif . ip6 daddr . fib daddr . iif oif + flags dynamic + counter + } + + set fibtype4 { + typeof meta iif . ip daddr . fib daddr type + flags dynamic + counter + } + + set fibtype4iif { + typeof meta iif . ip daddr . fib daddr . iif type + flags dynamic + counter + } + + set fibtype6 { + typeof meta iif . ip6 daddr . fib daddr type + flags dynamic + counter + } + + set fibtype6iif { + typeof meta iif . ip6 daddr . fib daddr . iif type + flags dynamic + counter + } + + chain fib_test { + meta nfproto ipv4 jump { + add @fibif4 { meta iif . ip daddr . fib daddr oif } + add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif } + add @fibtype4 { meta iif . ip daddr . fib daddr type } + add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type } + + add @fibif4 { meta iif . ip saddr . fib saddr oif } + add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif } + } + + meta nfproto ipv6 jump { + add @fibif6 { meta iif . ip6 daddr . fib daddr oif } + add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif } + add @fibtype6 { meta iif . ip6 daddr . fib daddr type } + add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type } + + add @fibif6 { meta iif . ip6 saddr . fib saddr oif } + add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif } + } + } + + chain prerouting { + type filter hook prerouting priority 0; + icmp type echo-request counter jump fib_test + + # neighbour discovery to be ignored. + icmpv6 type echo-request counter jump fib_test + } +} +EOF + +if [ $? -ne 0 ] ;then + echo "SKIP: Could not load ruleset for fib vrf test" + [ $ret -eq 0 ] && ret=$ksft_skip + return 1 +fi +} + +check_type() +{ + local setname="$1" + local iifname="$2" + local addr="$3" + local type="$4" + local count="$5" + + [ -z "$count" ] && count=1 + + if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then + echo "FAIL: did not find $iifname . $addr . $type in $setname" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + return 1 + fi + + # delete the entry, this allows to check if anything unexpected appeared + # at the end of the test run: all dynamic sets should be empty by then. + if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then + echo "FAIL: can't delete $iifname . $addr . $type in $setname" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + return 1 + fi + + return 0 +} + +check_local() +{ + check_type $@ "local" 1 +} + +check_unicast() +{ + check_type $@ "unicast" 1 +} + +check_rpf() +{ + check_type $@ +} + +check_fib_vrf_sets_empty() +{ + local setname="" + local lret=0 + + # A non-empty set means that we have seen unexpected packets OR + # that a fib lookup provided unexpected results. + for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \ + "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do + if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then + echo "FAIL: $setname not empty" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + lret=1 + fi + done + + return $lret +} + +check_fib_vrf_type() +{ + local msg="$1" + + local addr + # the incoming interface is always veth0. As its not linked to a VRF, + # the 'tvrf' device should NOT show up anywhere. + local ifname="veth0" + local lret=0 + + # local_veth0, local_veth1 + for addr in "10.0.1.1" "10.0.2.1"; do + check_local fibtype4 "$ifname" "$addr" || lret=1 + check_type fibif4 "$ifname" "$addr" "0" || lret=1 + done + for addr in "dead:1::1" "dead:2::1";do + check_local fibtype6 "$ifname" "$addr" || lret=1 + check_type fibif6 "$ifname" "$addr" "0" || lret=1 + done + + # when restricted to the incoming interface, 10.0.1.1 should + # be 'local', but 10.0.2.1 unicast. + check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1 + + # same for the ipv6 addresses. + check_local fibtype6iif "$ifname" "dead:1::1" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1 + + # None of these addresses should find a valid route when restricting + # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are + # reachable via 'lo'. + for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do + check_type fibif4iif "$ifname" "$addr" "0" || lret=1 + done + + # expect default route (veth1), dummy0 is part of VRF but iif isn't. + for addr in "10.9.9.1" "10.9.9.2";do + check_unicast fibtype4 "$ifname" "$addr" || lret=1 + check_unicast fibtype4iif "$ifname" "$addr" || lret=1 + check_type fibif4 "$ifname" "$addr" "veth1" || lret=1 + done + for addr in "dead:9::1" "dead:9::2";do + check_unicast fibtype6 "$ifname" "$addr" || lret=1 + check_unicast fibtype6iif "$ifname" "$addr" || lret=1 + check_type fibif6 "$ifname" "$addr" "veth1" || lret=1 + done + + # same for the IPv6 equivalent addresses. + for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do + check_type fibif6iif "$ifname" "$addr" "0" || lret=1 + done + + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 + + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 + + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1 + check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1 + check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1 + + check_fib_vrf_sets_empty || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: $msg" + else + echo "FAIL: $msg" + ret=1 + fi +} + +check_fib_veth_vrf_type() +{ + local msg="$1" + + local addr + local ifname + local setname + local lret=0 + + # as veth0 is now part of tvrf interface, packets will be seen + # twice, once with iif veth0, then with iif tvrf. + + for ifname in "veth0" "tvrf"; do + for addr in "10.0.1.1" "10.9.9.1"; do + check_local fibtype4 "$ifname" "$addr" || lret=1 + # addr local, but nft_fib doesn't return routes with RTN_LOCAL. + check_type fibif4 "$ifname" "$addr" 0 || lret=1 + check_type fibif4iif "$ifname" "$addr" 0 || lret=1 + done + + for addr in "dead:1::1" "dead:9::1"; do + check_local fibtype6 "$ifname" "$addr" || lret=1 + # same, address is local but no route is returned for lo. + check_type fibif6 "$ifname" "$addr" 0 || lret=1 + check_type fibif6iif "$ifname" "$addr" 0 || lret=1 + done + + for t in fibtype4 fibtype4iif; do + check_unicast "$t" "$ifname" 10.9.9.2 || lret=1 + done + for t in fibtype6 fibtype6iif; do + check_unicast "$t" "$ifname" dead:9::2 || lret=1 + done + + check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1 + + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 + + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 + + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 + check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1 + check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1 + + # restricted to iif -- MUST NOT provide result, its != $ifname. + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 + + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1 + check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1 + check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1 + done + + check_local fibtype4iif "veth0" "10.0.1.1" || lret=1 + check_local fibtype6iif "veth0" "dead:1::1" || lret=1 + + check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1 + check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1 + + # 10.9.9.2 should not provide a result for iif veth, but + # should when iif is tvrf. + # This is because its reachable via dummy0 which is part of + # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0). + check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1 + check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1 + + check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1 + check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1 + + check_fib_vrf_sets_empty || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: $msg" + else + echo "FAIL: $msg" + ret=1 + fi +} + +# Extends nsrouter config by adding dummy0+vrf. +# +# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 +# dead:1::99 dead:1::1 dead:2::1 dead:2::99 +# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 +# [dummy0] +# 10.9.9.1 +# dead:9::1 +# [tvrf] +test_fib_vrf() +{ + local cntname="" + + if ! test_fib_vrf_dev_add_dummy; then + [ $ret -eq 0 ] && ret=$ksft_skip + return + fi + + ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0 + ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad + + ip -net "$nsrouter" route add default via 10.0.2.99 + ip -net "$nsrouter" route add default via dead:2::99 + + load_ruleset_vrf || return + + # no echo reply for these addresses: The dummy interface is part of tvrf, + # but veth0 (incoming interface) isn't linked to it. + test_ping_unreachable "10.9.9.1" "dead:9::1" & + test_ping_unreachable "10.9.9.2" "dead:9::2" & + + # expect replies from these. + test_ping "10.0.1.1" "dead:1::1" + test_ping "10.0.2.1" "dead:2::1" + test_ping "10.0.2.99" "dead:2::99" + + wait + + check_fib_vrf_type "fib expression address types match (iif not in vrf)" + + # second round: this time, make veth0 (rx interface) part of the vrf. + # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2 + # becomes unreachable. + ip -net "$nsrouter" link set veth0 master tvrf + ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + + # this reload should not be needed, but in case + # there is some error (missing or unexpected entry) this will prevent them + # from leaking into round 2. + load_ruleset_vrf || return + + test_ping "10.0.1.1" "dead:1::1" + test_ping "10.9.9.1" "dead:9::1" + + # ns2 should no longer be reachable (veth1 not in vrf) + test_ping_unreachable "10.0.2.99" "dead:2::99" & + + # vrf via dummy0, but host doesn't exist + test_ping_unreachable "10.9.9.2" "dead:9::2" & + + wait + + check_fib_veth_vrf_type "fib expression address types match (iif in vrf)" +} + ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null -ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null -ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null test_ping 10.0.2.1 dead:2::1 || exit 1 -check_drops || exit 1 +check_drops test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops + +[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops" + +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 +check_drops + +test_ping 10.0.1.99 dead:1::99 +check_drops + +[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets" + +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 || exit 1 check_drops || exit 1 -echo "PASS: fib expression did not cause unwanted packet drops" +test_ping 10.0.1.99 dead:1::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not discard loopback packets" ip netns exec "$nsrouter" nft flush table inet filter @@ -213,7 +801,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0 # ... pbr ruleset for the router, check iif+oif. if ! load_pbr_ruleset "$nsrouter";then echo "SKIP: Could not load fib forward ruleset" - exit $ksft_skip + [ "$ret" -eq 0 ] && ret=$ksft_skip fi ip -net "$nsrouter" rule add from all table 128 @@ -224,11 +812,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1 # drop main ipv4 table ip -net "$nsrouter" -4 rule delete table main -if ! test_ping 10.0.2.99 dead:2::99;then - ip -net "$nsrouter" nft list ruleset - echo "FAIL: fib mismatch in pbr setup" - exit 1 +if test_ping 10.0.2.99 dead:2::99;then + echo "PASS: fib expression forward check with policy based routing" +else + echo "FAIL: fib expression forward check with policy based routing" + ret=1 fi -echo "PASS: fib expression forward check with policy based routing" -exit 0 +test_fib_type "policy routing" +ip netns exec "$nsrouter" nft delete table ip filter +ip netns exec "$nsrouter" nft delete table ip6 filter + +# Un-do policy routing changes +ip -net "$nsrouter" rule del from all table 128 +ip -net "$nsrouter" rule del from all iif veth0 table 129 + +ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0 +ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1 + +ip -net "$ns1" -4 route del default +ip -net "$ns1" -6 route del default + +ip -net "$ns1" -4 route add default via 10.0.1.1 +ip -net "$ns1" -6 route add default via dead:1::1 + +ip -net "$nsrouter" -4 rule add from all table main priority 32766 + +test_fib_type "default table" +ip netns exec "$nsrouter" nft delete table ip filter +ip netns exec "$nsrouter" nft delete table ip6 filter + +test_fib_vrf + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh new file mode 100755 index 000000000000..5ff7be9daeee --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh @@ -0,0 +1,154 @@ +#!/bin/bash -e +# +# SPDX-License-Identifier: GPL-2.0 +# +# Torture nftables' netdevice notifier callbacks and related code by frequent +# renaming of interfaces which netdev-family chains and flowtables hook into. + +source lib.sh + +checktool "nft --version" "run test without nft tool" +checktool "iperf3 --version" "run test without iperf3 tool" + +# how many seconds to torture the kernel? +# default to 80% of max run time but don't exceed 48s +TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10)) +[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48 + +trap "cleanup_all_ns" EXIT + +setup_ns nsc nsr nss + +ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr +ip -net $nsc addr add 10.0.0.1/24 dev cr0 +ip -net $nsc link set cr0 up +ip -net $nsc route add default via 10.0.0.2 + +ip -net $nss link add sr0 type veth peer name rs0 netns $nsr +ip -net $nss addr add 10.1.0.1/24 dev sr0 +ip -net $nss link set sr0 up +ip -net $nss route add default via 10.1.0.2 + +ip -net $nsr addr add 10.0.0.2/24 dev rc0 +ip -net $nsr link set rc0 up +ip -net $nsr addr add 10.1.0.2/24 dev rs0 +ip -net $nsr link set rs0 up +ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1 +ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1 + +{ + echo "table netdev t {" + for ((i = 0; i < 10; i++)); do + cat <<-EOF + chain chain_rc$i { + type filter hook ingress device rc$i priority 0 + counter + } + chain chain_rs$i { + type filter hook ingress device rs$i priority 0 + counter + } + EOF + done + echo "}" + echo "table ip t {" + for ((i = 0; i < 10; i++)); do + cat <<-EOF + flowtable ft_${i} { + hook ingress priority 0 + devices = { rc$i, rs$i } + } + EOF + done + echo "chain c {" + echo "type filter hook forward priority 0" + for ((i = 0; i < 10; i++)); do + echo -n "iifname rc$i oifname rs$i " + echo "ip protocol tcp counter flow add @ft_${i}" + done + echo "counter" + echo "}" + echo "}" +} | ip netns exec $nsr nft -f - || { + echo "SKIP: Could not load nft ruleset" + exit $ksft_skip +} + +for ((o=0, n=1; ; o=n, n++, n %= 10)); do + ip -net $nsr link set rc$o name rc$n + ip -net $nsr link set rs$o name rs$n +done & +rename_loop_pid=$! + +while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done & +nft_list_pid=$! + +ip netns exec $nsr nft monitor >/dev/null & +nft_monitor_pid=$! + +ip netns exec $nss iperf3 --server --daemon -1 +summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p' +rate=$(ip netns exec $nsc iperf3 \ + --format k -c 10.1.0.1 --time $TEST_RUNTIME \ + --length 56 --parallel 10 -i 0 | sed -n "$summary_expr") + +kill $nft_list_pid +kill $nft_monitor_pid +kill $rename_loop_pid +wait + +wildcard_prep() { + ip netns exec $nsr nft -f - <<EOF +table ip t { + flowtable ft_wild { + hook ingress priority 0 + devices = { wild* } + } +} +EOF +} + +if ! wildcard_prep; then + echo "SKIP wildcard tests: not supported by host's nft?" +else + for ((i = 0; i < 100; i++)); do + ip -net $nsr link add wild$i type dummy & + done + wait + for ((i = 80; i < 100; i++)); do + ip -net $nsr link del wild$i & + done + for ((i = 0; i < 80; i++)); do + ip -net $nsr link del wild$i & + done + wait + for ((i = 0; i < 100; i += 10)); do + ( + for ((j = 0; j < 10; j++)); do + ip -net $nsr link add wild$((i + j)) type dummy + done + for ((j = 0; j < 10; j++)); do + ip -net $nsr link del wild$((i + j)) + done + ) & + done + wait +fi + +[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || { + echo "FAIL: Kernel is tainted!" + exit $ksft_fail +} + +[[ $rate -gt 0 ]] || { + echo "FAIL: Zero throughput in iperf3" + exit $ksft_fail +} + +[[ -f /sys/kernel/debug/kmemleak && \ + -n $(</sys/kernel/debug/kmemleak) ]] && { + echo "FAIL: non-empty kmemleak report" + exit $ksft_fail +} + +exit $ksft_pass diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh index 3b81d88bdde3..9f200f80253a 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh @@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 echo netns exec "$gw" ip link set "veth$i" up echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2 - echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0 # clients have same IP addresses. echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 @@ -178,7 +177,6 @@ fi ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null # useful for debugging: allows to use 'ping' from clients to gateway. ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 784d1b46912b..6136ceec45e0 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -10,6 +10,8 @@ source lib.sh ret=0 timeout=5 +SCTP_TEST_TIMEOUT=60 + cleanup() { ip netns pids "$ns1" | xargs kill 2>/dev/null @@ -40,7 +42,7 @@ TMPFILE3=$(mktemp) TMPINPUT=$(mktemp) COUNT=200 -[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8)) dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT" if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then @@ -275,9 +277,11 @@ test_tcp_forward() busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2 + local tthen=$(date +%s) + ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null - wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain" + wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen" kill "$nfqpid" } @@ -288,13 +292,14 @@ test_tcp_localhost() ip netns exec "$nsrouter" ./nf_queue -q 3 & local nfqpid=$! + local tthen=$(date +%s) busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null - wait "$rpid" && echo "PASS: tcp via loopback" + wait_and_check_retval "$rpid" "tcp via loopback" "$tthen" kill "$nfqpid" } @@ -417,6 +422,23 @@ check_output_files() fi } +wait_and_check_retval() +{ + local rpid="$1" + local msg="$2" + local tthen="$3" + local tnow=$(date +%s) + + if wait "$rpid";then + echo -n "PASS: " + else + echo -n "FAIL: " + ret=1 + fi + + printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen)) +} + test_sctp_forward() { ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF @@ -428,13 +450,14 @@ table inet sctpq { } } EOF - timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & local rpid=$! busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" ip netns exec "$nsrouter" ./nf_queue -q 10 -G & local nfqpid=$! + local tthen=$(date +%s) ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null @@ -443,7 +466,7 @@ EOF exit 1 fi - wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain" + wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen" kill "$nfqpid" check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward" @@ -462,13 +485,14 @@ EOF # reduce test file size, software segmentation causes sk wmem increase. dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT" - timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & local rpid=$! busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" ip netns exec "$ns1" ./nf_queue -q 11 & local nfqpid=$! + local tthen=$(date +%s) ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null @@ -478,7 +502,7 @@ EOF fi # must wait before checking completeness of output file. - wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO" + wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen" kill "$nfqpid" check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output" diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh index 86ec4e68594d..24ad41d526d9 100755 --- a/tools/testing/selftests/net/netfilter/rpath.sh +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -1,8 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# return code to signal skipped test -ksft_skip=4 +source lib.sh # search for legacy iptables (it uses the xtables extensions if iptables-legacy --version >/dev/null 2>&1; then @@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then exit $ksft_skip fi -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -trap "ip netns del $ns1; ip netns del $ns2" EXIT - -# create two netns, disable rp_filter in ns2 and -# keep IPv6 address when moving into VRF -ip netns add "$ns1" -ip netns add "$ns2" -ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0 +trap cleanup_all_ns EXIT + +# create two netns, keep IPv6 address when moving into VRF +setup_ns ns1 ns2 ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 # a standard connection between the netns, should not trigger rp filter diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore new file mode 100644 index 000000000000..ee44c081ca7c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0+ +ovpn-cli diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile new file mode 100644 index 000000000000..e0926d76b4c8 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES) +VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS = -I/usr/include/libnl3 +endif +CFLAGS += $(VAR_CFLAGS) + + +LDLIBS = -lmbedtls -lmbedcrypto +VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS = -lnl-genl-3 -lnl-3 +endif +LDLIBS += $(VAR_LDLIBS) + + +TEST_FILES = common.sh + +TEST_PROGS = test.sh \ + test-large-mtu.sh \ + test-chachapoly.sh \ + test-tcp.sh \ + test-float.sh \ + test-close-socket.sh \ + test-close-socket-tcp.sh + +TEST_GEN_FILES := ovpn-cli + +include ../../lib.mk diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh new file mode 100644 index 000000000000..88869c675d03 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/common.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt} +TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt} +OVPN_CLI=${OVPN_CLI:-./ovpn-cli} +ALG=${ALG:-aes} +PROTO=${PROTO:-UDP} +FLOAT=${FLOAT:-0} + +LAN_IP="11.11.11.11" + +create_ns() { + ip netns add peer${1} +} + +setup_ns() { + MODE="P2P" + + if [ ${1} -eq 0 ]; then + MODE="MP" + for p in $(seq 1 ${NUM_PEERS}); do + ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p} + + ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p} + ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p} + ip -n peer0 link set veth${p} up + + ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p} + ip -n peer${p} link set veth${p} up + done + fi + + ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE + ip -n peer${1} addr add ${2} dev tun${1} + # add a secondary IP to peer 1, to test a LAN behind a client + if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then + ip -n peer${1} addr add ${LAN_IP} dev tun${1} + ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0 + fi + if [ -n "${3}" ]; then + ip -n peer${1} link set mtu ${3} dev tun${1} + fi + ip -n peer${1} link set tun${1} up +} + +add_peer() { + if [ "${PROTO}" == "UDP" ]; then + if [ ${1} -eq 0 ]; then + ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE} + + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \ + data64.key + done + else + RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE}) + RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE}) + LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE}) + ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \ + ${RADDR} ${RPORT} + ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \ + data64.key + fi + else + if [ ${1} -eq 0 ]; then + (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && { + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \ + ${ALG} 0 data64.key + done + }) & + sleep 5 + else + ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \ + data64.key + fi + fi +} + +cleanup() { + # some ovpn-cli processes sleep in background so they need manual poking + killall $(basename ${OVPN_CLI}) 2>/dev/null || true + + # netns peer0 is deleted without erasing ifaces first + for p in $(seq 1 10); do + ip -n peer${p} link set tun${p} down 2>/dev/null || true + ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true + done + for p in $(seq 1 10); do + ip -n peer0 link del veth${p} 2>/dev/null || true + done + for p in $(seq 0 10); do + ip netns del peer${p} 2>/dev/null || true + done +} + +if [ "${PROTO}" == "UDP" ]; then + NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')} +else + NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')} +fi + + diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config new file mode 100644 index 000000000000..71946ba9fa17 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/config @@ -0,0 +1,10 @@ +CONFIG_NET=y +CONFIG_INET=y +CONFIG_STREAM_PARSER=y +CONFIG_NET_UDP_TUNNEL=y +CONFIG_DST_CACHE=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_GCM=y +CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_OVPN=m diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key new file mode 100644 index 000000000000..a99e88c4e290 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/data64.key @@ -0,0 +1,5 @@ +jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B +ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9 +uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6 +KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE +BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w== diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c new file mode 100644 index 000000000000..de9c26f98b2e --- /dev/null +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -0,0 +1,2383 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel accelerator + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#include <stdio.h> +#include <inttypes.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <time.h> + +#include <linux/ovpn.h> +#include <linux/types.h> +#include <linux/netlink.h> + +#include <netlink/socket.h> +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/family.h> +#include <netlink/genl/ctrl.h> + +#include <mbedtls/base64.h> +#include <mbedtls/error.h> + +#include <sys/socket.h> + +/* defines to make checkpatch happy */ +#define strscpy strncpy +#define __always_unused __attribute__((__unused__)) + +/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we + * have to explicitly do it to prevent the kernel from failing upon + * parsing of the message + */ +#define nla_nest_start(_msg, _type) \ + nla_nest_start(_msg, (_type) | NLA_F_NESTED) + +/* libnl < 3.11.0 does not implement nla_get_uint() */ +uint64_t ovpn_nla_get_uint(struct nlattr *attr) +{ + if (nla_len(attr) == sizeof(uint32_t)) + return nla_get_u32(attr); + else + return nla_get_u64(attr); +} + +typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg); + +enum ovpn_key_direction { + KEY_DIR_IN = 0, + KEY_DIR_OUT, +}; + +#define KEY_LEN (256 / 8) +#define NONCE_LEN 8 + +#define PEER_ID_UNDEF 0x00FFFFFF +#define MAX_PEERS 10 + +struct nl_ctx { + struct nl_sock *nl_sock; + struct nl_msg *nl_msg; + struct nl_cb *nl_cb; + + int ovpn_dco_id; +}; + +enum ovpn_cmd { + CMD_INVALID, + CMD_NEW_IFACE, + CMD_DEL_IFACE, + CMD_LISTEN, + CMD_CONNECT, + CMD_NEW_PEER, + CMD_NEW_MULTI_PEER, + CMD_SET_PEER, + CMD_DEL_PEER, + CMD_GET_PEER, + CMD_NEW_KEY, + CMD_DEL_KEY, + CMD_GET_KEY, + CMD_SWAP_KEYS, + CMD_LISTEN_MCAST, +}; + +struct ovpn_ctx { + enum ovpn_cmd cmd; + + __u8 key_enc[KEY_LEN]; + __u8 key_dec[KEY_LEN]; + __u8 nonce[NONCE_LEN]; + + enum ovpn_cipher_alg cipher; + + sa_family_t sa_family; + + unsigned long peer_id; + unsigned long lport; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } remote; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } peer_ip; + + bool peer_ip_set; + + unsigned int ifindex; + char ifname[IFNAMSIZ]; + enum ovpn_mode mode; + bool mode_set; + + int socket; + int cli_sockets[MAX_PEERS]; + + __u32 keepalive_interval; + __u32 keepalive_timeout; + + enum ovpn_key_direction key_dir; + enum ovpn_key_slot key_slot; + int key_id; + + const char *peers_file; +}; + +static int ovpn_nl_recvmsgs(struct nl_ctx *ctx) +{ + int ret; + + ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb); + + switch (ret) { + case -NLE_INTR: + fprintf(stderr, + "netlink received interrupt due to signal - ignoring\n"); + break; + case -NLE_NOMEM: + fprintf(stderr, "netlink out of memory error\n"); + break; + case -NLE_AGAIN: + fprintf(stderr, + "netlink reports blocking read - aborting wait\n"); + break; + default: + if (ret) + fprintf(stderr, "netlink reports error (%d): %s\n", + ret, nl_geterror(-ret)); + break; + } + + return ret; +} + +static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd, + int flags) +{ + struct nl_ctx *ctx; + int err, ret; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + ctx->nl_sock = nl_socket_alloc(); + if (!ctx->nl_sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + goto err_free; + } + + nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192); + + ret = genl_connect(ctx->nl_sock); + if (ret) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_sock; + } + + /* enable Extended ACK for detailed error reporting */ + err = 1; + setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK, + &err, sizeof(err)); + + ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME); + if (ctx->ovpn_dco_id < 0) { + fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n", + ctx->ovpn_dco_id); + goto err_free; + } + + ctx->nl_msg = nlmsg_alloc(); + if (!ctx->nl_msg) { + fprintf(stderr, "cannot allocate netlink message\n"); + goto err_sock; + } + + ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!ctx->nl_cb) { + fprintf(stderr, "failed to allocate netlink callback\n"); + goto err_msg; + } + + nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb); + + genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0); + + if (ovpn->ifindex > 0) + NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex); + + return ctx; +nla_put_failure: +err_msg: + nlmsg_free(ctx->nl_msg); +err_sock: + nl_socket_free(ctx->nl_sock); +err_free: + free(ctx); + return NULL; +} + +static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd) +{ + return nl_ctx_alloc_flags(ovpn, cmd, 0); +} + +static void nl_ctx_free(struct nl_ctx *ctx) +{ + if (!ctx) + return; + + nl_socket_free(ctx->nl_sock); + nlmsg_free(ctx->nl_msg); + nl_cb_put(ctx->nl_cb); + free(ctx); +} + +static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1; + struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1]; + int len = nlh->nlmsg_len; + struct nlattr *attrs; + int *ret = arg; + int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh); + + *ret = err->error; + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return NL_STOP; + + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + ack_len += err->msg.nlmsg_len - sizeof(*nlh); + + if (len <= ack_len) + return NL_STOP; + + attrs = (void *)((uint8_t *)nlh + ack_len); + len -= ack_len; + + nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL); + if (tb_msg[NLMSGERR_ATTR_MSG]) { + len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]), + nla_len(tb_msg[NLMSGERR_ATTR_MSG])); + fprintf(stderr, "kernel error: %*s\n", len, + (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) { + fprintf(stderr, "missing required nesting type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) { + fprintf(stderr, "missing required attribute type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE])); + } + + return NL_STOP; +} + +static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_SKIP; +} + +static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_STOP; +} + +static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb) +{ + int status = 1; + + nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status); + nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, + &status); + nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status); + + if (cb) + nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx); + + nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg); + + while (status == 1) + ovpn_nl_recvmsgs(ctx); + + if (status < 0) + fprintf(stderr, "failed to send netlink message: %s (%d)\n", + strerror(-status), status); + + return status; +} + +static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx) +{ + int idx_enc, idx_dec, ret = -1; + unsigned char *ckey = NULL; + __u8 *bkey = NULL; + size_t olen = 0; + long ckey_len; + FILE *fp; + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "cannot open: %s\n", file); + return -1; + } + + /* get file size */ + fseek(fp, 0L, SEEK_END); + ckey_len = ftell(fp); + rewind(fp); + + /* if the file is longer, let's just read a portion */ + if (ckey_len > 256) + ckey_len = 256; + + ckey = malloc(ckey_len); + if (!ckey) + goto err; + + ret = fread(ckey, 1, ckey_len, fp); + if (ret != ckey_len) { + fprintf(stderr, + "couldn't read enough data from key file: %dbytes read\n", + ret); + goto err; + } + + olen = 0; + ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len); + if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf, + ret); + + goto err; + } + + bkey = malloc(olen); + if (!bkey) { + fprintf(stderr, "cannot allocate binary key buffer\n"); + goto err; + } + + ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len); + if (ret) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf, + ret); + + goto err; + } + + if (olen < 2 * KEY_LEN + NONCE_LEN) { + fprintf(stderr, + "not enough data in key file, found %zdB but needs %dB\n", + olen, 2 * KEY_LEN + NONCE_LEN); + goto err; + } + + switch (ctx->key_dir) { + case KEY_DIR_IN: + idx_enc = 0; + idx_dec = 1; + break; + case KEY_DIR_OUT: + idx_enc = 1; + idx_dec = 0; + break; + default: + goto err; + } + + memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN); + memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN); + memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN); + + ret = 0; + +err: + fclose(fp); + free(bkey); + free(ckey); + + return ret; +} + +static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx) +{ + if (strcmp(cipher, "aes") == 0) + ctx->cipher = OVPN_CIPHER_ALG_AES_GCM; + else if (strcmp(cipher, "chachapoly") == 0) + ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305; + else if (strcmp(cipher, "none") == 0) + ctx->cipher = OVPN_CIPHER_ALG_NONE; + else + return -ENOTSUP; + + return 0; +} + +static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx) +{ + int in_dir; + + in_dir = strtoll(dir, NULL, 10); + switch (in_dir) { + case KEY_DIR_IN: + case KEY_DIR_OUT: + ctx->key_dir = in_dir; + break; + default: + fprintf(stderr, + "invalid key direction provided. Can be 0 or 1 only\n"); + return -1; + } + + return 0; +} + +static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto) +{ + struct sockaddr_storage local_sock = { 0 }; + struct sockaddr_in6 *in6; + struct sockaddr_in *in; + int ret, s, sock_type; + size_t sock_len; + + if (proto == IPPROTO_UDP) + sock_type = SOCK_DGRAM; + else if (proto == IPPROTO_TCP) + sock_type = SOCK_STREAM; + else + return -EINVAL; + + s = socket(family, sock_type, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (family) { + case AF_INET: + in = (struct sockaddr_in *)&local_sock; + in->sin_family = family; + in->sin_port = htons(ctx->lport); + in->sin_addr.s_addr = htonl(INADDR_ANY); + sock_len = sizeof(*in); + break; + case AF_INET6: + in6 = (struct sockaddr_in6 *)&local_sock; + in6->sin6_family = family; + in6->sin6_port = htons(ctx->lport); + in6->sin6_addr = in6addr_any; + sock_len = sizeof(*in6); + break; + default: + return -1; + } + + int opt = 1; + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + + if (ret < 0) { + perror("setsockopt for SO_REUSEADDR"); + return ret; + } + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret < 0) { + perror("setsockopt for SO_REUSEPORT"); + return ret; + } + + if (family == AF_INET6) { + opt = 0; + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt, + sizeof(opt))) { + perror("failed to set IPV6_V6ONLY"); + return -1; + } + } + + ret = bind(s, (struct sockaddr *)&local_sock, sock_len); + if (ret < 0) { + perror("cannot bind socket"); + goto err_socket; + } + + ctx->socket = s; + ctx->sa_family = family; + return 0; + +err_socket: + close(s); + return -1; +} + +static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family) +{ + return ovpn_socket(ctx, family, IPPROTO_UDP); +} + +static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family) +{ + int ret; + + ret = ovpn_socket(ctx, family, IPPROTO_TCP); + if (ret < 0) + return ret; + + ret = listen(ctx->socket, 10); + if (ret < 0) { + perror("listen"); + close(ctx->socket); + return -1; + } + + return 0; +} + +static int ovpn_accept(struct ovpn_ctx *ctx) +{ + socklen_t socklen; + int ret; + + socklen = sizeof(ctx->remote); + ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen); + if (ret < 0) { + perror("accept"); + goto err; + } + + fprintf(stderr, "Connection received!\n"); + + switch (socklen) { + case sizeof(struct sockaddr_in): + case sizeof(struct sockaddr_in6): + break; + default: + fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n"); + close(ret); + ret = -EINVAL; + goto err; + } + + return ret; +err: + close(ctx->socket); + return ret; +} + +static int ovpn_connect(struct ovpn_ctx *ovpn) +{ + socklen_t socklen; + int s, ret; + + s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + socklen = sizeof(struct sockaddr_in); + break; + case AF_INET6: + socklen = sizeof(struct sockaddr_in6); + break; + default: + return -EOPNOTSUPP; + } + + ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen); + if (ret < 0) { + perror("connect"); + goto err; + } + + fprintf(stderr, "connected\n"); + + ovpn->socket = s; + + return 0; +err: + close(s); + return ret; +} + +static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket); + + if (!is_tcp) { + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4, + ovpn->remote.in4.sin_addr.s_addr); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in4.sin_port); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6, + sizeof(ovpn->remote.in6.sin6_addr), + &ovpn->remote.in6.sin6_addr); + NLA_PUT_U32(ctx->nl_msg, + OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + ovpn->remote.in6.sin6_scope_id); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in6.sin6_port); + break; + default: + fprintf(stderr, + "Invalid family for remote socket address\n"); + goto nla_put_failure; + } + } + + if (ovpn->peer_ip_set) { + switch (ovpn->peer_ip.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4, + ovpn->peer_ip.in4.sin_addr.s_addr); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6, + sizeof(struct in6_addr), + &ovpn->peer_ip.in6.sin6_addr); + break; + default: + fprintf(stderr, "Invalid family for peer address\n"); + goto nla_put_failure; + } + } + + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_set_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL, + ovpn->keepalive_interval); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT, + ovpn->keepalive_timeout); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *pattrs[OVPN_A_PEER_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + __u16 rport = 0, lport = 0; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_PEER]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]), + nla_len(attrs[OVPN_A_PEER]), NULL); + + if (pattrs[OVPN_A_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(pattrs[OVPN_A_PEER_ID])); + + if (pattrs[OVPN_A_PEER_SOCKET_NETNSID]) + fprintf(stderr, "\tsocket NetNS ID: %d\n", + nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID])); + + if (pattrs[OVPN_A_PEER_VPN_IPV4]) { + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv4: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_VPN_IPV6]) { + char buf[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv6: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_LOCAL_PORT]) + lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_PORT]) + rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6]; + char buf[INET6_ADDRSTRLEN]; + int scope_id = -1; + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) { + void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]; + + scope_id = nla_get_u32(p); + } + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport, + scope_id); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6]; + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4]; + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) { + void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4]; + + inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) { + void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]; + + fprintf(stderr, "\tKeepalive interval: %u sec\n", + nla_get_u32(p)); + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) + fprintf(stderr, "\tKeepalive timeout: %u sec\n", + nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])); + + if (pattrs[OVPN_A_PEER_VPN_RX_BYTES]) + fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_TX_BYTES]) + fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS]) + fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS]) + fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_RX_BYTES]) + fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_TX_BYTES]) + fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS]) + fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS]) + fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS])); + + return NL_SKIP; +} + +static int ovpn_get_peer(struct ovpn_ctx *ovpn) +{ + int flags = 0, ret = -1; + struct nlattr *attr; + struct nl_ctx *ctx; + + if (ovpn->peer_id == PEER_ID_UNDEF) + flags = NLM_F_DUMP; + + ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags); + if (!ctx) + return -ENOMEM; + + if (ovpn->peer_id != PEER_ID_UNDEF) { + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + } + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_new_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf, *key_dir; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_KEYCONF]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]), + nla_len(attrs[OVPN_A_KEYCONF]), NULL); + + if (kattrs[OVPN_A_KEYCONF_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID])); + if (kattrs[OVPN_A_KEYCONF_SLOT]) { + fprintf(stderr, "\t- Slot: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) { + case OVPN_KEY_SLOT_PRIMARY: + fprintf(stderr, "primary\n"); + break; + case OVPN_KEY_SLOT_SECONDARY: + fprintf(stderr, "secondary\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])); + break; + } + } + if (kattrs[OVPN_A_KEYCONF_KEY_ID]) + fprintf(stderr, "\t- Key ID: %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID])); + if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) { + fprintf(stderr, "\t- Cipher: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) { + case OVPN_CIPHER_ALG_NONE: + fprintf(stderr, "none\n"); + break; + case OVPN_CIPHER_ALG_AES_GCM: + fprintf(stderr, "aes-gcm\n"); + break; + case OVPN_CIPHER_ALG_CHACHA20_POLY1305: + fprintf(stderr, "chacha20poly1305\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])); + break; + } + } + + return NL_SKIP; +} + +static int ovpn_get_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_key); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_swap_keys(struct ovpn_ctx *ovpn) +{ + struct nl_ctx *ctx; + struct nlattr *kc; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP); + if (!ctx) + return -ENOMEM; + + kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, kc); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +/* Helper function used to easily add attributes to a rtnl message */ +static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type, + const void *data, int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) { + fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n", + __func__, maxlen); + return -EMSGSIZE; + } + + rta = nlmsg_tail(n); + rta->rta_type = type; + rta->rta_len = len; + + if (!data) + memset(RTA_DATA(rta), 0, alen); + else + memcpy(RTA_DATA(rta), data, alen); + + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + + return 0; +} + +static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size, + int attr) +{ + struct rtattr *nest = nlmsg_tail(msg); + + if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0) + return NULL; + + return nest; +} + +static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest) +{ + nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest; +} + +#define RT_SNDBUF_SIZE (1024 * 2) +#define RT_RCVBUF_SIZE (1024 * 4) + +/* Open RTNL socket */ +static int ovpn_rt_socket(void) +{ + int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd < 0) { + fprintf(stderr, "%s: cannot open netlink socket\n", __func__); + return fd; + } + + if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, + sizeof(sndbuf)) < 0) { + fprintf(stderr, "%s: SO_SNDBUF\n", __func__); + close(fd); + return -1; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, + sizeof(rcvbuf)) < 0) { + fprintf(stderr, "%s: SO_RCVBUF\n", __func__); + close(fd); + return -1; + } + + return fd; +} + +/* Bind socket to Netlink subsystem */ +static int ovpn_rt_bind(int fd, uint32_t groups) +{ + struct sockaddr_nl local = { 0 }; + socklen_t addr_len; + + local.nl_family = AF_NETLINK; + local.nl_groups = groups; + + if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { + fprintf(stderr, "%s: cannot bind netlink socket: %d\n", + __func__, errno); + return -errno; + } + + addr_len = sizeof(local); + if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) { + fprintf(stderr, "%s: cannot getsockname: %d\n", __func__, + errno); + return -errno; + } + + if (addr_len != sizeof(local)) { + fprintf(stderr, "%s: wrong address length %d\n", __func__, + addr_len); + return -EINVAL; + } + + if (local.nl_family != AF_NETLINK) { + fprintf(stderr, "%s: wrong address family %d\n", __func__, + local.nl_family); + return -EINVAL; + } + + return 0; +} + +typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg); + +/* Send Netlink message and run callback on reply (if specified) */ +static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer, + unsigned int groups, ovpn_parse_reply_cb cb, + void *arg_cb) +{ + int len, rem_len, fd, ret, rcv_len; + struct sockaddr_nl nladdr = { 0 }; + struct nlmsgerr *err; + struct nlmsghdr *h; + char buf[1024 * 16]; + struct iovec iov = { + .iov_base = payload, + .iov_len = payload->nlmsg_len, + }; + struct msghdr nlmsg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + nladdr.nl_family = AF_NETLINK; + nladdr.nl_pid = peer; + nladdr.nl_groups = groups; + + payload->nlmsg_seq = time(NULL); + + /* no need to send reply */ + if (!cb) + payload->nlmsg_flags |= NLM_F_ACK; + + fd = ovpn_rt_socket(); + if (fd < 0) { + fprintf(stderr, "%s: can't open rtnl socket\n", __func__); + return -errno; + } + + ret = ovpn_rt_bind(fd, 0); + if (ret < 0) { + fprintf(stderr, "%s: can't bind rtnl socket\n", __func__); + ret = -errno; + goto out; + } + + ret = sendmsg(fd, &nlmsg, 0); + if (ret < 0) { + fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__); + ret = -errno; + goto out; + } + + /* prepare buffer to store RTNL replies */ + memset(buf, 0, sizeof(buf)); + iov.iov_base = buf; + + while (1) { + /* + * iov_len is modified by recvmsg(), therefore has to be initialized before + * using it again + */ + iov.iov_len = sizeof(buf); + rcv_len = recvmsg(fd, &nlmsg, 0); + if (rcv_len < 0) { + if (errno == EINTR || errno == EAGAIN) { + fprintf(stderr, "%s: interrupted call\n", + __func__); + continue; + } + fprintf(stderr, "%s: rtnl: error on recvmsg()\n", + __func__); + ret = -errno; + goto out; + } + + if (rcv_len == 0) { + fprintf(stderr, + "%s: rtnl: socket reached unexpected EOF\n", + __func__); + ret = -EIO; + goto out; + } + + if (nlmsg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "%s: sender address length: %u (expected %zu)\n", + __func__, nlmsg.msg_namelen, sizeof(nladdr)); + ret = -EIO; + goto out; + } + + h = (struct nlmsghdr *)buf; + while (rcv_len >= (int)sizeof(*h)) { + len = h->nlmsg_len; + rem_len = len - sizeof(*h); + + if (rem_len < 0 || len > rcv_len) { + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: truncated message\n", + __func__); + ret = -EIO; + goto out; + } + fprintf(stderr, "%s: malformed message: len=%d\n", + __func__, len); + ret = -EIO; + goto out; + } + + if (h->nlmsg_type == NLMSG_DONE) { + ret = 0; + goto out; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + err = (struct nlmsgerr *)NLMSG_DATA(h); + if (rem_len < (int)sizeof(struct nlmsgerr)) { + fprintf(stderr, "%s: ERROR truncated\n", + __func__); + ret = -EIO; + goto out; + } + + if (err->error) { + fprintf(stderr, "%s: (%d) %s\n", + __func__, err->error, + strerror(-err->error)); + ret = err->error; + goto out; + } + + ret = 0; + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) + ret = r; + } + goto out; + } + + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) { + ret = r; + goto out; + } + } else { + fprintf(stderr, "%s: RTNL: unexpected reply\n", + __func__); + } + + rcv_len -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((uint8_t *)h + + NLMSG_ALIGN(len)); + } + + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: message truncated\n", __func__); + continue; + } + + if (rcv_len) { + fprintf(stderr, "%s: rtnl: %d not parsed bytes\n", + __func__, rcv_len); + ret = -1; + goto out; + } + } +out: + close(fd); + + return ret; +} + +struct ovpn_link_req { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[256]; +}; + +static int ovpn_new_iface(struct ovpn_ctx *ovpn) +{ + struct rtattr *linkinfo, *data; + struct ovpn_link_req req = { 0 }; + int ret = -1; + + fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname, + ovpn->mode); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname, + strlen(ovpn->ifname) + 1) < 0) + goto err; + + linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO); + if (!linkinfo) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME, + strlen(OVPN_FAMILY_NAME) + 1) < 0) + goto err; + + if (ovpn->mode_set) { + data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA); + if (!data) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE, + &ovpn->mode, sizeof(uint8_t)) < 0) + goto err; + + ovpn_nest_end(&req.n, data); + } + + ovpn_nest_end(&req.n, linkinfo); + + req.i.ifi_family = AF_PACKET; + + ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +err: + return ret; +} + +static int ovpn_del_iface(struct ovpn_ctx *ovpn) +{ + struct ovpn_link_req req = { 0 }; + + fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname, + ovpn->ifindex); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_DELLINK; + + req.i.ifi_family = AF_PACKET; + req.i.ifi_index = ovpn->ifindex; + + return ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +} + +static int nl_seq_check(struct nl_msg (*msg)__always_unused, + void (*arg)__always_unused) +{ + return NL_OK; +} + +struct mcast_handler_args { + const char *group; + int id; +}; + +static int mcast_family_handler(struct nl_msg *msg, void *arg) +{ + struct mcast_handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return NL_SKIP; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + break; + } + + return NL_SKIP; +} + +static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + int *ret = arg; + + *ret = err->error; + return NL_STOP; +} + +static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg) +{ + int *ret = arg; + + *ret = 0; + return NL_STOP; +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg) +{ + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + struct nlmsghdr *nlh = nlmsg_hdr(msg); + char ifname[IF_NAMESIZE]; + int *ret = arg; + __u32 ifindex; + + fprintf(stderr, "received message from ovpn-dco\n"); + + *ret = -1; + + if (!genlmsg_valid_hdr(nlh, 0)) { + fprintf(stderr, "invalid header\n"); + return NL_STOP; + } + + if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL)) { + fprintf(stderr, "received bogus data from ovpn-dco\n"); + return NL_STOP; + } + + if (!attrs[OVPN_A_IFINDEX]) { + fprintf(stderr, "no ifindex in this message\n"); + return NL_STOP; + } + + ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); + if (!if_indextoname(ifindex, ifname)) { + fprintf(stderr, "cannot resolve ifname for ifindex: %u\n", + ifindex); + return NL_STOP; + } + + switch (gnlh->cmd) { + case OVPN_CMD_PEER_DEL_NTF: + fprintf(stdout, "received CMD_PEER_DEL_NTF\n"); + break; + case OVPN_CMD_KEY_SWAP_NTF: + fprintf(stdout, "received CMD_KEY_SWAP_NTF\n"); + break; + default: + fprintf(stderr, "received unknown command: %d\n", gnlh->cmd); + return NL_STOP; + } + + *ret = 0; + return NL_OK; +} + +static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family, + const char *group) +{ + struct nl_msg *msg; + struct nl_cb *cb; + int ret, ctrlid; + struct mcast_handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return -ENOMEM; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) { + ret = -ENOMEM; + goto out_fail_cb; + } + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + ret = -ENOBUFS; + NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_auto_complete(sock, msg); + if (ret < 0) + goto nla_put_failure; + + ret = 1; + + nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp); + + while (ret > 0) + nl_recvmsgs(sock, cb); + + if (ret == 0) + ret = grp.id; + nla_put_failure: + nl_cb_put(cb); + out_fail_cb: + nlmsg_free(msg); + return ret; +} + +static int ovpn_listen_mcast(void) +{ + struct nl_sock *sock; + struct nl_cb *cb; + int mcid, ret; + + sock = nl_socket_alloc(); + if (!sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + goto err_free; + } + + nl_socket_set_buffer_size(sock, 8192, 8192); + + ret = genl_connect(sock); + if (ret < 0) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_free; + } + + mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS); + if (mcid < 0) { + fprintf(stderr, "cannot get mcast group: %s\n", + nl_geterror(mcid)); + goto err_free; + } + + ret = nl_socket_add_membership(sock, mcid); + if (ret) { + fprintf(stderr, "failed to join mcast group: %d\n", ret); + goto err_free; + } + + ret = 1; + cb = nl_cb_alloc(NL_CB_DEFAULT); + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret); + nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret); + + while (ret == 1) { + int err = nl_recvmsgs(sock, cb); + + if (err < 0) { + fprintf(stderr, + "cannot receive netlink message: (%d) %s\n", + err, nl_geterror(-err)); + ret = -1; + break; + } + } + + nl_cb_put(cb); +err_free: + nl_socket_free(sock); + return ret; +} + +static void usage(const char *cmd) +{ + fprintf(stderr, + "Usage %s <command> <iface> [arguments..]\n", + cmd); + fprintf(stderr, "where <command> can be one of the following\n\n"); + + fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tmode:\n"); + fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n"); + fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n"); + + fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + + fprintf(stderr, + "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: TCP port to listen to\n"); + fprintf(stderr, + "\tpeers_file: file containing one peer per line: Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n"); + fprintf(stderr, + "\tipv6: whether the socket should listen to the IPv6 wildcard address\n"); + + fprintf(stderr, + "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n"); + fprintf(stderr, "\traddr: peer IP address to connect to\n"); + fprintf(stderr, "\trport: peer TCP port to connect to\n"); + fprintf(stderr, + "\tkey_file: file containing the symmetric key for encryption\n"); + + fprintf(stderr, + "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, + "\tpeer_id: peer ID to be used in data packets to/from this peer\n"); + fprintf(stderr, "\traddr: peer IP address\n"); + fprintf(stderr, "\trport: peer UDP port\n"); + fprintf(stderr, "\tvpnaddr: peer VPN IP\n"); + + fprintf(stderr, + "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, + "\tpeers_file: text file containing one peer per line. Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n"); + + fprintf(stderr, + "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, + "\tkeepalive_interval: interval for sending ping messages\n"); + fprintf(stderr, + "\tkeepalive_timeout: time after which a peer is timed out\n"); + + fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n"); + + fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n"); + + fprintf(stderr, + "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to configure the key for\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + fprintf(stderr, "\tkey_id: an ID from 0 to 7\n"); + fprintf(stderr, + "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n"); + fprintf(stderr, + "\tkey_dir: key direction, must 0 on one host and 1 on the other\n"); + fprintf(stderr, "\tkey_file: file containing the pre-shared key\n"); + + fprintf(stderr, + "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n"); + + fprintf(stderr, + "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + + fprintf(stderr, + "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + + fprintf(stderr, + "* listen_mcast: listen to ovpn netlink multicast messages\n"); +} + +static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host, + const char *service, const char *vpnip) +{ + int ret; + struct addrinfo *result; + struct addrinfo hints = { + .ai_family = ovpn->sa_family, + .ai_socktype = SOCK_DGRAM, + .ai_protocol = IPPROTO_UDP + }; + + if (host) { + ret = getaddrinfo(host, service, &hints, &result); + if (ret) { + fprintf(stderr, "getaddrinfo on remote error: %s\n", + gai_strerror(ret)); + return -1; + } + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen); + } + + if (vpnip) { + ret = getaddrinfo(vpnip, NULL, &hints, &result); + if (ret) { + fprintf(stderr, "getaddrinfo on vpnip error: %s\n", + gai_strerror(ret)); + return -1; + } + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen); + ovpn->sa_family = result->ai_family; + + ovpn->peer_ip_set = true; + } + + ret = 0; +out: + freeaddrinfo(result); + return ret; +} + +static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id, + const char *raddr, const char *rport, + const char *vpnip) +{ + ovpn->peer_id = strtoul(peer_id, NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + return ovpn_parse_remote(ovpn, raddr, rport, vpnip); +} + +static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn) +{ + int slot = strtoul(arg, NULL, 10); + + if (errno == ERANGE || slot < 1 || slot > 2) { + fprintf(stderr, "key slot out of range\n"); + return -1; + } + + switch (slot) { + case 1: + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + break; + case 2: + ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY; + break; + } + + return 0; +} + +static int ovpn_send_tcp_data(int socket) +{ + uint16_t len = htons(1000); + uint8_t buf[1002]; + int ret; + + memcpy(buf, &len, sizeof(len)); + memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len)); + + ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + fprintf(stdout, "Sent %u bytes over TCP socket\n", ret); + + return ret > 0 ? 0 : ret; +} + +static int ovpn_recv_tcp_data(int socket) +{ + uint8_t buf[1002]; + uint16_t len; + int ret; + + ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + if (ret < 2) { + fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret); + return ret; + } + + memcpy(&len, buf, sizeof(len)); + len = ntohs(len); + + fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n", + ret, len); + + return 0; +} + +static enum ovpn_cmd ovpn_parse_cmd(const char *cmd) +{ + if (!strcmp(cmd, "new_iface")) + return CMD_NEW_IFACE; + + if (!strcmp(cmd, "del_iface")) + return CMD_DEL_IFACE; + + if (!strcmp(cmd, "listen")) + return CMD_LISTEN; + + if (!strcmp(cmd, "connect")) + return CMD_CONNECT; + + if (!strcmp(cmd, "new_peer")) + return CMD_NEW_PEER; + + if (!strcmp(cmd, "new_multi_peer")) + return CMD_NEW_MULTI_PEER; + + if (!strcmp(cmd, "set_peer")) + return CMD_SET_PEER; + + if (!strcmp(cmd, "del_peer")) + return CMD_DEL_PEER; + + if (!strcmp(cmd, "get_peer")) + return CMD_GET_PEER; + + if (!strcmp(cmd, "new_key")) + return CMD_NEW_KEY; + + if (!strcmp(cmd, "del_key")) + return CMD_DEL_KEY; + + if (!strcmp(cmd, "get_key")) + return CMD_GET_KEY; + + if (!strcmp(cmd, "swap_keys")) + return CMD_SWAP_KEYS; + + if (!strcmp(cmd, "listen_mcast")) + return CMD_LISTEN_MCAST; + + return CMD_INVALID; +} + +/* Send process to background and waits for signal. + * + * This helper is called at the end of commands + * creating sockets, so that the latter stay alive + * along with the process that created them. + * + * A signal is expected to be delivered in order to + * terminate the waiting processes + */ +static void ovpn_waitbg(void) +{ + daemon(1, 1); + pause(); +} + +static int ovpn_run_cmd(struct ovpn_ctx *ovpn) +{ + char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10]; + char raddr[128], rport[10]; + int n, ret; + FILE *fp; + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + ret = ovpn_new_iface(ovpn); + break; + case CMD_DEL_IFACE: + ret = ovpn_del_iface(ovpn); + break; + case CMD_LISTEN: + ret = ovpn_listen(ovpn, ovpn->sa_family); + if (ret < 0) { + fprintf(stderr, "cannot listen on TCP socket\n"); + return ret; + } + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + int num_peers = 0; + + while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) { + struct ovpn_ctx peer_ctx = { 0 }; + + if (num_peers == MAX_PEERS) { + fprintf(stderr, "max peers reached!\n"); + return -E2BIG; + } + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.sa_family = ovpn->sa_family; + + peer_ctx.socket = ovpn_accept(ovpn); + if (peer_ctx.socket < 0) { + fprintf(stderr, "cannot accept connection!\n"); + return -1; + } + + /* store peer sockets to test TCP I/O */ + ovpn->cli_sockets[num_peers] = peer_ctx.socket; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL, + NULL, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, true); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s\n", + peer_id, vpnip); + return ret; + } + num_peers++; + } + + for (int i = 0; i < num_peers; i++) { + ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]); + if (ret < 0) + break; + } + ovpn_waitbg(); + break; + case CMD_CONNECT: + ret = ovpn_connect(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot connect TCP socket\n"); + return ret; + } + + ret = ovpn_new_peer(ovpn, true); + if (ret < 0) { + fprintf(stderr, "cannot add peer to VPN\n"); + close(ovpn->socket); + return ret; + } + + if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) { + ret = ovpn_new_key(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot set key\n"); + return ret; + } + } + + ret = ovpn_send_tcp_data(ovpn->socket); + ovpn_waitbg(); + break; + case CMD_NEW_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + ret = ovpn_new_peer(ovpn, false); + ovpn_waitbg(); + break; + case CMD_NEW_MULTI_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr, + lport, raddr, rport, vpnip)) == 6) { + struct ovpn_ctx peer_ctx = { 0 }; + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.socket = ovpn->socket; + peer_ctx.sa_family = AF_UNSPEC; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr, + rport, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, false); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s %s %s\n", + peer_id, raddr, rport, vpnip); + return ret; + } + } + ovpn_waitbg(); + break; + case CMD_SET_PEER: + ret = ovpn_set_peer(ovpn); + break; + case CMD_DEL_PEER: + ret = ovpn_del_peer(ovpn); + break; + case CMD_GET_PEER: + if (ovpn->peer_id == PEER_ID_UNDEF) + fprintf(stderr, "List of peers connected to: %s\n", + ovpn->ifname); + + ret = ovpn_get_peer(ovpn); + break; + case CMD_NEW_KEY: + ret = ovpn_new_key(ovpn); + break; + case CMD_DEL_KEY: + ret = ovpn_del_key(ovpn); + break; + case CMD_GET_KEY: + ret = ovpn_get_key(ovpn); + break; + case CMD_SWAP_KEYS: + ret = ovpn_swap_keys(ovpn); + break; + case CMD_LISTEN_MCAST: + ret = ovpn_listen_mcast(); + break; + case CMD_INVALID: + break; + } + + return ret; +} + +static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) +{ + int ret; + + /* no args required for LISTEN_MCAST */ + if (ovpn->cmd == CMD_LISTEN_MCAST) + return 0; + + /* all commands need an ifname */ + if (argc < 3) + return -EINVAL; + + strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1); + ovpn->ifname[IFNAMSIZ - 1] = '\0'; + + /* all commands, except NEW_IFNAME, needs an ifindex */ + if (ovpn->cmd != CMD_NEW_IFACE) { + ovpn->ifindex = if_nametoindex(ovpn->ifname); + if (!ovpn->ifindex) { + fprintf(stderr, "cannot find interface: %s\n", + strerror(errno)); + return -1; + } + } + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + if (argc < 4) + break; + + if (!strcmp(argv[3], "P2P")) { + ovpn->mode = OVPN_MODE_P2P; + } else if (!strcmp(argv[3], "MP")) { + ovpn->mode = OVPN_MODE_MP; + } else { + fprintf(stderr, "Cannot parse iface mode: %s\n", + argv[3]); + return -1; + } + ovpn->mode_set = true; + break; + case CMD_DEL_IFACE: + break; + case CMD_LISTEN: + if (argc < 5) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + ovpn->peers_file = argv[4]; + + if (argc > 5 && !strcmp(argv[5], "ipv6")) + ovpn->sa_family = AF_INET6; + break; + case CMD_CONNECT: + if (argc < 6) + return -EINVAL; + + ovpn->sa_family = AF_INET; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5], + NULL); + if (ret < 0) { + fprintf(stderr, "Cannot parse remote peer data\n"); + return -1; + } + + if (argc > 6) { + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + ovpn->key_id = 0; + ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM; + ovpn->key_dir = KEY_DIR_OUT; + + ret = ovpn_parse_key(argv[6], ovpn); + if (ret) + return -1; + } + break; + case CMD_NEW_PEER: + if (argc < 7) + return -EINVAL; + + ovpn->lport = strtoul(argv[4], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + const char *vpnip = (argc > 7) ? argv[7] : NULL; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6], + vpnip); + if (ret < 0) + return -1; + break; + case CMD_NEW_MULTI_PEER: + if (argc < 5) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + ovpn->peers_file = argv[4]; + break; + case CMD_SET_PEER: + if (argc < 6) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ovpn->keepalive_interval = strtoul(argv[4], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + + ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + break; + case CMD_DEL_PEER: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_GET_PEER: + ovpn->peer_id = PEER_ID_UNDEF; + if (argc > 3) { + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + } + break; + case CMD_NEW_KEY: + if (argc < 9) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return -1; + + ovpn->key_id = strtoul(argv[5], NULL, 10); + if (errno == ERANGE || ovpn->key_id > 2) { + fprintf(stderr, "key ID out of range\n"); + return -1; + } + + ret = ovpn_parse_cipher(argv[6], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key_direction(argv[7], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key(argv[8], ovpn); + if (ret) + return -1; + break; + case CMD_DEL_KEY: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_GET_KEY: + if (argc < 5) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_SWAP_KEYS: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_LISTEN_MCAST: + break; + case CMD_INVALID: + break; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct ovpn_ctx ovpn; + int ret; + + if (argc < 2) { + usage(argv[0]); + return -1; + } + + memset(&ovpn, 0, sizeof(ovpn)); + ovpn.sa_family = AF_UNSPEC; + ovpn.cipher = OVPN_CIPHER_ALG_NONE; + + ovpn.cmd = ovpn_parse_cmd(argv[1]); + if (ovpn.cmd == CMD_INVALID) { + fprintf(stderr, "Error: unknown command.\n\n"); + usage(argv[0]); + return -1; + } + + ret = ovpn_parse_cmd_args(&ovpn, argc, argv); + if (ret < 0) { + fprintf(stderr, "Error: invalid arguments.\n\n"); + if (ret == -EINVAL) + usage(argv[0]); + return ret; + } + + ret = ovpn_run_cmd(&ovpn); + if (ret) + fprintf(stderr, "Cannot execute command: %s (%d)\n", + strerror(-ret), ret); + + return ret; +} diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt new file mode 100644 index 000000000000..d753eebe8716 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt @@ -0,0 +1,5 @@ +1 5.5.5.2 +2 5.5.5.3 +3 5.5.5.4 +4 5.5.5.5 +5 5.5.5.6 diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh new file mode 100755 index 000000000000..32504079a2b8 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +ALG="chachapoly" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh new file mode 100755 index 000000000000..093d44772ffd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test-close-socket.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh new file mode 100755 index 000000000000..5e48a8b67928 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) +done + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh new file mode 100755 index 000000000000..ba5d725e18b0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-float.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +FLOAT="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh new file mode 100755 index 000000000000..ba3f1f315a34 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh new file mode 100755 index 000000000000..e8acdc303307 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU} +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) + ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1)) +done + +# ping LAN behind client 1 +ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP} + +if [ "$FLOAT" == "1" ]; then + # make clients float.. + for p in $(seq 1 ${NUM_PEERS}); do + ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p} + done + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1 + done +fi + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +echo "Adding secondary key and then swap:" +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key + ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key + ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p} +done + +sleep 1 + +echo "Querying all peers:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 +ip netns exec peer1 ${OVPN_CLI} get_peer tun1 + +echo "Querying peer 1:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1 + +echo "Querying non-existent peer 10:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true + +echo "Deleting peer 1:" +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1 +ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1 + +echo "Querying keys:" +for p in $(seq 2 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1 + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2 +done + +echo "Deleting peer while sending traffic:" +(ip netns exec peer2 ping -qf -w 4 5.5.5.1)& +sleep 2 +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2 +# following command fails in TCP mode +# (both ends get conn reset when one peer disconnects) +ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true + +echo "Deleting keys:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1 + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2 +done + +echo "Setting timeout to 3s MP:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0 +done +# wait for peers to timeout +sleep 5 + +echo "Setting timeout to 3s P2P:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3 +done +sleep 5 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt new file mode 100644 index 000000000000..e9773ddf875c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/udp_peers.txt @@ -0,0 +1,6 @@ +1 10.10.1.1 1 10.10.1.2 1 5.5.5.2 +2 10.10.2.1 1 10.10.2.2 1 5.5.5.3 +3 10.10.3.1 1 10.10.3.2 1 5.5.5.4 +4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5 +5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6 +6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 66be7699c72c..88e914c4eef9 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -205,7 +205,6 @@ # Check that PMTU exceptions are created for both paths. source lib.sh -source net_helper.sh PAUSE_ON_FAIL=no VERBOSE=0 diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index b8475cb29be7..1c43401a1c80 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -9,7 +9,6 @@ #include <arpa/inet.h> #include <errno.h> #include <error.h> -#include <linux/dccp.h> #include <linux/in.h> #include <linux/unistd.h> #include <stdbool.h> @@ -21,10 +20,6 @@ #include <sys/socket.h> #include <unistd.h> -#ifndef SOL_DCCP -#define SOL_DCCP 269 -#endif - static const char *IP4_ADDR = "127.0.0.1"; static const char *IP6_ADDR = "::1"; static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; @@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) error(1, errno, "tcp: failed to listen on receive port"); - else if (proto == SOCK_DCCP) { - if (setsockopt(rcv_fds[i], SOL_DCCP, - DCCP_SOCKOPT_SERVICE, - &(int) {htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - - if (listen(rcv_fds[i], 10)) - error(1, errno, "dccp: failed to listen on receive port"); - } } } @@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto) if (fd < 0) error(1, errno, "failed to create send socket"); - if (proto == SOCK_DCCP && - setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, - &(int){htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - if (bind(fd, saddr, sz)) error(1, errno, "failed to bind send socket"); @@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto) if (i < 0) error(1, errno, "epoll_wait failed"); - if (proto == SOCK_STREAM || proto == SOCK_DCCP) { + if (proto == SOCK_STREAM) { fd = accept(ev.data.fd, NULL, NULL); if (fd < 0) error(1, errno, "failed to accept"); @@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto, static void test_proto(int proto, const char *proto_str) { - if (proto == SOCK_DCCP) { - int test_fd; - - test_fd = socket(AF_INET, proto, 0); - if (test_fd < 0) { - if (errno == ESOCKTNOSUPPORT) { - fprintf(stderr, "DCCP not supported: skipping DCCP tests\n"); - return; - } else - error(1, errno, "failed to create a DCCP socket"); - } - close(test_fd); - } - fprintf(stderr, "%s IPv4 ... ", proto_str); run_one_test(AF_INET, AF_INET, proto, IP4_ADDR); @@ -271,7 +238,6 @@ int main(void) { test_proto(SOCK_DGRAM, "UDP"); test_proto(SOCK_STREAM, "TCP"); - test_proto(SOCK_DCCP, "DCCP"); fprintf(stderr, "SUCCESS\n"); return 0; diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index 02d617040793..a5e959a080bb 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -285,11 +285,6 @@ setup_hs() ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index 79fb81e63c59..a649dba3cb77 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -250,11 +250,6 @@ setup_hs() eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh index 50563443a4ad..318487eda671 100755 --- a/tools/testing/selftests/net/srv6_end_flavors_test.sh +++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh @@ -399,7 +399,7 @@ __get_srv6_rtcfg_id() # Given the description of a router <id:op> as an input, the function returns # the <op> token which represents the operation (e.g. End behavior with or -# withouth flavors) configured for the node. +# without flavors) configured for the node. # Note that when the operation represents an End behavior with a list of # flavors, the output is the ordered version of that list. @@ -480,7 +480,7 @@ setup_rt_local_sids() # all SIDs start with a common locator. Routes and SRv6 Endpoint - # behavior instaces are grouped together in the 'localsid' table. + # behavior instances are grouped together in the 'localsid' table. ip -netns "${nsname}" -6 rule \ add to "${LOCATOR_SERVICE}::/16" \ lookup "${LOCALSID_TABLE_ID}" prio 999 diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh index 87e414cc417c..ba730655a7bf 100755 --- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -245,10 +245,8 @@ # that adopted in the use cases already examined (of course, it is necessary to # consider the different SIDs/C-SIDs). -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -376,32 +374,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -410,8 +394,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -420,28 +403,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -462,10 +429,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -497,7 +464,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -518,9 +485,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -596,7 +560,7 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -668,8 +632,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -744,8 +708,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -791,11 +755,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -880,7 +839,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -903,7 +862,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -915,7 +874,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index c79cb8ede17f..4b86040c58c6 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -287,10 +287,8 @@ # packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46) # and sends it to the host hs-1. -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -418,32 +416,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -452,15 +436,12 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" + setup_ns "${nsname}" - __create_namespace "${nsname}" - + eval nsname=\${$(get_rtname "${rtid}")} ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -470,29 +451,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -512,10 +476,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -547,7 +511,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -631,7 +595,7 @@ set_end_x_nextcsid() local rt="$1" local adj="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} net_prefix="$(get_network_prefix "${rt}" "${adj}")" lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" @@ -650,7 +614,7 @@ set_underlay_sids_reachability() local rt="$1" local rt_neighs="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -685,7 +649,7 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} set_underlay_sids_reachability "${rt}" "${rt_neighs}" @@ -728,8 +692,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -804,8 +768,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -851,11 +815,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -947,7 +906,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -970,7 +929,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -982,7 +941,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1093,7 +1052,7 @@ rt_x_nextcsid_end_x_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 28a775654b92..3efce1718c5f 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -166,10 +166,8 @@ # hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" readonly RT2HS_DEVNAME="veth-t${VRF_TID}" @@ -248,32 +246,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -282,8 +266,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -292,29 +275,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -334,10 +300,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -369,7 +335,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -387,9 +353,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -403,7 +366,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -469,7 +432,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -516,8 +479,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -555,11 +518,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -656,7 +614,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -679,7 +637,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -691,7 +649,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh index cb4177d41b21..cabc70538ffe 100755 --- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -116,10 +116,8 @@ # hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly RT2HS_DEVNAME="veth-hs" readonly HS_VETH_NAME="veth0" @@ -199,32 +197,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -233,8 +217,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -243,28 +226,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -285,10 +252,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -320,7 +287,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -341,9 +308,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -357,7 +321,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -407,7 +371,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -446,7 +410,7 @@ setup_decap() local rt="$1" local nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} # Local End.DX2 behavior ip -netns "${nsname}" -6 route \ @@ -463,8 +427,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -486,11 +450,6 @@ setup_hs() add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up - - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 } # set an auto-generated mac address @@ -508,7 +467,7 @@ set_mac_address() local ifname="$4" local nsname - nsname=$(get_nodename "${nodename}") + eval nsname=\${${nodename}} ip -netns "${nsname}" link set dev "${ifname}" down @@ -532,7 +491,7 @@ set_host_l2peer() local hssrc_name local ipaddr - hssrc_name="$(get_hsname "${hssrc}")" + eval hssrc_name=\${$(get_hsname "${hssrc}")} if [ "${proto}" -eq 6 ]; then ipaddr="${ipprefix}::${hsdst}" @@ -562,7 +521,7 @@ setup_l2vpn() local rtdst="${hsdst}" # set fixed mac for source node and the neigh MAC address - set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 @@ -570,7 +529,7 @@ setup_l2vpn() # to the mac address of the remote peer (L2 VPN destination host). # Otherwise, traffic coming from the source host is dropped at the # ingress router. - set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" # set the SRv6 Policies at the ingress router setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ @@ -647,7 +606,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -670,7 +629,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -682,7 +641,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index 02b986c9c247..9067197c9055 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -51,7 +51,9 @@ ret=0 # All tests in this script. Can be overridden with -t option. TESTS=" neigh_suppress_arp + neigh_suppress_uc_arp neigh_suppress_ns + neigh_suppress_uc_ns neigh_vlan_suppress_arp neigh_vlan_suppress_ns " @@ -388,6 +390,52 @@ neigh_suppress_arp() neigh_suppress_arp_common $vid $sip $tip } +neigh_suppress_uc_arp_common() +{ + local vid=$1; shift + local sip=$1; shift + local tip=$1; shift + local tmac + + echo + echo "Unicast ARP, per-port ARP suppression - VLAN $vid" + echo "-----------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass" + + run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h2 filter" +} + +neigh_suppress_uc_arp() +{ + local vid=10 + local sip=192.0.2.1 + local tip=192.0.2.2 + + neigh_suppress_uc_arp_common $vid $sip $tip + + vid=20 + sip=192.0.2.17 + tip=192.0.2.18 + neigh_suppress_uc_arp_common $vid $sip $tip +} + neigh_suppress_ns_common() { local vid=$1; shift @@ -494,6 +542,78 @@ neigh_suppress_ns() neigh_suppress_ns_common $vid $saddr $daddr $maddr } +icmpv6_header_get() +{ + local csum=$1; shift + local tip=$1; shift + local type + local p + + # Type 135 (Neighbor Solicitation), hex format + type="87" + p=$(: + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"$csum:"$( : ICMPv6.checksum + )"00:00:00:00:"$( : Reserved + )"$tip:"$( : Target Address + ) + echo $p +} + +neigh_suppress_uc_ns_common() +{ + local vid=$1; shift + local sip=$1; shift + local dip=$1; shift + local full_dip=$1; shift + local csum=$1; shift + local tmac + + echo + echo "Unicast NS, per-port NS suppression - VLAN $vid" + echo "---------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h2 filter" +} + +neigh_suppress_uc_ns() +{ + local vid=10 + local saddr=2001:db8:1::1 + local daddr=2001:db8:1::2 + local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02 + local csum="ef:79" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum + + vid=20 + saddr=2001:db8:2::1 + daddr=2001:db8:2::2 + full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02 + csum="ef:76" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum +} + neigh_vlan_suppress_arp() { local vid1=10 @@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + bridge link help 2>&1 | grep -q "neigh_vlan_suppress" if [ $? -ne 0 ]; then echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support" diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index d5ffd8c9172e..1dc337c709f8 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,7 +3,7 @@ # # Run a series of udpgro functional tests. -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 815fad8c53a8..54fa4821bc5e 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,7 +3,7 @@ # # Run a series of udpgro benchmarks -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 5f3d1a110d11..9a2cfec1153e 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,7 +3,7 @@ # # Run a series of udpgro benchmarks -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index f22f6c66997e..a39fdc4aa2ff 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -source net_helper.sh +source lib.sh BPF_FILE="lib/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 58bcbbd029bc..94176ffe4646 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -56,6 +56,8 @@ ARCH_mips32be = mips ARCH_riscv32 = riscv ARCH_riscv64 = riscv ARCH_s390x = s390 +ARCH_sparc32 = sparc +ARCH_sparc64 = sparc ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -76,6 +78,9 @@ IMAGE_riscv64 = arch/riscv/boot/Image IMAGE_s390x = arch/s390/boot/bzImage IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi +IMAGE_sparc32 = arch/sparc/boot/image +IMAGE_sparc64 = arch/sparc/boot/image +IMAGE_m68k = vmlinux IMAGE = $(objtree)/$(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) @@ -97,9 +102,15 @@ DEFCONFIG_riscv64 = defconfig DEFCONFIG_s390x = defconfig DEFCONFIG_s390 = defconfig compat.config DEFCONFIG_loongarch = defconfig +DEFCONFIG_sparc32 = sparc32_defconfig +DEFCONFIG_sparc64 = sparc64_defconfig +DEFCONFIG_m68k = virt_defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) +EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) +EXTRACONFIG_arm = -e CONFIG_NAMESPACES +EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES # optional tests to run (default = all) TEST = @@ -122,6 +133,9 @@ QEMU_ARCH_riscv64 = riscv64 QEMU_ARCH_s390x = s390x QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 +QEMU_ARCH_sparc32 = sparc +QEMU_ARCH_sparc64 = sparc64 +QEMU_ARCH_m68k = m68k QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le @@ -152,6 +166,9 @@ QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_T QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise @@ -174,10 +191,15 @@ CFLAGS_s390x = -m64 CFLAGS_s390 = -m31 CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_sparc32 = $(call cc-option,-m32) +ifeq ($(origin XARCH),command line) +CFLAGS_XARCH = $(CFLAGS_$(XARCH)) +endif CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) +CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ - $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA) + $(CFLAGS_XARCH) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_SANITIZER) $(CFLAGS_EXTRA) LDFLAGS := LIBGCC := -lgcc @@ -232,11 +254,11 @@ all: run sysroot: sysroot/$(ARCH)/include -sysroot/$(ARCH)/include: | defconfig +sysroot/$(ARCH)/include: $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot $(QUIET_MKDIR)mkdir -p sysroot $(Q)$(MAKE) -C $(srctree) outputmakefile - $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone + $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check $(Q)mv sysroot/sysroot sysroot/$(ARCH) ifneq ($(NOLIBC_SYSROOT),0) diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c index a7ca8325863f..0636d1b6e808 100644 --- a/tools/testing/selftests/nolibc/nolibc-test-linkage.c +++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c @@ -2,9 +2,7 @@ #include "nolibc-test-linkage.h" -#ifndef NOLIBC #include <errno.h> -#endif void *linkage_test_errno_addr(void) { diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 5884a891c491..dbe13000fb1a 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -9,24 +9,22 @@ * $(CC) -nostdlib -I/path/to/nolibc/sysroot => _NOLIBC_* guards are present * $(CC) with default libc => NOLIBC* never defined */ -#ifndef NOLIBC #include <stdio.h> #include <stdlib.h> #include <string.h> -#ifndef _NOLIBC_STDIO_H -/* standard libcs need more includes */ #include <sys/auxv.h> -#include <sys/io.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/mount.h> #include <sys/prctl.h> +#include <sys/random.h> #include <sys/reboot.h> #include <sys/resource.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/sysmacros.h> #include <sys/time.h> +#include <sys/timerfd.h> #include <sys/utsname.h> #include <sys/wait.h> #include <dirent.h> @@ -38,10 +36,10 @@ #include <stdarg.h> #include <stddef.h> #include <stdint.h> +#include <time.h> #include <unistd.h> #include <limits.h> -#endif -#endif +#include <ctype.h> #pragma GCC diagnostic ignored "-Wmissing-prototypes" @@ -807,6 +805,26 @@ static int test_dirent(void) return 0; } +int test_getrandom(void) +{ + uint64_t rng = 0; + ssize_t ret; + + ret = getrandom(&rng, sizeof(rng), GRND_NONBLOCK); + if (ret == -1 && errno == EAGAIN) + return 0; /* No entropy available yet */ + + if (ret != sizeof(rng)) + return ret; + + if (!rng) { + errno = EINVAL; + return -1; + } + + return 0; +} + int test_getpagesize(void) { int x = getpagesize(); @@ -836,6 +854,29 @@ int test_getpagesize(void) return !c; } +int test_file_stream(void) +{ + FILE *f; + int r; + + f = fopen("/dev/null", "r"); + if (!f) + return -1; + + errno = 0; + r = fwrite("foo", 1, 3, f); + if (r != 0 || errno != EBADF) { + fclose(f); + return -1; + } + + r = fclose(f); + if (r == EOF) + return -1; + + return 0; +} + int test_fork(void) { int status; @@ -883,6 +924,102 @@ int test_stat_timestamps(void) return 0; } +int test_timer(void) +{ + struct itimerspec timerspec; + struct sigevent evp; + timer_t timer; + int ret; + + evp.sigev_notify = SIGEV_NONE; + + ret = timer_create(CLOCK_MONOTONIC, &evp, &timer); + if (ret) + return ret; + + timerspec = (struct itimerspec) { + .it_value.tv_sec = 1000000, + }; + ret = timer_settime(timer, 0, &timerspec, NULL); + if (ret) + goto err; + + timerspec = (struct itimerspec) { + .it_value.tv_sec = -1, + .it_value.tv_nsec = -1, + .it_interval.tv_sec = -1, + .it_interval.tv_nsec = -1, + }; + ret = timer_gettime(timer, &timerspec); + if (ret) + goto err; + + errno = EINVAL; + ret = -1; + + if (timerspec.it_interval.tv_sec || timerspec.it_interval.tv_nsec) + goto err; + + if (timerspec.it_value.tv_sec > 1000000) + goto err; + + ret = timer_delete(timer); + if (ret) + return ret; + + return 0; + +err: + timer_delete(timer); + return ret; +} + +int test_timerfd(void) +{ + struct itimerspec timerspec; + int timer, ret; + + timer = timerfd_create(CLOCK_MONOTONIC, 0); + if (timer == -1) + return -1; + + timerspec = (struct itimerspec) { + .it_value.tv_sec = 1000000, + }; + ret = timerfd_settime(timer, 0, &timerspec, NULL); + if (ret) + goto err; + + timerspec = (struct itimerspec) { + .it_value.tv_sec = -1, + .it_value.tv_nsec = -1, + .it_interval.tv_sec = -1, + .it_interval.tv_nsec = -1, + }; + ret = timerfd_gettime(timer, &timerspec); + if (ret) + goto err; + + errno = EINVAL; + ret = -1; + + if (timerspec.it_interval.tv_sec || timerspec.it_interval.tv_nsec) + goto err; + + if (timerspec.it_value.tv_sec > 1000000) + goto err; + + ret = close(timer); + if (ret) + return ret; + + return 0; + +err: + close(timer); + return ret; +} + int test_uname(void) { struct utsname buf; @@ -926,7 +1063,7 @@ int test_mmap_munmap(void) { int ret, fd, i, page_size; void *mem; - size_t file_size, length; + size_t file_size, length, mem_length; off_t offset, pa_offset; struct stat stat_buf; const char * const files[] = { @@ -966,14 +1103,22 @@ int test_mmap_munmap(void) offset = 0; length = file_size - offset; pa_offset = offset & ~(page_size - 1); + mem_length = length + offset - pa_offset; - mem = mmap(NULL, length + offset - pa_offset, PROT_READ, MAP_SHARED, fd, pa_offset); + mem = mmap(NULL, mem_length, PROT_READ, MAP_SHARED, fd, pa_offset); if (mem == MAP_FAILED) { ret = 1; goto end; } - ret = munmap(mem, length + offset - pa_offset); + mem = mremap(mem, mem_length, mem_length * 2, MREMAP_MAYMOVE, 0); + if (mem == MAP_FAILED) { + munmap(mem, mem_length); + ret = 1; + goto end; + } + + ret = munmap(mem, mem_length * 2); end: close(fd); @@ -1045,6 +1190,72 @@ int test_openat(void) return 0; } +int test_namespace(void) +{ + int original_ns, new_ns, ret; + ino_t original_ns_ino; + struct stat stat_buf; + + original_ns = open("/proc/self/ns/uts", O_RDONLY); + if (original_ns == -1) + return -1; + + ret = fstat(original_ns, &stat_buf); + if (ret) + goto out; + + original_ns_ino = stat_buf.st_ino; + + ret = unshare(CLONE_NEWUTS); + if (ret) + goto out; + + new_ns = open("/proc/self/ns/uts", O_RDONLY); + if (new_ns == -1) { + ret = new_ns; + goto out; + } + + ret = fstat(new_ns, &stat_buf); + close(new_ns); + if (ret) + goto out; + + if (stat_buf.st_ino == original_ns_ino) { + errno = EINVAL; + ret = -1; + goto out; + } + + ret = setns(original_ns, CLONE_NEWUTS); + if (ret) + goto out; + + new_ns = open("/proc/self/ns/uts", O_RDONLY); + if (new_ns == -1) { + ret = new_ns; + goto out; + } + + ret = fstat(new_ns, &stat_buf); + if (ret) + goto out; + + close(new_ns); + + if (stat_buf.st_ino != original_ns_ino) { + errno = EINVAL; + ret = -1; + goto out; + } + + ret = 0; + +out: + close(original_ns); + return ret; +} + /* Run syscall tests between IDs <min> and <max>. * Return 0 on success, non-zero on failure. */ @@ -1052,6 +1263,7 @@ int run_syscall(int min, int max) { struct timeval tv; struct timezone tz; + struct timespec ts; struct stat stat_buf; int euid0; int proc; @@ -1083,6 +1295,11 @@ int run_syscall(int min, int max) * test numbers. */ switch (test + __LINE__ + 1) { + CASE_TEST(access); EXPECT_SYSZR(proc, access("/proc/self", R_OK)); break; + CASE_TEST(access_bad); EXPECT_SYSER(proc, access("/proc/self", W_OK), -1, EPERM); break; + CASE_TEST(clock_getres); EXPECT_SYSZR(1, clock_getres(CLOCK_MONOTONIC, &ts)); break; + CASE_TEST(clock_gettime); EXPECT_SYSZR(1, clock_gettime(CLOCK_MONOTONIC, &ts)); break; + CASE_TEST(clock_settime); EXPECT_SYSER(1, clock_settime(CLOCK_MONOTONIC, &ts), -1, EINVAL); break; CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break; CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break; CASE_TEST(gettid); EXPECT_SYSNE(has_gettid, gettid(), -1); break; @@ -1112,10 +1329,12 @@ int run_syscall(int min, int max) CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break; + CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break; CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break; CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break; + CASE_TEST(getrandom); EXPECT_SYSZR(1, test_getrandom()); break; CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break; CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break; CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break; @@ -1149,6 +1368,8 @@ int run_syscall(int min, int max) CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break; CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break; CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break; + CASE_TEST(timer); EXPECT_SYSZR(1, test_timer()); break; + CASE_TEST(timerfd); EXPECT_SYSZR(1, test_timerfd()); break; CASE_TEST(uname); EXPECT_SYSZR(proc, test_uname()); break; CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break; CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; @@ -1160,6 +1381,7 @@ int run_syscall(int min, int max) CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break; CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break; CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break; + CASE_TEST(namespace); EXPECT_SYSZR(euid0 && proc, test_namespace()); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ @@ -1168,6 +1390,17 @@ int run_syscall(int min, int max) return ret; } +int test_difftime(void) +{ + if (difftime(200., 100.) != 100.) + return 1; + + if (difftime(100., 200.) != -100.) + return 1; + + return 0; +} + int run_stdlib(int min, int max) { int test; @@ -1211,6 +1444,9 @@ int run_stdlib(int min, int max) CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 2), buf, 3, "b"); break; CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 3), buf, 3, "ba"); break; CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 4), buf, 3, "bar"); break; + CASE_TEST(strstr_foobar_foo); EXPECT_STREQ(1, strstr("foobar", "foo"), "foobar"); break; + CASE_TEST(strstr_foobar_bar); EXPECT_STREQ(1, strstr("foobar", "bar"), "bar"); break; + CASE_TEST(strstr_foobar_baz); EXPECT_PTREQ(1, strstr("foobar", "baz"), NULL); break; CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break; CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break; CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break; @@ -1281,6 +1517,13 @@ int run_stdlib(int min, int max) CASE_TEST(strerror_EINVAL); EXPECT_STREQ(is_nolibc, strerror(EINVAL), "errno=22"); break; CASE_TEST(strerror_int_max); EXPECT_STREQ(is_nolibc, strerror(INT_MAX), "errno=2147483647"); break; CASE_TEST(strerror_int_min); EXPECT_STREQ(is_nolibc, strerror(INT_MIN), "errno=-2147483648"); break; + CASE_TEST(tolower); EXPECT_EQ(1, tolower('A'), 'a'); break; + CASE_TEST(tolower_noop); EXPECT_EQ(1, tolower('a'), 'a'); break; + CASE_TEST(toupper); EXPECT_EQ(1, toupper('a'), 'A'); break; + CASE_TEST(toupper_noop); EXPECT_EQ(1, toupper('A'), 'A'); break; + CASE_TEST(abs); EXPECT_EQ(1, abs(-10), 10); break; + CASE_TEST(abs_noop); EXPECT_EQ(1, abs(10), 10); break; + CASE_TEST(difftime); EXPECT_ZR(1, test_difftime()); break; case __LINE__: return ret; /* must be last */ @@ -1295,27 +1538,15 @@ int run_stdlib(int min, int max) static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...) { - int ret, pipefd[2]; - ssize_t w, r; char buf[100]; - FILE *memfile; va_list args; + ssize_t w; + int ret; - ret = pipe(pipefd); - if (ret == -1) { - llen += printf(" pipe() != %s", strerror(errno)); - result(llen, FAIL); - return 1; - } - - memfile = fdopen(pipefd[1], "w"); - if (!memfile) { - result(llen, FAIL); - return 1; - } va_start(args, fmt); - w = vfprintf(memfile, fmt, args); + /* Only allow writing 21 bytes, to test truncation */ + w = vsnprintf(buf, 21, fmt, args); va_end(args); if (w != c) { @@ -1324,17 +1555,6 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm return 1; } - fclose(memfile); - - r = read(pipefd[0], buf, sizeof(buf) - 1); - - if (r != w) { - llen += printf(" written(%d) != read(%d)", (int)w, (int)r); - result(llen, FAIL); - return 1; - } - - buf[r] = '\0'; llen += printf(" \"%s\" = \"%s\"", expected, buf); ret = strncmp(expected, buf, c); @@ -1409,7 +1629,24 @@ static int test_scanf(void) return 0; } -static int run_vfprintf(int min, int max) +int test_strerror(void) +{ + char buf[100]; + ssize_t ret; + + memset(buf, 'A', sizeof(buf)); + + errno = EINVAL; + ret = snprintf(buf, sizeof(buf), "%m"); + if (is_nolibc) { + if (ret < 6 || memcmp(buf, "errno=", 6)) + return 1; + } + + return 0; +} + +static int run_printf(int min, int max) { int test; int ret = 0; @@ -1430,7 +1667,14 @@ static int run_vfprintf(int min, int max) CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break; CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break; CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break; + CASE_TEST(uintmax_t); EXPECT_VFPRINTF(20, "18446744073709551615", "%ju", 0xffffffffffffffffULL); break; + CASE_TEST(intmax_t); EXPECT_VFPRINTF(20, "-9223372036854775807", "%jd", 0x8000000000000001LL); break; + CASE_TEST(truncation); EXPECT_VFPRINTF(25, "01234567890123456789", "%s", "0123456789012345678901234"); break; + CASE_TEST(string_width); EXPECT_VFPRINTF(10, " 1", "%10s", "1"); break; + CASE_TEST(number_width); EXPECT_VFPRINTF(10, " 1", "%10d", 1); break; + CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break; CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; + CASE_TEST(strerror); EXPECT_ZR(1, test_strerror()); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ @@ -1439,6 +1683,7 @@ static int run_vfprintf(int min, int max) return ret; } +__attribute__((no_sanitize("undefined"))) static int smash_stack(void) { char buf[100]; @@ -1455,8 +1700,7 @@ static int run_protection(int min __attribute__((unused)), int max __attribute__((unused))) { pid_t pid; - int llen = 0, ret; - siginfo_t siginfo = {}; + int llen = 0, status; struct rlimit rlimit = { 0, 0 }; llen += printf("0 -fstackprotector "); @@ -1494,11 +1738,10 @@ static int run_protection(int min __attribute__((unused)), return 1; default: - ret = waitid(P_PID, pid, &siginfo, WEXITED); + pid = waitpid(pid, &status, 0); - if (ret != 0 || siginfo.si_signo != SIGCHLD || - siginfo.si_code != CLD_KILLED || siginfo.si_status != SIGABRT) { - llen += printf("waitid()"); + if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) { + llen += printf("waitpid()"); result(llen, FAIL); return 1; } @@ -1570,7 +1813,7 @@ static const struct test test_names[] = { { .name = "startup", .func = run_startup }, { .name = "syscall", .func = run_syscall }, { .name = "stdlib", .func = run_stdlib }, - { .name = "vfprintf", .func = run_vfprintf }, + { .name = "printf", .func = run_printf }, { .name = "protection", .func = run_protection }, { 0 } }; diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 0299a0912d40..8277599e6441 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -25,6 +25,8 @@ all_archs=( riscv32 riscv64 s390x s390 loongarch + sparc32 sparc64 + m68k ) archs="${all_archs[@]}" @@ -111,6 +113,7 @@ crosstool_arch() { loongarch) echo loongarch64;; mips*) echo mips;; s390*) echo s390;; + sparc*) echo sparc64;; *) echo "$1";; esac } @@ -184,6 +187,10 @@ test_arch() { echo "Unsupported configuration" return fi + if [ "$arch" = "m68k" ] && [ "$llvm" = "1" ]; then + echo "Unsupported configuration" + return + fi mkdir -p "$build_dir" swallow_output "${MAKE[@]}" defconfig diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c index 49dc1e831174..e03fe1b9bba2 100644 --- a/tools/testing/selftests/perf_events/watermark_signal.c +++ b/tools/testing/selftests/perf_events/watermark_signal.c @@ -75,7 +75,7 @@ TEST(watermark_signal) if (waitpid(child, &child_status, WSTOPPED) != child || !(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) { fprintf(stderr, - "failed to sycnhronize with child errno=%d status=%x\n", + "failed to synchronize with child errno=%d status=%x\n", errno, child_status); goto cleanup; diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c index 51c414faabb0..96f274f0582b 100644 --- a/tools/testing/selftests/pid_namespace/pid_max.c +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> +#include <sys/mount.h> #include <sys/wait.h> #include "../kselftest_harness.h" diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 55bcf81a2b9a..efd74063126e 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -131,6 +131,26 @@ #define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */ #endif +#ifndef PIDFD_INFO_COREDUMP +#define PIDFD_INFO_COREDUMP (1UL << 4) +#endif + +#ifndef PIDFD_COREDUMPED +#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */ +#endif + +#ifndef PIDFD_COREDUMP_SKIP +#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */ +#endif + +#ifndef PIDFD_COREDUMP_USER +#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */ +#endif + +#ifndef PIDFD_COREDUMP_ROOT +#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ +#endif + #ifndef PIDFD_THREAD #define PIDFD_THREAD O_EXCL #endif @@ -150,6 +170,8 @@ struct pidfd_info { __u32 fsuid; __u32 fsgid; __s32 exit_code; + __u32 coredump_mask; + __u32 __spare1; }; /* diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c index 7822dd080258..c094aeb1c620 100644 --- a/tools/testing/selftests/pidfd/pidfd_bind_mount.c +++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c @@ -15,79 +15,7 @@ #include "pidfd.h" #include "../kselftest_harness.h" - -#ifndef __NR_open_tree - #if defined __alpha__ - #define __NR_open_tree 538 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_open_tree 4428 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_open_tree 6428 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_open_tree 5428 - #endif - #elif defined __ia64__ - #define __NR_open_tree (428 + 1024) - #else - #define __NR_open_tree 428 - #endif -#endif - -#ifndef __NR_move_mount - #if defined __alpha__ - #define __NR_move_mount 539 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_move_mount 4429 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_move_mount 6429 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_move_mount 5429 - #endif - #elif defined __ia64__ - #define __NR_move_mount (428 + 1024) - #else - #define __NR_move_mount 429 - #endif -#endif - -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ -#endif - -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#endif - -static inline int sys_move_mount(int from_dfd, const char *from_pathname, - int to_dfd, const char *to_pathname, - unsigned int flags) -{ - return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, - to_pathname, flags); -} - -#ifndef OPEN_TREE_CLONE -#define OPEN_TREE_CLONE 1 -#endif - -#ifndef OPEN_TREE_CLOEXEC -#define OPEN_TREE_CLOEXEC O_CLOEXEC -#endif - -#ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -#endif - -static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) -{ - return syscall(__NR_open_tree, dfd, filename, flags); -} +#include "../filesystems/wrappers.h" FIXTURE(pidfd_bind_mount) { char template[PATH_MAX]; diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c index 1758a1b0457b..a0eb6e81eaa2 100644 --- a/tools/testing/selftests/pidfd/pidfd_info_test.c +++ b/tools/testing/selftests/pidfd/pidfd_info_test.c @@ -299,6 +299,7 @@ TEST_F(pidfd_info, thread_group) /* Opening a thread as a thread-group leader must fail. */ pidfd_thread = sys_pidfd_open(pid_thread, 0); ASSERT_LT(pidfd_thread, 0); + ASSERT_EQ(errno, ENOENT); /* Opening a thread as a PIDFD_THREAD must succeed. */ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD); @@ -362,9 +363,9 @@ TEST_F(pidfd_info, thread_group) ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); - /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */ - ASSERT_TRUE(WIFEXITED(info.exit_code)); - ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); + /* Even though the thread-group exited successfully it will still report the group exit code. */ + ASSERT_TRUE(WIFSIGNALED(info.exit_code)); + ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); /* * Retrieve exit information for the thread-group leader via the @@ -375,9 +376,9 @@ TEST_F(pidfd_info, thread_group) ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS)); ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT)); - /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */ - ASSERT_TRUE(WIFEXITED(info2.exit_code)); - ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0); + /* Even though the thread-group exited successfully it will still report the group exit code. */ + ASSERT_TRUE(WIFSIGNALED(info2.exit_code)); + ASSERT_EQ(WTERMSIG(info2.exit_code), SIGKILL); /* Retrieve exit information for the thread. */ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index aad51e7c0183..991fb11306eb 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -10,7 +10,7 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Call trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index ad79784e552d..957800c9ffba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -73,7 +73,7 @@ config_override_param "$config_dir/CFcommon.$(uname -m)" KcList \ cp $T/KcList $resdir/ConfigFragment base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` -if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux +if test "$base_resdir" != "$resdir" && (test -f $base_resdir/bzImage || test -f $base_resdir/Image) && test -f $base_resdir/vmlinux then # Rerunning previous test, so use that test's kernel. QEMU="`identify_qemu $base_resdir/vmlinux`" diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index b07c11cf6929..21e6ba3615f6 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -148,7 +148,7 @@ then summary="$summary KCSAN: $n_kcsan" fi fi - n_calltrace=`grep -c 'Call Trace:' $file` + n_calltrace=`grep -Ec 'Call Trace:|Call trace:' $file` if test "$n_calltrace" -ne 0 then summary="$summary Call Traces: $n_calltrace" diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh index 2db12c5cad9c..208be7d09a61 100755 --- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh +++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh @@ -39,8 +39,9 @@ do shift done -err= nerrs=0 + +# Test lockdep's handling of deadlocks. for d in 0 1 do for t in 0 1 2 @@ -52,6 +53,12 @@ do tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 ret=$? mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" + if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config + then + echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario" + nerrs=$((nerrs+1)) + err=1 + fi if test "$d" -ne 0 && test "$ret" -eq 0 then err=1 @@ -71,6 +78,39 @@ do done done done + +# Test lockdep-enabled testing of mixed SRCU readers. +for val in 0x1 0xf +do + err= + tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.reader_flavor=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 + ret=$? + mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" + if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config + then + echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario" + nerrs=$((nerrs+1)) + err=1 + fi + if test "$val" -eq 0xf && test "$ret" -eq 0 + then + err=1 + echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + fi + if test "$val" -eq 0x1 && test "$ret" -ne 0 + then + err=1 + echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + fi + if test -n "$err" + then + grep "rcu_torture_init_srcu_lockdep: test_srcu_lockdep = " "$RCUTORTURE/res/$ds/$val/SRCU-P/console.log" | sed -e 's/^.*rcu_torture_init_srcu_lockdep://' >> "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + cat "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + nerrs=$((nerrs+1)) + fi +done + +# Set up exit code. if test "$nerrs" -ne 0 then exit 1 diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 0447c4a00cc4..e03fdaca89b3 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -51,12 +51,15 @@ do_scftorture=yes do_rcuscale=yes do_refscale=yes do_kvfree=yes +do_normal=yes +explicit_normal=no do_kasan=yes do_kcsan=no do_clocksourcewd=yes do_rt=yes do_rcutasksflavors=yes do_srcu_lockdep=yes +do_rcu_rust=no # doyesno - Helper function for yes/no arguments function doyesno () { @@ -87,6 +90,7 @@ usage () { echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture" echo " --do-refscale / --do-no-refscale / --no-refscale" echo " --do-rt / --do-no-rt / --no-rt" + echo " --do-rcu-rust / --do-no-rcu-rust / --no-rcu-rust" echo " --do-scftorture / --do-no-scftorture / --no-scftorture" echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep" echo " --duration [ <minutes> | <hours>h | <days>d ]" @@ -128,6 +132,8 @@ do do_refscale=yes do_rt=yes do_kvfree=yes + do_normal=yes + explicit_normal=no do_kasan=yes do_kcsan=yes do_clocksourcewd=yes @@ -161,11 +167,17 @@ do do_refscale=no do_rt=no do_kvfree=no + do_normal=no + explicit_normal=no do_kasan=no do_kcsan=no do_clocksourcewd=no do_srcu_lockdep=no ;; + --do-normal|--do-no-normal|--no-normal) + do_normal=`doyesno "$1" --do-normal` + explicit_normal=yes + ;; --do-rcuscale|--do-no-rcuscale|--no-rcuscale) do_rcuscale=`doyesno "$1" --do-rcuscale` ;; @@ -181,6 +193,9 @@ do --do-rt|--do-no-rt|--no-rt) do_rt=`doyesno "$1" --do-rt` ;; + --do-rcu-rust|--do-no-rcu-rust|--no-rcu-rust) + do_rcu_rust=`doyesno "$1" --do-rcu-rust` + ;; --do-scftorture|--do-no-scftorture|--no-scftorture) do_scftorture=`doyesno "$1" --do-scftorture` ;; @@ -242,6 +257,17 @@ trap 'rm -rf $T' 0 2 echo " --- " $scriptname $args | tee -a $T/log echo " --- Results directory: " $ds | tee -a $T/log +if test "$do_normal" = "no" && test "$do_kasan" = "no" && test "$do_kcsan" = "no" +then + # Match old scripts so that "--do-none --do-rcutorture" does + # normal rcutorture testing, but no KASAN or KCSAN testing. + if test $explicit_normal = yes + then + echo " --- Everything disabled, so explicit --do-normal overridden" | tee -a $T/log + fi + do_normal=yes +fi + # Calculate rcutorture defaults and apportion time if test -z "$configs_rcutorture" then @@ -332,9 +358,12 @@ function torture_set { local kcsan_kmake_tag= local flavor=$1 shift - curflavor=$flavor - torture_one "$@" - mv $T/last-resdir $T/last-resdir-nodebug || : + if test "$do_normal" = "yes" + then + curflavor=$flavor + torture_one "$@" + mv $T/last-resdir $T/last-resdir-nodebug || : + fi if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan @@ -448,13 +477,57 @@ fi if test "$do_rt" = "yes" then - # With all post-boot grace periods forced to normal. - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1" - torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + # In both runs, disable testing of RCU priority boosting because + # -rt doesn't like its interaction with testing of callback + # flooding. + + # With all post-boot grace periods forced to normal (default for PREEMPT_RT). + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcutorture.preempt_duration=0" + torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_RCU_NOCB_CPU=y" --trust-make # With all post-boot grace periods forced to expedited. - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1" - torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcupdate.rcu_normal_after_boot=0 rcupdate.rcu_expedited=1 rcutorture.preempt_duration=0" + torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_FULL=y CONFIG_RCU_NOCB_CPU=y" --trust-make +fi + +if test "$do_rcu_rust" = "yes" +then + echo " --- do-rcu-rust:" Start `date` | tee -a $T/log + rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust" + mkdir -p "$rrdir" + echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out + make LLVM=1 rustavailable > $T/rustavailable.out 2>&1 + retcode=$? + echo $retcode > $rrdir/rustavailable.exitcode + cat $T/rustavailable.out | tee -a $rrdir/log >> $rrdir/rustavailable.out 2>&1 + buildphase=rustavailable + if test "$retcode" -eq 0 + then + echo " --- Running 'make mrproper' in order to run kunit." | tee -a $rrdir/log > $rrdir/mrproper.out + make mrproper > $rrdir/mrproper.out 2>&1 + retcode=$? + echo $retcode > $rrdir/mrproper.exitcode + buildphase=mrproper + fi + if test "$retcode" -eq 0 + then + echo " --- Running rust_doctests_kernel." | tee -a $rrdir/log > $rrdir/rust_doctests_kernel.out + ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --make_options CLIPPY=1 --arch arm64 --kconfig_add CONFIG_SMP=y --kconfig_add CONFIG_WERROR=y --kconfig_add CONFIG_RUST=y rust_doctests_kernel >> $rrdir/rust_doctests_kernel.out 2>&1 + # @@@ Remove "--arch arm64" in order to test on native architecture? + # @@@ Analyze $rrdir/rust_doctests_kernel.out contents? + retcode=$? + echo $retcode > $rrdir/rust_doctests_kernel.exitcode + buildphase=rust_doctests_kernel + fi + if test "$retcode" -eq 0 + then + echo "rcu-rust($retcode)" $rrdir >> $T/successes + echo Success >> $rrdir/log + else + echo "rcu-rust($retcode)" $rrdir >> $T/failures + echo " --- rcu-rust Test summary:" >> $rrdir/log + echo " --- Summary: Exit code $retcode from $buildphase, see $rrdir/$buildphase.out" >> $rrdir/log + fi fi if test "$do_srcu_lockdep" = "yes" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 8ae41d5f81a3..54b1600c7eb5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -8,8 +8,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y -CONFIG_MAXSMP=y -CONFIG_CPUMASK_OFFSTACK=y CONFIG_RCU_NOCB_CPU=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index 40af3df0f397..1cc5b47dde28 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -1,4 +1,4 @@ -maxcpus=8 nr_cpus=43 +maxcpus=8 nr_cpus=17 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 50e03eefe7ac..0443beacf362 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -3,7 +3,14 @@ # # Run installed kselftest tests. # -BASE_DIR=$(realpath $(dirname $0)) + +# Fallback to readlink if realpath is not available +if which realpath > /dev/null; then + BASE_DIR=$(realpath $(dirname $0)) +else + BASE_DIR=$(readlink -f $(dirname $0)) +fi + cd $BASE_DIR TESTS="$BASE_DIR"/kselftest-list.txt if [ ! -r "$TESTS" ] ; then diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index f4531327b8e7..9d9d6b4c38b0 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -162,10 +162,10 @@ all_test_bpfprogs := $(foreach prog,$(wildcard *.bpf.c),$(INCLUDE_DIR)/$(patsubs auto-test-targets := \ create_dsq \ enq_last_no_enq_fails \ - enq_select_cpu_fails \ ddsp_bogus_dsq_fail \ ddsp_vtimelocal_fail \ dsp_local_on \ + enq_select_cpu \ exit \ hotplug \ init_enable_count \ @@ -173,6 +173,7 @@ auto-test-targets := \ maybe_null \ minimal \ numa \ + allowed_cpus \ prog_run \ reload_loop \ select_cpu_dfl \ diff --git a/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c b/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c new file mode 100644 index 000000000000..35923e74a2ec --- /dev/null +++ b/tools/testing/selftests/sched_ext/allowed_cpus.bpf.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A scheduler that validates the behavior of scx_bpf_select_cpu_and() by + * selecting idle CPUs strictly within a subset of allowed CPUs. + * + * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com> + */ + +#include <scx/common.bpf.h> + +char _license[] SEC("license") = "GPL"; + +UEI_DEFINE(uei); + +private(PREF_CPUS) struct bpf_cpumask __kptr * allowed_cpumask; + +static void +validate_idle_cpu(const struct task_struct *p, const struct cpumask *allowed, s32 cpu) +{ + if (scx_bpf_test_and_clear_cpu_idle(cpu)) + scx_bpf_error("CPU %d should be marked as busy", cpu); + + if (bpf_cpumask_subset(allowed, p->cpus_ptr) && + !bpf_cpumask_test_cpu(cpu, allowed)) + scx_bpf_error("CPU %d not in the allowed domain for %d (%s)", + cpu, p->pid, p->comm); +} + +s32 BPF_STRUCT_OPS(allowed_cpus_select_cpu, + struct task_struct *p, s32 prev_cpu, u64 wake_flags) +{ + const struct cpumask *allowed; + s32 cpu; + + allowed = cast_mask(allowed_cpumask); + if (!allowed) { + scx_bpf_error("allowed domain not initialized"); + return -EINVAL; + } + + /* + * Select an idle CPU strictly within the allowed domain. + */ + cpu = scx_bpf_select_cpu_and(p, prev_cpu, wake_flags, allowed, 0); + if (cpu >= 0) { + validate_idle_cpu(p, allowed, cpu); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); + + return cpu; + } + + return prev_cpu; +} + +void BPF_STRUCT_OPS(allowed_cpus_enqueue, struct task_struct *p, u64 enq_flags) +{ + const struct cpumask *allowed; + s32 prev_cpu = scx_bpf_task_cpu(p), cpu; + + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); + + allowed = cast_mask(allowed_cpumask); + if (!allowed) { + scx_bpf_error("allowed domain not initialized"); + return; + } + + /* + * Use scx_bpf_select_cpu_and() to proactively kick an idle CPU + * within @allowed_cpumask, usable by @p. + */ + cpu = scx_bpf_select_cpu_and(p, prev_cpu, 0, allowed, 0); + if (cpu >= 0) { + validate_idle_cpu(p, allowed, cpu); + scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); + } +} + +s32 BPF_STRUCT_OPS_SLEEPABLE(allowed_cpus_init) +{ + struct bpf_cpumask *mask; + + mask = bpf_cpumask_create(); + if (!mask) + return -ENOMEM; + + mask = bpf_kptr_xchg(&allowed_cpumask, mask); + if (mask) + bpf_cpumask_release(mask); + + bpf_rcu_read_lock(); + + /* + * Assign the first online CPU to the allowed domain. + */ + mask = allowed_cpumask; + if (mask) { + const struct cpumask *online = scx_bpf_get_online_cpumask(); + + bpf_cpumask_set_cpu(bpf_cpumask_first(online), mask); + scx_bpf_put_cpumask(online); + } + + bpf_rcu_read_unlock(); + + return 0; +} + +void BPF_STRUCT_OPS(allowed_cpus_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +struct task_cpu_arg { + pid_t pid; +}; + +SEC("syscall") +int select_cpu_from_user(struct task_cpu_arg *input) +{ + struct task_struct *p; + int cpu; + + p = bpf_task_from_pid(input->pid); + if (!p) + return -EINVAL; + + bpf_rcu_read_lock(); + cpu = scx_bpf_select_cpu_and(p, bpf_get_smp_processor_id(), 0, p->cpus_ptr, 0); + bpf_rcu_read_unlock(); + + bpf_task_release(p); + + return cpu; +} + +SEC(".struct_ops.link") +struct sched_ext_ops allowed_cpus_ops = { + .select_cpu = (void *)allowed_cpus_select_cpu, + .enqueue = (void *)allowed_cpus_enqueue, + .init = (void *)allowed_cpus_init, + .exit = (void *)allowed_cpus_exit, + .name = "allowed_cpus", +}; diff --git a/tools/testing/selftests/sched_ext/allowed_cpus.c b/tools/testing/selftests/sched_ext/allowed_cpus.c new file mode 100644 index 000000000000..093f285ab4ba --- /dev/null +++ b/tools/testing/selftests/sched_ext/allowed_cpus.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com> + */ +#include <bpf/bpf.h> +#include <scx/common.h> +#include <sys/wait.h> +#include <unistd.h> +#include "allowed_cpus.bpf.skel.h" +#include "scx_test.h" + +static enum scx_test_status setup(void **ctx) +{ + struct allowed_cpus *skel; + + skel = allowed_cpus__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(allowed_cpus__load(skel), "Failed to load skel"); + + *ctx = skel; + + return SCX_TEST_PASS; +} + +static int test_select_cpu_from_user(const struct allowed_cpus *skel) +{ + int fd, ret; + __u64 args[1]; + + LIBBPF_OPTS(bpf_test_run_opts, attr, + .ctx_in = args, + .ctx_size_in = sizeof(args), + ); + + args[0] = getpid(); + fd = bpf_program__fd(skel->progs.select_cpu_from_user); + if (fd < 0) + return fd; + + ret = bpf_prog_test_run_opts(fd, &attr); + if (ret < 0) + return ret; + + fprintf(stderr, "%s: CPU %d\n", __func__, attr.retval); + + return 0; +} + +static enum scx_test_status run(void *ctx) +{ + struct allowed_cpus *skel = ctx; + struct bpf_link *link; + + link = bpf_map__attach_struct_ops(skel->maps.allowed_cpus_ops); + SCX_FAIL_IF(!link, "Failed to attach scheduler"); + + /* Pick an idle CPU from user-space */ + SCX_FAIL_IF(test_select_cpu_from_user(skel), "Failed to pick idle CPU"); + + /* Just sleeping is fine, plenty of scheduling events happening */ + sleep(1); + + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE)); + bpf_link__destroy(link); + + return SCX_TEST_PASS; +} + +static void cleanup(void *ctx) +{ + struct allowed_cpus *skel = ctx; + + allowed_cpus__destroy(skel); +} + +struct scx_test allowed_cpus = { + .name = "allowed_cpus", + .description = "Verify scx_bpf_select_cpu_and()", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&allowed_cpus) diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c new file mode 100644 index 000000000000..ee2c9b89716e --- /dev/null +++ b/tools/testing/selftests/sched_ext/enq_select_cpu.bpf.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2023 David Vernet <dvernet@meta.com> + * Copyright (c) 2023 Tejun Heo <tj@kernel.org> + */ + +#include <scx/common.bpf.h> + +char _license[] SEC("license") = "GPL"; + +UEI_DEFINE(uei); + +s32 BPF_STRUCT_OPS(enq_select_cpu_select_cpu, struct task_struct *p, + s32 prev_cpu, u64 wake_flags) +{ + /* Bounce all tasks to ops.enqueue() */ + return prev_cpu; +} + +void BPF_STRUCT_OPS(enq_select_cpu_enqueue, struct task_struct *p, + u64 enq_flags) +{ + s32 cpu, prev_cpu = scx_bpf_task_cpu(p); + bool found = false; + + cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, 0, &found); + if (found) { + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_DFL, enq_flags); + return; + } + + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); +} + +void BPF_STRUCT_OPS(enq_select_cpu_exit, struct scx_exit_info *ei) +{ + UEI_RECORD(uei, ei); +} + +struct task_cpu_arg { + pid_t pid; +}; + +SEC("syscall") +int select_cpu_from_user(struct task_cpu_arg *input) +{ + struct task_struct *p; + bool found = false; + s32 cpu; + + p = bpf_task_from_pid(input->pid); + if (!p) + return -EINVAL; + + bpf_rcu_read_lock(); + cpu = scx_bpf_select_cpu_dfl(p, bpf_get_smp_processor_id(), 0, &found); + if (!found) + cpu = -EBUSY; + bpf_rcu_read_unlock(); + + bpf_task_release(p); + + return cpu; +} + +SEC(".struct_ops.link") +struct sched_ext_ops enq_select_cpu_ops = { + .select_cpu = (void *)enq_select_cpu_select_cpu, + .enqueue = (void *)enq_select_cpu_enqueue, + .exit = (void *)enq_select_cpu_exit, + .name = "enq_select_cpu", + .timeout_ms = 1000U, +}; diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu.c b/tools/testing/selftests/sched_ext/enq_select_cpu.c new file mode 100644 index 000000000000..340c6f8b86da --- /dev/null +++ b/tools/testing/selftests/sched_ext/enq_select_cpu.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2023 David Vernet <dvernet@meta.com> + * Copyright (c) 2023 Tejun Heo <tj@kernel.org> + */ +#include <bpf/bpf.h> +#include <scx/common.h> +#include <sys/wait.h> +#include <unistd.h> +#include "enq_select_cpu.bpf.skel.h" +#include "scx_test.h" + +static enum scx_test_status setup(void **ctx) +{ + struct enq_select_cpu *skel; + + skel = enq_select_cpu__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(enq_select_cpu__load(skel), "Failed to load skel"); + + *ctx = skel; + + return SCX_TEST_PASS; +} + +static int test_select_cpu_from_user(const struct enq_select_cpu *skel) +{ + int fd, ret; + __u64 args[1]; + + LIBBPF_OPTS(bpf_test_run_opts, attr, + .ctx_in = args, + .ctx_size_in = sizeof(args), + ); + + args[0] = getpid(); + fd = bpf_program__fd(skel->progs.select_cpu_from_user); + if (fd < 0) + return fd; + + ret = bpf_prog_test_run_opts(fd, &attr); + if (ret < 0) + return ret; + + fprintf(stderr, "%s: CPU %d\n", __func__, attr.retval); + + return 0; +} + +static enum scx_test_status run(void *ctx) +{ + struct enq_select_cpu *skel = ctx; + struct bpf_link *link; + + link = bpf_map__attach_struct_ops(skel->maps.enq_select_cpu_ops); + if (!link) { + SCX_ERR("Failed to attach scheduler"); + return SCX_TEST_FAIL; + } + + /* Pick an idle CPU from user-space */ + SCX_FAIL_IF(test_select_cpu_from_user(skel), "Failed to pick idle CPU"); + + sleep(1); + + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE)); + bpf_link__destroy(link); + + return SCX_TEST_PASS; +} + +static void cleanup(void *ctx) +{ + struct enq_select_cpu *skel = ctx; + + enq_select_cpu__destroy(skel); +} + +struct scx_test enq_select_cpu = { + .name = "enq_select_cpu", + .description = "Verify scx_bpf_select_cpu_dfl() from multiple contexts", + .setup = setup, + .run = run, + .cleanup = cleanup, +}; +REGISTER_SCX_TEST(&enq_select_cpu) diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c deleted file mode 100644 index a7cf868d5e31..000000000000 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2023 Meta Platforms, Inc. and affiliates. - * Copyright (c) 2023 David Vernet <dvernet@meta.com> - * Copyright (c) 2023 Tejun Heo <tj@kernel.org> - */ - -#include <scx/common.bpf.h> - -char _license[] SEC("license") = "GPL"; - -/* Manually specify the signature until the kfunc is added to the scx repo. */ -s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, - bool *found) __ksym; - -s32 BPF_STRUCT_OPS(enq_select_cpu_fails_select_cpu, struct task_struct *p, - s32 prev_cpu, u64 wake_flags) -{ - return prev_cpu; -} - -void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p, - u64 enq_flags) -{ - /* - * Need to initialize the variable or the verifier will fail to load. - * Improving these semantics is actively being worked on. - */ - bool found = false; - - /* Can only call from ops.select_cpu() */ - scx_bpf_select_cpu_dfl(p, 0, 0, &found); - - scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); -} - -SEC(".struct_ops.link") -struct sched_ext_ops enq_select_cpu_fails_ops = { - .select_cpu = (void *) enq_select_cpu_fails_select_cpu, - .enqueue = (void *) enq_select_cpu_fails_enqueue, - .name = "enq_select_cpu_fails", - .timeout_ms = 1000U, -}; diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c deleted file mode 100644 index a80e3a3b3698..000000000000 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c +++ /dev/null @@ -1,61 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2023 Meta Platforms, Inc. and affiliates. - * Copyright (c) 2023 David Vernet <dvernet@meta.com> - * Copyright (c) 2023 Tejun Heo <tj@kernel.org> - */ -#include <bpf/bpf.h> -#include <scx/common.h> -#include <sys/wait.h> -#include <unistd.h> -#include "enq_select_cpu_fails.bpf.skel.h" -#include "scx_test.h" - -static enum scx_test_status setup(void **ctx) -{ - struct enq_select_cpu_fails *skel; - - skel = enq_select_cpu_fails__open(); - SCX_FAIL_IF(!skel, "Failed to open"); - SCX_ENUM_INIT(skel); - SCX_FAIL_IF(enq_select_cpu_fails__load(skel), "Failed to load skel"); - - *ctx = skel; - - return SCX_TEST_PASS; -} - -static enum scx_test_status run(void *ctx) -{ - struct enq_select_cpu_fails *skel = ctx; - struct bpf_link *link; - - link = bpf_map__attach_struct_ops(skel->maps.enq_select_cpu_fails_ops); - if (!link) { - SCX_ERR("Failed to attach scheduler"); - return SCX_TEST_FAIL; - } - - sleep(1); - - bpf_link__destroy(link); - - return SCX_TEST_PASS; -} - -static void cleanup(void *ctx) -{ - struct enq_select_cpu_fails *skel = ctx; - - enq_select_cpu_fails__destroy(skel); -} - -struct scx_test enq_select_cpu_fails = { - .name = "enq_select_cpu_fails", - .description = "Verify we fail to call scx_bpf_select_cpu_dfl() " - "from ops.enqueue()", - .setup = setup, - .run = run, - .cleanup = cleanup, -}; -REGISTER_SCX_TEST(&enq_select_cpu_fails) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 94886c82ae60..5822e25e0217 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -1,6 +1,6 @@ /* * Strictly speaking, this is not a test. But it can report during test - * runs so relative performace can be measured. + * runs so relative performance can be measured. */ #define _GNU_SOURCE #include <assert.h> diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index b2f76a52215a..61acbd45ffaa 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1629,14 +1629,8 @@ void teardown_trace_fixture(struct __test_metadata *_metadata, { if (tracer) { int status; - /* - * Extract the exit code from the other process and - * adopt it for ourselves in case its asserts failed. - */ ASSERT_EQ(0, kill(tracer, SIGUSR1)); ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); - if (WEXITSTATUS(status)) - _metadata->exit_code = KSFT_FAIL; } } @@ -3166,12 +3160,15 @@ TEST(syscall_restart) ret = get_syscall(_metadata, child_pid); #if defined(__arm__) /* - * FIXME: * - native ARM registers do NOT expose true syscall. * - compat ARM registers on ARM64 DO expose true syscall. + * - values of utsbuf.machine include 'armv8l' or 'armb8b' + * for ARM64 running in compat mode. */ ASSERT_EQ(0, uname(&utsbuf)); - if (strncmp(utsbuf.machine, "arm", 3) == 0) { + if ((strncmp(utsbuf.machine, "arm", 3) == 0) && + (strncmp(utsbuf.machine, "armv8l", 6) != 0) && + (strncmp(utsbuf.machine, "armv8b", 6) != 0)) { EXPECT_EQ(__NR_nanosleep, ret); } else #endif diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index db1616857d89..a10350c8a46e 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -36,6 +36,7 @@ ALL_TESTS="$ALL_TESTS 0008:1:1:match_int:1" ALL_TESTS="$ALL_TESTS 0009:1:1:unregister_error:0" ALL_TESTS="$ALL_TESTS 0010:1:1:mnt/mnt_error:0" ALL_TESTS="$ALL_TESTS 0011:1:1:empty_add:0" +ALL_TESTS="$ALL_TESTS 0012:1:1:u8_valid:0" function allow_user_defaults() { @@ -851,6 +852,34 @@ sysctl_test_0011() return 0 } +sysctl_test_0012() +{ + TARGET="${SYSCTL}/$(get_test_target 0012)" + echo -n "Testing u8 range check in sysctl table check in ${TARGET} ... " + if [ ! -f ${TARGET} ]; then + echo -e "FAIL\nCould not create ${TARGET}" >&2 + rc=1 + test_rc + fi + + local u8over_msg=$(dmesg | grep "u8_over range value" | wc -l) + if [ ! ${u8over_msg} -eq 1 ]; then + echo -e "FAIL\nu8 overflow not detected" >&2 + rc=1 + test_rc + fi + + local u8under_msg=$(dmesg | grep "u8_under range value" | wc -l) + if [ ! ${u8under_msg} -eq 1 ]; then + echo -e "FAIL\nu8 underflow not detected" >&2 + rc=1 + test_rc + fi + + echo "OK" + return 0 +} + list_tests() { echo "Test ID list:" @@ -870,6 +899,7 @@ list_tests() echo "0009 x $(get_test_count 0009) - tests sysct unregister" echo "0010 x $(get_test_count 0010) - tests sysct mount point" echo "0011 x $(get_test_count 0011) - tests empty directories" + echo "0012 x $(get_test_count 0012) - tests range check for u8 proc_handler" } usage() diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index a951c0d33cd2..9aa44d8176d9 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -573,5 +573,67 @@ "teardown": [ "$TC qdisc del dev $DEV1 handle 1: root" ] + }, + { + "id": "831d", + "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain", + "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"], + "plugins": { "requires": ["nsPlugin", "scapyPlugin"] }, + "setup": [ + "$IP link set dev $DEV1 up || true", + "$TC qdisc add dev $DEV1 root handle 1: drr", + "$TC filter add dev $DEV1 parent 1: basic classid 1:1", + "$TC class add dev $DEV1 parent 1: classid 1:1 drr", + "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem", + "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1", + "matchPattern": "qdisc hfsc", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"] + }, + { + "id": "309e", + "name": "Test HFSC eltree double add with reentrant enqueue behaviour on netem", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 1s", + "$TC qdisc add dev $DUMMY parent 1:0 handle 2:0 hfsc", + "ping -I $DUMMY -f -c10 -s48 -W0.001 10.10.11.1 || true", + "$TC class add dev $DUMMY parent 2:0 classid 2:1 hfsc rt m2 20Kbit", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%", + "$TC class add dev $DUMMY parent 2:0 classid 2:2 hfsc rt m2 20Kbit", + "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip dst 10.10.11.2/32 flowid 2:1", + "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 2 u32 match ip dst 10.10.11.3/32 flowid 2:2", + "ping -c 1 10.10.11.2 -I$DUMMY > /dev/null || true", + "$TC filter del dev $DUMMY parent 2:0 protocol ip prio 1", + "$TC class del dev $DUMMY classid 2:1", + "ping -c 1 10.10.11.3 -I$DUMMY > /dev/null || true" + ], + "cmdUnderTest": "$TC class change dev $DUMMY parent 2:0 classid 2:2 hfsc sc m2 20Kbit", + "expExitCode": "0", + "verifyCmd": "$TC -j class ls dev $DUMMY classid 2:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json index e9469ee71e6f..6d515d0e5ed6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json @@ -189,5 +189,29 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "deb1", + "name": "CODEL test qdisc limit trimming", + "category": ["qdisc", "codel"], + "plugins": { + "requires": ["nsPlugin", "scapyPlugin"] + }, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1p target 5ms interval 100ms", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json index 3a537b2ec4c9..24faf4e12dfa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json @@ -377,5 +377,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "9479", + "name": "FQ test qdisc limit trimming", + "category": ["qdisc", "fq"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json index 9774b1e8801b..4ce62b857fd7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json @@ -294,5 +294,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "0436", + "name": "FQ_CODEL test qdisc limit trimming", + "category": ["qdisc", "fq_codel"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json index d012d88d67fe..229fe1bf4a90 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -18,5 +18,27 @@ "matchCount": "1", "teardown": [ ] + }, + { + "id": "83bf", + "name": "FQ_PIE test qdisc limit trimming", + "category": ["qdisc", "fq_pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json index dbef5474b26b..0ca19fac54a5 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json @@ -188,5 +188,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "385f", + "name": "HHF test qdisc limit trimming", + "category": ["qdisc", "hhf"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root hhf limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root hhf limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json new file mode 100644 index 000000000000..1a98b66e8030 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json @@ -0,0 +1,24 @@ +[ + { + "id": "6158", + "name": "PIE test qdisc limit trimming", + "category": ["qdisc", "pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] + } +] diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index cddff1772e10..589b18ed758a 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -31,6 +31,10 @@ try_modprobe act_skbedit try_modprobe act_skbmod try_modprobe act_tunnel_key try_modprobe act_vlan +try_modprobe act_ife +try_modprobe act_meta_mark +try_modprobe act_meta_skbtcindex +try_modprobe act_meta_skbprio try_modprobe cls_basic try_modprobe cls_bpf try_modprobe cls_cgroup diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c index 72d41b955fb2..5cc0010e85ff 100644 --- a/tools/testing/selftests/timens/clock_nanosleep.c +++ b/tools/testing/selftests/timens/clock_nanosleep.c @@ -38,7 +38,7 @@ void *call_nanosleep(void *_args) return NULL; } -int run_test(int clockid, int abs) +static int run_test(int clockid, int abs) { struct timespec now = {}, rem; struct thread_args args = { .now = &now, .rem = &rem, .clockid = clockid}; @@ -115,6 +115,8 @@ int main(int argc, char *argv[]) { int ret, nsfd; + ksft_print_header(); + nscheck(); ksft_set_plan(4); diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index d12ff955de0d..a644162d56fd 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -36,6 +36,8 @@ int main(int argc, char *argv[]) return 0; } + ksft_print_header(); + nscheck(); ksft_set_plan(1); diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c index 6b2b9264e851..339633ae037a 100644 --- a/tools/testing/selftests/timens/futex.c +++ b/tools/testing/selftests/timens/futex.c @@ -66,6 +66,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec mtime_now; + ksft_print_header(); + nscheck(); ksft_set_plan(2); diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c index 6b13dc277724..d6658b7b7548 100644 --- a/tools/testing/selftests/timens/gettime_perf.c +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -67,6 +67,8 @@ int main(int argc, char *argv[]) time_t offset = 10; int nsfd; + ksft_print_header(); + ksft_set_plan(8); fill_function_pointers(); diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c index 1833ca97eb24..0a9ff90ee69a 100644 --- a/tools/testing/selftests/timens/procfs.c +++ b/tools/testing/selftests/timens/procfs.c @@ -180,6 +180,8 @@ int main(int argc, char *argv[]) { int ret = 0; + ksft_print_header(); + nscheck(); ksft_set_plan(2); diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c index 387220791a05..a9c0534ef8f6 100644 --- a/tools/testing/selftests/timens/timens.c +++ b/tools/testing/selftests/timens/timens.c @@ -151,6 +151,8 @@ int main(int argc, char *argv[]) time_t offset; int ret = 0; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 5b939f59dfa4..79543ceb2c0f 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -15,7 +15,7 @@ #include "log.h" #include "timens.h" -int run_test(int clockid, struct timespec now) +static int run_test(int clockid, struct timespec now) { struct itimerspec new_value; long long elapsed; @@ -75,6 +75,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec btime_now, mtime_now; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c index a4196bbd6e33..402e2e415545 100644 --- a/tools/testing/selftests/timens/timerfd.c +++ b/tools/testing/selftests/timens/timerfd.c @@ -15,14 +15,14 @@ #include "log.h" #include "timens.h" -static int tclock_gettime(clock_t clockid, struct timespec *now) +static int tclock_gettime(clockid_t clockid, struct timespec *now) { if (clockid == CLOCK_BOOTTIME_ALARM) clockid = CLOCK_BOOTTIME; return clock_gettime(clockid, now); } -int run_test(int clockid, struct timespec now) +static int run_test(int clockid, struct timespec now) { struct itimerspec new_value; long long elapsed; @@ -82,6 +82,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec btime_now, mtime_now; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c index 5b8907bf451d..b957e1a65124 100644 --- a/tools/testing/selftests/timens/vfork_exec.c +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -91,6 +91,8 @@ int main(int argc, char *argv[]) return check("child after exec", &now); } + ksft_print_header(); + nscheck(); ksft_set_plan(4); diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index f34ac0bac696..4dde8838261d 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)/usr/include +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh @@ -11,6 +15,11 @@ TEST_PROGS += test_generic_05.sh TEST_PROGS += test_generic_06.sh TEST_PROGS += test_generic_07.sh +TEST_PROGS += test_generic_08.sh +TEST_PROGS += test_generic_09.sh +TEST_PROGS += test_generic_10.sh +TEST_PROGS += test_generic_11.sh + TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh TEST_PROGS += test_loop_01.sh diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c index 94a8e729ba4c..5421774d7867 100644 --- a/tools/testing/selftests/ublk/fault_inject.c +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -16,6 +16,11 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; unsigned long dev_size = 250UL << 30; + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + dev->tgt.dev_size = dev_size; dev->tgt.params = (struct ublk_params) { .types = UBLK_PARAM_TYPE_BASIC, diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index 6f34eabfae97..509842df9bee 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -29,19 +29,23 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); - int zc = ublk_queue_use_zc(q); - enum io_uring_op op = ublk_to_uring_op(iod, zc); + unsigned zc = ublk_queue_use_zc(q); + unsigned auto_zc = ublk_queue_use_auto_zc(q); + enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc); struct io_uring_sqe *sqe[3]; + void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr; - if (!zc) { + if (!zc || auto_zc) { ublk_queue_alloc_sqes(q, sqe, 1); if (!sqe[0]) return -ENOMEM; io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/, - (void *)iod->addr, + addr, iod->nr_sectors << 9, iod->start_sector << 9); + if (auto_zc) + sqe[0]->buf_index = tag; io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1); @@ -145,6 +149,11 @@ static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) }, }; + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + ret = backing_file_tgt_init(dev); if (ret) return ret; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 842b40736a9b..b5131a000795 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -216,6 +216,30 @@ static int ublk_ctrl_get_features(struct ublk_dev *dev, return __ublk_ctrl_cmd(dev, &data); } +static int ublk_ctrl_update_size(struct ublk_dev *dev, + __u64 nr_sects) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_UPDATE_SIZE, + .flags = CTRL_CMD_HAS_DATA, + }; + + data.data[0] = nr_sects; + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev, + unsigned int timeout_ms) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_QUIESCE_DEV, + .flags = CTRL_CMD_HAS_DATA, + }; + + data.data[0] = timeout_ms; + return __ublk_ctrl_cmd(dev, &data); +} + static const char *ublk_dev_state_desc(struct ublk_dev *dev) { switch (dev->dev_info.state) { @@ -405,7 +429,7 @@ static void ublk_queue_deinit(struct ublk_queue *q) free(q->ios[i].buf_addr); } -static int ublk_queue_init(struct ublk_queue *q) +static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) { struct ublk_dev *dev = q->dev; int depth = dev->dev_info.queue_depth; @@ -420,10 +444,14 @@ static int ublk_queue_init(struct ublk_queue *q) q->cmd_inflight = 0; q->tid = gettid(); - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { + if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { q->state |= UBLKSRV_NO_BUF; - q->state |= UBLKSRV_ZC; + if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) + q->state |= UBLKSRV_ZC; + if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG) + q->state |= UBLKSRV_AUTO_BUF_REG; } + q->state |= extra_flags; cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); @@ -461,7 +489,7 @@ static int ublk_queue_init(struct ublk_queue *q) goto fail; } - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { + if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth); if (ret) { ublk_err("ublk dev %d queue %d register spare buffers failed %d", @@ -525,6 +553,23 @@ static void ublk_dev_unprep(struct ublk_dev *dev) close(dev->fds[0]); } +static void ublk_set_auto_buf_reg(const struct ublk_queue *q, + struct io_uring_sqe *sqe, + unsigned short tag) +{ + struct ublk_auto_buf_reg buf = {}; + + if (q->tgt_ops->buf_index) + buf.index = q->tgt_ops->buf_index(q, tag); + else + buf.index = tag; + + if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; + + sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); +} + int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) { struct ublksrv_io_cmd *cmd; @@ -579,6 +624,9 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) else cmd->addr = 0; + if (q->state & UBLKSRV_AUTO_BUF_REG) + ublk_set_auto_buf_reg(q, sqe[0], tag); + user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0); io_uring_sqe_set_data64(sqe[0], user_data); @@ -729,6 +777,7 @@ struct ublk_queue_info { struct ublk_queue *q; sem_t *queue_sem; cpu_set_t *affinity; + unsigned char auto_zc_fallback; }; static void *ublk_io_handler_fn(void *data) @@ -736,9 +785,13 @@ static void *ublk_io_handler_fn(void *data) struct ublk_queue_info *info = data; struct ublk_queue *q = info->q; int dev_id = q->dev->dev_info.dev_id; + unsigned extra_flags = 0; int ret; - ret = ublk_queue_init(q); + if (info->auto_zc_fallback) + extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK; + + ret = ublk_queue_init(q, extra_flags); if (ret) { ublk_err("ublk dev %d queue %d init queue failed\n", dev_id, q->q_id); @@ -831,6 +884,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) qinfo[i].q = &dev->q[i]; qinfo[i].queue_sem = &queue_sem; qinfo[i].affinity = &affinity_buf[i]; + qinfo[i].auto_zc_fallback = ctx->auto_zc_fallback; pthread_create(&dev->q[i].thread, NULL, ublk_io_handler_fn, &qinfo[i]); @@ -1011,6 +1065,9 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) info->nr_hw_queues = nr_queues; info->queue_depth = depth; info->flags = ctx->flags; + if ((features & UBLK_F_QUIESCE) && + (info->flags & UBLK_F_USER_RECOVERY)) + info->flags |= UBLK_F_QUIESCE; dev->tgt.ops = ops; dev->tgt.sq_depth = depth; dev->tgt.cq_depth = depth; @@ -1206,6 +1263,9 @@ static int cmd_dev_get_features(void) [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", [const_ilog2(UBLK_F_ZONED)] = "ZONED", [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", + [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", + [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", + [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", }; struct ublk_dev *dev; __u64 features = 0; @@ -1239,13 +1299,66 @@ static int cmd_dev_get_features(void) return ret; } +static int cmd_dev_update_size(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + struct ublk_params p; + int ret = -EINVAL; + + if (!dev) + return -ENODEV; + + if (ctx->dev_id < 0) { + fprintf(stderr, "device id isn't provided\n"); + goto out; + } + + dev->dev_info.dev_id = ctx->dev_id; + ret = ublk_ctrl_get_params(dev, &p); + if (ret < 0) { + ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); + goto out; + } + + if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) { + ublk_err("size isn't aligned with logical block size\n"); + ret = -EINVAL; + goto out; + } + + ret = ublk_ctrl_update_size(dev, ctx->size >> 9); +out: + ublk_ctrl_deinit(dev); + return ret; +} + +static int cmd_dev_quiesce(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + int ret = -EINVAL; + + if (!dev) + return -ENODEV; + + if (ctx->dev_id < 0) { + fprintf(stderr, "device id isn't provided for quiesce\n"); + goto out; + } + dev->dev_info.dev_id = ctx->dev_id; + ret = ublk_ctrl_quiesce_dev(dev, 10000); + +out: + ublk_ctrl_deinit(dev); + return ret; +} + static void __cmd_create_help(char *exe, bool recovery) { int i; printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", exe, recovery ? "recover" : "add"); - printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n"); + printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); printf("\t[-e 0|1 ] [-i 0|1]\n"); printf("\t[target options] [backfile1] [backfile2] ...\n"); printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); @@ -1281,6 +1394,8 @@ static int cmd_dev_help(char *exe) printf("%s list [-n dev_id] -a \n", exe); printf("\t -a list all devices, -n list specified device, default -a \n\n"); printf("%s features\n", exe); + printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe); + printf("%s quiesce -n dev_id\n", exe); return 0; } @@ -1300,6 +1415,9 @@ int main(int argc, char *argv[]) { "recovery_fail_io", 1, NULL, 'e'}, { "recovery_reissue", 1, NULL, 'i'}, { "get_data", 1, NULL, 'g'}, + { "auto_zc", 0, NULL, 0 }, + { "auto_zc_fallback", 0, NULL, 0 }, + { "size", 1, NULL, 's'}, { 0, 0, 0, 0 } }; const struct ublk_tgt_ops *ops = NULL; @@ -1321,7 +1439,7 @@ int main(int argc, char *argv[]) opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1361,6 +1479,9 @@ int main(int argc, char *argv[]) case 'g': ctx.flags |= UBLK_F_NEED_GET_DATA; break; + case 's': + ctx.size = strtoull(optarg, NULL, 10); + break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); @@ -1368,6 +1489,10 @@ int main(int argc, char *argv[]) ublk_dbg_mask = 0; if (!strcmp(longopts[option_idx].name, "foreground")) ctx.fg = 1; + if (!strcmp(longopts[option_idx].name, "auto_zc")) + ctx.flags |= UBLK_F_AUTO_BUF_REG; + if (!strcmp(longopts[option_idx].name, "auto_zc_fallback")) + ctx.auto_zc_fallback = 1; break; case '?': /* @@ -1391,6 +1516,16 @@ int main(int argc, char *argv[]) } } + /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ + if (ctx.auto_zc_fallback && + !((ctx.flags & UBLK_F_AUTO_BUF_REG) && + (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) { + ublk_err("%s: auto_zc_fallback is set but neither " + "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n", + __func__); + return -EINVAL; + } + i = optind; while (i < argc && ctx.nr_files < MAX_BACK_FILES) { ctx.files[ctx.nr_files++] = argv[i++]; @@ -1423,6 +1558,10 @@ int main(int argc, char *argv[]) ret = cmd_dev_help(argv[0]); else if (!strcmp(cmd, "features")) ret = cmd_dev_get_features(); + else if (!strcmp(cmd, "update_size")) + ret = cmd_dev_update_size(&ctx); + else if (!strcmp(cmd, "quiesce")) + ret = cmd_dev_quiesce(&ctx); else cmd_dev_help(argv[0]); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 44ee1e4ac55b..e34508bf5798 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -19,7 +19,6 @@ #include <sys/inotify.h> #include <sys/wait.h> #include <sys/eventfd.h> -#include <sys/uio.h> #include <sys/ipc.h> #include <sys/shm.h> #include <linux/io_uring.h> @@ -85,6 +84,7 @@ struct dev_ctx { unsigned int all:1; unsigned int fg:1; unsigned int recovery:1; + unsigned int auto_zc_fallback:1; int _evtfd; int _shmid; @@ -92,6 +92,9 @@ struct dev_ctx { /* built from shmem, only for ublk_dump_dev() */ struct ublk_dev *shadow_dev; + /* for 'update_size' command */ + unsigned long long size; + union { struct stripe_ctx stripe; struct fault_inject_ctx fault_inject; @@ -116,6 +119,7 @@ struct ublk_io { #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) #define UBLKSRV_NEED_GET_DATA (1UL << 3) +#define UBLKSRV_NEED_REG_BUF (1UL << 4) unsigned short flags; unsigned short refs; /* used by target code only */ @@ -141,6 +145,9 @@ struct ublk_tgt_ops { */ void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); void (*usage)(const struct ublk_tgt_ops *ops); + + /* return buffer index for UBLK_F_AUTO_BUF_REG */ + unsigned short (*buf_index)(const struct ublk_queue *, int tag); }; struct ublk_tgt { @@ -169,6 +176,8 @@ struct ublk_queue { #define UBLKSRV_QUEUE_IDLE (1U << 1) #define UBLKSRV_NO_BUF (1U << 2) #define UBLKSRV_ZC (1U << 3) +#define UBLKSRV_AUTO_BUF_REG (1U << 4) +#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5) unsigned state; pid_t tid; pthread_t thread; @@ -204,6 +213,12 @@ struct ublk_dev { extern unsigned int ublk_dbg_mask; extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag); + +static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) +{ + return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); +} + static inline int is_target_io(__u64 user_data) { return (user_data & (1ULL << 63)) != 0; @@ -388,6 +403,11 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q) return q->state & UBLKSRV_ZC; } +static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) +{ + return q->state & UBLKSRV_AUTO_BUF_REG; +} + extern const struct ublk_tgt_ops null_tgt_ops; extern const struct ublk_tgt_ops loop_tgt_ops; extern const struct ublk_tgt_ops stripe_tgt_ops; diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index 91fec3690d4b..44aca31cf2b0 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -42,10 +42,22 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) return 0; } +static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod, + struct io_uring_sqe *sqe) +{ + unsigned ublk_op = ublksrv_get_op(iod); + + io_uring_prep_nop(sqe); + sqe->buf_index = tag; + sqe->flags |= IOSQE_FIXED_FILE; + sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; + sqe->len = iod->nr_sectors << 9; /* injected result */ + sqe->user_data = build_user_data(tag, ublk_op, 0, 1); +} + static int null_queue_zc_io(struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); - unsigned ublk_op = ublksrv_get_op(iod); struct io_uring_sqe *sqe[3]; ublk_queue_alloc_sqes(q, sqe, 3); @@ -55,12 +67,8 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; - io_uring_prep_nop(sqe[1]); - sqe[1]->buf_index = tag; - sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; - sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; - sqe[1]->len = iod->nr_sectors << 9; /* injected result */ - sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1); + __setup_nop_io(tag, iod, sqe[1]); + sqe[1]->flags |= IOSQE_IO_HARDLINK; io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1); @@ -69,6 +77,16 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) return 2; } +static int null_queue_auto_zc_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe[1]; + + ublk_queue_alloc_sqes(q, sqe, 1); + __setup_nop_io(tag, iod, sqe[0]); + return 1; +} + static void ublk_null_io_done(struct ublk_queue *q, int tag, const struct io_uring_cqe *cqe) { @@ -94,22 +112,37 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag, static int ublk_null_queue_io(struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); - int zc = ublk_queue_use_zc(q); + unsigned auto_zc = ublk_queue_use_auto_zc(q); + unsigned zc = ublk_queue_use_zc(q); int queued; - if (!zc) { + if (auto_zc && !ublk_io_auto_zc_fallback(iod)) + queued = null_queue_auto_zc_io(q, tag); + else if (zc) + queued = null_queue_zc_io(q, tag); + else { ublk_complete_io(q, tag, iod->nr_sectors << 9); return 0; } - - queued = null_queue_zc_io(q, tag); ublk_queued_tgt_io(q, tag, queued); return 0; } +/* + * return invalid buffer index for triggering auto buffer register failure, + * then UBLK_IO_RES_NEED_REG_BUF handling is covered + */ +static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag) +{ + if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + return (unsigned short)-1; + return tag; +} + const struct ublk_tgt_ops null_tgt_ops = { .name = "null", .init_tgt = ublk_null_tgt_init, .queue_io = ublk_null_queue_io, .tgt_io_done = ublk_null_io_done, + .buf_index = ublk_null_buf_index, }; diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 5dbd6392d83d..404a143bf3d6 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -70,7 +70,7 @@ static void free_stripe_array(struct stripe_array *s) } static void calculate_stripe_array(const struct stripe_conf *conf, - const struct ublksrv_io_desc *iod, struct stripe_array *s) + const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base) { const unsigned shift = conf->shift - 9; const unsigned chunk_sects = 1 << shift; @@ -102,7 +102,7 @@ static void calculate_stripe_array(const struct stripe_conf *conf, } assert(this->nr_vec < this->cap); - this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done); + this->vec[this->nr_vec].iov_base = (void *)(base + done); this->vec[this->nr_vec++].iov_len = nr_sects << 9; start += nr_sects; @@ -126,15 +126,17 @@ static inline enum io_uring_op stripe_to_uring_op( static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); - int zc = !!(ublk_queue_use_zc(q) != 0); - enum io_uring_op op = stripe_to_uring_op(iod, zc); + unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0); + unsigned zc = (ublk_queue_use_zc(q) != 0); + enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc); struct io_uring_sqe *sqe[NR_STRIPE]; struct stripe_array *s = alloc_stripe_array(conf, iod); struct ublk_io *io = ublk_get_io(q, tag); int i, extra = zc ? 2 : 0; + void *base = (zc | auto_zc) ? NULL : (void *)iod->addr; io->private_data = s; - calculate_stripe_array(conf, iod, s); + calculate_stripe_array(conf, iod, s, base); ublk_queue_alloc_sqes(q, sqe, s->nr + extra); @@ -153,12 +155,11 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ (void *)t->vec, t->nr_vec, t->start << 9); - if (zc) { + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (auto_zc || zc) { sqe[i]->buf_index = tag; - io_uring_sqe_set_flags(sqe[i], - IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK); - } else { - io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (zc) + sqe[i]->flags |= IOSQE_IO_HARDLINK; } /* bit63 marks us as tgt io */ sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1); @@ -287,6 +288,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) loff_t bytes = 0; int ret, i, mul = 1; + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + if ((chunk_size & (chunk_size - 1)) || !chunk_size) { ublk_err("invalid chunk size %u\n", chunk_size); return -EINVAL; diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index a81210ca3e99..0145569ee7e9 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -23,6 +23,11 @@ _get_disk_dev_t() { echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } +_get_disk_size() +{ + lsblk -b -o SIZE -n "$1" +} + _run_fio_verify_io() { fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \ --bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \ @@ -215,6 +220,26 @@ _recover_ublk_dev() { echo "$state" } +# quiesce device and return ublk device state +__ublk_quiesce_dev() +{ + local dev_id=$1 + local exp_state=$2 + local state + + if ! ${UBLK_PROG} quiesce -n "${dev_id}"; then + state=$(_get_ublk_dev_state "${dev_id}") + return "$state" + fi + + for ((j=0;j<50;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "$exp_state" ] && break + sleep 1 + done + echo "$state" +} + # kill the ublk daemon and return ublk device state __ublk_kill_daemon() { @@ -251,7 +276,7 @@ __run_io_and_remove() local kill_server=$3 fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ - --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ + --rw=randrw --norandommap --iodepth=256 --size="${size}" --numjobs="$(nproc)" \ --runtime=20 --time_based > /dev/null 2>&1 & sleep 2 if [ "${kill_server}" = "yes" ]; then @@ -303,20 +328,26 @@ run_io_and_kill_daemon() run_io_and_recover() { + local action=$1 local state local dev_id + shift 1 dev_id=$(_add_ublk_dev "$@") _check_add_dev "$TID" $? fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ - --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \ + --rw=randread --iodepth=256 --size="${size}" --numjobs=4 \ --runtime=20 --time_based > /dev/null 2>&1 & sleep 4 - state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + if [ "$action" == "kill_daemon" ]; then + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + elif [ "$action" == "quiesce_dev" ]; then + state=$(__ublk_quiesce_dev "${dev_id}" "QUIESCED") + fi if [ "$state" != "QUIESCED" ]; then - echo "device isn't quiesced($state) after killing daemon" + echo "device isn't quiesced($state) after $action" return 255 fi diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh index 8a3bc080c577..8b533217d4a1 100755 --- a/tools/testing/selftests/ublk/test_generic_04.sh +++ b/tools/testing/selftests/ublk/test_generic_04.sh @@ -8,7 +8,7 @@ ERR_CODE=0 ublk_run_recover_test() { - run_io_and_recover "$@" + run_io_and_recover "kill_daemon" "$@" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then echo "$TID failure: $*" diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh index 3bb00a347402..398e9e2b58e1 100755 --- a/tools/testing/selftests/ublk/test_generic_05.sh +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -8,7 +8,7 @@ ERR_CODE=0 ublk_run_recover_test() { - run_io_and_recover "$@" + run_io_and_recover "kill_daemon" "$@" ERR_CODE=$? if [ ${ERR_CODE} -ne 0 ]; then echo "$TID failure: $*" diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh index b67230c42c84..fd42062b7b76 100755 --- a/tools/testing/selftests/ublk/test_generic_06.sh +++ b/tools/testing/selftests/ublk/test_generic_06.sh @@ -17,7 +17,7 @@ STARTTIME=${SECONDS} dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 & dd_pid=$! -__ublk_kill_daemon ${dev_id} "DEAD" +__ublk_kill_daemon ${dev_id} "DEAD" >/dev/null wait $dd_pid dd_exitcode=$? diff --git a/tools/testing/selftests/ublk/test_generic_08.sh b/tools/testing/selftests/ublk/test_generic_08.sh new file mode 100755 index 000000000000..b222f3a77e12 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_08.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_08" +ERR_CODE=0 + +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_AUTO_BUF_REG" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t loop -q 2 --auto_zc "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then + _cleanup_test "generic" + _show_result $TID 255 +fi + +dev_id=$(_add_ublk_dev -t stripe --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_09.sh b/tools/testing/selftests/ublk/test_generic_09.sh new file mode 100755 index 000000000000..bb6f77ca5522 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_09.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_09" +ERR_CODE=0 + +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "basic IO test" + +dev_id=$(_add_ublk_dev -t null -z --auto_zc --auto_zc_fallback) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "null" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_10.sh b/tools/testing/selftests/ublk/test_generic_10.sh new file mode 100755 index 000000000000..abc11c3d416b --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_10.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_10" +ERR_CODE=0 + +if ! _have_feature "UPDATE_SIZE"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "check update size" + +dev_id=$(_add_ublk_dev -t null) +_check_add_dev $TID $? + +size=$(_get_disk_size /dev/ublkb"${dev_id}") +size=$(( size / 2 )) +if ! "$UBLK_PROG" update_size -n "$dev_id" -s "$size"; then + ERR_CODE=255 +fi + +new_size=$(_get_disk_size /dev/ublkb"${dev_id}") +if [ "$new_size" != "$size" ]; then + ERR_CODE=255 +fi + +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_11.sh b/tools/testing/selftests/ublk/test_generic_11.sh new file mode 100755 index 000000000000..a00357a5ec6b --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_11.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_11" +ERR_CODE=0 + +ublk_run_quiesce_recover() +{ + run_io_and_recover "quiesce_dev" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_feature "QUIESCE"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "quiesce" "basic quiesce & recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_quiesce_recover -t null -q 2 -r 1 & +ublk_run_quiesce_recover -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_quiesce_recover -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_quiesce_recover -t null -q 2 -r 1 -i 1 & +ublk_run_quiesce_recover -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_quiesce_recover -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "quiesce" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 1a9065125ae1..4bdd921081e5 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -25,10 +25,12 @@ _create_backfile 0 256M _create_backfile 1 128M _create_backfile 2 128M -ublk_io_and_kill_daemon 8G -t null -q 4 & -ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & -ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & -wait +for nr_queue in 1 4; do + ublk_io_and_kill_daemon 8G -t null -q "$nr_queue" & + ublk_io_and_kill_daemon 256M -t loop -q "$nr_queue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q "$nr_queue" "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + wait +done _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh index e0854f71d35b..7d728ce50774 100755 --- a/tools/testing/selftests/ublk/test_stress_03.sh +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -32,6 +32,13 @@ _create_backfile 2 128M ublk_io_and_remove 8G -t null -q 4 -z & ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + +if _have_feature "AUTO_BUF_REG"; then + ublk_io_and_remove 8G -t null -q 4 --auto_zc & + ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & +fi wait _cleanup_test "stress" diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh index 1798a98387e8..9bcfa64ea1f0 100755 --- a/tools/testing/selftests/ublk/test_stress_04.sh +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -31,6 +31,13 @@ _create_backfile 2 128M ublk_io_and_kill_daemon 8G -t null -q 4 -z & ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + +if _have_feature "AUTO_BUF_REG"; then + ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & + ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & +fi wait _cleanup_test "stress" diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index 88601b48f1cd..bcfc904cefc6 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -60,5 +60,14 @@ if _have_feature "ZERO_COPY"; then done fi +if _have_feature "AUTO_BUF_REG"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" & + wait + done +fi + _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 55500f901fbc..a8f550aecb35 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -611,6 +611,35 @@ n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" } < <(n0 wg show wg0 allowed-ips) ip0 link del wg0 +allowedips=( ) +for i in {1..197}; do + allowedips+=( 192.168.0.$i ) + allowedips+=( abcd::$i ) +done +saved_ifs="$IFS" +IFS=, +allowedips="${allowedips[*]}" +IFS="$saved_ifs" +ip0 link add wg0 type wireguard +n0 wg set wg0 peer "$pub1" allowed-ips "$allowedips" +n0 wg set wg0 peer "$pub1" allowed-ips -192.168.0.1/32,-192.168.0.20/32,-192.168.0.100/32,-abcd::1/128,-abcd::20/128,-abcd::100/128 +{ + read -r pub allowedips + [[ $pub == "$pub1" ]] + i=0 + for ip in $allowedips; do + [[ $ip != "192.168.0.1" ]] + [[ $ip != "192.168.0.20" ]] + [[ $ip != "192.168.0.100" ]] + [[ $ip != "abcd::1" ]] + [[ $ip != "abcd::20" ]] + [[ $ip != "abcd::100" ]] + ((++i)) + done + ((i == 388)) +} < <(n0 wg show wg0 allowed-ips) +ip0 link del wg0 + ! n0 wg show doesnotexist || false ip0 link add wg0 type wireguard diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 35856b11c143..791d21b736a5 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -43,7 +43,7 @@ $(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.o $(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0)) $(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117)) $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) -$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20250521,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,b6f2628b85b1b23cc06517ec9c74f82d52c4cdbd020f3dd2f00c972a1782950e)) export CFLAGS := -O3 -pipe ifeq ($(HOST_ARCH),$(ARCH)) @@ -401,6 +401,7 @@ $(BASH_PATH)/.installed: $(BASH_TAR) flock -s $<.lock tar -C $(BUILD_PATH) -xf $< touch $@ +$(BASH_PATH)/bash: export CFLAGS_FOR_BUILD += -std=gnu17 $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble $(MAKE) -C $(BASH_PATH) diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index c305d2f613f0..5d39f43dd667 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -22,7 +22,6 @@ CONFIG_HAVE_ARCH_KASAN=y CONFIG_KASAN=y CONFIG_KASAN_INLINE=y CONFIG_UBSAN=y -CONFIG_UBSAN_SANITIZE_ALL=y CONFIG_DEBUG_KMEMLEAK=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_SHIRQ=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 28422c32cc8f..f703fcfe9f7c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ - corrupt_xstate_header amx lam test_shadow_stack avx + corrupt_xstate_header amx lam test_shadow_stack avx apx # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall @@ -136,3 +136,4 @@ $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack $(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f $(OUTPUT)/amx_64: EXTRA_FILES += xstate.c $(OUTPUT)/avx_64: EXTRA_FILES += xstate.c +$(OUTPUT)/apx_64: EXTRA_FILES += xstate.c diff --git a/tools/testing/selftests/x86/apx.c b/tools/testing/selftests/x86/apx.c new file mode 100644 index 000000000000..d9c8d41b8c5a --- /dev/null +++ b/tools/testing/selftests/x86/apx.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include "xstate.h" + +int main(void) +{ + test_xstate(XFEATURE_APX); +} diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile new file mode 100644 index 000000000000..8ff2d7226c7f --- /dev/null +++ b/tools/testing/selftests/x86/bugs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py +TEST_FILES := common.py +include ../../lib.mk diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py new file mode 100755 index 000000000000..2f9664a80617 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/common.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# This contains kselftest framework adapted common functions for testing +# mitigation for x86 bugs. + +import os, sys, re, shutil + +sys.path.insert(0, '../../kselftest') +import ksft + +def read_file(path): + if not os.path.exists(path): + return None + with open(path, 'r') as file: + return file.read().strip() + +def cpuinfo_has(arg): + cpuinfo = read_file('/proc/cpuinfo') + if arg in cpuinfo: + return True + return False + +def cmdline_has(arg): + cmdline = read_file('/proc/cmdline') + if arg in cmdline: + return True + return False + +def cmdline_has_either(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg in cmdline: + return True + return False + +def cmdline_has_none(args): + return not cmdline_has_either(args) + +def cmdline_has_all(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg not in cmdline: + return False + return True + +def get_sysfs(bug): + return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug) + +def sysfs_has(bug, mitigation): + status = get_sysfs(bug) + if mitigation in status: + return True + return False + +def sysfs_has_either(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if sysfs_has(bug, mitigation): + return True + return False + +def sysfs_has_none(bugs, mitigations): + return not sysfs_has_either(bugs, mitigations) + +def sysfs_has_all(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if not sysfs_has(bug, mitigation): + return False + return True + +def bug_check_pass(bug, found): + ksft.print_msg(f"\nFound: {found}") + # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_pass(f'{bug}: {found}') + +def bug_check_fail(bug, found, expected): + ksft.print_msg(f'\nFound:\t {found}') + ksft.print_msg(f'Expected:\t {expected}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def bug_status_unknown(bug, found): + ksft.print_msg(f'\nUnknown status: {found}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def basic_checks_sufficient(bug, mitigation): + if not mitigation: + bug_status_unknown(bug, "None") + return True + elif mitigation == "Not affected": + ksft.test_result_pass(bug) + return True + elif mitigation == "Vulnerable": + if cmdline_has_either([f'{bug}=off', 'mitigations=off']): + bug_check_pass(bug, mitigation) + return True + return False + +def get_section_info(vmlinux, section_name): + from elftools.elf.elffile import ELFFile + with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + section = elffile.get_section_by_name(section_name) + if section is None: + ksft.print_msg("Available sections in vmlinux:") + for sec in elffile.iter_sections(): + ksft.print_msg(sec.name) + raise ValueError(f"Section {section_name} not found in {vmlinux}") + return section['sh_addr'], section['sh_offset'], section['sh_size'] + +def get_patch_sites(vmlinux, offset, size): + import struct + output = [] + with open(vmlinux, 'rb') as f: + f.seek(offset) + i = 0 + while i < size: + data = f.read(4) # s32 + if not data: + break + sym_offset = struct.unpack('<i', data)[0] + i + i += 4 + output.append(sym_offset) + return output + +def get_instruction_from_vmlinux(elffile, section, virtual_address, target_address): + from capstone import Cs, CS_ARCH_X86, CS_MODE_64 + section_start = section['sh_addr'] + section_end = section_start + section['sh_size'] + + if not (section_start <= target_address < section_end): + return None + + offset = target_address - section_start + code = section.data()[offset:offset + 16] + + cap = init_capstone() + for instruction in cap.disasm(code, target_address): + if instruction.address == target_address: + return instruction + return None + +def init_capstone(): + from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_OPT_SYNTAX_ATT + cap = Cs(CS_ARCH_X86, CS_MODE_64) + cap.syntax = CS_OPT_SYNTAX_ATT + return cap + +def get_runtime_kernel(): + import drgn + return drgn.program_from_kernel() + +def check_dependencies_or_skip(modules, script_name="unknown test"): + for mod in modules: + try: + __import__(mod) + except ImportError: + ksft.test_result_skip(f"Skipping {script_name}: missing module '{mod}'") + ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_indirect_alignment.py b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py new file mode 100755 index 000000000000..cdc33ae6a91c --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Test if indirect CALL/JMP are correctly patched by evaluating +# the vmlinux .retpoline_sites in /proc/kcore. + +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Best to copy the vmlinux at a standard location: +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_indirect_alignment.py [vmlinux] + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" + +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_indirect_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +if c.sysfs_has("spectre_v2", "Retpolines"): + ksft.test_result_skip("Skipping its_indirect_alignment.py: Retpolines deployed") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_indirect_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites') +ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size) +total_retpoline_tests = len(sites_offset) +ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites") + +prog = c.get_runtime_kernel() +retpolines_start_kcore = prog.symbol('__retpoline_sites').address +ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}') + +x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address +ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(0, len(sites_offset)): + site = retpolines_start_kcore + sites_offset[i] + vmlinux_site = retpolines_start_vmlinux + sites_offset[i] + passed = unknown = failed = False + try: + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + operand = kcore_insn.op_str + insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if (site & 0x20) ^ (insn_end & 0x20): + ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}") + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if operand.startswith('0xffffffff'): + thunk = int(operand, 16) + if thunk > x86_indirect_its_thunk_r15: + insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0] + operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' <dynamic-thunk?>' + if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20: + ksft.print_msg(f"\tPASSED: Found {operand} at safe address") + passed = True + if not passed: + if kcore_insn.operands[0].type == capstone.CS_OP_IMM: + operand += ' <' + prog.symbol(int(operand, 16)) + '>' + if '__x86_indirect_its_thunk_' in operand: + ksft.print_msg(f"\tPASSED: Found {operand}") + else: + ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.") + passed = True + else: + unknown = True + if passed: + tests_passed += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed") +else: + ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py new file mode 100755 index 000000000000..3204f4728c62 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_permutations.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) cmdline permutations with other bugs +# like spectre_v2 and retbleed. + +import os, sys, subprocess, itertools, re, shutil + +test_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, test_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) + +if not mitigation or "Not affected" in mitigation: + ksft.test_result_skip("Skipping its_permutations.py: not applicable") + ksft.finished() + +if shutil.which('vng') is None: + ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.") + ksft.finished() + +TEST = f"{test_dir}/its_sysfs.py" +default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1'] + +DEBUG = " -v " + +# Install dependencies +# https://github.com/arighi/virtme-ng +# apt install virtme-ng +BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage " +#BOOT_CMD += DEBUG + +bug = "indirect_target_selection" + +input_options = { + 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'], + 'retbleed' : ['off', 'stuff', 'auto'], + 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'], +} + +def pretty_print(output): + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + + # Define patterns and their corresponding colors + patterns = { + r"^ok \d+": OKGREEN, + r"^not ok \d+": FAIL, + r"^# Testing .*": OKBLUE, + r"^# Found: .*": WARNING, + r"^# Totals: .*": BOLD, + r"pass:([1-9]\d*)": OKGREEN, + r"fail:([1-9]\d*)": FAIL, + r"skip:([1-9]\d*)": WARNING, + } + + # Apply colors based on patterns + for pattern, color in patterns.items(): + output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE) + + print(output) + +combinations = list(itertools.product(*input_options.values())) +ksft.print_header() +ksft.set_plan(len(combinations)) + +logs = "" + +for combination in combinations: + append = "" + log = "" + for p in default_kparam: + append += f' --append={p}' + command = BOOT_CMD + append + test_params = "" + for i, key in enumerate(input_options.keys()): + param = f'{key}={combination[i]}' + test_params += f' {param}' + command += f" --append={param}" + command += f" -- {TEST}" + test_name = f"{bug} {test_params}" + pretty_print(f'# Testing {test_name}') + t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + t.wait() + output, _ = t.communicate() + if t.returncode == 0: + ksft.test_result_pass(test_name) + else: + ksft.test_result_fail(test_name) + output = output.decode() + log += f" {output}" + pretty_print(log) + logs += output + "\n" + +# Optionally use tappy to parse the output +# apt install python3-tappy +with open("logs.txt", "w") as f: + f.write(logs) + +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py new file mode 100755 index 000000000000..f40078d9f6ff --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Tests if the RETs are correctly patched by evaluating the +# vmlinux .return_sites in /proc/kcore. +# +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Run on target machine +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_ret_alignment.py + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites') +ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size) +total_rethunk_tests = len(sites_offset) +ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites") + +prog = c.get_runtime_kernel() +rethunks_start_kcore = prog.symbol('__return_sites').address +ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}') + +its_return_thunk = prog.symbol('its_return_thunk').address +ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 +tests_skipped = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(len(sites_offset)): + site = rethunks_start_kcore + sites_offset[i] + vmlinux_site = rethunks_start_vmlinux + sites_offset[i] + try: + passed = unknown = failed = skipped = False + + symbol = identify_address(prog, site) + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + + insn_end = site + kcore_insn.size - 1 + + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if "jmp" in kcore_insn.mnemonic: + passed = True + elif "ret" not in kcore_insn.mnemonic: + skipped = True + + if passed: + ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}") + tests_passed += 1 + elif skipped: + ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'") + tests_skipped += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed.") +else: + ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py new file mode 100755 index 000000000000..7bca81f2f606 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_sysfs.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for Indirect Target Selection(ITS) mitigation sysfs status. + +import sys, os, re +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft + +from common import * + +bug = "indirect_target_selection" +mitigation = get_sysfs(bug) + +ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks" +ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB" +ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected" +ITS_MITIGATION_VULNERABLE = "Vulnerable" + +def check_mitigation(): + if mitigation == ITS_MITIGATION_ALIGNED_THUNKS: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF) + return + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS) + return + + if mitigation == ITS_MITIGATION_RETPOLINE_STUFF: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + if sysfs_has('retbleed', 'Stuffing'): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VMEXIT_ONLY: + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VULNERABLE: + if sysfs_has("spectre_v2", "Vulnerable"): + bug_check_pass(bug, ITS_MITIGATION_VULNERABLE) + else: + bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE) + + bug_status_unknown(bug, mitigation) + return + +ksft.print_header() +ksft.set_plan(1) +ksft.print_msg(f'{bug}: {mitigation} ...') + +if not basic_checks_sufficient(bug, mitigation): + check_mitigation() + +ksft.finished() diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 18d736640ece..0873b0e5f48b 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -682,7 +682,7 @@ int do_uring(unsigned long lam) return 1; if (fstat(file_fd, &st) < 0) - return 1; + goto cleanup; off_t file_sz = st.st_size; @@ -690,7 +690,7 @@ int do_uring(unsigned long lam) fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks); if (!fi) - return 1; + goto cleanup; fi->file_sz = file_sz; fi->file_fd = file_fd; @@ -698,7 +698,7 @@ int do_uring(unsigned long lam) ring = malloc(sizeof(*ring)); if (!ring) { free(fi); - return 1; + goto cleanup; } memset(ring, 0, sizeof(struct io_ring)); @@ -729,6 +729,8 @@ out: } free(fi); +cleanup: + close(file_fd); return ret; } @@ -1189,6 +1191,7 @@ void *allocate_dsa_pasid(void) wq = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); + close(fd); if (wq == MAP_FAILED) perror("mmap"); diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c index 23c1d6c964ea..97fe4bd8bc77 100644 --- a/tools/testing/selftests/x86/xstate.c +++ b/tools/testing/selftests/x86/xstate.c @@ -31,7 +31,8 @@ (1 << XFEATURE_OPMASK) | \ (1 << XFEATURE_ZMM_Hi256) | \ (1 << XFEATURE_Hi16_ZMM) | \ - (1 << XFEATURE_XTILEDATA)) + (1 << XFEATURE_XTILEDATA) | \ + (1 << XFEATURE_APX)) static inline uint64_t xgetbv(uint32_t index) { diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h index 42af36ec852f..e91e3092b5d2 100644 --- a/tools/testing/selftests/x86/xstate.h +++ b/tools/testing/selftests/x86/xstate.h @@ -33,6 +33,7 @@ enum xfeature { XFEATURE_RSRVD_COMP_16, XFEATURE_XTILECFG, XFEATURE_XTILEDATA, + XFEATURE_APX, XFEATURE_MAX, }; @@ -59,6 +60,7 @@ static const char *xfeature_names[] = "unknown xstate feature", "AMX Tile config", "AMX Tile data", + "APX registers", "unknown xstate feature", }; diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c index 44aee49b6cee..1453d38e08bb 100644 --- a/tools/testing/vsock/timeout.c +++ b/tools/testing/vsock/timeout.c @@ -21,6 +21,7 @@ #include <stdbool.h> #include <unistd.h> #include <stdio.h> +#include <time.h> #include "timeout.h" static volatile bool timeout; @@ -28,6 +29,8 @@ static volatile bool timeout; /* SIGALRM handler function. Do not use sleep(2), alarm(2), or * setitimer(2) while using this API - they may interfere with each * other. + * + * If you need to sleep, please use timeout_sleep() provided by this API. */ void sigalrm(int signo) { @@ -58,3 +61,18 @@ void timeout_end(void) alarm(0); timeout = false; } + +/* Sleep in a timeout section. + * + * nanosleep(2) can be used with this API since POSIX.1 explicitly + * specifies that it does not interact with signals. + */ +int timeout_usleep(useconds_t usec) +{ + struct timespec ts = { + .tv_sec = usec / 1000000, + .tv_nsec = (usec % 1000000) * 1000, + }; + + return nanosleep(&ts, NULL); +} diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h index ecb7c840e65a..1c3fcad87a49 100644 --- a/tools/testing/vsock/timeout.h +++ b/tools/testing/vsock/timeout.h @@ -11,5 +11,6 @@ void sigalrm(int signo); void timeout_begin(unsigned int seconds); void timeout_check(const char *operation); void timeout_end(void); +int timeout_usleep(useconds_t usec); #endif /* TIMEOUT_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index de25892f865f..0c7e9cbcbc85 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -17,6 +17,7 @@ #include <assert.h> #include <sys/epoll.h> #include <sys/mman.h> +#include <linux/sockios.h> #include "timeout.h" #include "control.h" @@ -96,6 +97,30 @@ void vsock_wait_remote_close(int fd) close(epollfd); } +/* Wait until transport reports no data left to be sent. + * Return false if transport does not implement the unsent_bytes() callback. + */ +bool vsock_wait_sent(int fd) +{ + int ret, sock_bytes_unsent; + + timeout_begin(TIMEOUT); + do { + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) + break; + + perror("ioctl(SIOCOUTQ)"); + exit(EXIT_FAILURE); + } + timeout_check("SIOCOUTQ"); + } while (sock_bytes_unsent != 0); + timeout_end(); + + return !ret; +} + /* Create socket <type>, bind to <cid, port> and return the file descriptor. */ int vsock_bind(unsigned int cid, unsigned int port, int type) { @@ -798,3 +823,16 @@ void enable_so_zerocopy_check(int fd) setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1, "setsockopt SO_ZEROCOPY"); } + +void enable_so_linger(int fd, int timeout) +{ + struct linger optval = { + .l_onoff = 1, + .l_linger = timeout + }; + + if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) { + perror("setsockopt(SO_LINGER)"); + exit(EXIT_FAILURE); + } +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index d1f765ce3eee..5e2db67072d5 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -54,6 +54,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port); int vsock_seqpacket_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); +bool vsock_wait_sent(int fd); void send_buf(int fd, const void *buf, size_t len, int flags, ssize_t expected_ret); void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret); @@ -79,4 +80,5 @@ void setsockopt_int_check(int fd, int level, int optname, int val, void setsockopt_timeval_check(int fd, int level, int optname, struct timeval val, char const *errmsg); void enable_so_zerocopy_check(int fd); +void enable_so_linger(int fd, int timeout); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index d0f6d253ac72..f669baaa0dca 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -21,7 +21,6 @@ #include <poll.h> #include <signal.h> #include <sys/ioctl.h> -#include <linux/sockios.h> #include <linux/time64.h> #include "vsock_test_zerocopy.h" @@ -1058,18 +1057,39 @@ static void sigpipe(int signo) have_sigpipe = 1; } +#define SEND_SLEEP_USEC (10 * 1000) + static void test_stream_check_sigpipe(int fd) { ssize_t res; have_sigpipe = 0; - res = send(fd, "A", 1, 0); - if (res != -1) { - fprintf(stderr, "expected send(2) failure, got %zi\n", res); - exit(EXIT_FAILURE); + /* When the other peer calls shutdown(SHUT_RD), there is a chance that + * the send() call could occur before the message carrying the close + * information arrives over the transport. In such cases, the send() + * might still succeed. To avoid this race, let's retry the send() call + * a few times, ensuring the test is more reliable. + */ + timeout_begin(TIMEOUT); + while(1) { + res = send(fd, "A", 1, 0); + if (res == -1 && errno != EINTR) + break; + + /* Sleep a little before trying again to avoid flooding the + * other peer and filling its receive buffer, causing + * false-negative. + */ + timeout_usleep(SEND_SLEEP_USEC); + timeout_check("send"); } + timeout_end(); + if (errno != EPIPE) { + fprintf(stderr, "unexpected send(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } if (!have_sigpipe) { fprintf(stderr, "SIGPIPE expected\n"); exit(EXIT_FAILURE); @@ -1077,12 +1097,21 @@ static void test_stream_check_sigpipe(int fd) have_sigpipe = 0; - res = send(fd, "A", 1, MSG_NOSIGNAL); - if (res != -1) { - fprintf(stderr, "expected send(2) failure, got %zi\n", res); - exit(EXIT_FAILURE); + timeout_begin(TIMEOUT); + while(1) { + res = send(fd, "A", 1, MSG_NOSIGNAL); + if (res == -1 && errno != EINTR) + break; + + timeout_usleep(SEND_SLEEP_USEC); + timeout_check("send"); } + timeout_end(); + if (errno != EPIPE) { + fprintf(stderr, "unexpected send(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } if (have_sigpipe) { fprintf(stderr, "SIGPIPE not expected\n"); exit(EXIT_FAILURE); @@ -1250,7 +1279,7 @@ static void test_unsent_bytes_server(const struct test_opts *opts, int type) static void test_unsent_bytes_client(const struct test_opts *opts, int type) { unsigned char buf[MSG_BUF_IOCTL_LEN]; - int ret, fd, sock_bytes_unsent; + int fd; fd = vsock_connect(opts->peer_cid, opts->peer_port, type); if (fd < 0) { @@ -1264,20 +1293,14 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); control_expectln("RECEIVED"); - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); - if (ret < 0) { - if (errno == EOPNOTSUPP) { - fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); - } else { - perror("ioctl"); - exit(EXIT_FAILURE); - } - } else if (ret == 0 && sock_bytes_unsent != 0) { - fprintf(stderr, - "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", - sock_bytes_unsent); - exit(EXIT_FAILURE); - } + /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though + * the "RECEIVED" message means that the other side has received the + * data, there can be a delay in our kernel before updating the "unsent + * bytes" counter. vsock_wait_sent() will repeat SIOCOUTQ until it + * returns 0. + */ + if (!vsock_wait_sent(fd)) + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); close(fd); } @@ -1790,10 +1813,6 @@ static void test_stream_connect_retry_server(const struct test_opts *opts) static void test_stream_linger_client(const struct test_opts *opts) { - struct linger optval = { - .l_onoff = 1, - .l_linger = 1 - }; int fd; fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); @@ -1802,15 +1821,58 @@ static void test_stream_linger_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) { - perror("setsockopt(SO_LINGER)"); + enable_so_linger(fd, 1); + close(fd); +} + +static void test_stream_linger_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); exit(EXIT_FAILURE); } + vsock_wait_remote_close(fd); close(fd); } -static void test_stream_linger_server(const struct test_opts *opts) +/* Half of the default to not risk timing out the control channel */ +#define LINGER_TIMEOUT (TIMEOUT / 2) + +static void test_stream_nolinger_client(const struct test_opts *opts) +{ + bool waited; + time_t ns; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_linger(fd, LINGER_TIMEOUT); + send_byte(fd, 1, 0); /* Left unread to expose incorrect behaviour. */ + waited = vsock_wait_sent(fd); + + ns = current_nsec(); + close(fd); + ns = current_nsec() - ns; + + if (!waited) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + } else if (DIV_ROUND_UP(ns, NSEC_PER_SEC) >= LINGER_TIMEOUT) { + fprintf(stderr, "Unexpected lingering\n"); + exit(EXIT_FAILURE); + } + + control_writeln("DONE"); +} + +static void test_stream_nolinger_server(const struct test_opts *opts) { int fd; @@ -1820,7 +1882,7 @@ static void test_stream_linger_server(const struct test_opts *opts) exit(EXIT_FAILURE); } - vsock_wait_remote_close(fd); + control_expectln("DONE"); close(fd); } @@ -1984,6 +2046,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_linger_client, .run_server = test_stream_linger_server, }, + { + .name = "SOCK_STREAM SO_LINGER close() on unread", + .run_client = test_stream_nolinger_client, + .run_server = test_stream_nolinger_server, + }, {}, }; |