diff options
Diffstat (limited to 'tools/testing/selftests')
521 files changed, 27377 insertions, 5100 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 030da61dbff3..c46ebdb9b8ef 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -36,6 +36,7 @@ TARGETS += filesystems/fat TARGETS += filesystems/overlayfs TARGETS += filesystems/statmount TARGETS += filesystems/mount-notify +TARGETS += filesystems/fuse TARGETS += firmware TARGETS += fpu TARGETS += ftrace @@ -124,6 +125,7 @@ TARGETS += uevent TARGETS += user_events TARGETS += vDSO TARGETS += mm +TARGETS += vfio TARGETS += x86 TARGETS += x86/bugs TARGETS += zram @@ -314,7 +316,7 @@ gen_tar: install @echo "Created ${TAR_PATH}" clean: - @for TARGET in $(TARGETS); do \ + @for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index 2a4b2662035e..e113dafa5c24 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -53,10 +53,10 @@ struct ctl_data { struct ctl_data *next; }; -int num_cards = 0; -int num_controls = 0; -struct card_data *card_list = NULL; -struct ctl_data *ctl_list = NULL; +int num_cards; +int num_controls; +struct card_data *card_list; +struct ctl_data *ctl_list; static void find_controls(void) { diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index dbd7c222ce93..ce92548670c8 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -30,7 +30,7 @@ struct card_data { struct card_data *next; }; -struct card_data *card_list = NULL; +struct card_data *card_list; struct pcm_data { snd_pcm_t *handle; @@ -43,10 +43,10 @@ struct pcm_data { struct pcm_data *next; }; -struct pcm_data *pcm_list = NULL; +struct pcm_data *pcm_list; -int num_missing = 0; -struct pcm_data *pcm_missing = NULL; +int num_missing; +struct pcm_data *pcm_missing; snd_config_t *default_pcm_config; diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 002ec38a8bbb..3b96d090c5eb 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -17,6 +17,8 @@ #include <asm/sigcontext.h> #include <asm/unistd.h> +#include <linux/auxvec.h> + #include "../../kselftest.h" #define TESTS_PER_HWCAP 3 @@ -55,7 +57,6 @@ static void cmpbr_sigill(void) /* Not implemented, too complicated and unreliable anyway */ } - static void crc32_sigill(void) { /* CRC32W W0, W0, W1 */ @@ -169,6 +170,18 @@ static void lse128_sigill(void) : "cc", "memory"); } +static void lsfe_sigill(void) +{ + float __attribute__ ((aligned (16))) mem; + register float *memp asm ("x0") = &mem; + + /* STFADD H0, [X0] */ + asm volatile(".inst 0x7c20801f" + : "+r" (memp) + : + : "memory"); +} + static void lut_sigill(void) { /* LUTI2 V0.16B, { V0.16B }, V[0] */ @@ -763,6 +776,13 @@ static const struct hwcap_data { .sigill_fn = lse128_sigill, }, { + .name = "LSFE", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_LSFE, + .cpuinfo = "lsfe", + .sigill_fn = lsfe_sigill, + }, + { .name = "LUT", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_LUT, diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index f58a9f89b952..1703543fb7c7 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -182,16 +182,16 @@ static int write_clone_read(void) } for (;;) { - waiting = wait4(ret, &status, __WCLONE, NULL); + waiting = waitpid(ret, &status, __WCLONE); if (waiting < 0) { if (errno == EINTR) continue; - ksft_print_msg("wait4() failed: %d\n", errno); + ksft_print_msg("waitpid() failed: %d\n", errno); return 0; } if (waiting != ret) { - ksft_print_msg("wait4() returned wrong PID %d\n", + ksft_print_msg("waitpid() returned wrong PID %d\n", waiting); return 0; } @@ -227,10 +227,10 @@ int main(int argc, char **argv) ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { ksft_test_result(default_value(), "default_value\n"); - ksft_test_result(write_read, "write_read\n"); - ksft_test_result(write_sleep_read, "write_sleep_read\n"); - ksft_test_result(write_fork_read, "write_fork_read\n"); - ksft_test_result(write_clone_read, "write_clone_read\n"); + ksft_test_result(write_read(), "write_read\n"); + ksft_test_result(write_sleep_read(), "write_sleep_read\n"); + ksft_test_result(write_fork_read(), "write_fork_read\n"); + ksft_test_result(write_clone_read(), "write_clone_read\n"); } else { ksft_print_msg("SME support not present\n"); diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h index 04e7b72880ef..141cdcbf0b8f 100644 --- a/tools/testing/selftests/arm64/bti/assembler.h +++ b/tools/testing/selftests/arm64/bti/assembler.h @@ -14,7 +14,6 @@ #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) #define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) - .macro startfn name:req .globl \name \name: diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index cdd7a45c045d..a85c19e9524e 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1568,7 +1568,6 @@ static void run_sve_tests(void) &test_config); } } - } static void run_sme_tests(void) diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index 74e23208b94c..9349aa630c84 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -105,8 +105,8 @@ static void child_start(struct child_data *child, const char *program) /* * Read from the startup pipe, there should be no data - * and we should block until it is closed. We just - * carry on on error since this isn't super critical. + * and we should block until it is closed. We just + * carry-on on error since this isn't super critical. */ ret = read(3, &i, sizeof(i)); if (ret < 0) @@ -549,7 +549,7 @@ int main(int argc, char **argv) evs = calloc(tests, sizeof(*evs)); if (!evs) - ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + ksft_exit_fail_msg("Failed to allocate %d epoll events\n", tests); for (i = 0; i < cpus; i++) { diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index e3cec3723ffa..0c40007d1282 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -188,13 +188,13 @@ static bool create_socket(void) ref = malloc(digest_len); if (!ref) { - printf("Failed to allocated %d byte reference\n", digest_len); + printf("Failed to allocate %d byte reference\n", digest_len); return false; } digest = malloc(digest_len); if (!digest) { - printf("Failed to allocated %d byte digest\n", digest_len); + printf("Failed to allocate %d byte digest\n", digest_len); return false; } diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index b22303778fb0..e0fc3a001e28 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -66,7 +66,7 @@ static const struct vec_type vec_types[] = { }; #define VL_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 4) -#define FLAG_TESTS 2 +#define FLAG_TESTS 4 #define FPSIMD_TESTS 2 #define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types)) @@ -95,19 +95,27 @@ static int do_child(void) static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) { struct iovec iov; + int ret; iov.iov_base = fpsimd; iov.iov_len = sizeof(*fpsimd); - return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); + ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); + if (ret == -1) + ksft_perror("ptrace(PTRACE_GETREGSET)"); + return ret; } static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) { struct iovec iov; + int ret; iov.iov_base = fpsimd; iov.iov_len = sizeof(*fpsimd); - return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov); + ret = ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov); + if (ret == -1) + ksft_perror("ptrace(PTRACE_SETREGSET)"); + return ret; } static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, @@ -115,8 +123,9 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, { struct user_sve_header *sve; void *p; - size_t sz = sizeof *sve; + size_t sz = sizeof(*sve); struct iovec iov; + int ret; while (1) { if (*size < sz) { @@ -132,8 +141,11 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, iov.iov_base = *buf; iov.iov_len = sz; - if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov)) + ret = ptrace(PTRACE_GETREGSET, pid, type->regset, &iov); + if (ret) { + ksft_perror("ptrace(PTRACE_GETREGSET)"); goto error; + } sve = *buf; if (sve->size <= sz) @@ -152,10 +164,46 @@ static int set_sve(pid_t pid, const struct vec_type *type, const struct user_sve_header *sve) { struct iovec iov; + int ret; iov.iov_base = (void *)sve; iov.iov_len = sve->size; - return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov); + ret = ptrace(PTRACE_SETREGSET, pid, type->regset, &iov); + if (ret == -1) + ksft_perror("ptrace(PTRACE_SETREGSET)"); + return ret; +} + +/* A read operation fails */ +static void read_fails(pid_t child, const struct vec_type *type) +{ + struct user_sve_header *new_sve = NULL; + size_t new_sve_size = 0; + void *ret; + + ret = get_sve(child, type, (void **)&new_sve, &new_sve_size); + + ksft_test_result(ret == NULL, "%s unsupported read fails\n", + type->name); + + free(new_sve); +} + +/* A write operation fails */ +static void write_fails(pid_t child, const struct vec_type *type) +{ + struct user_sve_header sve; + int ret; + + /* Just the header, no data */ + memset(&sve, 0, sizeof(sve)); + sve.size = sizeof(sve); + sve.flags = SVE_PT_REGS_SVE; + sve.vl = SVE_VL_MIN; + ret = set_sve(child, type, &sve); + + ksft_test_result(ret != 0, "%s unsupported write fails\n", + type->name); } /* Validate setting and getting the inherit flag */ @@ -270,6 +318,25 @@ static void check_u32(unsigned int vl, const char *reg, } } +/* Set out of range VLs */ +static void ptrace_set_vl_ranges(pid_t child, const struct vec_type *type) +{ + struct user_sve_header sve; + int ret; + + memset(&sve, 0, sizeof(sve)); + sve.flags = SVE_PT_REGS_SVE; + sve.size = sizeof(sve); + + ret = set_sve(child, type, &sve); + ksft_test_result(ret != 0, "%s Set invalid VL 0\n", type->name); + + sve.vl = SVE_VL_MAX + SVE_VQ_BYTES; + ret = set_sve(child, type, &sve); + ksft_test_result(ret != 0, "%s Set invalid VL %d\n", type->name, + SVE_VL_MAX + SVE_VQ_BYTES); +} + /* Access the FPSIMD registers via the SVE regset */ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) { @@ -683,6 +750,20 @@ static int do_parent(pid_t child) } for (i = 0; i < ARRAY_SIZE(vec_types); i++) { + /* + * If the vector type isn't supported reads and writes + * should fail. + */ + if (!(getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap)) { + read_fails(child, &vec_types[i]); + write_fails(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s unsupported read fails\n", + vec_types[i].name); + ksft_test_result_skip("%s unsupported write fails\n", + vec_types[i].name); + } + /* FPSIMD via SVE regset */ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { ptrace_sve_fpsimd(child, &vec_types[i]); @@ -703,6 +784,17 @@ static int do_parent(pid_t child) vec_types[i].name); } + /* Setting out of bounds VLs should fail */ + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_set_vl_ranges(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s Set invalid VL 0\n", + vec_types[i].name); + ksft_test_result_skip("%s Set invalid VL %d\n", + vec_types[i].name, + SVE_VL_MAX + SVE_VQ_BYTES); + } + /* Step through every possible VQ */ for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index ea9c7d47790f..2d75d342eeb9 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -690,7 +690,6 @@ static inline void smstop(void) asm volatile("msr S0_3_C4_C6_3, xzr"); } - /* * Verify we can change the SVE vector length while SME is active and * continue to use SME afterwards. diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c index 584b8d59b7ea..a7f34040fbf1 100644 --- a/tools/testing/selftests/arm64/fp/zt-ptrace.c +++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c @@ -108,7 +108,6 @@ static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES]) return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov); } - static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES]) { struct iovec iov; diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile index d2f3497a9103..1fbbf0ca1f02 100644 --- a/tools/testing/selftests/arm64/gcs/Makefile +++ b/tools/testing/selftests/arm64/gcs/Makefile @@ -14,11 +14,11 @@ LDLIBS+=-lpthread include ../../lib.mk $(OUTPUT)/basic-gcs: basic-gcs.c - $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ - -static -include ../../../../include/nolibc/nolibc.h \ + $(CC) $(CFLAGS) -fno-asynchronous-unwind-tables -fno-ident -s -nostdlib -nostdinc \ + -static -I../../../../include/nolibc -include ../../../../include/nolibc/nolibc.h \ -I../../../../../usr/include \ -std=gnu99 -I../.. -g \ - -ffreestanding -Wall $^ -o $@ -lgcc + -ffreestanding $^ -o $@ -lgcc $(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S $(CC) -nostdlib $^ -o $@ diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c index 54f9c888249d..250977abc398 100644 --- a/tools/testing/selftests/arm64/gcs/basic-gcs.c +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -10,6 +10,7 @@ #include <sys/mman.h> #include <asm/mman.h> +#include <asm/hwcap.h> #include <linux/sched.h> #include "kselftest.h" @@ -386,14 +387,13 @@ int main(void) ksft_print_header(); - /* - * We don't have getauxval() with nolibc so treat a failure to - * read GCS state as a lack of support and skip. - */ + if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) + ksft_exit_skip("SKIP GCS not supported\n"); + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode, 0, 0, 0); if (ret != 0) - ksft_exit_skip("Failed to read GCS state: %d\n", ret); + ksft_exit_fail_msg("Failed to read GCS state: %d\n", ret); if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { gcs_mode = PR_SHADOW_STACK_ENABLE; @@ -410,7 +410,7 @@ int main(void) } /* One last test: disable GCS, we can do this one time */ - my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); + ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); if (ret != 0) ksft_print_msg("Failed to disable GCS: %d\n", ret); diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c index 989f75a491b7..1e6abb136ffd 100644 --- a/tools/testing/selftests/arm64/gcs/gcs-locking.c +++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c @@ -165,7 +165,6 @@ TEST_F(valid_modes, lock_enable_disable_others) ASSERT_EQ(ret, 0); ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES); - ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, variant->mode); ASSERT_EQ(ret, 0); diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c index bbc7f4950c13..cf316d78ea97 100644 --- a/tools/testing/selftests/arm64/gcs/gcs-stress.c +++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c @@ -433,7 +433,7 @@ int main(int argc, char **argv) evs = calloc(tests, sizeof(*evs)); if (!evs) - ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + ksft_exit_fail_msg("Failed to allocate %d epoll events\n", tests); for (i = 0; i < gcs_threads; i++) diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c index 4435600ca400..e597861b26d6 100644 --- a/tools/testing/selftests/arm64/pauth/exec_target.c +++ b/tools/testing/selftests/arm64/pauth/exec_target.c @@ -13,7 +13,12 @@ int main(void) unsigned long hwcaps; size_t val; - fread(&val, sizeof(size_t), 1, stdin); + size_t size = fread(&val, sizeof(size_t), 1, stdin); + + if (size != 1) { + fprintf(stderr, "Could not read input from stdin\n"); + return EXIT_FAILURE; + } /* don't try to execute illegal (unimplemented) instructions) caller * should have checked this and keep worker simple diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 3d8378972d26..be1ee7ba7ce0 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -44,3 +44,4 @@ xdp_redirect_multi xdp_synproxy xdp_hw_metadata xdp_features +verification_cert.h diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 3ebd77206f98..a17baf8c6fd7 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -2,4 +2,3 @@ # Alphabetical order get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) -verifier_iterating_callbacks diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4863106034df..f00587d4ede6 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -120,7 +120,7 @@ TEST_PROGS_EXTENDED := \ test_bpftool.py TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko + bpf_test_modorder_y.ko bpf_test_rqspinlock.ko TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS)) # Compile but not part of 'make run_tests' @@ -137,7 +137,7 @@ TEST_GEN_PROGS_EXTENDED = \ xdping \ xskxceiver -TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi +TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi ifneq ($(V),1) submake_extras := feature_display=0 @@ -398,7 +398,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers endif -# vmlinux.h is first dumped to a temprorary file and then compared to +# vmlinux.h is first dumped to a temporary file and then compared to # the previous version. This helps to avoid unnecessary re-builds of # $(TRUNNER_BPF_OBJS) $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) @@ -496,15 +496,16 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ test_subskeleton.skel.h test_subskeleton_lib.skel.h \ test_usdt.skel.h -LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ - trace_printk.c trace_vprintk.c map_ptr_kern.c \ +LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c +LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c + # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ kfunc_call_test_subprog.c -SKEL_BLACKLIST += $$(LSKELS) +SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED) test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o @@ -535,6 +536,7 @@ HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \ # $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc) define DEFINE_TEST_RUNNER +LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT) TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2 TRUNNER_BINARY := $1$(if $2,-)$2 TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \ @@ -550,6 +552,7 @@ TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ $$(TRUNNER_BPF_SRCS))) TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA)) TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) +TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) # Evaluate rules now with extra TRUNNER_XXX variables above already defined @@ -604,6 +607,15 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) +$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$< + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) + $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) + $(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@ + $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) + $(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/% # .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites @@ -653,6 +665,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_BPF_SKELS) \ $(TRUNNER_BPF_LSKELS) \ + $(TRUNNER_BPF_LSKELS_SIGNED) \ $(TRUNNER_BPF_SKELS_LINKED) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) @@ -667,6 +680,7 @@ $(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ $(TRUNNER_EXTRA_HDRS) \ + $(VERIFY_SIG_HDR) \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) @@ -697,6 +711,18 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ endef +VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh +VERIFY_SIG_HDR := verification_cert.h +VERIFICATION_CERT := $(BUILD_DIR)/signing_key.der +PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem + +$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP) + $(Q)mkdir -p $(BUILD_DIR) + $(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR) + +$(VERIFY_SIG_HDR): $(VERIFICATION_CERT) + $(Q)xxd -i -n test_progs_verification_cert $< > $@ + # Define test_progs test runner. TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs @@ -716,6 +742,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ disasm.c \ disasm_helpers.c \ json_writer.c \ + $(VERIFY_SIG_HDR) \ flow_dissector_load.h \ ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ @@ -725,7 +752,7 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(OUTPUT)/uprobe_multi \ $(TEST_KMOD_TARGETS) \ ima_setup.sh \ - verify_sig_setup.sh \ + $(VERIFY_SIG_SETUP) \ $(wildcard progs/btf_dump_test_case_*.c) \ $(wildcard progs/*.bpf.o) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE @@ -816,6 +843,7 @@ $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h $(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h +$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -837,6 +865,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_htab_mem.o \ $(OUTPUT)/bench_bpf_crypto.o \ $(OUTPUT)/bench_sockmap.o \ + $(OUTPUT)/bench_lpm_trie_map.o \ # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index ddd73d06a1eb..bd29bb2e6cb5 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -284,6 +284,7 @@ extern struct argp bench_htab_mem_argp; extern struct argp bench_trigger_batch_argp; extern struct argp bench_crypto_argp; extern struct argp bench_sockmap_argp; +extern struct argp bench_lpm_trie_map_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -299,6 +300,7 @@ static const struct argp_child bench_parsers[] = { { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 }, { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 }, { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 }, + { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 }, {}, }; @@ -499,7 +501,7 @@ extern const struct bench bench_rename_rawtp; extern const struct bench bench_rename_fentry; extern const struct bench bench_rename_fexit; -/* pure counting benchmarks to establish theoretical lmits */ +/* pure counting benchmarks to establish theoretical limits */ extern const struct bench bench_trig_usermode_count; extern const struct bench bench_trig_syscall_count; extern const struct bench bench_trig_kernel_count; @@ -510,6 +512,8 @@ extern const struct bench bench_trig_kretprobe; extern const struct bench bench_trig_kprobe_multi; extern const struct bench bench_trig_kretprobe_multi; extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_kprobe_multi_all; +extern const struct bench bench_trig_kretprobe_multi_all; extern const struct bench bench_trig_fexit; extern const struct bench bench_trig_fmodret; extern const struct bench bench_trig_tp; @@ -558,6 +562,13 @@ extern const struct bench bench_htab_mem; extern const struct bench bench_crypto_encrypt; extern const struct bench bench_crypto_decrypt; extern const struct bench bench_sockmap; +extern const struct bench bench_lpm_trie_noop; +extern const struct bench bench_lpm_trie_baseline; +extern const struct bench bench_lpm_trie_lookup; +extern const struct bench bench_lpm_trie_insert; +extern const struct bench bench_lpm_trie_update; +extern const struct bench bench_lpm_trie_delete; +extern const struct bench bench_lpm_trie_free; static const struct bench *benchs[] = { &bench_count_global, @@ -578,6 +589,8 @@ static const struct bench *benchs[] = { &bench_trig_kprobe_multi, &bench_trig_kretprobe_multi, &bench_trig_fentry, + &bench_trig_kprobe_multi_all, + &bench_trig_kretprobe_multi_all, &bench_trig_fexit, &bench_trig_fmodret, &bench_trig_tp, @@ -625,6 +638,13 @@ static const struct bench *benchs[] = { &bench_crypto_encrypt, &bench_crypto_decrypt, &bench_sockmap, + &bench_lpm_trie_noop, + &bench_lpm_trie_baseline, + &bench_lpm_trie_lookup, + &bench_lpm_trie_insert, + &bench_lpm_trie_update, + &bench_lpm_trie_delete, + &bench_lpm_trie_free, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 005c401b3e22..bea323820ffb 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -46,6 +46,7 @@ struct bench_res { unsigned long gp_ns; unsigned long gp_ct; unsigned int stime; + unsigned long duration_ns; }; struct bench { diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c new file mode 100644 index 000000000000..246f6cb3387d --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Cloudflare */ + +/* + * All of these benchmarks operate on tries with keys in the range + * [0, args.nr_entries), i.e. there are no gaps or partially filled + * branches of the trie for any key < args.nr_entries. + * + * This gives an idea of worst-case behaviour. + */ + +#include <argp.h> +#include <linux/time64.h> +#include <linux/if_ether.h> +#include "lpm_trie_bench.skel.h" +#include "lpm_trie_map.skel.h" +#include "bench.h" +#include "testing_helpers.h" +#include "progs/lpm_trie.h" + +static struct ctx { + struct lpm_trie_bench *bench; +} ctx; + +static struct { + __u32 nr_entries; + __u32 prefixlen; + bool random; +} args = { + .nr_entries = 0, + .prefixlen = 32, + .random = false, +}; + +enum { + ARG_NR_ENTRIES = 9000, + ARG_PREFIX_LEN, + ARG_RANDOM, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Number of unique entries in the LPM trie" }, + { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0, + "Number of prefix bits to use in the LPM trie" }, + { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" }, + {}, +}; + +static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + args.nr_entries = ret; + break; + case ARG_PREFIX_LEN: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "Invalid prefix_len value."); + argp_usage(state); + } + args.prefixlen = ret; + break; + case ARG_RANDOM: + args.random = true; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +const struct argp bench_lpm_trie_map_argp = { + .options = opts, + .parser = lpm_parse_arg, +}; + +static void validate_common(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer\n"); + exit(1); + } + + if (args.nr_entries == 0) { + fprintf(stderr, "Missing --nr_entries parameter\n"); + exit(1); + } + + if ((1UL << args.prefixlen) < args.nr_entries) { + fprintf(stderr, "prefix_len value too small for nr_entries\n"); + exit(1); + } +} + +static void lpm_insert_validate(void) +{ + validate_common(); + + if (env.producer_cnt != 1) { + fprintf(stderr, "lpm-trie-insert requires a single producer\n"); + exit(1); + } + + if (args.random) { + fprintf(stderr, "lpm-trie-insert does not support --random\n"); + exit(1); + } +} + +static void lpm_delete_validate(void) +{ + validate_common(); + + if (env.producer_cnt != 1) { + fprintf(stderr, "lpm-trie-delete requires a single producer\n"); + exit(1); + } + + if (args.random) { + fprintf(stderr, "lpm-trie-delete does not support --random\n"); + exit(1); + } +} + +static void lpm_free_validate(void) +{ + validate_common(); + + if (env.producer_cnt != 1) { + fprintf(stderr, "lpm-trie-free requires a single producer\n"); + exit(1); + } + + if (args.random) { + fprintf(stderr, "lpm-trie-free does not support --random\n"); + exit(1); + } +} + +static struct trie_key *keys; +static __u32 *vals; + +static void fill_map(int map_fd) +{ + int err; + + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts); + if (err) { + fprintf(stderr, "failed to batch update keys to map: %d\n", + -err); + exit(1); + } +} + +static void empty_map(int map_fd) +{ + int err; + + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts); + if (err) { + fprintf(stderr, "failed to batch delete keys for map: %d\n", + -err); + exit(1); + } +} + +static void attach_prog(void) +{ + int i; + + ctx.bench = lpm_trie_bench__open_and_load(); + if (!ctx.bench) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.bench->bss->nr_entries = args.nr_entries; + ctx.bench->bss->prefixlen = args.prefixlen; + ctx.bench->bss->random = args.random; + + if (lpm_trie_bench__attach(ctx.bench)) { + fprintf(stderr, "failed to attach skeleton\n"); + exit(1); + } + + keys = calloc(args.nr_entries, sizeof(*keys)); + vals = calloc(args.nr_entries, sizeof(*vals)); + + for (i = 0; i < args.nr_entries; i++) { + struct trie_key *k = &keys[i]; + __u32 *v = &vals[i]; + + k->prefixlen = args.prefixlen; + k->data = i; + *v = 1; + } +} + +static void attach_prog_and_fill_map(void) +{ + int fd; + + attach_prog(); + + fd = bpf_map__fd(ctx.bench->maps.trie_map); + fill_map(fd); +} + +static void lpm_noop_setup(void) +{ + attach_prog(); + ctx.bench->bss->op = LPM_OP_NOOP; +} + +static void lpm_baseline_setup(void) +{ + attach_prog(); + ctx.bench->bss->op = LPM_OP_BASELINE; +} + +static void lpm_lookup_setup(void) +{ + attach_prog_and_fill_map(); + ctx.bench->bss->op = LPM_OP_LOOKUP; +} + +static void lpm_insert_setup(void) +{ + attach_prog(); + ctx.bench->bss->op = LPM_OP_INSERT; +} + +static void lpm_update_setup(void) +{ + attach_prog_and_fill_map(); + ctx.bench->bss->op = LPM_OP_UPDATE; +} + +static void lpm_delete_setup(void) +{ + attach_prog_and_fill_map(); + ctx.bench->bss->op = LPM_OP_DELETE; +} + +static void lpm_free_setup(void) +{ + attach_prog(); + ctx.bench->bss->op = LPM_OP_FREE; +} + +static void lpm_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.bench->bss->hits, 0); + res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0); +} + +static void bench_reinit_map(void) +{ + int fd = bpf_map__fd(ctx.bench->maps.trie_map); + + switch (ctx.bench->bss->op) { + case LPM_OP_INSERT: + /* trie_map needs to be emptied */ + empty_map(fd); + break; + case LPM_OP_DELETE: + /* trie_map needs to be refilled */ + fill_map(fd); + break; + default: + fprintf(stderr, "Unexpected REINIT return code for op %d\n", + ctx.bench->bss->op); + exit(1); + } +} + +/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */ +static void *lpm_producer(void *unused __always_unused) +{ + int err; + char in[ETH_HLEN]; /* unused */ + + LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in, + .data_size_in = sizeof(in), .repeat = 1, ); + + while (true) { + int fd = bpf_program__fd(ctx.bench->progs.run_bench); + err = bpf_prog_test_run_opts(fd, &opts); + if (err) { + fprintf(stderr, "failed to run BPF prog: %d\n", err); + exit(1); + } + + /* Check for kernel error code */ + if ((int)opts.retval < 0) { + fprintf(stderr, "BPF prog returned error: %d\n", + opts.retval); + exit(1); + } + + switch (opts.retval) { + case LPM_BENCH_SUCCESS: + break; + case LPM_BENCH_REINIT_MAP: + bench_reinit_map(); + break; + default: + fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n", + opts.retval, ctx.bench->bss->op); + exit(1); + } + } + + return NULL; +} + +static void *lpm_free_producer(void *unused __always_unused) +{ + while (true) { + struct lpm_trie_map *skel; + + skel = lpm_trie_map__open_and_load(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + fill_map(bpf_map__fd(skel->maps.trie_free_map)); + lpm_trie_map__destroy(skel); + } + + return NULL; +} + +/* + * The standard bench op_report_*() functions assume measurements are + * taken over a 1-second interval but operations that modify the map + * (INSERT, DELETE, and FREE) cannot run indefinitely without + * "resetting" the map to the initial state. Depending on the size of + * the map, this likely needs to happen before the 1-second timer fires. + * + * Calculate the fraction of a second over which the op measurement was + * taken (to ignore any time spent doing the reset) and report the + * throughput results per second. + */ +static void frac_second_report_progress(int iter, struct bench_res *res, + long delta_ns, double rate_divisor, + char rate) +{ + double hits_per_sec, hits_per_prod; + + hits_per_sec = res->hits / rate_divisor / + (res->duration_ns / (double)NSEC_PER_SEC); + hits_per_prod = hits_per_sec / env.producer_cnt; + + printf("Iter %3d (%7.3lfus): ", iter, + (delta_ns - NSEC_PER_SEC) / 1000.0); + printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate, + hits_per_prod, rate); +} + +static void frac_second_report_final(struct bench_res res[], int res_cnt, + double lat_divisor, double rate_divisor, + char rate, const char *unit) +{ + double hits_mean = 0.0, hits_stddev = 0.0; + double latency = 0.0; + int i; + + for (i = 0; i < res_cnt; i++) { + double val = res[i].hits / rate_divisor / + (res[i].duration_ns / (double)NSEC_PER_SEC); + hits_mean += val / (0.0 + res_cnt); + latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt); + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + double val = + res[i].hits / rate_divisor / + (res[i].duration_ns / (double)NSEC_PER_SEC); + hits_stddev += (hits_mean - val) * (hits_mean - val) / + (res_cnt - 1.0); + } + + hits_stddev = sqrt(hits_stddev); + } + printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ", + hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt, + rate); + printf("latency %8.3lf %s/op\n", + latency / lat_divisor / env.producer_cnt, unit); +} + +static void insert_ops_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double rate_divisor = 1000000.0; + char rate = 'M'; + + frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate); +} + +static void delete_ops_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double rate_divisor = 1000000.0; + char rate = 'M'; + + frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate); +} + +static void free_ops_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double rate_divisor = 1000.0; + char rate = 'K'; + + frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate); +} + +static void insert_ops_report_final(struct bench_res res[], int res_cnt) +{ + double lat_divisor = 1.0; + double rate_divisor = 1000000.0; + const char *unit = "ns"; + char rate = 'M'; + + frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate, + unit); +} + +static void delete_ops_report_final(struct bench_res res[], int res_cnt) +{ + double lat_divisor = 1.0; + double rate_divisor = 1000000.0; + const char *unit = "ns"; + char rate = 'M'; + + frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate, + unit); +} + +static void free_ops_report_final(struct bench_res res[], int res_cnt) +{ + double lat_divisor = 1000000.0; + double rate_divisor = 1000.0; + const char *unit = "ms"; + char rate = 'K'; + + frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate, + unit); +} + +/* noop bench measures harness-overhead */ +const struct bench bench_lpm_trie_noop = { + .name = "lpm-trie-noop", + .argp = &bench_lpm_trie_map_argp, + .validate = validate_common, + .setup = lpm_noop_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +/* baseline overhead for lookup and update */ +const struct bench bench_lpm_trie_baseline = { + .name = "lpm-trie-baseline", + .argp = &bench_lpm_trie_map_argp, + .validate = validate_common, + .setup = lpm_baseline_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +/* measure cost of doing a lookup on existing entries in a full trie */ +const struct bench bench_lpm_trie_lookup = { + .name = "lpm-trie-lookup", + .argp = &bench_lpm_trie_map_argp, + .validate = validate_common, + .setup = lpm_lookup_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +/* measure cost of inserting new entries into an empty trie */ +const struct bench bench_lpm_trie_insert = { + .name = "lpm-trie-insert", + .argp = &bench_lpm_trie_map_argp, + .validate = lpm_insert_validate, + .setup = lpm_insert_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = insert_ops_report_progress, + .report_final = insert_ops_report_final, +}; + +/* measure cost of updating existing entries in a full trie */ +const struct bench bench_lpm_trie_update = { + .name = "lpm-trie-update", + .argp = &bench_lpm_trie_map_argp, + .validate = validate_common, + .setup = lpm_update_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; + +/* measure cost of deleting existing entries from a full trie */ +const struct bench bench_lpm_trie_delete = { + .name = "lpm-trie-delete", + .argp = &bench_lpm_trie_map_argp, + .validate = lpm_delete_validate, + .setup = lpm_delete_setup, + .producer_thread = lpm_producer, + .measure = lpm_measure, + .report_progress = delete_ops_report_progress, + .report_final = delete_ops_report_final, +}; + +/* measure cost of freeing a full trie */ +const struct bench bench_lpm_trie_free = { + .name = "lpm-trie-free", + .argp = &bench_lpm_trie_map_argp, + .validate = lpm_free_validate, + .setup = lpm_free_setup, + .producer_thread = lpm_free_producer, + .measure = lpm_measure, + .report_progress = free_ops_report_progress, + .report_final = free_ops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c index 8ebf563a67a2..cfc072aa7fff 100644 --- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c +++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c @@ -10,6 +10,7 @@ #include <argp.h> #include "bench.h" #include "bench_sockmap_prog.skel.h" +#include "bpf_util.h" #define FILE_SIZE (128 * 1024) #define DATA_REPEAT_SIZE 10 @@ -124,8 +125,8 @@ static void bench_sockmap_prog_destroy(void) { int i; - for (i = 0; i < sizeof(ctx.fds); i++) { - if (ctx.fds[0] > 0) + for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) { + if (ctx.fds[i] > 0) close(ctx.fds[i]); } diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 82327657846e..1e2aff007c2a 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -226,6 +226,65 @@ static void trigger_fentry_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + char **syms = NULL; + size_t cnt = 0; + + /* Some recursive functions will be skipped in + * bpf_get_ksyms -> skip_entry, as they can introduce sufficient + * overhead. However, it's difficut to skip all the recursive + * functions for a debug kernel. + * + * So, don't run the kprobe-multi-all and kretprobe-multi-all on + * a debug kernel. + */ + if (bpf_get_ksyms(&syms, &cnt, true)) { + fprintf(stderr, "failed to get ksyms\n"); + exit(1); + } + + opts.syms = (const char **) syms; + opts.cnt = cnt; + opts.retprobe = kretprobe; + /* attach empty to all the kernel functions except bpf_get_numa_node_id. */ + if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) { + fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n"); + exit(1); + } +} + +static void trigger_kprobe_multi_all_setup(void) +{ + struct bpf_program *prog, *empty; + + setup_ctx(); + empty = ctx.skel->progs.bench_kprobe_multi_empty; + prog = ctx.skel->progs.bench_trigger_kprobe_multi; + bpf_program__set_autoload(empty, true); + bpf_program__set_autoload(prog, true); + load_ctx(); + + attach_ksyms_all(empty, false); + attach_bpf(prog); +} + +static void trigger_kretprobe_multi_all_setup(void) +{ + struct bpf_program *prog, *empty; + + setup_ctx(); + empty = ctx.skel->progs.bench_kretprobe_multi_empty; + prog = ctx.skel->progs.bench_trigger_kretprobe_multi; + bpf_program__set_autoload(empty, true); + bpf_program__set_autoload(prog, true); + load_ctx(); + + attach_ksyms_all(empty, true); + attach_bpf(prog); +} + static void trigger_fexit_setup(void) { setup_ctx(); @@ -512,6 +571,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); BENCH_TRIG_KERNEL(fentry, "fentry"); +BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all"); +BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all"); BENCH_TRIG_KERNEL(fexit, "fexit"); BENCH_TRIG_KERNEL(fmodret, "fmodret"); BENCH_TRIG_KERNEL(tp, "tp"); diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh index a690f5a68b6b..f7573708a0c3 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh @@ -6,8 +6,8 @@ def_tests=( \ usermode-count kernel-count syscall-count \ fentry fexit fmodret \ rawtp tp \ - kprobe kprobe-multi \ - kretprobe kretprobe-multi \ + kprobe kprobe-multi kprobe-multi-all \ + kretprobe kretprobe-multi kretprobe-multi-all \ ) tests=("$@") diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index da7e230f2781..2cd9165c7348 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -219,7 +219,7 @@ extern void bpf_put_file(struct file *file) __ksym; * including the NULL termination character, stored in the supplied * buffer. On error, a negative integer is returned. */ -extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __ksym; +extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym; /* This macro must be used to mark the exception callback corresponding to the * main program. For example: @@ -599,4 +599,58 @@ extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str, struct bpf_dynptr *value_p) __weak __ksym; +#define PREEMPT_BITS 8 +#define SOFTIRQ_BITS 8 +#define HARDIRQ_BITS 4 +#define NMI_BITS 4 + +#define PREEMPT_SHIFT 0 +#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) +#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) +#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) + +#define __IRQ_MASK(x) ((1UL << (x))-1) + +#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) +#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) +#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) + +extern bool CONFIG_PREEMPT_RT __kconfig __weak; +#ifdef bpf_target_x86 +extern const int __preempt_count __ksym; +#endif + +struct task_struct___preempt_rt { + int softirq_disable_cnt; +} __attribute__((preserve_access_index)); + +static inline int get_preempt_count(void) +{ +#if defined(bpf_target_x86) + return *(int *) bpf_this_cpu_ptr(&__preempt_count); +#elif defined(bpf_target_arm64) + return bpf_get_current_task_btf()->thread_info.preempt.count; +#endif + return 0; +} + +/* Description + * Report whether it is in interrupt context. Only works on the following archs: + * * x86 + * * arm64 + */ +static inline int bpf_in_interrupt(void) +{ + struct task_struct___preempt_rt *tsk; + int pcnt; + + pcnt = get_preempt_count(); + if (!CONFIG_PREEMPT_RT) + return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK); + + tsk = (void *) bpf_get_current_task_btf(); + return (pcnt & (NMI_MASK | HARDIRQ_MASK)) | + (tsk->softirq_disable_cnt & SOFTIRQ_MASK); +} + #endif diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 9386dfe8b884..794d44d19c88 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym __weak; +extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym __weak; + /* Description * Obtain a read-only pointer to the dynptr's data * Returns diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 5f6963a320d7..4bc2d25f33e1 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -67,6 +67,9 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) #define sys_gettid() syscall(SYS_gettid) #endif +/* and poison usage to ensure it does not creep back in. */ +#pragma GCC poison gettid + #ifndef ENOTSUPP #define ENOTSUPP 524 #endif diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 15f626014872..20cede4db3ce 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -412,6 +412,26 @@ void remove_cgroup(const char *relative_path) log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); } +/* + * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID + * @relative_path: The cgroup path, relative to the workdir, to remove + * @pid: PID to be used to find cgroup_path + * + * This function expects a cgroup to already be created, relative to the cgroup + * work dir. It also expects the cgroup doesn't have any children or live + * processes and it removes the cgroup. + * + * On failure, it will print an error to stderr. + */ +void remove_cgroup_pid(const char *relative_path, int pid) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path_pid(cgroup_path, relative_path, pid); + if (rmdir(cgroup_path)) + log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path); +} + /** * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD * @relative_path: The cgroup path, relative to the workdir, to join diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 182e1ac36c95..3857304be874 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -19,6 +19,7 @@ int cgroup_setup_and_join(const char *relative_path); int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); void remove_cgroup(const char *relative_path); +void remove_cgroup_pid(const char *relative_path, int pid); unsigned long long get_cgroup_id(const char *relative_path); int get_cgroup1_hierarchy_id(const char *subsys_name); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 8916ab814a3e..70b28c1e653e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -61,6 +61,7 @@ CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y CONFIG_NET_ACT_GACT=y +CONFIG_NET_ACT_MIRRED=y CONFIG_NET_ACT_SKBMOD=y CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index e1495a4bbc99..7efad36ceb26 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -31,10 +31,7 @@ CONFIG_COMPAT=y CONFIG_CPUSETS=y CONFIG_CRASH_DUMP=y CONFIG_CRYPTO_USER_API_RNG=y -CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_INFO_BTF=y -CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_INFO_REDUCED=n CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y @@ -46,7 +43,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEVTMPFS=y CONFIG_DRM=y -CONFIG_DUMMY=y CONFIG_EXPERT=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -70,13 +66,11 @@ CONFIG_HZ_100=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_IKHEADERS=y CONFIG_INET6_ESP=y -CONFIG_INET_ESP=y CONFIG_INET=y CONFIG_INPUT_EVDEV=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTICAST=y CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_IPTABLES=y CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPVLAN=y CONFIG_JUMP_LABEL=y @@ -97,22 +91,18 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET_ACT_BPF=y -CONFIG_NET_ACT_GACT=y CONFIG_NETDEVICES=y CONFIG_NETFILTER_XT_MATCH_BPF=y CONFIG_NETFILTER_XT_TARGET_MARK=y CONFIG_NET_KEY=y -CONFIG_NET_SCH_FQ=y CONFIG_NET_VRF=y CONFIG_NET=y -CONFIG_NF_TABLES=y CONFIG_NLMON=y CONFIG_NO_HZ_IDLE=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y CONFIG_OVERLAY_FS=y CONFIG_PACKET_DIAG=y -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI_HOST_GENERIC=y @@ -149,7 +139,6 @@ CONFIG_TASK_XACCT=y CONFIG_TCG_TIS=y CONFIG_TCG_TPM=y CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_DCTCP=y CONFIG_TLS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS=y @@ -161,6 +150,5 @@ CONFIG_UPROBES=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y -CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el index 9acf389dc4ce..b53afb5e0b71 100644 --- a/tools/testing/selftests/bpf/config.ppc64el +++ b/tools/testing/selftests/bpf/config.ppc64el @@ -54,7 +54,6 @@ CONFIG_NET=y CONFIG_NO_HZ_IDLE=y CONFIG_NONPORTABLE=y CONFIG_NR_CPUS=256 -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI_HOST_GENERIC=y diff --git a/tools/testing/selftests/bpf/config.riscv64 b/tools/testing/selftests/bpf/config.riscv64 index bb7043a80e1a..7bee24a79a71 100644 --- a/tools/testing/selftests/bpf/config.riscv64 +++ b/tools/testing/selftests/bpf/config.riscv64 @@ -48,7 +48,6 @@ CONFIG_NET_VRF=y CONFIG_NONPORTABLE=y CONFIG_NO_HZ_IDLE=y CONFIG_NR_CPUS=256 -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 26c3bc2ce11d..db61878148e4 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -22,10 +22,7 @@ CONFIG_CHECKPOINT_RESTORE=y CONFIG_CPUSETS=y CONFIG_CRASH_DUMP=y CONFIG_CRYPTO_USER_API_RNG=y -CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_INFO_BTF=y -CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_NOTIFIERS=y @@ -56,11 +53,9 @@ CONFIG_IDLE_PAGE_TRACKING=y CONFIG_IKHEADERS=y CONFIG_INET6_ESP=y CONFIG_INET=y -CONFIG_INET_ESP=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTICAST=y CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_IPTABLES=y CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPVLAN=y CONFIG_JUMP_LABEL=y @@ -83,18 +78,14 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET=y CONFIG_NET_ACT_BPF=y -CONFIG_NET_ACT_GACT=y CONFIG_NET_KEY=y -CONFIG_NET_SCH_FQ=y CONFIG_NET_VRF=y CONFIG_NETDEVICES=y CONFIG_NETFILTER_XT_MATCH_BPF=y CONFIG_NETFILTER_XT_TARGET_MARK=y -CONFIG_NF_TABLES=y CONFIG_NO_HZ_IDLE=y CONFIG_NR_CPUS=256 CONFIG_NUMA=y -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI=y @@ -119,7 +110,6 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_TASK_XACCT=y CONFIG_TASKSTATS=y CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_DCTCP=y CONFIG_TLS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y @@ -131,6 +121,5 @@ CONFIG_UPROBES=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y -CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 5e713ef7caa3..42ad817b00ae 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -44,7 +44,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEFAULT_FQ_CODEL=y @@ -104,12 +103,10 @@ CONFIG_HZ_1000=y CONFIG_INET=y CONFIG_INPUT_EVDEV=y CONFIG_INTEL_POWERCLAMP=y -CONFIG_IP6_NF_IPTABLES=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MROUTE=y CONFIG_IP_MULTICAST=y CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_IPTABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_IP_ROUTE_MULTIPATH=y @@ -162,7 +159,6 @@ CONFIG_NUMA=y CONFIG_NUMA_BALANCING=y CONFIG_NVMEM=y CONFIG_OSF_PARTITION=y -CONFIG_PACKET=y CONFIG_PANIC_ON_OOPS=y CONFIG_PARTITION_ADVANCED=y CONFIG_PCI=y @@ -220,7 +216,6 @@ CONFIG_VALIDATE_FS_PARSER=y CONFIG_VETH=y CONFIG_VIRT_DRIVERS=y CONFIG_VLAN_8021Q=y -CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_X86_CPUID=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 72b5c174ab3b..cdf7b6641444 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -457,7 +457,7 @@ int append_tid(char *str, size_t sz) if (end + 8 > sz) return -1; - sprintf(&str[end], "%07d", gettid()); + sprintf(&str[end], "%07ld", sys_gettid()); str[end + 7] = '\0'; return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 1d53a8561ee2..24c509ce4e5b 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -42,11 +42,11 @@ static struct bpf_align_test tests[] = { .matches = { {0, "R1", "ctx()"}, {0, "R10", "fp0"}, - {0, "R3_w", "2"}, - {1, "R3_w", "4"}, - {2, "R3_w", "8"}, - {3, "R3_w", "16"}, - {4, "R3_w", "32"}, + {0, "R3", "2"}, + {1, "R3", "4"}, + {2, "R3", "8"}, + {3, "R3", "16"}, + {4, "R3", "32"}, }, }, { @@ -70,17 +70,17 @@ static struct bpf_align_test tests[] = { .matches = { {0, "R1", "ctx()"}, {0, "R10", "fp0"}, - {0, "R3_w", "1"}, - {1, "R3_w", "2"}, - {2, "R3_w", "4"}, - {3, "R3_w", "8"}, - {4, "R3_w", "16"}, - {5, "R3_w", "1"}, - {6, "R4_w", "32"}, - {7, "R4_w", "16"}, - {8, "R4_w", "8"}, - {9, "R4_w", "4"}, - {10, "R4_w", "2"}, + {0, "R3", "1"}, + {1, "R3", "2"}, + {2, "R3", "4"}, + {3, "R3", "8"}, + {4, "R3", "16"}, + {5, "R3", "1"}, + {6, "R4", "32"}, + {7, "R4", "16"}, + {8, "R4", "8"}, + {9, "R4", "4"}, + {10, "R4", "2"}, }, }, { @@ -99,12 +99,12 @@ static struct bpf_align_test tests[] = { .matches = { {0, "R1", "ctx()"}, {0, "R10", "fp0"}, - {0, "R3_w", "4"}, - {1, "R3_w", "8"}, - {2, "R3_w", "10"}, - {3, "R4_w", "8"}, - {4, "R4_w", "12"}, - {5, "R4_w", "14"}, + {0, "R3", "4"}, + {1, "R3", "8"}, + {2, "R3", "10"}, + {3, "R4", "8"}, + {4, "R4", "12"}, + {5, "R4", "14"}, }, }, { @@ -121,10 +121,10 @@ static struct bpf_align_test tests[] = { .matches = { {0, "R1", "ctx()"}, {0, "R10", "fp0"}, - {0, "R3_w", "7"}, - {1, "R3_w", "7"}, - {2, "R3_w", "14"}, - {3, "R3_w", "56"}, + {0, "R3", "7"}, + {1, "R3", "7"}, + {2, "R3", "14"}, + {3, "R3", "56"}, }, }, @@ -162,19 +162,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w", "pkt(off=8,r=8)"}, - {6, "R3_w", "var_off=(0x0; 0xff)"}, - {7, "R3_w", "var_off=(0x0; 0x1fe)"}, - {8, "R3_w", "var_off=(0x0; 0x3fc)"}, - {9, "R3_w", "var_off=(0x0; 0x7f8)"}, - {10, "R3_w", "var_off=(0x0; 0xff0)"}, - {12, "R3_w", "pkt_end()"}, - {17, "R4_w", "var_off=(0x0; 0xff)"}, - {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, - {19, "R4_w", "var_off=(0x0; 0xff0)"}, - {20, "R4_w", "var_off=(0x0; 0x7f8)"}, - {21, "R4_w", "var_off=(0x0; 0x3fc)"}, - {22, "R4_w", "var_off=(0x0; 0x1fe)"}, + {6, "R0", "pkt(off=8,r=8)"}, + {6, "R3", "var_off=(0x0; 0xff)"}, + {7, "R3", "var_off=(0x0; 0x1fe)"}, + {8, "R3", "var_off=(0x0; 0x3fc)"}, + {9, "R3", "var_off=(0x0; 0x7f8)"}, + {10, "R3", "var_off=(0x0; 0xff0)"}, + {12, "R3", "pkt_end()"}, + {17, "R4", "var_off=(0x0; 0xff)"}, + {18, "R4", "var_off=(0x0; 0x1fe0)"}, + {19, "R4", "var_off=(0x0; 0xff0)"}, + {20, "R4", "var_off=(0x0; 0x7f8)"}, + {21, "R4", "var_off=(0x0; 0x3fc)"}, + {22, "R4", "var_off=(0x0; 0x1fe)"}, }, }, { @@ -195,16 +195,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R3_w", "var_off=(0x0; 0xff)"}, - {7, "R4_w", "var_off=(0x0; 0xff)"}, - {8, "R4_w", "var_off=(0x0; 0xff)"}, - {9, "R4_w", "var_off=(0x0; 0xff)"}, - {10, "R4_w", "var_off=(0x0; 0x1fe)"}, - {11, "R4_w", "var_off=(0x0; 0xff)"}, - {12, "R4_w", "var_off=(0x0; 0x3fc)"}, - {13, "R4_w", "var_off=(0x0; 0xff)"}, - {14, "R4_w", "var_off=(0x0; 0x7f8)"}, - {15, "R4_w", "var_off=(0x0; 0xff0)"}, + {6, "R3", "var_off=(0x0; 0xff)"}, + {7, "R4", "var_off=(0x0; 0xff)"}, + {8, "R4", "var_off=(0x0; 0xff)"}, + {9, "R4", "var_off=(0x0; 0xff)"}, + {10, "R4", "var_off=(0x0; 0x1fe)"}, + {11, "R4", "var_off=(0x0; 0xff)"}, + {12, "R4", "var_off=(0x0; 0x3fc)"}, + {13, "R4", "var_off=(0x0; 0xff)"}, + {14, "R4", "var_off=(0x0; 0x7f8)"}, + {15, "R4", "var_off=(0x0; 0xff0)"}, }, }, { @@ -235,14 +235,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w", "pkt(r=0)"}, - {4, "R5_w", "pkt(off=14,r=0)"}, - {5, "R4_w", "pkt(off=14,r=0)"}, + {2, "R5", "pkt(r=0)"}, + {4, "R5", "pkt(off=14,r=0)"}, + {5, "R4", "pkt(off=14,r=0)"}, {9, "R2", "pkt(r=18)"}, {10, "R5", "pkt(off=14,r=18)"}, - {10, "R4_w", "var_off=(0x0; 0xff)"}, - {13, "R4_w", "var_off=(0x0; 0xffff)"}, - {14, "R4_w", "var_off=(0x0; 0xffff)"}, + {10, "R4", "var_off=(0x0; 0xff)"}, + {13, "R4", "var_off=(0x0; 0xffff)"}, + {14, "R4", "var_off=(0x0; 0xffff)"}, }, }, { @@ -299,12 +299,12 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(r=8)"}, - {7, "R6_w", "var_off=(0x0; 0x3fc)"}, + {6, "R2", "pkt(r=8)"}, + {7, "R6", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5_w", "pkt(id=1,off=14,"}, + {11, "R5", "pkt(id=1,off=14,"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable @@ -320,12 +320,12 @@ static struct bpf_align_test tests[] = { * instruction to validate R5 state. We also check * that R4 is what it should be in such case. */ - {18, "R4_w", "var_off=(0x0; 0x3fc)"}, - {18, "R5_w", "var_off=(0x0; 0x3fc)"}, + {18, "R4", "var_off=(0x0; 0x3fc)"}, + {18, "R5", "var_off=(0x0; 0x3fc)"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5_w", "pkt(id=2,off=14,"}, + {19, "R5", "pkt(id=2,off=14,"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -337,21 +337,21 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w", "pkt(off=14,r=8)"}, + {26, "R5", "pkt(off=14,r=8)"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. */ - {28, "R4_w", "var_off=(0x0; 0x3fc)"}, - {28, "R5_w", "var_off=(0x0; 0x3fc)"}, + {28, "R4", "var_off=(0x0; 0x3fc)"}, + {28, "R5", "var_off=(0x0; 0x3fc)"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {29, "R5_w", "pkt(id=3,off=18,"}, + {29, "R5", "pkt(id=3,off=18,"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {31, "R4_w", "var_off=(0x0; 0x7fc)"}, - {31, "R5_w", "var_off=(0x0; 0x7fc)"}, + {31, "R4", "var_off=(0x0; 0x7fc)"}, + {31, "R5", "var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -397,12 +397,12 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(r=8)"}, - {7, "R6_w", "var_off=(0x0; 0x3fc)"}, + {6, "R2", "pkt(r=8)"}, + {7, "R6", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {8, "R6_w", "var_off=(0x2; 0x7fc)"}, + {8, "R6", "var_off=(0x2; 0x7fc)"}, /* Packet pointer has (4n+2) offset */ - {11, "R5_w", "var_off=(0x2; 0x7fc)"}, + {11, "R5", "var_off=(0x2; 0x7fc)"}, {12, "R4", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -414,11 +414,11 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {17, "R6_w", "var_off=(0x0; 0x3fc)"}, + {17, "R6", "var_off=(0x0; 0x3fc)"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5_w", "var_off=(0x2; 0xffc)"}, + {19, "R5", "var_off=(0x2; 0xffc)"}, {20, "R4", "var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -459,18 +459,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w", "pkt_end()"}, + {3, "R5", "pkt_end()"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, + {5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"}, + {6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, - {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -478,7 +478,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"}, + {15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { @@ -513,12 +513,12 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(r=8)"}, - {8, "R6_w", "var_off=(0x0; 0x3fc)"}, + {6, "R2", "pkt(r=8)"}, + {8, "R6", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6_w", "var_off=(0x2; 0x7fc)"}, + {9, "R6", "var_off=(0x2; 0x7fc)"}, /* New unknown value in R7 is (4n) */ - {10, "R7_w", "var_off=(0x0; 0x3fc)"}, + {10, "R7", "var_off=(0x0; 0x3fc)"}, /* Subtracting it from R6 blows our unsigned bounds */ {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ @@ -566,16 +566,16 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(r=8)"}, - {9, "R6_w", "var_off=(0x0; 0x3c)"}, + {6, "R2", "pkt(r=8)"}, + {9, "R6", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6_w", "var_off=(0x2; 0x7c)"}, + {10, "R6", "var_off=(0x2; 0x7c)"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ - {14, "R7_w", "var_off=(0x0; 0x7fc)"}, + {14, "R7", "var_off=(0x0; 0x7fc)"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w", "var_off=(0x2; 0x7fc)"}, + {16, "R5", "var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c index 0223fce4db2b..693fd86fbde6 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c @@ -40,8 +40,13 @@ static void *spin_lock_thread(void *arg) err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run err"); + + if (topts.retval == -EOPNOTSUPP) + goto end; + ASSERT_EQ((int)topts.retval, 0, "test_run retval"); +end: pthread_exit(arg); } @@ -63,6 +68,7 @@ static void test_arena_spin_lock_size(int size) skel = arena_spin_lock__open_and_load(); if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load")) return; + if (skel->data->test_skip == 2) { test__skip(); goto end; @@ -86,6 +92,13 @@ static void test_arena_spin_lock_size(int size) goto end_barrier; } + if (skel->data->test_skip == 3) { + printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n", + __func__); + test__skip(); + goto end_barrier; + } + ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value"); end_barrier: diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 13e101f370a1..92b5f378bfb8 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -165,11 +165,17 @@ static void test_xchg(struct atomics_lskel *skel) void test_atomics(void) { struct atomics_lskel *skel; + int err; - skel = atomics_lskel__open_and_load(); - if (!ASSERT_OK_PTR(skel, "atomics skeleton load")) + skel = atomics_lskel__open(); + if (!ASSERT_OK_PTR(skel, "atomics skeleton open")) return; + skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = atomics_lskel__load(skel); + if (!ASSERT_OK(err, "atomics skeleton load")) + goto cleanup; + if (skel->data->skip_tests) { printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)", __func__); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index cabc51c2ca6b..9e77e5da7097 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -3,6 +3,7 @@ #include "test_attach_kprobe_sleepable.skel.h" #include "test_attach_probe_manual.skel.h" #include "test_attach_probe.skel.h" +#include "kprobe_write_ctx.skel.h" /* this is how USDT semaphore is actually defined, except volatile modifier */ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); @@ -201,6 +202,31 @@ cleanup: test_attach_probe_manual__destroy(skel); } +#ifdef __x86_64__ +/* attach kprobe/kretprobe long event name testings */ +static void test_attach_kprobe_write_ctx(void) +{ + struct kprobe_write_ctx *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_write_ctx__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load")) + return; + + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts")) + bpf_link__destroy(link); + + kprobe_write_ctx__destroy(skel); +} +#else +static void test_attach_kprobe_write_ctx(void) +{ + test__skip(); +} +#endif + static void test_attach_probe_auto(struct test_attach_probe *skel) { struct bpf_link *uprobe_err_link; @@ -406,6 +432,8 @@ void test_attach_probe(void) test_attach_uprobe_long_event_name(); if (test__start_subtest("kprobe-long_name")) test_attach_kprobe_long_event_name(); + if (test__start_subtest("kprobe-write-ctx")) + test_attach_kprobe_write_ctx(); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 4a0670c056ba..75f4dff7d042 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -450,8 +450,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) attr.size = sizeof(attr); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; - attr.freq = 1; - attr.sample_freq = 10000; + attr.sample_period = 100000; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 82903585c870..10cba526d3e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -63,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) /* tests with t->known_ptr_sz have no "long" or "unsigned long" type, * so it's impossible to determine correct pointer size; but if they - * do, it should be 8 regardless of host architecture, becaues BPF + * do, it should be 8 regardless of host architecture, because BPF * target is always 64-bit */ if (!t->known_ptr_sz) { diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c index e0dd966e4a3e..5ad904e9d15d 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -44,7 +44,7 @@ static void test_read_cgroup_xattr(void) if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) goto out; - skel->bss->target_pid = gettid(); + skel->bss->target_pid = sys_gettid(); if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c index adda85f97058..4b42fbc96efc 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -4,6 +4,8 @@ #define _GNU_SOURCE #include <cgroup_helpers.h> #include <test_progs.h> +#include <sched.h> +#include <sys/wait.h> #include "cgrp_kfunc_failure.skel.h" #include "cgrp_kfunc_success.skel.h" @@ -87,6 +89,72 @@ static const char * const success_tests[] = { "test_cgrp_from_id", }; +static void test_cgrp_from_id_ns(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct cgrp_kfunc_success *skel; + struct bpf_program *prog; + int pid, pipe_fd[2]; + + skel = open_load_cgrp_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err")) + goto cleanup; + + prog = skel->progs.test_cgrp_from_id_ns; + + if (!ASSERT_OK(pipe(pipe_fd), "pipe")) + goto cleanup; + + pid = fork(); + if (!ASSERT_GE(pid, 0, "fork result")) { + close(pipe_fd[0]); + close(pipe_fd[1]); + goto cleanup; + } + + if (pid == 0) { + int ret = 0; + + close(pipe_fd[0]); + + if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup")) + exit(1); + + if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns")) + exit(1); + + ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(ret, "test run ret")) + exit(1); + + if (!ASSERT_OK(opts.retval, "test run retval")) + exit(1); + + if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe")) + exit(1); + + exit(0); + } else { + int res; + + close(pipe_fd[1]); + + ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res"); + ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child"); + + remove_cgroup_pid("cgrp_from_id_ns", pid); + + ASSERT_OK(res, "result from run"); + } + + close(pipe_fd[0]); +cleanup: + cgrp_kfunc_success__destroy(skel); +} + void test_cgrp_kfunc(void) { int i, err; @@ -102,6 +170,9 @@ void test_cgrp_kfunc(void) run_success_test(success_tests[i]); } + if (test__start_subtest("test_cgrp_from_id_ns")) + test_cgrp_from_id_ns(); + RUN_TESTS(cgrp_kfunc_failure); cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 9b2d9ceda210..b9f86cb91e81 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -32,6 +32,8 @@ static struct { {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG}, {"test_adjust", SETUP_SYSCALL_SLEEP}, {"test_adjust_err", SETUP_SYSCALL_SLEEP}, {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP}, diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index 241b2c8c6e0f..c534b4d5f9da 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -293,7 +293,7 @@ static int get_btf_id_by_fd(int btf_fd, __u32 *id) * 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1 * 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2 * 3) Close the btf_fd, now refcnt=1 - * Wait and check that BTF stil exists. + * Wait and check that BTF still exists. */ static void check_fd_array_cnt__referenced_btfs(void) { diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 130f5b82d2e6..5ef1804e44df 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -12,13 +12,24 @@ void test_fentry_fexit(void) int err, prog_fd, i; LIBBPF_OPTS(bpf_test_run_opts, topts); - fentry_skel = fentry_test_lskel__open_and_load(); + fentry_skel = fentry_test_lskel__open(); if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto close_prog; - fexit_skel = fexit_test_lskel__open_and_load(); + + fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fentry_test_lskel__load(fentry_skel); + if (!ASSERT_OK(err, "fentry_skel_load")) + goto close_prog; + + fexit_skel = fexit_test_lskel__open(); if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto close_prog; + fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fexit_test_lskel__load(fexit_skel); + if (!ASSERT_OK(err, "fexit_skel_load")) + goto close_prog; + err = fentry_test_lskel__attach(fentry_skel); if (!ASSERT_OK(err, "fentry_attach")) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index aee1bc77a17f..ec882328eb59 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -43,8 +43,13 @@ static void fentry_test(void) struct fentry_test_lskel *fentry_skel = NULL; int err; - fentry_skel = fentry_test_lskel__open_and_load(); - if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) + fentry_skel = fentry_test_lskel__open(); + if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open")) + goto cleanup; + + fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fentry_test_lskel__load(fentry_skel); + if (!ASSERT_OK(err, "fentry_skel_load")) goto cleanup; err = fentry_test_common(fentry_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index 1c13007e37dd..94eed753560c 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -43,8 +43,13 @@ static void fexit_test(void) struct fexit_test_lskel *fexit_skel = NULL; int err; - fexit_skel = fexit_test_lskel__open_and_load(); - if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) + fexit_skel = fexit_test_lskel__open(); + if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open")) + goto cleanup; + + fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING; + err = fexit_test_lskel__load(fexit_skel); + if (!ASSERT_OK(err, "fexit_skel_load")) goto cleanup; err = fexit_test_common(fexit_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c index a133354ac9bc..97b00c7efe94 100644 --- a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c +++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c @@ -16,7 +16,7 @@ void test_kernel_flag(void) if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel")) return; - lsm_skel->bss->monitored_tid = gettid(); + lsm_skel->bss->monitored_tid = sys_gettid(); ret = test_kernel_flag__attach(lsm_skel); if (!ASSERT_OK(ret, "test_kernel_flag__attach")) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index e19ef509ebf8..6cfaa978bc9a 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -7,6 +7,7 @@ #include "kprobe_multi_session.skel.h" #include "kprobe_multi_session_cookie.skel.h" #include "kprobe_multi_verifier.skel.h" +#include "kprobe_write_ctx.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -422,220 +423,6 @@ static void test_unique_match(void) kprobe_multi__destroy(skel); } -static size_t symbol_hash(long key, void *ctx __maybe_unused) -{ - return str_hash((const char *) key); -} - -static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) -{ - return strcmp((const char *) key1, (const char *) key2) == 0; -} - -static bool is_invalid_entry(char *buf, bool kernel) -{ - if (kernel && strchr(buf, '[')) - return true; - if (!kernel && !strchr(buf, '[')) - return true; - return false; -} - -static bool skip_entry(char *name) -{ - /* - * We attach to almost all kernel functions and some of them - * will cause 'suspicious RCU usage' when fprobe is attached - * to them. Filter out the current culprits - arch_cpu_idle - * default_idle and rcu_* functions. - */ - if (!strcmp(name, "arch_cpu_idle")) - return true; - if (!strcmp(name, "default_idle")) - return true; - if (!strncmp(name, "rcu_", 4)) - return true; - if (!strcmp(name, "bpf_dispatcher_xdp_func")) - return true; - if (!strncmp(name, "__ftrace_invalid_address__", - sizeof("__ftrace_invalid_address__") - 1)) - return true; - return false; -} - -/* Do comparision by ignoring '.llvm.<hash>' suffixes. */ -static int compare_name(const char *name1, const char *name2) -{ - const char *res1, *res2; - int len1, len2; - - res1 = strstr(name1, ".llvm."); - res2 = strstr(name2, ".llvm."); - len1 = res1 ? res1 - name1 : strlen(name1); - len2 = res2 ? res2 - name2 : strlen(name2); - - if (len1 == len2) - return strncmp(name1, name2, len1); - if (len1 < len2) - return strncmp(name1, name2, len1) <= 0 ? -1 : 1; - return strncmp(name1, name2, len2) >= 0 ? 1 : -1; -} - -static int load_kallsyms_compare(const void *p1, const void *p2) -{ - return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); -} - -static int search_kallsyms_compare(const void *p1, const struct ksym *p2) -{ - return compare_name(p1, p2->name); -} - -static int get_syms(char ***symsp, size_t *cntp, bool kernel) -{ - size_t cap = 0, cnt = 0; - char *name = NULL, *ksym_name, **syms = NULL; - struct hashmap *map; - struct ksyms *ksyms; - struct ksym *ks; - char buf[256]; - FILE *f; - int err = 0; - - ksyms = load_kallsyms_custom_local(load_kallsyms_compare); - if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local")) - return -EINVAL; - - /* - * The available_filter_functions contains many duplicates, - * but other than that all symbols are usable in kprobe multi - * interface. - * Filtering out duplicates by using hashmap__add, which won't - * add existing entry. - */ - - if (access("/sys/kernel/tracing/trace", F_OK) == 0) - f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); - else - f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); - - if (!f) - return -EINVAL; - - map = hashmap__new(symbol_hash, symbol_equal, NULL); - if (IS_ERR(map)) { - err = libbpf_get_error(map); - goto error; - } - - while (fgets(buf, sizeof(buf), f)) { - if (is_invalid_entry(buf, kernel)) - continue; - - free(name); - if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) - continue; - if (skip_entry(name)) - continue; - - ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); - if (!ks) { - err = -EINVAL; - goto error; - } - - ksym_name = ks->name; - err = hashmap__add(map, ksym_name, 0); - if (err == -EEXIST) { - err = 0; - continue; - } - if (err) - goto error; - - err = libbpf_ensure_mem((void **) &syms, &cap, - sizeof(*syms), cnt + 1); - if (err) - goto error; - - syms[cnt++] = ksym_name; - } - - *symsp = syms; - *cntp = cnt; - -error: - free(name); - fclose(f); - hashmap__free(map); - if (err) - free(syms); - return err; -} - -static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) -{ - unsigned long *addr, *addrs, *tmp_addrs; - int err = 0, max_cnt, inc_cnt; - char *name = NULL; - size_t cnt = 0; - char buf[256]; - FILE *f; - - if (access("/sys/kernel/tracing/trace", F_OK) == 0) - f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); - else - f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); - - if (!f) - return -ENOENT; - - /* In my local setup, the number of entries is 50k+ so Let us initially - * allocate space to hold 64k entries. If 64k is not enough, incrementally - * increase 1k each time. - */ - max_cnt = 65536; - inc_cnt = 1024; - addrs = malloc(max_cnt * sizeof(long)); - if (addrs == NULL) { - err = -ENOMEM; - goto error; - } - - while (fgets(buf, sizeof(buf), f)) { - if (is_invalid_entry(buf, kernel)) - continue; - - free(name); - if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) - continue; - if (skip_entry(name)) - continue; - - if (cnt == max_cnt) { - max_cnt += inc_cnt; - tmp_addrs = realloc(addrs, max_cnt); - if (!tmp_addrs) { - err = -ENOMEM; - goto error; - } - addrs = tmp_addrs; - } - - addrs[cnt++] = (unsigned long)addr; - } - - *addrsp = addrs; - *cntp = cnt; - -error: - free(name); - fclose(f); - if (err) - free(addrs); - return err; -} - static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts) { long attach_start_ns, attach_end_ns; @@ -670,7 +457,7 @@ static void test_kprobe_multi_bench_attach(bool kernel) char **syms = NULL; size_t cnt = 0; - if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms")) + if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms")) return; skel = kprobe_multi_empty__open_and_load(); @@ -696,13 +483,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel) size_t cnt = 0; int err; - err = get_addrs(&addrs, &cnt, kernel); + err = bpf_get_addrs(&addrs, &cnt, kernel); if (err == -ENOENT) { test__skip(); return; } - if (!ASSERT_OK(err, "get_addrs")) + if (!ASSERT_OK(err, "bpf_get_addrs")) return; skel = kprobe_multi_empty__open_and_load(); @@ -753,6 +540,30 @@ cleanup: kprobe_multi_override__destroy(skel); } +#ifdef __x86_64__ +static void test_attach_write_ctx(void) +{ + struct kprobe_write_ctx *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_write_ctx__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load")) + return; + + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts")) + bpf_link__destroy(link); + + kprobe_write_ctx__destroy(skel); +} +#else +static void test_attach_write_ctx(void) +{ + test__skip(); +} +#endif + void serial_test_kprobe_multi_bench_attach(void) { if (test__start_subtest("kernel")) @@ -792,5 +603,7 @@ void test_kprobe_multi_test(void) test_session_cookie_skel_api(); if (test__start_subtest("unique_match")) test_unique_match(); + if (test__start_subtest("attach_write_ctx")) + test_attach_write_ctx(); RUN_TESTS(kprobe_multi_verifier); } diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c new file mode 100644 index 000000000000..6bdc6d6de0da --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Google LLC. */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <test_progs.h> +#include <bpf/btf.h> + +#include "map_excl.skel.h" + +static void test_map_excl_allowed(void) +{ + struct map_excl *skel = map_excl__open(); + int err; + + err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access); + if (!ASSERT_OK(err, "bpf_map__set_exclusive_program")) + goto out; + + bpf_program__set_autoload(skel->progs.should_have_access, true); + bpf_program__set_autoload(skel->progs.should_not_have_access, false); + + err = map_excl__load(skel); + ASSERT_OK(err, "map_excl__load"); +out: + map_excl__destroy(skel); +} + +static void test_map_excl_denied(void) +{ + struct map_excl *skel = map_excl__open(); + int err; + + err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access); + if (!ASSERT_OK(err, "bpf_map__make_exclusive")) + goto out; + + bpf_program__set_autoload(skel->progs.should_have_access, false); + bpf_program__set_autoload(skel->progs.should_not_have_access, true); + + err = map_excl__load(skel); + ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n"); +out: + map_excl__destroy(skel); + +} + +void test_map_excl(void) +{ + if (test__start_subtest("map_excl_allowed")) + test_map_excl_allowed(); + if (test__start_subtest("map_excl_denied")) + test_map_excl_denied(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 6d391d95f96e..70fa7ae93173 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -90,7 +90,7 @@ void test_module_attach(void) test_module_attach__detach(skel); - /* attach fentry/fexit and make sure it get's module reference */ + /* attach fentry/fexit and make sure it gets module reference */ link = bpf_program__attach(skel->progs.handle_fentry); if (!ASSERT_OK_PTR(link, "attach_fentry")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c new file mode 100644 index 000000000000..9ae49b587f3e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <test_progs.h> + + +#include "test_pinning_devmap.skel.h" + +void test_pinning_devmap_reuse(void) +{ + const char *pinpath1 = "/sys/fs/bpf/pinmap1"; + const char *pinpath2 = "/sys/fs/bpf/pinmap2"; + struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL; + int err; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + + /* load the object a first time */ + skel1 = test_pinning_devmap__open_and_load(); + if (!ASSERT_OK_PTR(skel1, "skel_load1")) + goto out; + + /* load the object a second time, re-using the pinned map */ + skel2 = test_pinning_devmap__open_and_load(); + if (!ASSERT_OK_PTR(skel2, "skel_load2")) + goto out; + + /* we can close the reference safely without + * the map's refcount falling to 0 + */ + test_pinning_devmap__destroy(skel1); + skel1 = NULL; + + /* now, swap the pins */ + err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE); + if (!ASSERT_OK(err, "swap pins")) + goto out; + + /* load the object again, this time the re-use should fail */ + skel1 = test_pinning_devmap__open_and_load(); + if (!ASSERT_ERR_PTR(skel1, "skel_load3")) + goto out; + +out: + unlink(pinpath1); + unlink(pinpath2); + test_pinning_devmap__destroy(skel1); + test_pinning_devmap__destroy(skel2); +} diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c index 14f2796076e0..7607cfc2408c 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c @@ -54,3 +54,128 @@ void test_prog_tests_framework(void) return; clear_test_state(state); } + +static void dummy_emit(const char *buf, bool force) {} + +void test_prog_tests_framework_expected_msgs(void) +{ + struct expected_msgs msgs; + int i, j, error_cnt; + const struct { + const char *name; + const char *log; + const char *expected; + struct expect_msg *pats; + } cases[] = { + { + .name = "simple-ok", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "simple-fail", + .log = "aaabbbddd", + .expected = "MATCHED SUBSTR: 'aaa'\n" + "EXPECTED SUBSTR: 'ccc'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-ok-mid", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "foo", .negative = true }, + { .substr = "bar", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-ok-tail", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "foo", .negative = true }, + {} + } + }, + { + .name = "negative-ok-head", + .log = "aaabbbccc", + .pats = (struct expect_msg[]) { + { .substr = "foo", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-fail-head", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'aaa'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa", .negative = true }, + { .substr = "bbb" }, + {} + } + }, + { + .name = "negative-fail-tail", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'ccc'\n", + .pats = (struct expect_msg[]) { + { .substr = "bbb" }, + { .substr = "ccc", .negative = true }, + {} + } + }, + { + .name = "negative-fail-mid-1", + .log = "aaabbbccc", + .expected = "UNEXPECTED SUBSTR: 'bbb'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "bbb", .negative = true }, + { .substr = "ccc" }, + {} + } + }, + { + .name = "negative-fail-mid-2", + .log = "aaabbb222ccc", + .expected = "UNEXPECTED SUBSTR: '222'\n", + .pats = (struct expect_msg[]) { + { .substr = "aaa" }, + { .substr = "222", .negative = true }, + { .substr = "bbb", .negative = true }, + { .substr = "ccc" }, + {} + } + } + }; + + for (i = 0; i < ARRAY_SIZE(cases); i++) { + if (test__start_subtest(cases[i].name)) { + error_cnt = env.subtest_state->error_cnt; + msgs.patterns = cases[i].pats; + msgs.cnt = 0; + for (j = 0; cases[i].pats[j].substr; j++) + msgs.cnt++; + validate_msgs(cases[i].log, &msgs, dummy_emit); + fflush(stderr); + env.subtest_state->error_cnt = error_cnt; + if (cases[i].expected) + ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output"); + else + ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output"); + test__end_subtest(); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index e261b0e872db..d93a0c7b1786 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -623,7 +623,7 @@ static void range_cond(enum num_t t, struct range x, struct range y, *newx = range(t, x.a, x.b); *newy = range(t, y.a + 1, y.b); } else if (x.a == x.b && x.b == y.b) { - /* X is a constant matching rigth side of Y */ + /* X is a constant matching right side of Y */ *newx = range(t, x.a, x.b); *newy = range(t, y.a, y.b - 1); } else if (y.a == y.b && x.a == y.a) { @@ -631,7 +631,7 @@ static void range_cond(enum num_t t, struct range x, struct range y, *newx = range(t, x.a + 1, x.b); *newy = range(t, y.a, y.b); } else if (y.a == y.b && x.b == y.b) { - /* Y is a constant matching rigth side of X */ + /* Y is a constant matching right side of X */ *newx = range(t, x.a, x.b - 1); *newy = range(t, y.a, y.b); } else { diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c index 0703e987df89..8c6c2043a432 100644 --- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -99,3 +99,19 @@ end: res_spin_lock__destroy(skel); return; } + +void serial_test_res_spin_lock_stress(void) +{ + if (libbpf_num_possible_cpus() < 3) { + test__skip(); + return; + } + + ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA"); + sleep(5); + unload_module("bpf_test_rqspinlock", false); + + ASSERT_OK(load_module_params("bpf_test_rqspinlock.ko", "test_ab=1", false), "load module ABBA"); + sleep(5); + unload_module("bpf_test_rqspinlock", false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sha256.c b/tools/testing/selftests/bpf/prog_tests/sha256.c new file mode 100644 index 000000000000..604a0b1423d5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sha256.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright 2025 Google LLC */ + +#include <test_progs.h> +#include "bpf/libbpf_internal.h" + +#define MAX_LEN 4096 + +/* Test libbpf_sha256() for all lengths from 0 to MAX_LEN inclusively. */ +void test_sha256(void) +{ + /* + * The correctness of this value was verified by running this test with + * libbpf_sha256() replaced by OpenSSL's SHA256(). + */ + static const __u8 expected_digest_of_digests[SHA256_DIGEST_LENGTH] = { + 0x62, 0x30, 0x0e, 0x1d, 0xea, 0x7f, 0xc4, 0x74, + 0xfd, 0x8e, 0x64, 0x0b, 0xd8, 0x5f, 0xea, 0x04, + 0xf3, 0xef, 0x77, 0x42, 0xc2, 0x01, 0xb8, 0x90, + 0x6e, 0x19, 0x91, 0x1b, 0xca, 0xb3, 0x28, 0x42, + }; + __u64 seed = 0; + __u8 *data = NULL, *digests = NULL; + __u8 digest_of_digests[SHA256_DIGEST_LENGTH]; + size_t i; + + data = malloc(MAX_LEN); + if (!ASSERT_OK_PTR(data, "malloc")) + goto out; + digests = malloc((MAX_LEN + 1) * SHA256_DIGEST_LENGTH); + if (!ASSERT_OK_PTR(digests, "malloc")) + goto out; + + /* Generate MAX_LEN bytes of "random" data deterministically. */ + for (i = 0; i < MAX_LEN; i++) { + seed = (seed * 25214903917 + 11) & ((1ULL << 48) - 1); + data[i] = (__u8)(seed >> 16); + } + + /* Calculate a digest for each length 0 through MAX_LEN inclusively. */ + for (i = 0; i <= MAX_LEN; i++) + libbpf_sha256(data, i, &digests[i * SHA256_DIGEST_LENGTH]); + + /* Calculate and verify the digest of all the digests. */ + libbpf_sha256(digests, (MAX_LEN + 1) * SHA256_DIGEST_LENGTH, + digest_of_digests); + ASSERT_MEMEQ(digest_of_digests, expected_digest_of_digests, + SHA256_DIGEST_LENGTH, "digest_of_digests"); +out: + free(data); + free(digests); +} diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index e3ea5dc2f697..254fbfeab06a 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -13,22 +13,22 @@ static struct { const char *err_msg; } spin_lock_fail_tests[] = { { "lock_id_kptr_preserve", - "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) " - "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "5: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2) " + "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", - "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" + "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)" - " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n" + " R0=map_value(id=1,map=array_map,ks=4,vs=8)" + " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" " R0=map_value(id=2,ks=4,vs=8)" - " R1_w=map_value(id=2,ks=4,vs=8)\n" + " R1=map_value(id=2,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index b7ba5cd47d96..271b5cc9fc01 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -39,7 +39,7 @@ retry: bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 0832fd787457..b277dddd5af7 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -66,7 +66,7 @@ retry: bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index df59e4ae2951..c23b97414813 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -1,46 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "stacktrace_map.skel.h" void test_stacktrace_map(void) { + struct stacktrace_map *skel; int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "oncpu"; - int err, prog_fd, stack_trace_len; - const char *file = "./test_stacktrace_map.bpf.o"; - __u32 key, val, duration = 0; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link; + int err, stack_trace_len; + __u32 key, val, stack_id, duration = 0; + __u64 stack[PERF_MAX_STACK_DEPTH]; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + skel = stacktrace_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) - goto close_prog; - - link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch"); - if (!ASSERT_OK_PTR(link, "attach_tp")) - goto close_prog; - - /* find map fds */ - control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK_FAIL(control_map_fd < 0)) - goto disable_pmu; - - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK_FAIL(stackid_hmap_fd < 0)) - goto disable_pmu; - - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK_FAIL(stackmap_fd < 0)) - goto disable_pmu; - - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (CHECK_FAIL(stack_amap_fd < 0)) - goto disable_pmu; + control_map_fd = bpf_map__fd(skel->maps.control_map); + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); + stackmap_fd = bpf_map__fd(skel->maps.stackmap); + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap); + err = stacktrace_map__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; /* give some time for bpf program run */ sleep(1); @@ -50,26 +31,32 @@ void test_stacktrace_map(void) bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", "err %d errno %d\n", err, errno)) - goto disable_pmu; - -disable_pmu: - bpf_link__destroy(link); -close_prog: - bpf_object__close(obj); + goto out; + + stack_id = skel->bss->stack_id; + err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack); + if (!ASSERT_OK(err, "lookup and delete target stack_id")) + goto out; + + err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack); + if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id")) + goto out; +out: + stacktrace_map__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index c6ef06f55cdb..e985d51d3d47 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void) { const char *prog_name = "oncpu"; int control_map_fd, stackid_hmap_fd, stackmap_fd; - const char *file = "./test_stacktrace_map.bpf.o"; + const char *file = "./stacktrace_map.bpf.o"; __u32 key, val, duration = 0; int err, prog_fd; struct bpf_program *prog; @@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void) bpf_map_update_elem(control_map_fd, &key, &val, 0); /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c index 1932b1e0685c..dc2ccf6a14d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c @@ -40,7 +40,7 @@ void test_stacktrace_map_skip(void) skel->bss->control = 1; /* for every element in stackid_hmap, we can find a corresponding one - * in stackmap, and vise versa. + * in stackmap, and vice versa. */ err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap")) diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c index d9f0185dca61..c3cce5c292bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/stream.c +++ b/tools/testing/selftests/bpf/prog_tests/stream.c @@ -2,7 +2,6 @@ /* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ #include <test_progs.h> #include <sys/mman.h> -#include <regex.h> #include "stream.skel.h" #include "stream_fail.skel.h" @@ -18,87 +17,6 @@ void test_stream_success(void) return; } -struct { - int prog_off; - const char *errstr; -} stream_error_arr[] = { - { - offsetof(struct stream, progs.stream_cond_break), - "ERROR: Timeout detected for may_goto instruction\n" - "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" - "Call trace:\n" - "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" - "|[ \t]+[^\n]+\n)*", - }, - { - offsetof(struct stream, progs.stream_deadlock), - "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n" - "Attempted lock = (0x[0-9a-fA-F]+)\n" - "Total held locks = 1\n" - "Held lock\\[ 0\\] = \\1\n" // Lock address must match - "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" - "Call trace:\n" - "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" - "|[ \t]+[^\n]+\n)*", - }, -}; - -static int match_regex(const char *pattern, const char *string) -{ - int err, rc; - regex_t re; - - err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE); - if (err) - return -1; - rc = regexec(&re, string, 0, NULL, 0); - regfree(&re); - return rc == 0 ? 1 : 0; -} - -void test_stream_errors(void) -{ - LIBBPF_OPTS(bpf_test_run_opts, opts); - LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); - struct stream *skel; - int ret, prog_fd; - char buf[1024]; - - skel = stream__open_and_load(); - if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) - return; - - for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) { - struct bpf_program **prog; - - prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off); - prog_fd = bpf_program__fd(*prog); - ret = bpf_prog_test_run_opts(prog_fd, &opts); - ASSERT_OK(ret, "ret"); - ASSERT_OK(opts.retval, "retval"); - -#if !defined(__x86_64__) - ASSERT_TRUE(1, "Timed may_goto unsupported, skip."); - if (i == 0) { - ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts); - ASSERT_EQ(ret, 0, "stream read"); - continue; - } -#endif - - ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); - ASSERT_GT(ret, 0, "stream read"); - ASSERT_LE(ret, 1023, "len for buf"); - buf[ret] = '\0'; - - ret = match_regex(stream_error_arr[i].errstr, buf); - if (!ASSERT_TRUE(ret == 1, "regex match")) - fprintf(stderr, "Output from stream:\n%s\n", buf); - } - - stream__destroy(skel); -} - void test_stream_syscall(void) { LIBBPF_OPTS(bpf_test_run_opts, opts); @@ -139,3 +57,52 @@ void test_stream_syscall(void) stream__destroy(skel); } + +static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + int ret, prog_fd; + char fault_addr[64]; + char buf[1024]; + + prog_fd = bpf_program__fd(prog); + + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + + sprintf(fault_addr, "0x%lx", *fault_addr_p); + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); + ASSERT_GT(ret, 0, "stream read"); + ASSERT_LE(ret, 1023, "len for buf"); + buf[ret] = '\0'; + + if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) { + fprintf(stderr, "Output from stream:\n%s\n", buf); + fprintf(stderr, "Fault Addr: %s\n", fault_addr); + } +} + +void test_stream_arena_fault_address(void) +{ + struct stream *skel; + +#if !defined(__x86_64__) && !defined(__aarch64__) + printf("%s:SKIP: arena fault reporting not supported\n", __func__); + test__skip(); + return; +#endif + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + if (test__start_subtest("read_fault")) + test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr); + if (test__start_subtest("write_fault")) + test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr); + + stream__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c index 35af8044d059..4d66fad3c8bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -8,6 +8,7 @@ static const char * const test_cases[] = { "strcmp", + "strcasecmp", "strchr", "strchrnul", "strnchr", diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h new file mode 100644 index 000000000000..2de38776a2d4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h @@ -0,0 +1,386 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TASK_LOCAL_DATA_H +#define __TASK_LOCAL_DATA_H + +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdatomic.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT +#include <pthread.h> +#endif + +#include <bpf/bpf.h> + +/* + * OPTIONS + * + * Define the option before including the header + * + * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically + * + * Thread-specific memory for storing TLD is allocated lazily on the first call to + * tld_get_data(). The thread that calls it must also call tld_free() on thread exit + * to prevent memory leak. Pthread will be included if the option is defined. A pthread + * key will be registered with a destructor that calls tld_free(). + * + * + * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically + * (default: 64 bytes) + * + * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using + * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be + * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated + * for these TLDs. This additional memory is allocated for every thread that calls + * tld_get_data() even if no tld_create_key are actually called, so be mindful of + * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory + * will be allocated for TLDs created with it. + * + * + * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62) + * + * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store, + * TLD_MAX_DATA_CNT. + * + * + * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc() + * + * When allocating the memory for storing TLDs, we need to make sure there is a memory + * region of the X bytes within a page. This is due to the limit posed by UPTR: memory + * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The + * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses + * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage + * as not all memory allocator can use the exact amount of memory requested to fulfill + * aligned_alloc(). For example, some may round the size up to the alignment. Enable the + * option to always use aligned_alloc() if the implementation has low memory overhead. + */ + +#define TLD_PAGE_SIZE getpagesize() +#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1)) + +#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) +#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) + +#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#ifndef TLD_DYN_DATA_SIZE +#define TLD_DYN_DATA_SIZE 64 +#endif + +#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1) + +#ifndef TLD_NAME_LEN +#define TLD_NAME_LEN 62 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + __s16 off; +} tld_key_t; + +struct tld_metadata { + char name[TLD_NAME_LEN]; + _Atomic __u16 size; +}; + +struct tld_meta_u { + _Atomic __u8 cnt; + __u16 size; + struct tld_metadata metadata[]; +}; + +struct tld_data_u { + __u64 start; /* offset of tld_data_u->data in a page */ + char data[]; +}; + +struct tld_map_value { + void *data; + struct tld_meta_u *meta; +}; + +struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); +__thread struct tld_data_u *tld_data_p __attribute__((weak)); +__thread void *tld_data_alloc_p __attribute__((weak)); + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT +pthread_key_t tld_pthread_key __attribute__((weak)); + +static void tld_free(void); + +static void __tld_thread_exit_handler(void *unused) +{ + tld_free(); +} +#endif + +static int __tld_init_meta_p(void) +{ + struct tld_meta_u *meta, *uninit = NULL; + int err = 0; + + meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE); + if (!meta) { + err = -ENOMEM; + goto out; + } + + memset(meta, 0, TLD_PAGE_SIZE); + meta->size = TLD_DYN_DATA_SIZE; + + if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) { + free(meta); + goto out; + } + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT + pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); +#endif +out: + return err; +} + +static int __tld_init_data_p(int map_fd) +{ + bool use_aligned_alloc = false; + struct tld_map_value map_val; + struct tld_data_u *data; + void *data_alloc = NULL; + int err, tid_fd = -1; + + tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL); + if (tid_fd < 0) { + err = -errno; + goto out; + } + +#ifdef TLD_DATA_USE_ALIGNED_ALLOC + use_aligned_alloc = true; +#endif + + /* + * tld_meta_p->size = TLD_DYN_DATA_SIZE + + * total size of TLDs defined via TLD_DEFINE_KEY() + */ + data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ? + aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) : + malloc(tld_meta_p->size * 2); + if (!data_alloc) { + err = -ENOMEM; + goto out; + } + + /* + * Always pass a page-aligned address to UPTR since the size of tld_map_value::data + * is a page in BTF. If data_alloc spans across two pages, use the page that contains large + * enough memory. + */ + if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) { + map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc); + data = data_alloc; + data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) + + offsetof(struct tld_data_u, data); + } else { + map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); + data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); + data->start = offsetof(struct tld_data_u, data); + } + map_val.meta = TLD_READ_ONCE(tld_meta_p); + + err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); + if (err) { + free(data_alloc); + goto out; + } + + tld_data_p = data; + tld_data_alloc_p = data_alloc; +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT + pthread_setspecific(tld_pthread_key, (void *)1); +#endif +out: + if (tid_fd >= 0) + close(tid_fd); + return err; +} + +static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) +{ + int err, i, sz, off = 0; + __u8 cnt; + + if (!TLD_READ_ONCE(tld_meta_p)) { + err = __tld_init_meta_p(); + if (err) + return (tld_key_t){err}; + } + + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { +retry: + cnt = atomic_load(&tld_meta_p->cnt); + if (i < cnt) { + /* A metadata is not ready until size is updated with a non-zero value */ + while (!(sz = atomic_load(&tld_meta_p->metadata[i].size))) + sched_yield(); + + if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN)) + return (tld_key_t){-EEXIST}; + + off += TLD_ROUND_UP(sz, 8); + continue; + } + + /* + * TLD_DEFINE_KEY() is given memory upto a page while at most + * TLD_DYN_DATA_SIZE is allocated for tld_create_key() + */ + if (dyn_data) { + if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size) + return (tld_key_t){-E2BIG}; + } else { + if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) + return (tld_key_t){-E2BIG}; + tld_meta_p->size += TLD_ROUND_UP(size, 8); + } + + /* + * Only one tld_create_key() can increase the current cnt by one and + * takes the latest available slot. Other threads will check again if a new + * TLD can still be added, and then compete for the new slot after the + * succeeding thread update the size. + */ + if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) + goto retry; + + strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN); + atomic_store(&tld_meta_p->metadata[i].size, size); + return (tld_key_t){(__s16)off}; + } + + return (tld_key_t){-ENOSPC}; +} + +/** + * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD. + * + * @name: The name of the TLD + * @size: The size of the TLD + * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN + * + * The macro can only be used in file scope. + * + * A global variable key of opaque type, tld_key_t, will be declared and initialized before + * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key + * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD. + * bpf programs can also fetch the same key by name. + * + * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just + * enough memory will be allocated for each thread on the first call to tld_get_data(). + */ +#define TLD_DEFINE_KEY(key, name, size) \ +tld_key_t key; \ + \ +__attribute__((constructor)) \ +void __tld_define_key_##key(void) \ +{ \ + key = __tld_create_key(name, size, false); \ +} + +/** + * tld_create_key() - Create a TLD and return a key associated with the TLD. + * + * @name: The name the TLD + * @size: The size of the TLD + * + * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check + * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to + * locate the TLD. bpf programs can also fetch the same key by name. + * + * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is + * not known statically or a TLD needs to be created conditionally) + * + * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs + * created dynamically with tld_create_key(). Since only a user page is pinned to the + * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE - + * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page. + */ +__attribute__((unused)) +static tld_key_t tld_create_key(const char *name, size_t size) +{ + return __tld_create_key(name, size, true); +} + +__attribute__((unused)) +static inline bool tld_key_is_err(tld_key_t key) +{ + return key.off < 0; +} + +__attribute__((unused)) +static inline int tld_key_err_or_zero(tld_key_t key) +{ + return tld_key_is_err(key) ? key.off : 0; +} + +/** + * tld_get_data() - Get a pointer to the TLD associated with the given key of the + * calling thread. + * + * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map + * of task local data. + * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key(). + * + * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD + * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte + * aligned. + * + * Threads that call tld_get_data() must call tld_free() on exit to prevent + * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined. + */ +__attribute__((unused)) +static void *tld_get_data(int map_fd, tld_key_t key) +{ + if (!TLD_READ_ONCE(tld_meta_p)) + return NULL; + + /* tld_data_p is allocated on the first invocation of tld_get_data() */ + if (!tld_data_p && __tld_init_data_p(map_fd)) + return NULL; + + return tld_data_p->data + key.off; +} + +/** + * tld_free() - Free task local data memory of the calling thread + * + * For the calling thread, all pointers to TLDs acquired before will become invalid. + * + * Users must call tld_free() on thread exit to prevent memory leak. Alternatively, + * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered + * to free the memory automatically. + */ +__attribute__((unused)) +static void tld_free(void) +{ + if (tld_data_alloc_p) { + free(tld_data_alloc_p); + tld_data_alloc_p = NULL; + tld_data_p = NULL; + } +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __TASK_LOCAL_DATA_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/task_work_stress.c b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c new file mode 100644 index 000000000000..450d17d91a56 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <string.h> +#include <stdio.h> +#include "task_work_stress.skel.h" +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <time.h> +#include <stdlib.h> +#include <stdatomic.h> + +struct test_data { + int prog_fd; + atomic_int exit; +}; + +void *runner(void *test_data) +{ + struct test_data *td = test_data; + int err = 0; + LIBBPF_OPTS(bpf_test_run_opts, opts); + + while (!err && !atomic_load(&td->exit)) + err = bpf_prog_test_run_opts(td->prog_fd, &opts); + + return NULL; +} + +static int get_env_int(const char *str, int def) +{ + const char *s = getenv(str); + char *end; + int retval; + + if (!s || !*s) + return def; + errno = 0; + retval = strtol(s, &end, 10); + if (errno || *end || retval < 0) + return def; + return retval; +} + +static void task_work_run(bool enable_delete) +{ + struct task_work_stress *skel; + struct bpf_program *scheduler, *deleter; + int nthreads = 16; + int test_time_s = get_env_int("BPF_TASK_WORK_TEST_TIME", 1); + pthread_t tid[nthreads], tid_del; + bool started[nthreads], started_del = false; + struct test_data td_sched = { .exit = 0 }, td_del = { .exit = 1 }; + int i, err; + + skel = task_work_stress__open(); + if (!ASSERT_OK_PTR(skel, "task_work__open")) + return; + + scheduler = bpf_object__find_program_by_name(skel->obj, "schedule_task_work"); + bpf_program__set_autoload(scheduler, true); + + deleter = bpf_object__find_program_by_name(skel->obj, "delete_task_work"); + bpf_program__set_autoload(deleter, true); + + err = task_work_stress__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + for (i = 0; i < nthreads; ++i) + started[i] = false; + + td_sched.prog_fd = bpf_program__fd(scheduler); + for (i = 0; i < nthreads; ++i) { + if (pthread_create(&tid[i], NULL, runner, &td_sched) != 0) { + fprintf(stderr, "could not start thread"); + goto cancel; + } + started[i] = true; + } + + if (enable_delete) + atomic_store(&td_del.exit, 0); + + td_del.prog_fd = bpf_program__fd(deleter); + if (pthread_create(&tid_del, NULL, runner, &td_del) != 0) { + fprintf(stderr, "could not start thread"); + goto cancel; + } + started_del = true; + + /* Run stress test for some time */ + sleep(test_time_s); + +cancel: + atomic_store(&td_sched.exit, 1); + atomic_store(&td_del.exit, 1); + for (i = 0; i < nthreads; ++i) { + if (started[i]) + pthread_join(tid[i], NULL); + } + + if (started_del) + pthread_join(tid_del, NULL); + + ASSERT_GT(skel->bss->callback_scheduled, 0, "work scheduled"); + /* Some scheduling attempts should have failed due to contention */ + ASSERT_GT(skel->bss->schedule_error, 0, "schedule error"); + + if (enable_delete) { + /* If delete thread is enabled, it has cancelled some callbacks */ + ASSERT_GT(skel->bss->delete_success, 0, "delete success"); + ASSERT_LT(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks"); + } else { + /* Without delete thread number of scheduled callbacks is the same as fired */ + ASSERT_EQ(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks"); + } + +cleanup: + task_work_stress__destroy(skel); +} + +void test_task_work_stress(void) +{ + if (test__start_subtest("no_delete")) + task_work_run(false); + if (test__start_subtest("with_delete")) + task_work_run(true); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c new file mode 100644 index 000000000000..fd8762ba4b67 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "struct_ops_id_ops_mapping1.skel.h" +#include "struct_ops_id_ops_mapping2.skel.h" + +static void test_st_ops_id_ops_mapping(void) +{ + struct struct_ops_id_ops_mapping1 *skel1 = NULL; + struct struct_ops_id_ops_mapping2 *skel2 = NULL; + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int err, pid, prog1_fd, prog2_fd; + + skel1 = struct_ops_id_ops_mapping1__open_and_load(); + if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open")) + goto out; + + skel2 = struct_ops_id_ops_mapping2__open_and_load(); + if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open")) + goto out; + + err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map), + &info, &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto out; + + skel1->bss->st_ops_id = info.id; + + err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map), + &info, &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto out; + + skel2->bss->st_ops_id = info.id; + + err = struct_ops_id_ops_mapping1__attach(skel1); + if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach")) + goto out; + + err = struct_ops_id_ops_mapping2__attach(skel2); + if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach")) + goto out; + + /* run tracing prog that calls .test_1 and checks return */ + pid = getpid(); + skel1->bss->test_pid = pid; + skel2->bss->test_pid = pid; + sys_gettid(); + skel1->bss->test_pid = 0; + skel2->bss->test_pid = 0; + + /* run syscall_prog that calls .test_1 and checks return */ + prog1_fd = bpf_program__fd(skel1->progs.syscall_prog); + err = bpf_prog_test_run_opts(prog1_fd, NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + prog2_fd = bpf_program__fd(skel2->progs.syscall_prog); + err = bpf_prog_test_run_opts(prog2_fd, NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err"); + ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err"); + +out: + struct_ops_id_ops_mapping1__destroy(skel1); + struct_ops_id_ops_mapping2__destroy(skel2); +} + +void test_struct_ops_id_ops_mapping(void) +{ + if (test__start_subtest("st_ops_id_ops_mapping")) + test_st_ops_id_ops_mapping(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c new file mode 100644 index 000000000000..9fd6306b455c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <pthread.h> +#include <bpf/btf.h> +#include <test_progs.h> + +#define TLD_FREE_DATA_ON_THREAD_EXIT +#define TLD_DYN_DATA_SIZE 4096 +#include "task_local_data.h" + +struct test_tld_struct { + __u64 a; + __u64 b; + __u64 c; + __u64 d; +}; + +#include "test_task_local_data.skel.h" + +TLD_DEFINE_KEY(value0_key, "value0", sizeof(int)); + +/* + * Reset task local data between subtests by clearing metadata other + * than the statically defined value0. This is safe as subtests run + * sequentially. Users of task local data library should not touch + * library internal. + */ +static void reset_tld(void) +{ + if (TLD_READ_ONCE(tld_meta_p)) { + /* Remove TLDs created by tld_create_key() */ + tld_meta_p->cnt = 1; + tld_meta_p->size = TLD_DYN_DATA_SIZE; + memset(&tld_meta_p->metadata[1], 0, + (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata)); + } +} + +/* Serialize access to bpf program's global variables */ +static pthread_mutex_t global_mutex; + +static tld_key_t *tld_keys; + +#define TEST_BASIC_THREAD_NUM 32 + +void *test_task_local_data_basic_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct test_task_local_data *skel = (struct test_task_local_data *)arg; + int fd, err, tid, *value0, *value1; + struct test_tld_struct *value2; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + value0 = tld_get_data(fd, value0_key); + if (!ASSERT_OK_PTR(value0, "tld_get_data")) + goto out; + + value1 = tld_get_data(fd, tld_keys[1]); + if (!ASSERT_OK_PTR(value1, "tld_get_data")) + goto out; + + value2 = tld_get_data(fd, tld_keys[2]); + if (!ASSERT_OK_PTR(value2, "tld_get_data")) + goto out; + + tid = sys_gettid(); + + *value0 = tid + 0; + *value1 = tid + 1; + value2->a = tid + 2; + value2->b = tid + 3; + value2->c = tid + 4; + value2->d = tid + 5; + + pthread_mutex_lock(&global_mutex); + /* Run task_main that read task local data and save to global variables */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + + ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0"); + ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1"); + ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a"); + ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b"); + ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c"); + ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d"); + pthread_mutex_unlock(&global_mutex); + + /* Make sure valueX are indeed local to threads */ + ASSERT_EQ(*value0, tid + 0, "value0"); + ASSERT_EQ(*value1, tid + 1, "value1"); + ASSERT_EQ(value2->a, tid + 2, "value2.a"); + ASSERT_EQ(value2->b, tid + 3, "value2.b"); + ASSERT_EQ(value2->c, tid + 4, "value2.c"); + ASSERT_EQ(value2->d, tid + 5, "value2.d"); + + *value0 = tid + 5; + *value1 = tid + 4; + value2->a = tid + 3; + value2->b = tid + 2; + value2->c = tid + 1; + value2->d = tid + 0; + + /* Run task_main again */ + pthread_mutex_lock(&global_mutex); + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + + ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0"); + ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1"); + ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a"); + ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b"); + ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c"); + ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d"); + pthread_mutex_unlock(&global_mutex); + +out: + pthread_exit(NULL); +} + +static void test_task_local_data_basic(void) +{ + struct test_task_local_data *skel; + pthread_t thread[TEST_BASIC_THREAD_NUM]; + char dummy_key_name[TLD_NAME_LEN]; + tld_key_t key; + int i, err; + + reset_tld(); + + ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init"); + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY"); + tld_keys[1] = tld_create_key("value1", sizeof(int)); + ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key"); + tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct)); + ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key"); + + /* + * Shouldn't be able to store data exceed a page. Create a TLD just big + * enough to exceed a page. TLDs already created are int value0, int + * value1, and struct test_tld_struct value2. + */ + key = tld_create_key("value_not_exist", + TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1); + ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key"); + + key = tld_create_key("value2", sizeof(struct test_tld_struct)); + ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key"); + + /* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */ + for (i = 3; i < TLD_MAX_DATA_CNT; i++) { + snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i); + tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int)); + ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key"); + } + key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct)); + ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key"); + + /* Access TLDs from multiple threads and check if they are thread-specific */ + for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) { + err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel); + if (!ASSERT_OK(err, "pthread_create")) + goto out; + } + +out: + for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) + pthread_join(thread[i], NULL); + + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + +#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3) + +void *test_task_local_data_race_thread(void *arg) +{ + int err = 0, id = (intptr_t)arg; + char key_name[32]; + tld_key_t key; + + key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1); + if (tld_key_err_or_zero(key) != -E2BIG) { + err = 1; + goto out; + } + + /* Only one thread will succeed in creating value1 */ + key = tld_create_key("value1", sizeof(int)); + if (!tld_key_is_err(key)) + tld_keys[1] = key; + + /* Only one thread will succeed in creating value2 */ + key = tld_create_key("value2", sizeof(struct test_tld_struct)); + if (!tld_key_is_err(key)) + tld_keys[2] = key; + + snprintf(key_name, 32, "thread_%d", id); + tld_keys[id] = tld_create_key(key_name, sizeof(int)); + if (tld_key_is_err(tld_keys[id])) + err = 2; +out: + return (void *)(intptr_t)err; +} + +static void test_task_local_data_race(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + pthread_t thread[TEST_RACE_THREAD_NUM]; + struct test_task_local_data *skel; + int fd, i, j, err, *data; + void *ret = NULL; + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY"); + tld_keys[0] = value0_key; + + for (j = 0; j < 100; j++) { + reset_tld(); + + for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { + /* + * Try to make tld_create_key() race with each other. Call + * tld_create_key(), both valid and invalid, from different threads. + */ + err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread, + (void *)(intptr_t)(i + 3)); + if (CHECK_FAIL(err)) + break; + } + + /* Wait for all tld_create_key() to return */ + for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { + pthread_join(thread[i], &ret); + if (CHECK_FAIL(ret)) + break; + } + + /* Write a unique number to each TLD */ + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (CHECK_FAIL(!data)) + break; + *data = i; + } + + /* Read TLDs and check the value to see if any address collides with another */ + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (CHECK_FAIL(*data != i)) + break; + } + + /* Run task_main to make sure no invalid TLDs are added */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + } +out: + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + +void test_task_local_data(void) +{ + if (test__start_subtest("task_local_data_basic")) + test_task_local_data_basic(); + if (test__start_subtest("task_local_data_race")) + test_task_local_data_race(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_work.c b/tools/testing/selftests/bpf/prog_tests/test_task_work.c new file mode 100644 index 000000000000..774b31a5f6ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_task_work.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <string.h> +#include <stdio.h> +#include "task_work.skel.h" +#include "task_work_fail.skel.h" +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <time.h> + +static int perf_event_open(__u32 type, __u64 config, int pid) +{ + struct perf_event_attr attr = { + .type = type, + .config = config, + .size = sizeof(struct perf_event_attr), + .sample_period = 100000, + }; + + return syscall(__NR_perf_event_open, &attr, pid, -1, -1, 0); +} + +struct elem { + char data[128]; + struct bpf_task_work tw; +}; + +static int verify_map(struct bpf_map *map, const char *expected_data) +{ + int err; + struct elem value; + int processed_values = 0; + int k, sz; + + sz = bpf_map__max_entries(map); + for (k = 0; k < sz; ++k) { + err = bpf_map__lookup_elem(map, &k, sizeof(int), &value, sizeof(struct elem), 0); + if (err) + continue; + if (!ASSERT_EQ(strcmp(expected_data, value.data), 0, "map data")) { + fprintf(stderr, "expected '%s', found '%s' in %s map", expected_data, + value.data, bpf_map__name(map)); + return 2; + } + processed_values++; + } + + return processed_values == 0; +} + +static void task_work_run(const char *prog_name, const char *map_name) +{ + struct task_work *skel; + struct bpf_program *prog; + struct bpf_map *map; + struct bpf_link *link = NULL; + int err, pe_fd = -1, pid, status, pipefd[2]; + char user_string[] = "hello world"; + + if (!ASSERT_NEQ(pipe(pipefd), -1, "pipe")) + return; + + pid = fork(); + if (pid == 0) { + __u64 num = 1; + int i; + char buf; + + close(pipefd[1]); + read(pipefd[0], &buf, sizeof(buf)); + close(pipefd[0]); + + for (i = 0; i < 10000; ++i) + num *= time(0) % 7; + (void)num; + exit(0); + } + if (!ASSERT_GT(pid, 0, "fork() failed")) { + close(pipefd[0]); + close(pipefd[1]); + return; + } + + skel = task_work__open(); + if (!ASSERT_OK_PTR(skel, "task_work__open")) + return; + + bpf_object__for_each_program(prog, skel->obj) { + bpf_program__set_autoload(prog, false); + } + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "prog_name")) + goto cleanup; + bpf_program__set_autoload(prog, true); + skel->bss->user_ptr = (char *)user_string; + + err = task_work__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pe_fd = perf_event_open(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, pid); + if (pe_fd == -1 && (errno == ENOENT || errno == EOPNOTSUPP)) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto cleanup; + } + if (!ASSERT_NEQ(pe_fd, -1, "pe_fd")) { + fprintf(stderr, "perf_event_open errno: %d, pid: %d\n", errno, pid); + goto cleanup; + } + + link = bpf_program__attach_perf_event(prog, pe_fd); + if (!ASSERT_OK_PTR(link, "attach_perf_event")) + goto cleanup; + + /* perf event fd ownership is passed to bpf_link */ + pe_fd = -1; + close(pipefd[0]); + write(pipefd[1], user_string, 1); + close(pipefd[1]); + /* Wait to collect some samples */ + waitpid(pid, &status, 0); + pid = 0; + map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(map, "find map_name")) + goto cleanup; + if (!ASSERT_OK(verify_map(map, user_string), "verify map")) + goto cleanup; +cleanup: + if (pe_fd >= 0) + close(pe_fd); + bpf_link__destroy(link); + task_work__destroy(skel); + if (pid > 0) { + close(pipefd[0]); + write(pipefd[1], user_string, 1); + close(pipefd[1]); + waitpid(pid, &status, 0); + } +} + +void test_task_work(void) +{ + if (test__start_subtest("test_task_work_hash_map")) + task_work_run("oncpu_hash_map", "hmap"); + + if (test__start_subtest("test_task_work_array_map")) + task_work_run("oncpu_array_map", "arrmap"); + + if (test__start_subtest("test_task_work_lru_map")) + task_work_run("oncpu_lru_map", "lrumap"); + + RUN_TESTS(task_work_fail); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index 367f47e4a936..b38c16b4247f 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -75,26 +75,26 @@ static void test_set_global_vars_succeeds(void) " -vl2 > %s", fix->veristat, fix->tmpfile); read(fix->fd, fix->output, fix->sz); - __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); - __CHECK_STR("_w=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210"); - __CHECK_STR("_w=0x80000000 ", "var_s32 = -0x80000000"); - __CHECK_STR("_w=0x76543210 ", "var_u32 = 0x76543210"); - __CHECK_STR("_w=0x8000 ", "var_s16 = -32768"); - __CHECK_STR("_w=0xecec ", "var_u16 = 60652"); - __CHECK_STR("_w=128 ", "var_s8 = -128"); - __CHECK_STR("_w=255 ", "var_u8 = 255"); - __CHECK_STR("_w=11 ", "var_ea = EA2"); - __CHECK_STR("_w=12 ", "var_eb = EB2"); - __CHECK_STR("_w=13 ", "var_ec = EC2"); - __CHECK_STR("_w=1 ", "var_b = 1"); - __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); - __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa"); - __CHECK_STR("_w=171 ", "arr[3]= 171"); - __CHECK_STR("_w=172 ", "arr[EA2] =172"); - __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3"); - __CHECK_STR("_w=173 ", "matrix[31][7][11]=173"); - __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); - __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); + __CHECK_STR("=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); + __CHECK_STR("=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210"); + __CHECK_STR("=0x80000000 ", "var_s32 = -0x80000000"); + __CHECK_STR("=0x76543210 ", "var_u32 = 0x76543210"); + __CHECK_STR("=0x8000 ", "var_s16 = -32768"); + __CHECK_STR("=0xecec ", "var_u16 = 60652"); + __CHECK_STR("=128 ", "var_s8 = -128"); + __CHECK_STR("=255 ", "var_u8 = 255"); + __CHECK_STR("=11 ", "var_ea = EA2"); + __CHECK_STR("=12 ", "var_eb = EB2"); + __CHECK_STR("=13 ", "var_ec = EC2"); + __CHECK_STR("=1 ", "var_b = 1"); + __CHECK_STR("=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); + __CHECK_STR("=0xaaaa ", "union1.var_u16 = 0xaaaa"); + __CHECK_STR("=171 ", "arr[3]= 171"); + __CHECK_STR("=172 ", "arr[EA2] =172"); + __CHECK_STR("=10 ", "enum_arr[EC2]=EA3"); + __CHECK_STR("=173 ", "matrix[31][7][11]=173"); + __CHECK_STR("=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); + __CHECK_STR("=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); out: teardown_fixture(fix); @@ -117,8 +117,8 @@ static void test_set_global_vars_from_file_succeeds(void) SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s", fix->veristat, input_file, fix->tmpfile); read(fix->fd, fix->output, fix->sz); - __CHECK_STR("_w=0x8000 ", "var_s16 = -32768"); - __CHECK_STR("_w=0xecec ", "var_u16 = 60652"); + __CHECK_STR("=0x8000 ", "var_s16 = -32768"); + __CHECK_STR("=0xecec ", "var_u16 = 60652"); out: close(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 56f660ca567b..34f9ccce2602 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -3,6 +3,7 @@ #include <test_progs.h> #include "timer.skel.h" #include "timer_failure.skel.h" +#include "timer_interrupt.skel.h" #define NUM_THR 8 @@ -99,3 +100,36 @@ void serial_test_timer(void) RUN_TESTS(timer_failure); } + +void test_timer_interrupt(void) +{ + struct timer_interrupt *skel = NULL; + int err, prog_fd; + LIBBPF_OPTS(bpf_test_run_opts, opts); + + skel = timer_interrupt__open_and_load(); + if (!skel && errno == EOPNOTSUPP) { + test__skip(); + return; + } + if (!ASSERT_OK_PTR(skel, "timer_interrupt__open_and_load")) + return; + + err = timer_interrupt__attach(skel); + if (!ASSERT_OK(err, "timer_interrupt__attach")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.test_timer_interrupt); + err = bpf_prog_test_run_opts(prog_fd, &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + + usleep(50); + + ASSERT_EQ(skel->bss->in_interrupt, 0, "in_interrupt"); + if (skel->bss->preempt_count) + ASSERT_NEQ(skel->bss->in_interrupt_cb, 0, "in_interrupt_cb"); + +out: + timer_interrupt__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c index 19e68d4b3532..6f8c0bfb0415 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -112,10 +112,39 @@ destroy_skel: tracing_struct_many_args__destroy(skel); } +static void test_union_args(void) +{ + struct tracing_struct *skel; + int err; + + skel = tracing_struct__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load")) + return; + + err = tracing_struct__attach(skel); + if (!ASSERT_OK(err, "tracing_struct__attach")) + goto out; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->ut1_a_a, 1, "ut1:a.arg.a"); + ASSERT_EQ(skel->bss->ut1_b, 4, "ut1:b"); + ASSERT_EQ(skel->bss->ut1_c, 5, "ut1:c"); + + ASSERT_EQ(skel->bss->ut2_a, 6, "ut2:a"); + ASSERT_EQ(skel->bss->ut2_b_a, 2, "ut2:b.arg.a"); + ASSERT_EQ(skel->bss->ut2_b_b, 3, "ut2:b.arg.b"); + +out: + tracing_struct__destroy(skel); +} + void test_tracing_struct(void) { if (test__start_subtest("struct_args")) test_struct_args(); if (test__start_subtest("struct_many_args")) test_struct_many_args(); + if (test__start_subtest("union_args")) + test_union_args(); } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c index cf3e0e7a64fa..86404476c1da 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c @@ -2,6 +2,7 @@ /* Copyright (c) 2023 Hengqi Chen */ #include <test_progs.h> +#include <asm/ptrace.h> #include "test_uprobe.skel.h" static FILE *urand_spawn(int *pid) @@ -33,7 +34,7 @@ static int urand_trigger(FILE **urand_pipe) return exit_code; } -void test_uprobe(void) +static void test_uprobe_attach(void) { LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); struct test_uprobe *skel; @@ -93,3 +94,156 @@ cleanup: pclose(urand_pipe); test_uprobe__destroy(skel); } + +#ifdef __x86_64__ +__naked __maybe_unused unsigned long uprobe_regs_change_trigger(void) +{ + asm volatile ( + "ret\n" + ); +} + +static __naked void uprobe_regs_change(struct pt_regs *before, struct pt_regs *after) +{ + asm volatile ( + "movq %r11, 48(%rdi)\n" + "movq %r10, 56(%rdi)\n" + "movq %r9, 64(%rdi)\n" + "movq %r8, 72(%rdi)\n" + "movq %rax, 80(%rdi)\n" + "movq %rcx, 88(%rdi)\n" + "movq %rdx, 96(%rdi)\n" + "movq %rsi, 104(%rdi)\n" + "movq %rdi, 112(%rdi)\n" + + /* save 2nd argument */ + "pushq %rsi\n" + "call uprobe_regs_change_trigger\n" + + /* save return value and load 2nd argument pointer to rax */ + "pushq %rax\n" + "movq 8(%rsp), %rax\n" + + "movq %r11, 48(%rax)\n" + "movq %r10, 56(%rax)\n" + "movq %r9, 64(%rax)\n" + "movq %r8, 72(%rax)\n" + "movq %rcx, 88(%rax)\n" + "movq %rdx, 96(%rax)\n" + "movq %rsi, 104(%rax)\n" + "movq %rdi, 112(%rax)\n" + + /* restore return value and 2nd argument */ + "pop %rax\n" + "pop %rsi\n" + + "movq %rax, 80(%rsi)\n" + "ret\n" + ); +} + +static void regs_common(void) +{ + struct pt_regs before = {}, after = {}, expected = { + .rax = 0xc0ffe, + .rcx = 0xbad, + .rdx = 0xdead, + .r8 = 0x8, + .r9 = 0x9, + .r10 = 0x10, + .r11 = 0x11, + .rdi = 0x12, + .rsi = 0x13, + }; + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->my_pid = getpid(); + skel->bss->regs = expected; + + uprobe_opts.func_name = "uprobe_regs_change_trigger"; + skel->links.test_regs_change = bpf_program__attach_uprobe_opts(skel->progs.test_regs_change, + -1, + "/proc/self/exe", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test_regs_change, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + uprobe_regs_change(&before, &after); + + ASSERT_EQ(after.rax, expected.rax, "ax"); + ASSERT_EQ(after.rcx, expected.rcx, "cx"); + ASSERT_EQ(after.rdx, expected.rdx, "dx"); + ASSERT_EQ(after.r8, expected.r8, "r8"); + ASSERT_EQ(after.r9, expected.r9, "r9"); + ASSERT_EQ(after.r10, expected.r10, "r10"); + ASSERT_EQ(after.r11, expected.r11, "r11"); + ASSERT_EQ(after.rdi, expected.rdi, "rdi"); + ASSERT_EQ(after.rsi, expected.rsi, "rsi"); + +cleanup: + test_uprobe__destroy(skel); +} + +static noinline unsigned long uprobe_regs_change_ip_1(void) +{ + return 0xc0ffee; +} + +static noinline unsigned long uprobe_regs_change_ip_2(void) +{ + return 0xdeadbeef; +} + +static void regs_ip(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct test_uprobe *skel; + unsigned long ret; + + skel = test_uprobe__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->my_pid = getpid(); + skel->bss->ip = (unsigned long) uprobe_regs_change_ip_2; + + uprobe_opts.func_name = "uprobe_regs_change_ip_1"; + skel->links.test_regs_change_ip = bpf_program__attach_uprobe_opts( + skel->progs.test_regs_change_ip, + -1, + "/proc/self/exe", + 0 /* offset */, + &uprobe_opts); + if (!ASSERT_OK_PTR(skel->links.test_regs_change_ip, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + ret = uprobe_regs_change_ip_1(); + ASSERT_EQ(ret, 0xdeadbeef, "ret"); + +cleanup: + test_uprobe__destroy(skel); +} + +static void test_uprobe_regs_change(void) +{ + if (test__start_subtest("regs_change_common")) + regs_common(); + if (test__start_subtest("regs_change_ip")) + regs_ip(); +} +#else +static void test_uprobe_regs_change(void) { } +#endif + +void test_uprobe(void) +{ + if (test__start_subtest("attach")) + test_uprobe_attach(); + test_uprobe_regs_change(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index b17dc39a23db..955a37751b52 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -8,22 +8,31 @@ #include <asm/ptrace.h> #include <linux/compiler.h> #include <linux/stringify.h> +#include <linux/kernel.h> #include <sys/wait.h> #include <sys/syscall.h> #include <sys/prctl.h> #include <asm/prctl.h> #include "uprobe_syscall.skel.h" #include "uprobe_syscall_executed.skel.h" +#include "bpf/libbpf_internal.h" -__naked unsigned long uretprobe_regs_trigger(void) +#define USDT_NOP .byte 0x0f, 0x1f, 0x44, 0x00, 0x00 +#include "usdt.h" + +#pragma GCC diagnostic ignored "-Wattributes" + +__attribute__((aligned(16))) +__nocf_check __weak __naked unsigned long uprobe_regs_trigger(void) { asm volatile ( + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n" /* nop5 */ "movq $0xdeadbeef, %rax\n" "ret\n" ); } -__naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) +__naked void uprobe_regs(struct pt_regs *before, struct pt_regs *after) { asm volatile ( "movq %r15, 0(%rdi)\n" @@ -44,15 +53,17 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) "movq $0, 120(%rdi)\n" /* orig_rax */ "movq $0, 128(%rdi)\n" /* rip */ "movq $0, 136(%rdi)\n" /* cs */ + "pushq %rax\n" "pushf\n" "pop %rax\n" "movq %rax, 144(%rdi)\n" /* eflags */ + "pop %rax\n" "movq %rsp, 152(%rdi)\n" /* rsp */ "movq $0, 160(%rdi)\n" /* ss */ /* save 2nd argument */ "pushq %rsi\n" - "call uretprobe_regs_trigger\n" + "call uprobe_regs_trigger\n" /* save return value and load 2nd argument pointer to rax */ "pushq %rax\n" @@ -92,25 +103,37 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) ); } -static void test_uretprobe_regs_equal(void) +static void test_uprobe_regs_equal(bool retprobe) { + LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = retprobe, + ); struct uprobe_syscall *skel = NULL; struct pt_regs before = {}, after = {}; unsigned long *pb = (unsigned long *) &before; unsigned long *pa = (unsigned long *) &after; unsigned long *pp; + unsigned long offset; unsigned int i, cnt; - int err; + + offset = get_uprobe_offset(&uprobe_regs_trigger); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + return; skel = uprobe_syscall__open_and_load(); if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load")) goto cleanup; - err = uprobe_syscall__attach(skel); - if (!ASSERT_OK(err, "uprobe_syscall__attach")) + skel->links.probe = bpf_program__attach_uprobe_opts(skel->progs.probe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(skel->links.probe, "bpf_program__attach_uprobe_opts")) goto cleanup; - uretprobe_regs(&before, &after); + /* make sure uprobe gets optimized */ + if (!retprobe) + uprobe_regs_trigger(); + + uprobe_regs(&before, &after); pp = (unsigned long *) &skel->bss->regs; cnt = sizeof(before)/sizeof(*pb); @@ -119,7 +142,7 @@ static void test_uretprobe_regs_equal(void) unsigned int offset = i * sizeof(unsigned long); /* - * Check register before and after uretprobe_regs_trigger call + * Check register before and after uprobe_regs_trigger call * that triggers the uretprobe. */ switch (offset) { @@ -133,7 +156,7 @@ static void test_uretprobe_regs_equal(void) /* * Check register seen from bpf program and register after - * uretprobe_regs_trigger call + * uprobe_regs_trigger call (with rax exception, check below). */ switch (offset) { /* @@ -146,6 +169,15 @@ static void test_uretprobe_regs_equal(void) case offsetof(struct pt_regs, rsp): case offsetof(struct pt_regs, ss): break; + /* + * uprobe does not see return value in rax, it needs to see the + * original (before) rax value + */ + case offsetof(struct pt_regs, rax): + if (!retprobe) { + ASSERT_EQ(pp[i], pb[i], "uprobe rax prog-before value check"); + break; + } default: if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check")) fprintf(stdout, "failed register offset %u\n", offset); @@ -175,7 +207,7 @@ static int write_bpf_testmod_uprobe(unsigned long offset) return ret != n ? (int) ret : 0; } -static void test_uretprobe_regs_change(void) +static void test_regs_change(void) { struct pt_regs before = {}, after = {}; unsigned long *pb = (unsigned long *) &before; @@ -183,13 +215,16 @@ static void test_uretprobe_regs_change(void) unsigned long cnt = sizeof(before)/sizeof(*pb); unsigned int i, err, offset; - offset = get_uprobe_offset(uretprobe_regs_trigger); + offset = get_uprobe_offset(uprobe_regs_trigger); err = write_bpf_testmod_uprobe(offset); if (!ASSERT_OK(err, "register_uprobe")) return; - uretprobe_regs(&before, &after); + /* make sure uprobe gets optimized */ + uprobe_regs_trigger(); + + uprobe_regs(&before, &after); err = write_bpf_testmod_uprobe(0); if (!ASSERT_OK(err, "unregister_uprobe")) @@ -252,6 +287,7 @@ static void test_uretprobe_syscall_call(void) ); struct uprobe_syscall_executed *skel; int pid, status, err, go[2], c = 0; + struct bpf_link *link; if (!ASSERT_OK(pipe(go), "pipe")) return; @@ -277,11 +313,14 @@ static void test_uretprobe_syscall_call(void) _exit(0); } - skel->links.test = bpf_program__attach_uprobe_multi(skel->progs.test, pid, - "/proc/self/exe", - "uretprobe_syscall_call", &opts); - if (!ASSERT_OK_PTR(skel->links.test, "bpf_program__attach_uprobe_multi")) + skel->bss->pid = pid; + + link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi, + pid, "/proc/self/exe", + "uretprobe_syscall_call", &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) goto cleanup; + skel->links.test_uretprobe_multi = link; /* kick the child */ write(go[1], &c, 1); @@ -301,6 +340,256 @@ cleanup: close(go[0]); } +#define TRAMP "[uprobes-trampoline]" + +__attribute__((aligned(16))) +__nocf_check __weak __naked void uprobe_test(void) +{ + asm volatile (" \n" + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n" + "ret \n" + ); +} + +__attribute__((aligned(16))) +__nocf_check __weak void usdt_test(void) +{ + USDT(optimized_uprobe, usdt); +} + +static int find_uprobes_trampoline(void *tramp_addr) +{ + void *start, *end; + char line[128]; + int ret = -1; + FILE *maps; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + fprintf(stderr, "cannot open maps\n"); + return -1; + } + + while (fgets(line, sizeof(line), maps)) { + int m = -1; + + /* We care only about private r-x mappings. */ + if (sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", &start, &end, &m) != 2) + continue; + if (m < 0) + continue; + if (!strncmp(&line[m], TRAMP, sizeof(TRAMP)-1) && (start == tramp_addr)) { + ret = 0; + break; + } + } + + fclose(maps); + return ret; +} + +static unsigned char nop5[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 }; + +static void *find_nop5(void *fn) +{ + int i; + + for (i = 0; i < 10; i++) { + if (!memcmp(nop5, fn + i, 5)) + return fn + i; + } + return NULL; +} + +typedef void (__attribute__((nocf_check)) *trigger_t)(void); + +static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigger, + void *addr, int executed) +{ + struct __arch_relative_insn { + __u8 op; + __s32 raddr; + } __packed *call; + void *tramp = NULL; + + /* Uprobe gets optimized after first trigger, so let's press twice. */ + trigger(); + trigger(); + + /* Make sure bpf program got executed.. */ + ASSERT_EQ(skel->bss->executed, executed, "executed"); + + /* .. and check the trampoline is as expected. */ + call = (struct __arch_relative_insn *) addr; + tramp = (void *) (call + 1) + call->raddr; + ASSERT_EQ(call->op, 0xe8, "call"); + ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline"); + + return tramp; +} + +static void check_detach(void *addr, void *tramp) +{ + /* [uprobes_trampoline] stays after detach */ + ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline"); + ASSERT_OK(memcmp(addr, nop5, 5), "nop5"); +} + +static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link, + trigger_t trigger, void *addr, int executed) +{ + void *tramp; + + tramp = check_attach(skel, trigger, addr, executed); + bpf_link__destroy(link); + check_detach(addr, tramp); +} + +static void test_uprobe_legacy(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = true, + ); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + /* uprobe */ + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe, + 0, "/proc/self/exe", offset, NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + + /* uretprobe */ + skel->bss->executed = 0; + + link = bpf_program__attach_uprobe_opts(skel->progs.test_uretprobe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_multi(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + opts.offsets = &offset; + opts.cnt = 1; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + /* uprobe.multi */ + link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + + /* uretprobe.multi */ + skel->bss->executed = 0; + opts.retprobe = true; + link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_session(void) +{ + struct uprobe_syscall_executed *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, + .session = true, + ); + struct bpf_link *link; + unsigned long offset; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + goto cleanup; + + opts.offsets = &offset; + opts.cnt = 1; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session, + 0, "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + check(skel, link, uprobe_test, uprobe_test, 4); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + +static void test_uprobe_usdt(void) +{ + struct uprobe_syscall_executed *skel; + struct bpf_link *link; + void *addr; + + errno = 0; + addr = find_nop5(usdt_test); + if (!ASSERT_OK_PTR(addr, "find_nop5")) + return; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return; + + skel->bss->pid = getpid(); + + link = bpf_program__attach_usdt(skel->progs.test_usdt, + -1 /* all PIDs */, "/proc/self/exe", + "optimized_uprobe", "usdt", NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt")) + goto cleanup; + + check(skel, link, usdt_test, addr, 2); + +cleanup: + uprobe_syscall_executed__destroy(skel); +} + /* * Borrowed from tools/testing/selftests/x86/test_shadow_stack.c. * @@ -343,43 +632,172 @@ static void test_uretprobe_shadow_stack(void) return; } - /* Run all of the uretprobe tests. */ - test_uretprobe_regs_equal(); - test_uretprobe_regs_change(); + /* Run all the tests with shadow stack in place. */ + + test_uprobe_regs_equal(false); + test_uprobe_regs_equal(true); test_uretprobe_syscall_call(); + test_uprobe_legacy(); + test_uprobe_multi(); + test_uprobe_session(); + test_uprobe_usdt(); + + test_regs_change(); + ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK); } -#else -static void test_uretprobe_regs_equal(void) + +static volatile bool race_stop; + +static USDT_DEFINE_SEMA(race); + +static void *worker_trigger(void *arg) { - test__skip(); + unsigned long rounds = 0; + + while (!race_stop) { + uprobe_test(); + rounds++; + } + + printf("tid %ld trigger rounds: %lu\n", sys_gettid(), rounds); + return NULL; } -static void test_uretprobe_regs_change(void) +static void *worker_attach(void *arg) { - test__skip(); + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct uprobe_syscall_executed *skel; + unsigned long rounds = 0, offset; + const char *sema[2] = { + __stringify(USDT_SEMA(race)), + NULL, + }; + unsigned long *ref; + int err; + + offset = get_uprobe_offset(&uprobe_test); + if (!ASSERT_GE(offset, 0, "get_uprobe_offset")) + return NULL; + + err = elf_resolve_syms_offsets("/proc/self/exe", 1, (const char **) &sema, &ref, STT_OBJECT); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_sema")) + return NULL; + + opts.ref_ctr_offset = *ref; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + return NULL; + + skel->bss->pid = getpid(); + + while (!race_stop) { + skel->links.test_uprobe = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe, + 0, "/proc/self/exe", offset, &opts); + if (!ASSERT_OK_PTR(skel->links.test_uprobe, "bpf_program__attach_uprobe_opts")) + break; + + bpf_link__destroy(skel->links.test_uprobe); + skel->links.test_uprobe = NULL; + rounds++; + } + + printf("tid %ld attach rounds: %lu hits: %d\n", sys_gettid(), rounds, skel->bss->executed); + uprobe_syscall_executed__destroy(skel); + free(ref); + return NULL; } -static void test_uretprobe_syscall_call(void) +static useconds_t race_msec(void) { - test__skip(); + char *env; + + env = getenv("BPF_SELFTESTS_UPROBE_SYSCALL_RACE_MSEC"); + if (env) + return atoi(env); + + /* default duration is 500ms */ + return 500; } -static void test_uretprobe_shadow_stack(void) +static void test_uprobe_race(void) { - test__skip(); + int err, i, nr_threads; + pthread_t *threads; + + nr_threads = libbpf_num_possible_cpus(); + if (!ASSERT_GT(nr_threads, 0, "libbpf_num_possible_cpus")) + return; + nr_threads = max(2, nr_threads); + + threads = alloca(sizeof(*threads) * nr_threads); + if (!ASSERT_OK_PTR(threads, "malloc")) + return; + + for (i = 0; i < nr_threads; i++) { + err = pthread_create(&threads[i], NULL, i % 2 ? worker_trigger : worker_attach, + NULL); + if (!ASSERT_OK(err, "pthread_create")) + goto cleanup; + } + + usleep(race_msec() * 1000); + +cleanup: + race_stop = true; + for (nr_threads = i, i = 0; i < nr_threads; i++) + pthread_join(threads[i], NULL); + + ASSERT_FALSE(USDT_SEMA_IS_ACTIVE(race), "race_semaphore"); } + +#ifndef __NR_uprobe +#define __NR_uprobe 336 #endif -void test_uprobe_syscall(void) +static void test_uprobe_error(void) +{ + long err = syscall(__NR_uprobe); + + ASSERT_EQ(err, -1, "error"); + ASSERT_EQ(errno, ENXIO, "errno"); +} + +static void __test_uprobe_syscall(void) { if (test__start_subtest("uretprobe_regs_equal")) - test_uretprobe_regs_equal(); - if (test__start_subtest("uretprobe_regs_change")) - test_uretprobe_regs_change(); + test_uprobe_regs_equal(true); if (test__start_subtest("uretprobe_syscall_call")) test_uretprobe_syscall_call(); if (test__start_subtest("uretprobe_shadow_stack")) test_uretprobe_shadow_stack(); + if (test__start_subtest("uprobe_legacy")) + test_uprobe_legacy(); + if (test__start_subtest("uprobe_multi")) + test_uprobe_multi(); + if (test__start_subtest("uprobe_session")) + test_uprobe_session(); + if (test__start_subtest("uprobe_usdt")) + test_uprobe_usdt(); + if (test__start_subtest("uprobe_race")) + test_uprobe_race(); + if (test__start_subtest("uprobe_error")) + test_uprobe_error(); + if (test__start_subtest("uprobe_regs_equal")) + test_uprobe_regs_equal(false); + if (test__start_subtest("regs_change")) + test_regs_change(); +} +#else +static void __test_uprobe_syscall(void) +{ + test__skip(); +} +#endif + +void test_uprobe_syscall(void) +{ + __test_uprobe_syscall(); } diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 9057e983cc54..f4be5269fa90 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -40,12 +40,79 @@ static void __always_inline trigger_func(int x) { } } -static void subtest_basic_usdt(void) +#if defined(__x86_64__) || defined(__i386__) +/* + * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)" + * - 'size' is the size in bytes of the array element, and its sign indicates + * whether the type is signed (negative) or unsigned (positive). + * - 'base_reg' is the register holding the base address, normally rdx or edx + * - 'index_reg' is the register holding the index, normally rax or eax + * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the + * size of the element type. + * + * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be: + * - size: -2 (negative because 'short' is signed) + * - scale: 2 (since sizeof(short) == 2) + * + * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386 + */ +static volatile short array[] = {-1, -2, -3, -4}; + +#if defined(__x86_64__) +#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2) +#else +#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2) +#endif + +unsigned short test_usdt_sib_semaphore SEC(".probes"); + +static void trigger_sib_spec(void) +{ + /* + * Force SIB addressing with inline assembly. + * + * You must compile with -std=gnu99 or -std=c99 to use the + * STAP_PROBE_ASM macro. + * + * The STAP_PROBE_ASM macro generates a quoted string that gets + * inserted between the surrounding assembly instructions. In this + * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction + * stream, creating a probe point between the asm statement boundaries. + * It works fine with gcc/clang. + * + * Register constraints: + * - "d"(array): Binds the 'array' variable to %rdx or %edx register + * - "a"(0): Binds the constant 0 to %rax or %eax register + * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and + * %%rax(%eax), they contain the expected values for SIB addressing. + * + * The "memory" clobber prevents the compiler from reordering memory + * accesses around the probe point, ensuring that the probe behavior + * is predictable and consistent. + */ + asm volatile( + STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC) + : + : "d"(array), "a"(0) + : "memory" + ); +} +#endif + +static void subtest_basic_usdt(bool optimized) { LIBBPF_OPTS(bpf_usdt_opts, opts); struct test_usdt *skel; struct test_usdt__bss *bss; - int err, i; + int err, i, called; + const __u64 expected_cookie = 0xcafedeadbeeffeed; + +#define TRIGGER(x) ({ \ + trigger_func(x); \ + if (optimized) \ + trigger_func(x); \ + optimized ? 2 : 1; \ + }) skel = test_usdt__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open")) @@ -59,20 +126,29 @@ static void subtest_basic_usdt(void) goto cleanup; /* usdt0 won't be auto-attached */ - opts.usdt_cookie = 0xcafedeadbeeffeed; + opts.usdt_cookie = expected_cookie; skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, 0 /*self*/, "/proc/self/exe", "test", "usdt0", &opts); if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link")) goto cleanup; - trigger_func(1); +#if defined(__x86_64__) || defined(__i386__) + opts.usdt_cookie = expected_cookie; + skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib, + 0 /*self*/, "/proc/self/exe", + "test", "usdt_sib", &opts); + if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link")) + goto cleanup; +#endif + + called = TRIGGER(1); - ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called"); - ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called"); - ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called"); + ASSERT_EQ(bss->usdt0_called, called, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, called, "usdt12_called"); - ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie"); + ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie"); ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt"); ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret"); ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size"); @@ -119,11 +195,11 @@ static void subtest_basic_usdt(void) * bpf_program__attach_usdt() handles this properly and attaches to * all possible places of USDT invocation. */ - trigger_func(2); + called += TRIGGER(2); - ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called"); - ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called"); - ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called"); + ASSERT_EQ(bss->usdt0_called, called, "usdt0_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); + ASSERT_EQ(bss->usdt12_called, called, "usdt12_called"); /* only check values that depend on trigger_func()'s input value */ ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1"); @@ -142,9 +218,9 @@ static void subtest_basic_usdt(void) if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach")) goto cleanup; - trigger_func(3); + called += TRIGGER(3); - ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called"); + ASSERT_EQ(bss->usdt3_called, called, "usdt3_called"); /* this time usdt3 has custom cookie */ ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie"); ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt"); @@ -156,8 +232,19 @@ static void subtest_basic_usdt(void) ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2"); ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3"); +#if defined(__x86_64__) || defined(__i386__) + trigger_sib_spec(); + ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called"); + ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie"); + ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt"); + ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg"); + ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret"); + ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size"); +#endif + cleanup: test_usdt__destroy(skel); +#undef TRIGGER } unsigned short test_usdt_100_semaphore SEC(".probes"); @@ -425,7 +512,11 @@ cleanup: void test_usdt(void) { if (test__start_subtest("basic")) - subtest_basic_usdt(); + subtest_basic_usdt(false); +#ifdef __x86_64__ + if (test__start_subtest("basic_optimized")) + subtest_basic_usdt(true); +#endif if (test__start_subtest("multispec")) subtest_multispec_usdt(); if (test__start_subtest("urand_auto_attach")) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 77ec95d4ffaa..28e81161e6fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -46,6 +46,7 @@ #include "verifier_ldsx.skel.h" #include "verifier_leak_ptr.skel.h" #include "verifier_linked_scalars.skel.h" +#include "verifier_live_stack.skel.h" #include "verifier_load_acquire.skel.h" #include "verifier_loops1.skel.h" #include "verifier_lwt.skel.h" @@ -59,6 +60,7 @@ #include "verifier_meta_access.skel.h" #include "verifier_movsx.skel.h" #include "verifier_mtu.skel.h" +#include "verifier_mul.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" #include "verifier_bpf_fastcall.skel.h" @@ -183,6 +185,7 @@ void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); } +void test_verifier_live_stack(void) { RUN(verifier_live_stack); } void test_verifier_loops1(void) { RUN(verifier_loops1); } void test_verifier_lwt(void) { RUN(verifier_lwt); } void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); } @@ -194,6 +197,7 @@ void test_verifier_may_goto_1(void) { RUN(verifier_may_goto_1); } void test_verifier_may_goto_2(void) { RUN(verifier_may_goto_2); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } +void test_verifier_mul(void) { RUN(verifier_mul); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index b9d9f0a502ce..178292d1251a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -9,6 +9,7 @@ #define TX_NETNS "xdp_context_tx" #define RX_NETNS "xdp_context_rx" #define TAP_NAME "tap0" +#define DUMMY_NAME "dum0" #define TAP_NETNS "xdp_context_tuntap" #define TEST_PAYLOAD_LEN 32 @@ -96,9 +97,7 @@ void test_xdp_context_test_run(void) /* Meta data must be 255 bytes or smaller */ test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0); - /* Total size of data must match data_end - data_meta */ - test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), - sizeof(data) - 1, 0, 0, 0); + /* Total size of data must be data_end - data_meta or larger */ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data) + 1, 0, 0, 0); @@ -156,15 +155,30 @@ err: return -1; } -static void assert_test_result(struct test_xdp_meta *skel) +static int write_test_packet(int tap_fd) +{ + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + int n; + + /* The ethernet header doesn't need to be valid for this test */ + memset(packet, 0, sizeof(struct ethhdr)); + memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); + + n = write(tap_fd, packet, sizeof(packet)); + if (!ASSERT_EQ(n, sizeof(packet), "write packet")) + return -1; + + return 0; +} + +static void assert_test_result(const struct bpf_map *result_map) { int err; __u32 map_key = 0; __u8 map_value[TEST_PAYLOAD_LEN]; - err = bpf_map__lookup_elem(skel->maps.test_result, &map_key, - sizeof(map_key), &map_value, - TEST_PAYLOAD_LEN, BPF_ANY); + err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), + &map_value, TEST_PAYLOAD_LEN, BPF_ANY); if (!ASSERT_OK(err, "lookup test_result")) return; @@ -172,6 +186,18 @@ static void assert_test_result(struct test_xdp_meta *skel) "test_result map contains test payload"); } +static bool clear_test_result(struct bpf_map *result_map) +{ + const __u8 v[sizeof(test_payload)] = {}; + const __u32 k = 0; + int err; + + err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); + ASSERT_OK(err, "update test_result"); + + return err == 0; +} + void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -248,7 +274,7 @@ void test_xdp_context_veth(void) if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel); + assert_test_result(skel->maps.test_result); close: close_netns(nstoken); @@ -257,17 +283,21 @@ close: netns_free(tx_ns); } -void test_xdp_context_tuntap(void) +static void test_tuntap(struct bpf_program *xdp_prog, + struct bpf_program *tc_prio_1_prog, + struct bpf_program *tc_prio_2_prog, + struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); struct netns_obj *ns = NULL; - struct test_xdp_meta *skel = NULL; - __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int tap_fd = -1; int tap_ifindex; int ret; + if (!clear_test_result(result_map)) + return; + ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) return; @@ -278,10 +308,6 @@ void test_xdp_context_tuntap(void) SYS(close, "ip link set dev " TAP_NAME " up"); - skel = test_xdp_meta__open_and_load(); - if (!ASSERT_OK_PTR(skel, "open and load skeleton")) - goto close; - tap_ifindex = if_nametoindex(TAP_NAME); if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) goto close; @@ -291,33 +317,175 @@ void test_xdp_context_tuntap(void) if (!ASSERT_OK(ret, "bpf_tc_hook_create")) goto close; - tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls); + tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog); ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; - ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp), + if (tc_prio_2_prog) { + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2, + .prog_fd = bpf_program__fd(tc_prio_2_prog)); + + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + } + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; - memcpy(packet, ð, sizeof(eth)); - memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + assert_test_result(result_map); + +close: + if (tap_fd >= 0) + close(tap_fd); + netns_free(ns); +} + +/* Write a packet to a tap dev and copy it to ingress of a dummy dev */ +static void test_tuntap_mirred(struct bpf_program *xdp_prog, + struct bpf_program *tc_prog, + bool *test_pass) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *ns = NULL; + int dummy_ifindex; + int tap_fd = -1; + int tap_ifindex; + int ret; + + *test_pass = false; + + ns = netns_new(TAP_NETNS, true); + if (!ASSERT_OK_PTR(ns, "netns_new")) + return; - ret = write(tap_fd, packet, sizeof(packet)); - if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) + /* Setup dummy interface */ + SYS(close, "ip link add name " DUMMY_NAME " type dummy"); + SYS(close, "ip link set dev " DUMMY_NAME " up"); + + dummy_ifindex = if_nametoindex(DUMMY_NAME); + if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex")) goto close; - assert_test_result(skel); + tc_hook.ifindex = dummy_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + /* Setup TAP interface */ + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " up"); + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + /* Copy all packets received from TAP to dummy ingress */ + SYS(close, "tc qdisc add dev " TAP_NAME " clsact"); + SYS(close, "tc filter add dev " TAP_NAME " ingress " + "protocol all matchall " + "action mirred ingress mirror dev " DUMMY_NAME); + + /* Receive a packet on TAP */ + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; + + ASSERT_TRUE(*test_pass, "test_pass"); close: if (tap_fd >= 0) close(tap_fd); - test_xdp_meta__destroy(skel); netns_free(ns); } + +void test_xdp_context_tuntap(void) +{ + struct test_xdp_meta *skel = NULL; + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + return; + + if (test__start_subtest("data_meta")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_read")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_read, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_slice")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_slice, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_write")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_write, + skel->progs.ing_cls_dynptr_read, + skel->maps.test_result); + if (test__start_subtest("dynptr_slice_rdwr")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_slice_rdwr, + skel->progs.ing_cls_dynptr_slice, + skel->maps.test_result); + if (test__start_subtest("dynptr_offset")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_offset_wr, + skel->progs.ing_cls_dynptr_offset_rd, + skel->maps.test_result); + if (test__start_subtest("dynptr_offset_oob")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_offset_oob, + skel->progs.ing_cls, + skel->maps.test_result); + if (test__start_subtest("clone_data_meta_empty_on_data_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_empty_on_data_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_data_meta_empty_on_meta_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_empty_on_meta_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_empty_on_data_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_empty_on_data_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_empty_on_meta_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_rdonly_before_data_dynptr_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write, + &skel->bss->test_pass); + + test_xdp_meta__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 461ab18705d5..a8ab05216c38 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -7,6 +7,7 @@ #include <test_progs.h> #include "test_xdp_devmap_helpers.skel.h" +#include "test_xdp_devmap_tailcall.skel.h" #include "test_xdp_with_devmap_frags_helpers.skel.h" #include "test_xdp_with_devmap_helpers.skel.h" @@ -107,6 +108,29 @@ static void test_neg_xdp_devmap_helpers(void) } } +static void test_xdp_devmap_tailcall(enum bpf_attach_type prog_dev, + enum bpf_attach_type prog_tail, + bool expect_reject) +{ + struct test_xdp_devmap_tailcall *skel; + int err; + + skel = test_xdp_devmap_tailcall__open(); + if (!ASSERT_OK_PTR(skel, "test_xdp_devmap_tailcall__open")) + return; + + bpf_program__set_expected_attach_type(skel->progs.xdp_devmap, prog_dev); + bpf_program__set_expected_attach_type(skel->progs.xdp_entry, prog_tail); + + err = test_xdp_devmap_tailcall__load(skel); + if (expect_reject) + ASSERT_ERR(err, "test_xdp_devmap_tailcall__load"); + else + ASSERT_OK(err, "test_xdp_devmap_tailcall__load"); + + test_xdp_devmap_tailcall__destroy(skel); +} + static void test_xdp_with_devmap_frags_helpers(void) { struct test_xdp_with_devmap_frags_helpers *skel; @@ -238,8 +262,13 @@ void serial_test_xdp_devmap_attach(void) if (test__start_subtest("DEVMAP with frags programs in entries")) test_xdp_with_devmap_frags_helpers(); - if (test__start_subtest("Verifier check of DEVMAP programs")) + if (test__start_subtest("Verifier check of DEVMAP programs")) { test_neg_xdp_devmap_helpers(); + test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, BPF_XDP_DEVMAP, false); + test_xdp_devmap_tailcall(0, 0, true); + test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, 0, true); + test_xdp_devmap_tailcall(0, BPF_XDP_DEVMAP, true); + } if (test__start_subtest("DEVMAP with programs in entries on veth")) test_xdp_with_devmap_helpers_veth(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c new file mode 100644 index 000000000000..efa350d04ec5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <network_helpers.h> +#include "test_xdp_pull_data.skel.h" + +#define PULL_MAX (1 << 31) +#define PULL_PLUS_ONE (1 << 30) + +#define XDP_PACKET_HEADROOM 256 + +/* Find headroom and tailroom occupied by struct xdp_frame and struct + * skb_shared_info so that we can calculate the maximum pull lengths for + * test cases. They might not be the real size of the structures due to + * cache alignment. + */ +static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct xdp_md ctx = {}; + int prog_fd, err; + __u8 *buf; + + buf = calloc(frame_sz, sizeof(__u8)); + if (!ASSERT_OK_PTR(buf, "calloc buf")) + return -ENOMEM; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = frame_sz; + topts.data_size_out = frame_sz; + /* Pass a data_end larger than the linear space available to make sure + * bpf_prog_test_run_xdp() will fill the linear data area so that + * xdp_find_sizes can infer the size of struct skb_shared_info + */ + ctx.data_end = frame_sz; + topts.ctx_in = &ctx; + topts.ctx_out = &ctx; + topts.ctx_size_in = sizeof(ctx); + topts.ctx_size_out = sizeof(ctx); + + prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + free(buf); + + return err; +} + +/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024] + * so caller expecting XDP_PASS should always pass pull_len no less than 1024 + */ +static void run_test(struct test_xdp_pull_data *skel, int retval, + int frame_sz, int buff_len, int meta_len, int data_len, + int pull_len) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct xdp_md ctx = {}; + int prog_fd, err; + __u8 *buf; + + buf = calloc(buff_len, sizeof(__u8)); + if (!ASSERT_OK_PTR(buf, "calloc buf")) + return; + + buf[meta_len + 1023] = 0xaa; + buf[meta_len + 1024] = 0xbb; + buf[meta_len + 1025] = 0xcc; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = buff_len; + topts.data_size_out = buff_len; + ctx.data = meta_len; + ctx.data_end = meta_len + data_len; + topts.ctx_in = &ctx; + topts.ctx_out = &ctx; + topts.ctx_size_in = sizeof(ctx); + topts.ctx_size_out = sizeof(ctx); + + skel->bss->data_len = data_len; + if (pull_len & PULL_MAX) { + int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz; + int tailroom = frame_sz - XDP_PACKET_HEADROOM - + data_len - skel->bss->sinfo_sz; + + pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0; + pull_len += headroom + tailroom + data_len; + } + skel->bss->pull_len = pull_len; + + prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval"); + + if (retval == XDP_DROP) + goto out; + + ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size"); + ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size"); + /* Make sure data around xdp->data_end was not messed up by + * bpf_xdp_pull_data() + */ + ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]"); + ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]"); + ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]"); +out: + free(buf); +} + +static void test_xdp_pull_data_basic(void) +{ + u32 pg_sz, max_meta_len, max_data_len; + struct test_xdp_pull_data *skel; + + skel = test_xdp_pull_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load")) + return; + + pg_sz = sysconf(_SC_PAGE_SIZE); + + if (find_xdp_sizes(skel, pg_sz)) + goto out; + + max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz; + max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz; + + /* linear xdp pkt, pull 0 byte */ + run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048); + + /* multi-buf pkt, pull results in linear xdp pkt */ + run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048); + + /* multi-buf pkt, pull 1 byte to linear data area */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025); + + /* multi-buf pkt, pull 0 byte to linear data area */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025); + + /* multi-buf pkt, empty linear data area, pull requires memmove */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX); + + /* multi-buf pkt, no headroom */ + run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX); + + /* multi-buf pkt, no tailroom, pull requires memmove */ + run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX); + + /* Test cases with invalid pull length */ + + /* linear xdp pkt, pull more than total data len */ + run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049); + + /* multi-buf pkt with no space left in linear data area */ + run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len, + PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, empty linear data area */ + run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, no headroom */ + run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024, + PULL_MAX | PULL_PLUS_ONE); + + /* multi-buf pkt, no tailroom */ + run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len, + PULL_MAX | PULL_PLUS_ONE); + +out: + test_xdp_pull_data__destroy(skel); +} + +void test_xdp_pull_data(void) +{ + if (test__start_subtest("xdp_pull_data")) + test_xdp_pull_data_basic(); +} diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index a52feff98112..d1841aac94a2 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -28,7 +28,8 @@ bool skip_all_tests = true; #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false; #else bool skip_lacq_srel_tests = true; @@ -314,7 +315,8 @@ int load_acquire(const void *ctx) { #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) #define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \ { asm volatile ( \ @@ -365,7 +367,8 @@ int store_release(const void *ctx) { #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) #define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \ { asm volatile ( \ diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c index c4500c37f85e..086b57a426cf 100644 --- a/tools/testing/selftests/bpf/progs/arena_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c @@ -37,8 +37,11 @@ int prog(void *ctx) #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) unsigned long flags; - if ((ret = arena_spin_lock_irqsave(&lock, flags))) + if ((ret = arena_spin_lock_irqsave(&lock, flags))) { + if (ret == -EOPNOTSUPP) + test_skip = 3; return ret; + } if (counter != limit) counter++; bpf_repeat(cs_count); diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c index 1654a530aa3d..4e51785e7606 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c @@ -101,7 +101,7 @@ static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, tp->snd_cwnd = pkts_in_flight + sndcnt; } -/* Decide wheather to run the increase function of congestion control. */ +/* Decide whether to run the increase function of congestion control. */ static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) { if (tcp_sk(sk)->reordering > TCP_REORDERING) diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 7cd73e75f52a..32c511bcd60b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ -/* WARNING: This implemenation is not necessarily the same +/* WARNING: This implementation is not necessarily the same * as the tcp_dctcp.c. The purpose is mainly for testing * the kernel BPF logic. */ diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index ffbd4b116d17..23b2aa2604de 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -64,7 +64,8 @@ int dump_udp4(struct bpf_iter__udp *ctx) 0, 0L, 0, ctx->uid, 0, sock_i_ino(&inet->sk), inet->sk.sk_refcnt.refs.counter, udp_sk, - inet->sk.sk_drops.counter); + udp_sk->drop_counters.drops0.counter + + udp_sk->drop_counters.drops1.counter); return 0; } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index 47ff7754f4fd..c48b05aa2a4b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx) 0, 0L, 0, ctx->uid, 0, sock_i_ino(&inet->sk), inet->sk.sk_refcnt.refs.counter, udp_sk, - inet->sk.sk_drops.counter); - + udp_sk->drop_counters.drops0.counter + + udp_sk->drop_counters.drops1.counter); return 0; } diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index c1cfd297aabf..a7a1a684eed1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -33,7 +33,20 @@ * e.g. "foo{{[0-9]+}}" matches strings like "foo007". * Extended POSIX regular expression syntax is allowed * inside the brackets. + * __not_msg Message not expected to be found in verifier log. + * If __msg_not is situated between __msg tags + * framework matches __msg tags first, and then + * checks that __msg_not is not present in a portion of + * a log between bracketing __msg tags. + * Same regex syntax as for __msg is supported. * __msg_unpriv Same as __msg but for unprivileged mode. + * __not_msg_unpriv Same as __not_msg but for unprivileged mode. + * + * __stderr Message expected to be found in bpf stderr stream. The + * same regex rules apply like __msg. + * __stderr_unpriv Same as __stderr but for unpriveleged mode. + * __stdout Same as __stderr but for stdout stream. + * __stdout_unpriv Same as __stdout but for unpriveleged mode. * * __xlated Expect a line in a disassembly log after verifier applies rewrites. * Multiple __xlated attributes could be specified. @@ -115,12 +128,14 @@ * __caps_unpriv Specify the capabilities that should be set when running the test. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg))) +#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg))) #define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg))) #define __jited(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg))) #define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) #define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) #define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc))) #define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" XSTR(__COUNTER__) "=" msg))) +#define __not_msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg_unpriv=" XSTR(__COUNTER__) "=" msg))) #define __xlated_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated_unpriv=" XSTR(__COUNTER__) "=" msg))) #define __jited_unpriv(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg))) #define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv"))) @@ -136,9 +151,14 @@ #define __arch_x86_64 __arch("X86_64") #define __arch_arm64 __arch("ARM64") #define __arch_riscv64 __arch("RISCV64") +#define __arch_s390x __arch("s390x") #define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps)))) #define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited"))) #define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited"))) +#define __stderr(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg))) +#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg))) +#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg))) +#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg))) /* Define common capabilities tested using __caps_unpriv */ #define CAP_NET_ADMIN 12 @@ -156,6 +176,10 @@ #define __imm_ptr(name) [name]"r"(&name) #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + /* Magic constants used with __retval() */ #define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 diff --git a/tools/testing/selftests/bpf/progs/bpf_test_utils.h b/tools/testing/selftests/bpf/progs/bpf_test_utils.h new file mode 100644 index 000000000000..f4e67b492dd2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_test_utils.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_TEST_UTILS_H__ +#define __BPF_TEST_UTILS_H__ + +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +/* Clobber as many native registers and stack slots as possible. */ +static __always_inline void clobber_regs_stack(void) +{ + char tmp_str[] = "123456789"; + unsigned long tmp; + + bpf_strtoul(tmp_str, sizeof(tmp_str), 0, &tmp); + __sink(tmp); +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c index 092db1d0435e..88e13e17ec9e 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c +++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c @@ -73,7 +73,7 @@ int BPF_PROG(use_css_iter_non_sleepable) } SEC("lsm.s/socket_connect") -__failure __msg("expected an RCU CS") +__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection") int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock) { u64 cgrp_id = bpf_get_current_cgroup_id(); diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index 5354455a01be..02d8f160ca0e 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -221,3 +221,15 @@ int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path) return 0; } + +SEC("syscall") +int test_cgrp_from_id_ns(void *ctx) +{ + struct cgroup *cg; + + cg = bpf_cgroup_from_id(1); + if (!cg) + return 42; + bpf_cgroup_release(cg); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index bd8f15229f5c..dda6a8dada82 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb) return SK_PASS; } +/* A metadata slice can't be accessed out of bounds */ +SEC("?tc") +__failure __msg("value is outside of the allowed memory range") +int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *(md + 1) = 42; + + return SK_PASS; +} + SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) @@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb) return SK_PASS; } +/* bpf_dynptr_slice()s are read-only and cannot be written to */ +SEC("?tc") +__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem") +int skb_meta_invalid_slice_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?tc") __failure __msg("invalid mem access 'scalar'") @@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb) return SK_PASS; } +/* Read-only skb data slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + val = *d; + + return SK_PASS; +} + +/* Read-write skb data slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + *d = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated on write to skb data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated on write to skb data slice */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") @@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx) return 0; } +/* Only supported prog type can create skb_meta-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed") +int skb_meta_invalid_ctx(void *ctx) +{ + struct bpf_dynptr meta; + + /* this should fail */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + + return 0; +} + SEC("fentry/skb_tx_error") __failure __msg("must be referenced or trusted") int BPF_PROG(skb_invalid_ctx_fentry, void *skb) @@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb) return 0; } +/* A skb clone's metadata slice becomes invalid anytime packet data changes */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int clone_skb_packet_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr clone, meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + bpf_dynptr_clone(&meta, &clone); + md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return 0; +} + /* A xdp clone's data slices should be invalid anytime packet data changes */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 8315273cb900..127dea342e5a 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -211,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb) return 1; } +SEC("?tc") +int test_dynptr_skb_meta_data(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + /* This should return NULL. Must use bpf_dynptr_slice API */ + err = 2; + md = bpf_dynptr_data(&meta, 0, sizeof(*md)); + if (md) + return 1; + + err = 0; + return 1; +} + +/* Check that skb metadata dynptr ops don't accept any flags. */ +SEC("?tc") +int test_dynptr_skb_meta_flags(struct __sk_buff *skb) +{ + const __u64 INVALID_FLAGS = ~0ULL; + struct bpf_dynptr meta; + __u8 buf; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta); + if (ret != -EINVAL) + return 1; + + err = 2; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + err = 3; + ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS); + if (ret != -EINVAL) + return 1; + + err = 4; + ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS); + if (ret != -EINVAL) + return 1; + + err = 0; + return 1; +} + SEC("tp/syscalls/sys_enter_nanosleep") int test_adjust(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index 5e0a1ca96d4e..a01c2736890f 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -18,43 +18,43 @@ return *(u64 *)num; \ } -__msg(": R0_w=0xffffffff80000000") +__msg(": R0=0xffffffff80000000") check_assert(s64, ==, eq_int_min, INT_MIN); -__msg(": R0_w=0x7fffffff") +__msg(": R0=0x7fffffff") check_assert(s64, ==, eq_int_max, INT_MAX); -__msg(": R0_w=0") +__msg(": R0=0") check_assert(s64, ==, eq_zero, 0); -__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000") +__msg(": R0=0x8000000000000000 R1=0x8000000000000000") check_assert(s64, ==, eq_llong_min, LLONG_MIN); -__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff") +__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff") check_assert(s64, ==, eq_llong_max, LLONG_MAX); -__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)") +__msg(": R0=scalar(id=1,smax=0x7ffffffe)") check_assert(s64, <, lt_pos, INT_MAX); -__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, <, lt_zero, 0); -__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff") +__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff") check_assert(s64, <, lt_neg, INT_MIN); -__msg(": R0_w=scalar(id=1,smax=0x7fffffff)") +__msg(": R0=scalar(id=1,smax=0x7fffffff)") check_assert(s64, <=, le_pos, INT_MAX); -__msg(": R0_w=scalar(id=1,smax=0)") +__msg(": R0=scalar(id=1,smax=0)") check_assert(s64, <=, le_zero, 0); -__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000") +__msg(": R0=scalar(id=1,smax=0xffffffff80000000") check_assert(s64, <=, le_neg, INT_MIN); -__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >, gt_pos, INT_MAX); -__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >, gt_zero, 0); -__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001") +__msg(": R0=scalar(id=1,smin=0xffffffff80000001") check_assert(s64, >, gt_neg, INT_MIN); -__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >=, ge_pos, INT_MAX); -__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, >=, ge_zero, 0); -__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000") +__msg(": R0=scalar(id=1,smin=0xffffffff80000000") check_assert(s64, >=, ge_neg, INT_MIN); SEC("?tc") diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c index 544e5ac90461..d09bbd8ae8a8 100644 --- a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c +++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c @@ -12,7 +12,7 @@ SEC("freplace/connect_v4_prog") int new_connect_v4_prog(struct bpf_sock_addr *ctx) { - // return value thats in invalid range + // return value that's in invalid range return 255; } diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c index f41257eadbb2..d273b46dfc7c 100644 --- a/tools/testing/selftests/bpf/progs/iters_state_safety.c +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -30,7 +30,7 @@ int force_clang_to_emit_btf_for_externs(void *ctx) SEC("?raw_tp") __success __log_level(2) -__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)") int create_and_destroy(void *ctx) { struct bpf_iter_num iter; @@ -196,7 +196,7 @@ int leak_iter_from_subprog_fail(void *ctx) SEC("?raw_tp") __success __log_level(2) -__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)") +__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)") int valid_stack_reuse(void *ctx) { struct bpf_iter_num iter; @@ -345,7 +345,7 @@ int __naked read_from_iter_slot_fail(void) "r3 = 1000;" "call %[bpf_iter_num_new];" - /* attemp to leak bpf_iter_num state */ + /* attempt to leak bpf_iter_num state */ "r7 = *(u64 *)(r6 + 0);" "r8 = *(u64 *)(r6 + 8);" diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c index 6b1588d70652..fe3663dedbe1 100644 --- a/tools/testing/selftests/bpf/progs/iters_task_failure.c +++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c @@ -15,7 +15,7 @@ void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -__failure __msg("expected an RCU CS when using bpf_iter_task_next") +__failure __msg("kernel func bpf_iter_task_new requires RCU critical section protection") int BPF_PROG(iter_tasks_without_lock) { struct task_struct *pos; @@ -27,7 +27,7 @@ int BPF_PROG(iter_tasks_without_lock) } SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -__failure __msg("expected an RCU CS when using bpf_iter_css_next") +__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection") int BPF_PROG(iter_css_without_lock) { u64 cg_id = bpf_get_current_cgroup_id(); diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c index 9e4b45201e69..5379e9960ffd 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod.c @@ -123,3 +123,49 @@ out: bpf_iter_num_destroy(&num_it); return 0; } + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_kfunc_ret_rcu_test requires RCU critical section protection") +int iter_ret_rcu_test_protected(const void *ctx) +{ + struct task_struct *p; + + p = bpf_kfunc_ret_rcu_test(); + return p->pid; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("R1 type=rcu_ptr_or_null_ expected=") +int iter_ret_rcu_test_type(const void *ctx) +{ + struct task_struct *p; + + bpf_rcu_read_lock(); + p = bpf_kfunc_ret_rcu_test(); + bpf_this_cpu_ptr(p); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("kernel func bpf_kfunc_ret_rcu_test_nostruct requires RCU critical section protection") +int iter_ret_rcu_test_protected_nostruct(const void *ctx) +{ + void *p; + + p = bpf_kfunc_ret_rcu_test_nostruct(4); + return *(int *)p; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("R1 type=rdonly_rcu_mem_or_null expected=") +int iter_ret_rcu_test_type_nostruct(const void *ctx) +{ + void *p; + + bpf_rcu_read_lock(); + p = bpf_kfunc_ret_rcu_test_nostruct(4); + bpf_this_cpu_ptr(p); + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c index 6543d5b6e0a9..83791348bed5 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -20,7 +20,7 @@ __s64 res_empty; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_empty(const void *ctx) @@ -38,7 +38,7 @@ __s64 res_full; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_full(const void *ctx) @@ -58,7 +58,7 @@ static volatile int zero = 0; SEC("raw_tp/sys_enter") __success __log_level(2) -__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)") +__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)") __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)") __msg("call bpf_iter_testmod_seq_destroy") int testmod_seq_truncated(const void *ctx) diff --git a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c new file mode 100644 index 000000000000..f77aef0474d3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +SEC("kprobe") +int kprobe_write_ctx(struct pt_regs *ctx) +{ + ctx->ax = 0; + return 0; +} + +SEC("kprobe.multi") +int kprobe_multi_write_ctx(struct pt_regs *ctx) +{ + ctx->ax = 0; + return 0; +} +#endif diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c index 50e66772c046..b0fa26fb4760 100644 --- a/tools/testing/selftests/bpf/progs/loop1.c +++ b/tools/testing/selftests/bpf/progs/loop1.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/sched.h> -#include <linux/ptrace.h> -#include <stdint.h> -#include <stddef.h> -#include <stdbool.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c index 947bb7e988c2..0227409d4b0e 100644 --- a/tools/testing/selftests/bpf/progs/loop2.c +++ b/tools/testing/selftests/bpf/progs/loop2.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/sched.h> -#include <linux/ptrace.h> -#include <stdint.h> -#include <stddef.h> -#include <stdbool.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index 717dab14322b..5d1c9a775e6b 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -1,11 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/sched.h> -#include <linux/ptrace.h> -#include <stdint.h> -#include <stddef.h> -#include <stdbool.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c index e4ff97fbcce1..dd36aff4fba3 100644 --- a/tools/testing/selftests/bpf/progs/loop6.c +++ b/tools/testing/selftests/bpf/progs/loop6.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/ptrace.h> -#include <stddef.h> -#include <linux/bpf.h> +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" @@ -26,12 +25,6 @@ char _license[] SEC("license") = "GPL"; #define SG_CHAIN 0x01UL #define SG_END 0x02UL -struct scatterlist { - unsigned long page_link; - unsigned int offset; - unsigned int length; -}; - #define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN) #define sg_is_last(sg) ((sg)->page_link & SG_END) #define sg_chain_ptr(sg) \ @@ -62,7 +55,7 @@ static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i) return sgp; } -int config = 0; +int run_once = 0; int result = 0; SEC("kprobe/virtqueue_add_sgs") @@ -73,14 +66,14 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, __u64 length1 = 0, length2 = 0; unsigned int i, n, len; - if (config != 0) + if (run_once != 0) return 0; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) { __sink(out_sgs); for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); sgp = __sg_next(sgp)) { - bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); + len = BPF_CORE_READ(sgp, length); length1 += len; n++; } @@ -90,13 +83,13 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs, __sink(in_sgs); for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX); sgp = __sg_next(sgp)) { - bpf_probe_read_kernel(&len, sizeof(len), &sgp->length); + len = BPF_CORE_READ(sgp, length); length2 += len; n++; } } - config = 1; + run_once = 1; result = length2 - length1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/lpm_trie.h b/tools/testing/selftests/bpf/progs/lpm_trie.h new file mode 100644 index 000000000000..76aa5821807f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lpm_trie.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __PROGS_LPM_TRIE_H +#define __PROGS_LPM_TRIE_H + +struct trie_key { + __u32 prefixlen; + __u32 data; +}; + +/* Benchmark operations */ +enum { + LPM_OP_NOOP = 0, + LPM_OP_BASELINE, + LPM_OP_LOOKUP, + LPM_OP_INSERT, + LPM_OP_UPDATE, + LPM_OP_DELETE, + LPM_OP_FREE +}; + +/* + * Return values from run_bench. + * + * Negative values are also allowed and represent kernel error codes. + */ +#define LPM_BENCH_SUCCESS 0 +#define LPM_BENCH_REINIT_MAP 1 /* Reset trie to initial state for current op */ + +#endif diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c new file mode 100644 index 000000000000..a0e6ebd5507a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Cloudflare */ + +#include <vmlinux.h> +#include <errno.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" +#include "bpf_atomic.h" +#include "progs/lpm_trie.h" + +#define BPF_OBJ_NAME_LEN 16U +#define MAX_ENTRIES 100000000 +#define NR_LOOPS 10000 + +char _license[] SEC("license") = "GPL"; + +/* Filled by userspace. See fill_map() in bench_lpm_trie_map.c */ +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __type(key, struct trie_key); + __type(value, __u32); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, MAX_ENTRIES); +} trie_map SEC(".maps"); + +long hits; +long duration_ns; + +/* Configured from userspace */ +__u32 nr_entries; +__u32 prefixlen; +bool random; +__u8 op; + +static __u64 latency_free_start; + +SEC("fentry/bpf_map_free_deferred") +int BPF_PROG(trie_free_entry, struct work_struct *work) +{ + struct bpf_map *map = container_of(work, struct bpf_map, work); + char name[BPF_OBJ_NAME_LEN]; + u32 map_type; + + map_type = BPF_CORE_READ(map, map_type); + if (map_type != BPF_MAP_TYPE_LPM_TRIE) + return 0; + + /* + * Ideally we'd have access to the map ID but that's already + * freed before we enter trie_free(). + */ + BPF_CORE_READ_STR_INTO(&name, map, name); + if (bpf_strncmp(name, BPF_OBJ_NAME_LEN, "trie_free_map")) + return 0; + + latency_free_start = bpf_ktime_get_ns(); + + return 0; +} + +SEC("fexit/bpf_map_free_deferred") +int BPF_PROG(trie_free_exit, struct work_struct *work) +{ + __u64 val; + + if (!latency_free_start) + return 0; + + val = bpf_ktime_get_ns() - latency_free_start; + latency_free_start = 0; + + __sync_add_and_fetch(&duration_ns, val); + __sync_add_and_fetch(&hits, 1); + + return 0; +} + +static __u32 cur_key; + +static __always_inline void generate_key(struct trie_key *key) +{ + key->prefixlen = prefixlen; + + if (random) + key->data = bpf_get_prandom_u32() % nr_entries; + else + key->data = cur_key++ % nr_entries; +} + +static int noop(__u32 index, __u32 *unused) +{ + return 0; +} + +static int baseline(__u32 index, __u32 *unused) +{ + struct trie_key key; + __u32 blackbox = 0; + + generate_key(&key); + /* Avoid compiler optimizing out the modulo */ + barrier_var(blackbox); + blackbox = READ_ONCE(key.data); + + return 0; +} + +static int lookup(__u32 index, int *retval) +{ + struct trie_key key; + + generate_key(&key); + if (!bpf_map_lookup_elem(&trie_map, &key)) { + *retval = -ENOENT; + return 1; + } + + return 0; +} + +static int insert(__u32 index, int *retval) +{ + struct trie_key key; + u32 val = 1; + int err; + + generate_key(&key); + err = bpf_map_update_elem(&trie_map, &key, &val, BPF_NOEXIST); + if (err) { + *retval = err; + return 1; + } + + /* Is this the last entry? */ + if (key.data == nr_entries - 1) { + /* For atomicity concerns, see the comment in delete() */ + *retval = LPM_BENCH_REINIT_MAP; + return 1; + } + + return 0; +} + +static int update(__u32 index, int *retval) +{ + struct trie_key key; + u32 val = 1; + int err; + + generate_key(&key); + err = bpf_map_update_elem(&trie_map, &key, &val, BPF_EXIST); + if (err) { + *retval = err; + return 1; + } + + return 0; +} + +static int delete(__u32 index, int *retval) +{ + struct trie_key key; + int err; + + generate_key(&key); + err = bpf_map_delete_elem(&trie_map, &key); + if (err) { + *retval = err; + return 1; + } + + /* Do we need to refill the map? */ + if (key.data == nr_entries - 1) { + /* + * Atomicity isn't required because DELETE only supports + * one producer running concurrently. What we need is a + * way to track how many entries have been deleted from + * the trie between consecutive invocations of the BPF + * prog because a single bpf_loop() call might not + * delete all entries, e.g. when NR_LOOPS < nr_entries. + */ + *retval = LPM_BENCH_REINIT_MAP; + return 1; + } + + return 0; +} + +SEC("xdp") +int BPF_PROG(run_bench) +{ + int err = LPM_BENCH_SUCCESS; + u64 start, delta; + int loops; + + start = bpf_ktime_get_ns(); + + switch (op) { + case LPM_OP_NOOP: + loops = bpf_loop(NR_LOOPS, noop, NULL, 0); + break; + case LPM_OP_BASELINE: + loops = bpf_loop(NR_LOOPS, baseline, NULL, 0); + break; + case LPM_OP_LOOKUP: + loops = bpf_loop(NR_LOOPS, lookup, &err, 0); + break; + case LPM_OP_INSERT: + loops = bpf_loop(NR_LOOPS, insert, &err, 0); + break; + case LPM_OP_UPDATE: + loops = bpf_loop(NR_LOOPS, update, &err, 0); + break; + case LPM_OP_DELETE: + loops = bpf_loop(NR_LOOPS, delete, &err, 0); + break; + default: + bpf_printk("invalid benchmark operation\n"); + return -1; + } + + delta = bpf_ktime_get_ns() - start; + + __sync_add_and_fetch(&duration_ns, delta); + __sync_add_and_fetch(&hits, loops); + + return err; +} diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_map.c b/tools/testing/selftests/bpf/progs/lpm_trie_map.c new file mode 100644 index 000000000000..6e60d686b664 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lpm_trie_map.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define MAX_ENTRIES 100000000 + +struct trie_key { + __u32 prefixlen; + __u32 data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __type(key, struct trie_key); + __type(value, __u32); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, MAX_ENTRIES); +} trie_free_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/map_excl.c b/tools/testing/selftests/bpf/progs/map_excl.c new file mode 100644 index 000000000000..d461684728e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_excl.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Google LLC. */ +#include <linux/bpf.h> +#include <time.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} excl_map SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int should_have_access(void *ctx) +{ + int key = 0, value = 0xdeadbeef; + + bpf_map_update_elem(&excl_map, &key, &value, 0); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int should_not_have_access(void *ctx) +{ + int key = 0, value = 0xdeadbeef; + + bpf_map_update_elem(&excl_map, &key, &value, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c index 4f94c971ae86..3b984b6ae7c0 100644 --- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -8,8 +8,8 @@ SEC("tp_btf/sys_enter") __success __log_level(2) -__msg("r8 = *(u64 *)(r7 +0) ; R7_w=ptr_nameidata(off={{[0-9]+}}) R8_w=rdonly_untrusted_mem(sz=0)") -__msg("r9 = *(u8 *)(r8 +0) ; R8_w=rdonly_untrusted_mem(sz=0) R9_w=scalar") +__msg("r8 = *(u64 *)(r7 +0) ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)") +__msg("r9 = *(u8 *)(r8 +0) ; R8=rdonly_untrusted_mem(sz=0) R9=scalar") int btf_id_to_ptr_mem(void *ctx) { struct task_struct *task; diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c index 70302477e326..41389e579578 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c @@ -117,7 +117,7 @@ int _getsockopt_subflow(struct bpf_sockopt *ctx) return 1; msk = bpf_core_cast(sk, struct mptcp_sock); - if (msk->pm.subflows != 1) { + if (msk->pm.extra_subflows != 1) { ctx->retval = -1; return 1; } diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c index 098ef970fac1..b05565d1db0d 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_search.c +++ b/tools/testing/selftests/bpf/progs/rbtree_search.c @@ -183,7 +183,7 @@ long test_##op##_spinlock_##dolock(void *ctx) \ } /* - * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG) + * Use a separate MSG macro instead of passing to TEST_XXX(..., MSG) * to ensure the message itself is not in the bpf prog lineinfo * which the verifier includes in its log. * Otherwise, the test_loader will incorrectly match the prog lineinfo diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c index 47568007b668..0c77df05be7f 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/stacktrace_map.c @@ -50,6 +50,7 @@ struct sched_switch_args { int next_prio; }; +__u32 stack_id; SEC("tracepoint/sched/sched_switch") int oncpu(struct sched_switch_args *ctx) { @@ -64,6 +65,7 @@ int oncpu(struct sched_switch_args *ctx) /* The size of stackmap and stackid_hmap should be the same */ key = bpf_get_stackid(ctx, &stackmap, 0); if ((int)key >= 0) { + stack_id = key; bpf_map_update_elem(&stackid_hmap, &key, &val, 0); stack_p = bpf_map_lookup_elem(&stack_amap, &key); if (stack_p) diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c index 35790897dc87..4a5bd852f10c 100644 --- a/tools/testing/selftests/bpf/progs/stream.c +++ b/tools/testing/selftests/bpf/progs/stream.c @@ -5,6 +5,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" #include "bpf_experimental.h" +#include "bpf_arena_common.h" struct arr_elem { struct bpf_res_spin_lock lock; @@ -17,10 +18,29 @@ struct { __type(value, struct arr_elem); } arrmap SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 1); /* number of pages */ +} arena SEC(".maps"); + +struct elem { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + #define ENOSPC 28 #define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" int size; +u64 fault_addr; +void *arena_ptr; SEC("syscall") __success __retval(0) @@ -37,7 +57,15 @@ int stream_exhaust(void *ctx) } SEC("syscall") +__arch_x86_64 +__arch_arm64 +__arch_s390x __success __retval(0) +__stderr("ERROR: Timeout detected for may_goto instruction") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") int stream_cond_break(void *ctx) { while (can_loop) @@ -47,6 +75,15 @@ int stream_cond_break(void *ctx) SEC("syscall") __success __retval(0) +__stderr("ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock") +__stderr("{{Attempted lock = (0x[0-9a-fA-F]+)\n" +"Total held locks = 1\n" +"Held lock\\[ 0\\] = \\1}}") +__stderr("...") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") int stream_deadlock(void *ctx) { struct bpf_res_spin_lock *lock, *nlock; @@ -76,4 +113,125 @@ int stream_syscall(void *ctx) return 0; } +SEC("syscall") +__arch_x86_64 +__arch_arm64 +__success __retval(0) +__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") +int stream_arena_write_fault(void *ctx) +{ + struct bpf_arena *ptr = (void *)&arena; + u64 user_vm_start; + + /* Prevent GCC bounds warning: casting &arena to struct bpf_arena * + * triggers bounds checking since the map definition is smaller than struct + * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the + * bounds analysis + */ + barrier_var(ptr); + user_vm_start = ptr->user_vm_start; + fault_addr = user_vm_start + 0x7fff; + bpf_addr_space_cast(user_vm_start, 0, 1); + asm volatile ( + "r1 = %0;" + "r2 = 1;" + "*(u32 *)(r1 + 0x7fff) = r2;" + : + : "r" (user_vm_start) + : "r1", "r2" + ); + return 0; +} + +SEC("syscall") +__arch_x86_64 +__arch_arm64 +__success __retval(0) +__stderr("ERROR: Arena READ access at unmapped address 0x{{.*}}") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") +int stream_arena_read_fault(void *ctx) +{ + struct bpf_arena *ptr = (void *)&arena; + u64 user_vm_start; + + /* Prevent GCC bounds warning: casting &arena to struct bpf_arena * + * triggers bounds checking since the map definition is smaller than struct + * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the + * bounds analysis + */ + barrier_var(ptr); + user_vm_start = ptr->user_vm_start; + fault_addr = user_vm_start + 0x7fff; + bpf_addr_space_cast(user_vm_start, 0, 1); + asm volatile ( + "r1 = %0;" + "r1 = *(u32 *)(r1 + 0x7fff);" + : + : "r" (user_vm_start) + : "r1" + ); + return 0; +} + +static __noinline void subprog(void) +{ + int __arena *addr = (int __arena *)0xdeadbeef; + + arena_ptr = &arena; + *addr = 1; +} + +SEC("syscall") +__arch_x86_64 +__arch_arm64 +__success __retval(0) +__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") +int stream_arena_subprog_fault(void *ctx) +{ + subprog(); + return 0; +} + +static __noinline int timer_cb(void *map, int *key, struct bpf_timer *timer) +{ + int __arena *addr = (int __arena *)0xdeadbeef; + + arena_ptr = &arena; + *addr = 1; + return 0; +} + +SEC("syscall") +__arch_x86_64 +__arch_arm64 +__success __retval(0) +__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}") +__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}") +__stderr("Call trace:\n" +"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" +"|[ \t]+[^\n]+\n)*}}") +int stream_arena_callback_fault(void *ctx) +{ + struct bpf_timer *arr_timer; + + arr_timer = bpf_map_lookup_elem(&array, &(int){0}); + if (!arr_timer) + return 0; + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb); + bpf_timer_start(arr_timer, 0, 0); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c index 53af438bd998..99d72c68f76a 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -31,6 +31,8 @@ char *invalid_kern_ptr = (char *)-1; /* Passing NULL to string kfuncs (treated as a userspace ptr) */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } @@ -49,6 +51,8 @@ SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return /* Passing userspace ptr to string kfuncs */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } @@ -69,6 +73,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { re /* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); } +SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); } SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c index 89fb4669b0e9..e41cc5601994 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -7,6 +7,7 @@ char long_str[XATTR_SIZE_MAX + 1]; SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); } +SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); } SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); } SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); } SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); } diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index a47690174e0e..2e3498e37b9c 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -12,6 +12,11 @@ char str[] = "hello world"; /* Functional tests */ __test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } __test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); } +__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); } +__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); } +__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); } +__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); } __test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } __test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } __test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c new file mode 100644 index 000000000000..ad8bb546c9bf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id) +int st_ops_id; + +int test_pid; +int test_err; + +#define MAP1_MAGIC 1234 + +SEC("struct_ops") +int BPF_PROG(test_1, struct st_ops_args *args) +{ + return MAP1_MAGIC; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(sys_enter, struct pt_regs *regs, long id) +{ + struct st_ops_args args = {}; + struct task_struct *task; + int ret; + + task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP1_MAGIC) + test_err++; + + return 0; +} + +SEC("syscall") +int syscall_prog(void *ctx) +{ + struct st_ops_args args = {}; + int ret; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP1_MAGIC) + test_err++; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_multi_st_ops st_ops_map = { + .test_1 = (void *)test_1, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c new file mode 100644 index 000000000000..cea1a2f4b62f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id) +int st_ops_id; + +int test_pid; +int test_err; + +#define MAP2_MAGIC 4567 + +SEC("struct_ops") +int BPF_PROG(test_1, struct st_ops_args *args) +{ + return MAP2_MAGIC; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(sys_enter, struct pt_regs *regs, long id) +{ + struct st_ops_args args = {}; + struct task_struct *task; + int ret; + + task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP2_MAGIC) + test_err++; + + return 0; +} + +SEC("syscall") +int syscall_prog(void *ctx) +{ + struct st_ops_args args = {}; + int ret; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP2_MAGIC) + test_err++; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_multi_st_ops st_ops_map = { + .test_1 = (void *)test_1, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c index 36386b3c23a1..2b98b7710816 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c @@ -9,7 +9,7 @@ void bpf_task_release(struct task_struct *p) __ksym; /* This test struct_ops BPF programs returning referenced kptr. The verifier should * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task - * here is acquried automatically as the task argument is tagged with "__ref". + * here is acquired automatically as the task argument is tagged with "__ref". */ SEC("struct_ops/test_return_ref_kptr") struct task_struct *BPF_PROG(kptr_return, int dummy, diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c index 76dcb6089d7f..9c0a65466356 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c @@ -9,7 +9,7 @@ __attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __k /* This is a test BPF program that uses struct_ops to access a referenced * kptr argument. This is a test for the verifier to ensure that it - * 1) recongnizes the task as a referenced object (i.e., ref_obj_id > 0), and + * 1) recognizes the task as a referenced object (i.e., ref_obj_id > 0), and * 2) the same reference can be acquired from multiple paths as long as it * has not been released. */ diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c index 327ca395e860..d556b19413d7 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c @@ -2,6 +2,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_legacy.h" +#include "bpf_test_utils.h" struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); @@ -24,6 +25,8 @@ int entry(struct __sk_buff *skb) { int ret = 1; + clobber_regs_stack(); + count++; subprog_tail(skb); subprog_tail(skb); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c index 72fd0d577506..ae94c9c70ab7 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c @@ -2,6 +2,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_test_utils.h" int classifier_0(struct __sk_buff *skb); int classifier_1(struct __sk_buff *skb); @@ -60,6 +61,8 @@ int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb) { int ret = 0; + clobber_regs_stack(); + subprog_tail0(skb); subprog_tail1(skb); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c index a7fb91cb05b7..56b6b0099840 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c @@ -2,6 +2,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_test_utils.h" int classifier_0(struct __sk_buff *skb); @@ -53,6 +54,8 @@ int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb) { int ret = 0; + clobber_regs_stack(); + bpf_tail_call_static(skb, &jmp_table0, 0); __sink(ret); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c index c87f9ca982d3..5261395713cd 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_test_utils.h" struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); @@ -24,6 +25,8 @@ int subprog_tail(void *ctx) SEC("fentry/dummy") int BPF_PROG(fentry, struct sk_buff *skb) { + clobber_regs_stack(); + count++; subprog_tail(ctx); subprog_tail(ctx); diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h new file mode 100644 index 000000000000..432fff2af844 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TASK_LOCAL_DATA_BPF_H +#define __TASK_LOCAL_DATA_BPF_H + +/* + * Task local data is a library that facilitates sharing per-task data + * between user space and bpf programs. + * + * + * USAGE + * + * A TLD, an entry of data in task local data, first needs to be created by the + * user space. This is done by calling user space API, TLD_DEFINE_KEY() or + * tld_create_key(), with the name of the TLD and the size. + * + * TLD_DEFINE_KEY(prio, "priority", sizeof(int)); + * + * or + * + * void func_call(...) { + * tld_key_t prio, in_cs; + * + * prio = tld_create_key("priority", sizeof(int)); + * in_cs = tld_create_key("in_critical_section", sizeof(bool)); + * ... + * + * A key associated with the TLD, which has an opaque type tld_key_t, will be + * initialized or returned. It can be used to get a pointer to the TLD in the + * user space by calling tld_get_data(). + * + * In a bpf program, tld_object_init() first needs to be called to initialized a + * tld_object on the stack. Then, TLDs can be accessed by calling tld_get_data(). + * The API will try to fetch the key by the name and use it to locate the data. + * A pointer to the TLD will be returned. It also caches the key in a task local + * storage map, tld_key_map, whose value type, struct tld_keys, must be defined + * by the developer. + * + * struct tld_keys { + * tld_key_t prio; + * tld_key_t in_cs; + * }; + * + * SEC("struct_ops") + * void prog(struct task_struct task, ...) + * { + * struct tld_object tld_obj; + * int err, *p; + * + * err = tld_object_init(task, &tld_obj); + * if (err) + * return; + * + * p = tld_get_data(&tld_obj, prio, "priority", sizeof(int)); + * if (p) + * // do something depending on *p + */ +#include <errno.h> +#include <bpf/bpf_helpers.h> + +#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) +#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) + +#define TLD_MAX_DATA_CNT (__PAGE_SIZE / sizeof(struct tld_metadata) - 1) + +#ifndef TLD_NAME_LEN +#define TLD_NAME_LEN 62 +#endif + +#ifndef TLD_KEY_MAP_CREATE_RETRY +#define TLD_KEY_MAP_CREATE_RETRY 10 +#endif + +typedef struct { + __s16 off; +} tld_key_t; + +struct tld_metadata { + char name[TLD_NAME_LEN]; + __u16 size; +}; + +struct tld_meta_u { + __u8 cnt; + __u16 size; + struct tld_metadata metadata[TLD_MAX_DATA_CNT]; +}; + +struct tld_data_u { + __u64 start; /* offset of tld_data_u->data in a page */ + char data[__PAGE_SIZE - sizeof(__u64)]; +}; + +struct tld_map_value { + struct tld_data_u __uptr *data; + struct tld_meta_u __uptr *meta; +}; + +typedef struct tld_uptr_dummy { + struct tld_data_u data[0]; + struct tld_meta_u meta[0]; +} *tld_uptr_dummy_t; + +struct tld_object { + struct tld_map_value *data_map; + struct tld_keys *key_map; + /* + * Force the compiler to generate the actual definition of tld_meta_u + * and tld_data_u in BTF. Without it, tld_meta_u and u_tld_data will + * be BTF_KIND_FWD. + */ + tld_uptr_dummy_t dummy[0]; +}; + +/* + * Map value of tld_key_map for caching keys. Must be defined by the developer. + * Members should be tld_key_t and passed to the 3rd argument of tld_fetch_key(). + */ +struct tld_keys; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tld_map_value); +} tld_data_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tld_keys); +} tld_key_map SEC(".maps"); + +/** + * tld_object_init() - Initialize a tld_object. + * + * @task: The task_struct of the target task + * @tld_obj: A pointer to a tld_object to be initialized + * + * Return 0 on success; -ENODATA if the user space did not initialize task local data + * for the current task through tld_get_data(); -ENOMEM if the creation of tld_key_map + * fails + */ +__attribute__((unused)) +static int tld_object_init(struct task_struct *task, struct tld_object *tld_obj) +{ + int i; + + tld_obj->data_map = bpf_task_storage_get(&tld_data_map, task, 0, 0); + if (!tld_obj->data_map) + return -ENODATA; + + bpf_for(i, 0, TLD_KEY_MAP_CREATE_RETRY) { + tld_obj->key_map = bpf_task_storage_get(&tld_key_map, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (likely(tld_obj->key_map)) + break; + } + if (!tld_obj->key_map) + return -ENOMEM; + + return 0; +} + +/* + * Return the offset of TLD if @name is found. Otherwise, return the current TLD count + * using the nonpositive range so that the next tld_get_data() can skip fetching key if + * no new TLD is added or start comparing name from the first newly added TLD. + */ +__attribute__((unused)) +static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_start) +{ + struct tld_metadata *metadata; + int i, cnt, start, off = 0; + + if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta) + return 0; + + start = tld_obj->data_map->data->start; + cnt = tld_obj->data_map->meta->cnt; + metadata = tld_obj->data_map->meta->metadata; + + bpf_for(i, 0, cnt) { + if (i >= TLD_MAX_DATA_CNT) + break; + + if (i >= i_start && !bpf_strncmp(metadata[i].name, TLD_NAME_LEN, name)) + return start + off; + + off += TLD_ROUND_UP(metadata[i].size, 8); + } + + return -cnt; +} + +/** + * tld_get_data() - Retrieve a pointer to the TLD associated with the name. + * + * @tld_obj: A pointer to a valid tld_object initialized by tld_object_init() + * @key: The cached key of the TLD in tld_key_map + * @name: The name of the key associated with a TLD + * @size: The size of the TLD. Must be a known constant value + * + * Return a pointer to the TLD associated with @name; NULL if not found or @size is too + * big. @key is used to cache the key if the TLD is found to speed up subsequent calls. + * It should be defined as an member of tld_keys of tld_key_t type by the developer. + */ +#define tld_get_data(tld_obj, key, name, size) \ + ({ \ + void *data = NULL, *_data = (tld_obj)->data_map->data; \ + long off = (tld_obj)->key_map->key.off; \ + int cnt; \ + \ + if (likely(_data)) { \ + if (likely(off > 0)) { \ + barrier_var(off); \ + if (likely(off < __PAGE_SIZE - size)) \ + data = _data + off; \ + } else { \ + cnt = -(off); \ + if (likely((tld_obj)->data_map->meta) && \ + cnt < (tld_obj)->data_map->meta->cnt) { \ + off = __tld_fetch_key(tld_obj, name, cnt); \ + (tld_obj)->key_map->key.off = off; \ + \ + if (likely(off < __PAGE_SIZE - size)) { \ + barrier_var(off); \ + if (off > 0) \ + data = _data + off; \ + } \ + } \ + } \ + } \ + data; \ + }) + +#endif diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c new file mode 100644 index 000000000000..23217f06a3ec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_work.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "errno.h" + +char _license[] SEC("license") = "GPL"; + +const void *user_ptr = NULL; + +struct elem { + char data[128]; + struct bpf_task_work tw; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} hmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} arrmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} lrumap SEC(".maps"); + +static int process_work(struct bpf_map *map, void *key, void *value) +{ + struct elem *work = value; + + bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0); + return 0; +} + +int key = 0; + +SEC("perf_event") +int oncpu_hash_map(struct pt_regs *args) +{ + struct elem empty_work = { .data = { 0 } }; + struct elem *work; + struct task_struct *task; + int err; + + task = bpf_get_current_task_btf(); + err = bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST); + if (err) + return 0; + work = bpf_map_lookup_elem(&hmap, &key); + if (!work) + return 0; + + bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +int oncpu_array_map(struct pt_regs *args) +{ + struct elem *work; + struct task_struct *task; + + task = bpf_get_current_task_btf(); + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) + return 0; + bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +int oncpu_lru_map(struct pt_regs *args) +{ + struct elem empty_work = { .data = { 0 } }; + struct elem *work; + struct task_struct *task; + int err; + + task = bpf_get_current_task_btf(); + work = bpf_map_lookup_elem(&lrumap, &key); + if (work) + return 0; + err = bpf_map_update_elem(&lrumap, &key, &empty_work, BPF_NOEXIST); + if (err) + return 0; + work = bpf_map_lookup_elem(&lrumap, &key); + if (!work || work->data[0]) + return 0; + bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c new file mode 100644 index 000000000000..77fe8f28facd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_work_fail.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +const void *user_ptr = NULL; + +struct elem { + char data[128]; + struct bpf_task_work tw; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} hmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} arrmap SEC(".maps"); + +static int process_work(struct bpf_map *map, void *key, void *value) +{ + struct elem *work = value; + + bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0); + return 0; +} + +int key = 0; + +SEC("perf_event") +__failure __msg("doesn't match map pointer in R3") +int mismatch_map(struct pt_regs *args) +{ + struct elem *work; + struct task_struct *task; + + task = bpf_get_current_task_btf(); + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) + return 0; + bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +__failure __msg("arg#1 doesn't point to a map value") +int no_map_task_work(struct pt_regs *args) +{ + struct task_struct *task; + struct bpf_task_work tw; + + task = bpf_get_current_task_btf(); + bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +__failure __msg("Possibly NULL pointer passed to trusted arg1") +int task_work_null(struct pt_regs *args) +{ + struct task_struct *task; + + task = bpf_get_current_task_btf(); + bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL); + return 0; +} + +SEC("perf_event") +__failure __msg("Possibly NULL pointer passed to trusted arg2") +int map_null(struct pt_regs *args) +{ + struct elem *work; + struct task_struct *task; + + task = bpf_get_current_task_btf(); + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) + return 0; + bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c new file mode 100644 index 000000000000..90fca06fff56 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_work_stress.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +#define ENTRIES 128 + +char _license[] SEC("license") = "GPL"; + +__u64 callback_scheduled = 0; +__u64 callback_success = 0; +__u64 schedule_error = 0; +__u64 delete_success = 0; + +struct elem { + __u32 count; + struct bpf_task_work tw; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, ENTRIES); + __type(key, int); + __type(value, struct elem); +} hmap SEC(".maps"); + +static int process_work(struct bpf_map *map, void *key, void *value) +{ + __sync_fetch_and_add(&callback_success, 1); + return 0; +} + +SEC("syscall") +int schedule_task_work(void *ctx) +{ + struct elem empty_work = {.count = 0}; + struct elem *work; + int key = 0, err; + + key = bpf_ktime_get_ns() % ENTRIES; + work = bpf_map_lookup_elem(&hmap, &key); + if (!work) { + bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST); + work = bpf_map_lookup_elem(&hmap, &key); + if (!work) + return 0; + } + err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap, + process_work, NULL); + if (err) + __sync_fetch_and_add(&schedule_error, 1); + else + __sync_fetch_and_add(&callback_scheduled, 1); + return 0; +} + +SEC("syscall") +int delete_task_work(void *ctx) +{ + int key = 0, err; + + key = bpf_get_prandom_u32() % ENTRIES; + err = bpf_map_delete_elem(&hmap, &key); + if (!err) + __sync_fetch_and_add(&delete_success, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index f344c6835e84..26a53e54b8fa 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -22,6 +22,7 @@ #include "bpf_compiler.h" #include "test_cls_redirect.h" +#include "bpf_misc.h" #pragma GCC diagnostic ignored "-Waddress-of-packed-member" @@ -31,9 +32,6 @@ #define INLINING __always_inline #endif -#define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) - #define IP_OFFSET_MASK (0x1FFF) #define IP_MF (0x2000) @@ -129,7 +127,7 @@ typedef uint8_t *net_ptr __attribute__((align_value(8))); typedef struct buf { struct __sk_buff *skb; net_ptr head; - /* NB: tail musn't have alignment other than 1, otherwise + /* NB: tail mustn't have alignment other than 1, otherwise * LLVM will go and eliminate code, e.g. when checking packet lengths. */ uint8_t *const tail; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c index d0f7670351e5..dfd4a2710391 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -494,7 +494,7 @@ static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_header *offset += sizeof(*next_hop); - /* Skip the remainig next hops (may be zero). */ + /* Skip the remaining next hops (may be zero). */ return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1); } diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index abb7344b531f..5edf3cdc213d 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -1,9 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ -#include <stdbool.h> -#include <stddef.h> -#include <linux/bpf.h> -#include <linux/ptrace.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/test_pinning_devmap.c b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c new file mode 100644 index 000000000000..c855f8f87eff --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} pinmap1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u32); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} pinmap2 SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_task_local_data.c b/tools/testing/selftests/bpf/progs/test_task_local_data.c new file mode 100644 index 000000000000..fffafc013044 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_task_local_data.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> + +#include "task_local_data.bpf.h" + +struct tld_keys { + tld_key_t value0; + tld_key_t value1; + tld_key_t value2; + tld_key_t value_not_exist; +}; + +struct test_tld_struct { + __u64 a; + __u64 b; + __u64 c; + __u64 d; +}; + +int test_value0; +int test_value1; +struct test_tld_struct test_value2; + +SEC("syscall") +int task_main(void *ctx) +{ + struct tld_object tld_obj; + struct test_tld_struct *struct_p; + struct task_struct *task; + int err, *int_p; + + task = bpf_get_current_task_btf(); + err = tld_object_init(task, &tld_obj); + if (err) + return 1; + + int_p = tld_get_data(&tld_obj, value0, "value0", sizeof(int)); + if (int_p) + test_value0 = *int_p; + else + return 2; + + int_p = tld_get_data(&tld_obj, value1, "value1", sizeof(int)); + if (int_p) + test_value1 = *int_p; + else + return 3; + + struct_p = tld_get_data(&tld_obj, value2, "value2", sizeof(struct test_tld_struct)); + if (struct_p) + test_value2 = *struct_p; + else + return 4; + + int_p = tld_get_data(&tld_obj, value_not_exist, "value_not_exist", sizeof(int)); + if (int_p) + return 5; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c index 5f4e87ee949a..1ecdf4c54de4 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -14,10 +14,7 @@ #include <bpf/bpf_endian.h> #define BPF_PROG_TEST_TCP_HDR_OPTIONS #include "test_tcp_hdr_options.h" - -#ifndef sizeof_field -#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) -#endif +#include "bpf_misc.h" __u8 test_kind = TCPOPT_EXP; __u16 test_magic = 0xeB9F; diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index 540181c115a8..ef00d38b0a8d 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -23,7 +23,6 @@ struct { struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(max_entries, 2); __type(key, int); __type(value, __u32); } perf_event_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c index 896c88a4960d..12f4065fca20 100644 --- a/tools/testing/selftests/bpf/progs/test_uprobe.c +++ b/tools/testing/selftests/bpf/progs/test_uprobe.c @@ -59,3 +59,41 @@ int BPF_UPROBE(test4) test4_result = 1; return 0; } + +#if defined(__TARGET_ARCH_x86) +struct pt_regs regs; + +SEC("uprobe") +int BPF_UPROBE(test_regs_change) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + ctx->ax = regs.ax; + ctx->cx = regs.cx; + ctx->dx = regs.dx; + ctx->r8 = regs.r8; + ctx->r9 = regs.r9; + ctx->r10 = regs.r10; + ctx->r11 = regs.r11; + ctx->di = regs.di; + ctx->si = regs.si; + return 0; +} + +unsigned long ip; + +SEC("uprobe") +int BPF_UPROBE(test_regs_change_ip) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + ctx->ip = ip; + return 0; +} +#endif diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c index 096488f47fbc..a78c87537b07 100644 --- a/tools/testing/selftests/bpf/progs/test_usdt.c +++ b/tools/testing/selftests/bpf/progs/test_usdt.c @@ -107,4 +107,35 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5, return 0; } +int usdt_sib_called; +u64 usdt_sib_cookie; +int usdt_sib_arg_cnt; +int usdt_sib_arg_ret; +short usdt_sib_arg; +int usdt_sib_arg_size; + +/* + * usdt_sib is only tested on x86-related architectures, so it requires + * manual attach since auto-attach will panic tests under other architectures + */ +SEC("usdt") +int usdt_sib(struct pt_regs *ctx) +{ + long tmp; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + __sync_fetch_and_add(&usdt_sib_called, 1); + + usdt_sib_cookie = bpf_usdt_cookie(ctx); + usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx); + + usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp); + usdt_sib_arg = (short)tmp; + usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0); + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c new file mode 100644 index 000000000000..814e2a980e97 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +SEC("xdp") +int xdp_devmap(struct xdp_md *ctx) +{ + return ctx->egress_ifindex; +} + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __array(values, int (void *)); +} xdp_map SEC(".maps") = { + .values = { + [0] = (void *)&xdp_devmap, + }, +}; + +SEC("xdp") +int xdp_entry(struct xdp_md *ctx) +{ + bpf_tail_call(ctx, &xdp_map, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index fcf6ca14f2ea..d79cb74b571e 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -1,8 +1,11 @@ +#include <stdbool.h> #include <linux/bpf.h> +#include <linux/errno.h> #include <linux/if_ether.h> #include <linux/pkt_cls.h> #include <bpf/bpf_helpers.h> +#include "bpf_kfuncs.h" #define META_SIZE 32 @@ -23,6 +26,8 @@ struct { __uint(value_size, META_SIZE); } test_result SEC(".maps"); +bool test_pass; + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -40,6 +45,231 @@ int ing_cls(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Read from metadata using bpf_dynptr_read helper */ +SEC("tc") +int ing_cls_dynptr_read(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + + return TC_ACT_SHOT; +} + +/* Write to metadata using bpf_dynptr_write helper */ +SEC("tc") +int ing_cls_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, META_SIZE, 0); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Read from metadata using read-only dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_SHOT; +} + +/* Write to metadata using writeable dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src, *dst; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!dst) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Read skb metadata in chunks from various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 chunk_len = META_SIZE / 4; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + /* 1. Regular read */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 2. Read from an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 3. Read at an offset */ + bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0); + dst += chunk_len; + + /* 4. Read from a slice starting at an offset */ + src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); + if (!src) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_SHOT; +} + +/* Write skb metadata in chunks at various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) +{ + const __u32 chunk_len = META_SIZE / 4; + __u8 payload[META_SIZE]; + struct bpf_dynptr meta; + __u8 *dst, *src; + + bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload)); + src = payload; + + /* 1. Regular write */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 2. Write to an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 3. Write at an offset */ + bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0); + src += chunk_len; + + /* 4. Write to a slice starting at an offset */ + dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len); + if (!dst) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Pass an OOB offset to dynptr read, write, adjust, slice. */ +SEC("tc") +int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + __u8 md, *p; + int err; + + err = bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (err) + goto fail; + + /* read offset OOB */ + err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0); + if (err != -E2BIG) + goto fail; + + /* write offset OOB */ + err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0); + if (err != -E2BIG) + goto fail; + + /* adjust end offset OOB */ + err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1); + if (err != -ERANGE) + goto fail; + + /* adjust start offset OOB */ + err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1); + if (err != -ERANGE) + goto fail; + + /* slice offset OOB */ + p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p)); + if (p) + goto fail; + + /* slice rdwr offset OOB */ + p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p)); + if (p) + goto fail; + + return TC_ACT_UNSPEC; +fail: + return TC_ACT_SHOT; +} + +/* Reserve and clear space for metadata but don't populate it */ +SEC("xdp") +int ing_xdp_zalloc_meta(struct xdp_md *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *meta; + int ret; + + /* Drop any non-test packets */ + if (eth + 1 > ctx_ptr(ctx, data_end)) + return XDP_DROP; + if (eth->h_proto != 0) + return XDP_DROP; + + ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); + if (ret < 0) + return XDP_DROP; + + meta = ctx_ptr(ctx, data_meta); + if (meta + META_SIZE > ctx_ptr(ctx, data)) + return XDP_DROP; + + __builtin_memset(meta, 0, META_SIZE); + + return XDP_PASS; +} + SEC("xdp") int ing_xdp(struct xdp_md *ctx) { @@ -73,4 +303,193 @@ int ing_xdp(struct xdp_md *ctx) return XDP_PASS; } +/* + * Check that skb->data_meta..skb->data is empty if prog writes to packet + * _payload_ using packet pointers. Applies only to cloned skbs. + */ +SEC("tc") +int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + + if (eth + 1 > ctx_ptr(ctx, data_end)) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + if (ctx->data_meta != ctx->data) + goto out; + + /* Packet write to trigger unclone in prologue */ + eth->h_proto = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb->data_meta..skb->data is empty if prog writes to packet + * _metadata_ using packet pointers. Applies only to cloned skbs. + */ +SEC("tc") +int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *md = ctx_ptr(ctx, data_meta); + + if (eth + 1 > ctx_ptr(ctx, data_end)) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + if (md + 1 > ctx_ptr(ctx, data)) { + /* Expect no metadata */ + test_pass = true; + } else { + /* Metadata write to trigger unclone in prologue */ + *md = 42; + } +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is writable but empty if prog writes to packet + * _payload_ using a dynptr slice. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + goto out; + + /* Packet write to trigger unclone in prologue */ + eth->h_proto = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is writable but empty if prog writes to packet + * _metadata_ using a dynptr slice. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + __u8 *md; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + goto out; + + /* Metadata write to trigger unclone in prologue */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (md) + *md = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is read-only before prog writes to packet payload + * using dynptr_write helper. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect read-only metadata before unclone */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + goto out; + + /* Helper write to payload will unclone the packet */ + bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); + + /* Expect no metadata after unclone */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is read-only if prog writes to packet + * metadata using dynptr_write helper. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect read-only metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + goto out; + + /* Metadata write. Expect failure. */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c new file mode 100644 index 000000000000..c41a21413eaa --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +int xdpf_sz; +int sinfo_sz; +int data_len; +int pull_len; + +#define XDP_PACKET_HEADROOM 256 + +SEC("xdp.frags") +int xdp_find_sizes(struct xdp_md *ctx) +{ + xdpf_sz = sizeof(struct xdp_frame); + sinfo_sz = __PAGE_SIZE - XDP_PACKET_HEADROOM - + (ctx->data_end - ctx->data); + + return XDP_PASS; +} + +SEC("xdp.frags") +int xdp_pull_data_prog(struct xdp_md *ctx) +{ + __u8 *data_end = (void *)(long)ctx->data_end; + __u8 *data = (void *)(long)ctx->data; + __u8 *val_p; + int err; + + if (data_len != data_end - data) + return XDP_DROP; + + err = bpf_xdp_pull_data(ctx, pull_len); + if (err) + return XDP_DROP; + + val_p = (void *)(long)ctx->data + 1024; + if (val_p + 1 > (void *)(long)ctx->data_end) + return XDP_DROP; + + if (*val_p != 0xbb) + return XDP_DROP; + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/timer_interrupt.c b/tools/testing/selftests/bpf/progs/timer_interrupt.c new file mode 100644 index 000000000000..19180a455f40 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_interrupt.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +#define CLOCK_MONOTONIC 1 + +int preempt_count; +int in_interrupt; +int in_interrupt_cb; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +static int timer_in_interrupt(void *map, int *key, struct bpf_timer *timer) +{ + preempt_count = get_preempt_count(); + in_interrupt_cb = bpf_in_interrupt(); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test_timer_interrupt) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&array, &key); + if (!timer) + return 0; + + in_interrupt = bpf_in_interrupt(); + bpf_timer_init(timer, &array, CLOCK_MONOTONIC); + bpf_timer_set_callback(timer, timer_in_interrupt); + bpf_timer_start(timer, 0, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c index c435a3a8328a..d460732e2023 100644 --- a/tools/testing/selftests/bpf/progs/tracing_struct.c +++ b/tools/testing/selftests/bpf/progs/tracing_struct.c @@ -18,6 +18,18 @@ struct bpf_testmod_struct_arg_3 { int b[]; }; +union bpf_testmod_union_arg_1 { + char a; + short b; + struct bpf_testmod_struct_arg_1 arg; +}; + +union bpf_testmod_union_arg_2 { + int a; + long b; + struct bpf_testmod_struct_arg_2 arg; +}; + long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs; __u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3; long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret; @@ -26,6 +38,9 @@ long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret; long t5_ret; int t6; +long ut1_a_a, ut1_b, ut1_c; +long ut2_a, ut2_b_a, ut2_b_b; + SEC("fentry/bpf_testmod_test_struct_arg_1") int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c) { @@ -130,4 +145,22 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a) return 0; } +SEC("fexit/bpf_testmod_test_union_arg_1") +int BPF_PROG2(test_union_arg_1, union bpf_testmod_union_arg_1, a, int, b, int, c) +{ + ut1_a_a = a.arg.a; + ut1_b = b; + ut1_c = c; + return 0; +} + +SEC("fexit/bpf_testmod_test_union_arg_2") +int BPF_PROG2(test_union_arg_2, int, a, union bpf_testmod_union_arg_2, b) +{ + ut2_a = a; + ut2_b_a = b.arg.a; + ut2_b_b = b.arg.b; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 044a6d78923e..3d5f30c29ae3 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -97,6 +97,12 @@ int bench_trigger_kprobe_multi(void *ctx) return 0; } +SEC("?kprobe.multi/bpf_get_numa_node_id") +int bench_kprobe_multi_empty(void *ctx) +{ + return 0; +} + SEC("?kretprobe.multi/bpf_get_numa_node_id") int bench_trigger_kretprobe_multi(void *ctx) { @@ -104,6 +110,12 @@ int bench_trigger_kretprobe_multi(void *ctx) return 0; } +SEC("?kretprobe.multi/bpf_get_numa_node_id") +int bench_kretprobe_multi_empty(void *ctx) +{ + return 0; +} + SEC("?fentry/bpf_get_numa_node_id") int bench_trigger_fentry(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall.c b/tools/testing/selftests/bpf/progs/uprobe_syscall.c index 8a4fa6c7ef59..e08c31669e5a 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/progs/uprobe_syscall.c @@ -7,8 +7,8 @@ struct pt_regs regs; char _license[] SEC("license") = "GPL"; -SEC("uretprobe//proc/self/exe:uretprobe_regs_trigger") -int uretprobe(struct pt_regs *ctx) +SEC("uprobe") +int probe(struct pt_regs *ctx) { __builtin_memcpy(®s, ctx, sizeof(regs)); return 0; diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c index 0d7f1a7db2e2..915d38591bf6 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c +++ b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include "vmlinux.h" #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/usdt.bpf.h> #include <string.h> struct pt_regs regs; @@ -8,10 +10,64 @@ struct pt_regs regs; char _license[] SEC("license") = "GPL"; int executed = 0; +int pid; + +SEC("uprobe") +int BPF_UPROBE(test_uprobe) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} + +SEC("uretprobe") +int BPF_URETPROBE(test_uretprobe) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} + +SEC("uprobe.multi") +int test_uprobe_multi(struct pt_regs *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} SEC("uretprobe.multi") -int test(struct pt_regs *regs) +int test_uretprobe_multi(struct pt_regs *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} + +SEC("uprobe.session") +int test_uprobe_session(struct pt_regs *ctx) { - executed = 1; + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; + return 0; +} + +SEC("usdt") +int test_usdt(struct pt_regs *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + executed++; return 0; } diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c index 9fdcf396b8f4..a2951e2f1711 100644 --- a/tools/testing/selftests/bpf/progs/uretprobe_stack.c +++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c @@ -26,8 +26,8 @@ int usdt_len; SEC("uprobe//proc/self/exe:target_1") int BPF_UPROBE(uprobe_1) { - /* target_1 is recursive wit depth of 2, so we capture two separate - * stack traces, depending on which occurence it is + /* target_1 is recursive with depth of 2, so we capture two separate + * stack traces, depending on which occurrence it is */ static bool recur = false; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index 9dbdf123542d..f19e15400b3e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -240,6 +240,7 @@ int big_alloc2(void *ctx) return 5; bpf_arena_free_pages(&arena, (void __arena *)pg, 2); page[i] = NULL; + barrier(); page[i + 1] = NULL; cond_break; } diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 87a2c60d86e6..0a72e0228ea9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -926,7 +926,7 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for non const xor src dst") __success __log_level(2) -__msg("5: (af) r0 ^= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") +__msg("5: (af) r0 ^= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") __naked void non_const_xor_src_dst(void) { asm volatile (" \ @@ -947,7 +947,7 @@ __naked void non_const_xor_src_dst(void) SEC("socket") __description("bounds check for non const or src dst") __success __log_level(2) -__msg("5: (4f) r0 |= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") +__msg("5: (4f) r0 |= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") __naked void non_const_or_src_dst(void) { asm volatile (" \ @@ -968,7 +968,7 @@ __naked void non_const_or_src_dst(void) SEC("socket") __description("bounds check for non const mul regs") __success __log_level(2) -__msg("5: (2f) r0 *= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))") +__msg("5: (2f) r0 *= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))") __naked void non_const_mul_regs(void) { asm volatile (" \ @@ -1241,7 +1241,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("multiply mixed sign bounds. test 1") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))") +__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))") __naked void mult_mixed0_sign(void) { asm volatile ( @@ -1264,7 +1264,7 @@ __naked void mult_mixed0_sign(void) SEC("tc") __description("multiply mixed sign bounds. test 2") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=smin32=-100,smax=smax32=200)") +__msg("r6 *= r7 {{.*}}; R6=scalar(smin=smin32=-100,smax=smax32=200)") __naked void mult_mixed1_sign(void) { asm volatile ( @@ -1287,7 +1287,7 @@ __naked void mult_mixed1_sign(void) SEC("tc") __description("multiply negative bounds") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))") +__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))") __naked void mult_sign_bounds(void) { asm volatile ( @@ -1311,7 +1311,7 @@ __naked void mult_sign_bounds(void) SEC("tc") __description("multiply bounds that don't cross signed boundary") __success __log_level(2) -__msg("r8 *= r6 {{.*}}; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8_w=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))") +__msg("r8 *= r6 {{.*}}; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))") __naked void mult_no_sign_crossing(void) { asm volatile ( @@ -1331,7 +1331,7 @@ __naked void mult_no_sign_crossing(void) SEC("tc") __description("multiplication overflow, result in unbounded reg. test 1") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar()") +__msg("r6 *= r7 {{.*}}; R6=scalar()") __naked void mult_unsign_ovf(void) { asm volatile ( @@ -1353,7 +1353,7 @@ __naked void mult_unsign_ovf(void) SEC("tc") __description("multiplication overflow, result in unbounded reg. test 2") __success __log_level(2) -__msg("r6 *= r7 {{.*}}; R6_w=scalar()") +__msg("r6 *= r7 {{.*}}; R6=scalar()") __naked void mult_sign_ovf(void) { asm volatile ( @@ -1376,7 +1376,7 @@ __naked void mult_sign_ovf(void) SEC("socket") __description("64-bit addition, all outcomes overflow") __success __log_level(2) -__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") +__msg("5: (0f) r3 += r3 {{.*}} R3=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") __retval(0) __naked void add64_full_overflow(void) { @@ -1396,7 +1396,7 @@ __naked void add64_full_overflow(void) SEC("socket") __description("64-bit addition, partial overflow, result in unbounded reg") __success __log_level(2) -__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()") +__msg("4: (0f) r3 += r3 {{.*}} R3=scalar()") __retval(0) __naked void add64_partial_overflow(void) { @@ -1416,7 +1416,7 @@ __naked void add64_partial_overflow(void) SEC("socket") __description("32-bit addition overflow, all outcomes overflow") __success __log_level(2) -__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") +__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") __retval(0) __naked void add32_full_overflow(void) { @@ -1436,7 +1436,7 @@ __naked void add32_full_overflow(void) SEC("socket") __description("32-bit addition, partial overflow, result in unbounded u32 bounds") __success __log_level(2) -__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") __retval(0) __naked void add32_partial_overflow(void) { @@ -1456,7 +1456,7 @@ __naked void add32_partial_overflow(void) SEC("socket") __description("64-bit subtraction, all outcomes underflow") __success __log_level(2) -__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)") +__msg("6: (1f) r3 -= r1 {{.*}} R3=scalar(umin=1,umax=0x8000000000000000)") __retval(0) __naked void sub64_full_overflow(void) { @@ -1477,7 +1477,7 @@ __naked void sub64_full_overflow(void) SEC("socket") __description("64-bit subtraction, partial overflow, result in unbounded reg") __success __log_level(2) -__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()") +__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()") __retval(0) __naked void sub64_partial_overflow(void) { @@ -1496,7 +1496,7 @@ __naked void sub64_partial_overflow(void) SEC("socket") __description("32-bit subtraction overflow, all outcomes underflow") __success __log_level(2) -__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") +__msg("5: (1c) w3 -= w1 {{.*}} R3=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") __retval(0) __naked void sub32_full_overflow(void) { @@ -1517,7 +1517,7 @@ __naked void sub32_full_overflow(void) SEC("socket") __description("32-bit subtraction, partial overflow, result in unbounded u32 bounds") __success __log_level(2) -__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__msg("3: (1c) w3 -= w2 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") __retval(0) __naked void sub32_partial_overflow(void) { @@ -1617,7 +1617,7 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds deduction cross sign boundary, positive overlap") __success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) -__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))") +__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))") __retval(0) __naked void bounds_deduct_positive_overlap(void) { @@ -1650,7 +1650,7 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds deduction cross sign boundary, two overlaps") __failure __flag(BPF_F_TEST_REG_INVARIANTS) -__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)") +__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)") __msg("frame pointer is read only") __naked void bounds_deduct_two_overlaps(void) { @@ -1668,4 +1668,45 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("socket") +__description("dead jne branch due to disagreeing tnums") +__success __log_level(2) +__naked void jne_disagreeing_tnums(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w0 = w0; \ + r0 >>= 30; \ + r0 <<= 30; \ + r1 = r0; \ + r1 += 1024; \ + if r1 != r0 goto +1; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("dead jeq branch due to disagreeing tnums") +__success __log_level(2) +__naked void jeq_disagreeing_tnums(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w0 = w0; \ + r0 >>= 30; \ + r0 <<= 30; \ + r1 = r0; \ + r1 += 1024; \ + if r1 == r0 goto +1; \ + exit; \ + r10 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index c258b0722e04..fb4fa465d67c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -660,19 +660,24 @@ __naked void may_goto_interaction_x86_64(void) SEC("raw_tp") __arch_arm64 -__log_level(4) __msg("stack depth 16") -/* may_goto counter at -16 */ -__xlated("0: *(u64 *)(r10 -16) =") -__xlated("1: r1 = 1") -__xlated("2: call bpf_get_smp_processor_id") +__log_level(4) __msg("stack depth 24") +/* may_goto counter at -24 */ +__xlated("0: *(u64 *)(r10 -24) =") +/* may_goto timestamp at -16 */ +__xlated("1: *(u64 *)(r10 -16) =") +__xlated("2: r1 = 1") +__xlated("3: call bpf_get_smp_processor_id") /* may_goto expansion starts */ -__xlated("3: r11 = *(u64 *)(r10 -16)") -__xlated("4: if r11 == 0x0 goto pc+3") -__xlated("5: r11 -= 1") -__xlated("6: *(u64 *)(r10 -16) = r11") +__xlated("4: r11 = *(u64 *)(r10 -24)") +__xlated("5: if r11 == 0x0 goto pc+6") +__xlated("6: r11 -= 1") +__xlated("7: if r11 != 0x0 goto pc+2") +__xlated("8: r11 = -24") +__xlated("9: call unknown") +__xlated("10: *(u64 *)(r10 -24) = r11") /* may_goto expansion ends */ -__xlated("7: *(u64 *)(r10 -8) = r1") -__xlated("8: exit") +__xlated("11: *(u64 *)(r10 -8) = r1") +__xlated("12: exit") __success __naked void may_goto_interaction_arm64(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c index 424463094760..5ebf7d9bcc55 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c @@ -5,8 +5,6 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) - SEC("tc") __description("context stores via BPF_ATOMIC") __failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed") @@ -264,4 +262,34 @@ narrow_load("sockops", bpf_sock_ops, skb_hwtstamp); unaligned_access("flow_dissector", __sk_buff, data); unaligned_access("netfilter", bpf_nf_ctx, skb); +#define padding_access(type, ctx, prev_field, sz) \ + SEC(type) \ + __description("access on " #ctx " padding after " #prev_field) \ + __naked void padding_ctx_access_##ctx(void) \ + { \ + asm volatile (" \ + r1 = *(u%[size] *)(r1 + %[off]); \ + r0 = 0; \ + exit;" \ + : \ + : __imm_const(size, sz * 8), \ + __imm_const(off, offsetofend(struct ctx, prev_field)) \ + : __clobber_all); \ + } + +__failure __msg("invalid bpf_context access") +padding_access("cgroup/bind4", bpf_sock_addr, msg_src_ip6[3], 4); + +__success +padding_access("sk_lookup", bpf_sk_lookup, remote_port, 2); + +__failure __msg("invalid bpf_context access") +padding_access("tc", __sk_buff, tstamp_type, 2); + +__failure __msg("invalid bpf_context access") +padding_access("cgroup/post_bind4", bpf_sock, dst_port, 2); + +__failure __msg("invalid bpf_context access") +padding_access("sk_reuseport", sk_reuseport_md, hash, 4); + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 181da86ba5f0..6630a92b1b47 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -215,7 +215,7 @@ __weak int subprog_untrusted(const volatile struct task_struct *restrict task __ SEC("tp_btf/sys_enter") __success __log_level(2) -__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()") __msg("Func#1 ('subprog_untrusted') is global and assumed valid.") __msg("Validating subprog_untrusted() func#1...") __msg(": R1=untrusted_ptr_task_struct") @@ -278,7 +278,7 @@ __weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted) SEC("tp_btf/sys_enter") __success __log_level(2) -__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()") __msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.") __msg("Validating subprog_void_untrusted() func#1...") __msg(": R1=rdonly_untrusted_mem(sz=0)") diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 52edee41caf6..c8494b682c31 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_arena_common.h" #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ @@ -10,6 +11,12 @@ defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 1); +} arena SEC(".maps"); + SEC("socket") __description("LDSX, S8") __success __success_unpriv __retval(-2) @@ -65,7 +72,7 @@ __naked void ldsx_s32(void) SEC("socket") __description("LDSX, S8 range checking, privileged") __log_level(2) __success __retval(1) -__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)") +__msg("R1=scalar(smin=smin32=-128,smax=smax32=127)") __naked void ldsx_s8_range_priv(void) { asm volatile ( @@ -256,6 +263,175 @@ __naked void ldsx_ctx_8(void) : __clobber_all); } +SEC("syscall") +__description("Arena LDSX Disasm") +__success +__arch_x86_64 +__jited("movslq 0x10(%rax,%r12), %r14") +__jited("movswq 0x18(%rax,%r12), %r14") +__jited("movsbq 0x20(%rax,%r12), %r14") +__jited("movslq 0x10(%rdi,%r12), %r15") +__jited("movswq 0x18(%rdi,%r12), %r15") +__jited("movsbq 0x20(%rdi,%r12), %r15") +__arch_arm64 +__jited("add x11, x7, x28") +__jited("ldrsw x21, [x11, #0x10]") +__jited("add x11, x7, x28") +__jited("ldrsh x21, [x11, #0x18]") +__jited("add x11, x7, x28") +__jited("ldrsb x21, [x11, #0x20]") +__jited("add x11, x0, x28") +__jited("ldrsw x22, [x11, #0x10]") +__jited("add x11, x0, x28") +__jited("ldrsh x22, [x11, #0x18]") +__jited("add x11, x0, x28") +__jited("ldrsb x22, [x11, #0x20]") +__naked void arena_ldsx_disasm(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = %[numa_no_node];" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = r0;" + "r8 = *(s32 *)(r0 + 16);" + "r8 = *(s16 *)(r0 + 24);" + "r8 = *(s8 *)(r0 + 32);" + "r9 = *(s32 *)(r1 + 16);" + "r9 = *(s16 *)(r1 + 24);" + "r9 = *(s8 *)(r1 + 32);" + "r0 = 0;" + "exit;" + :: __imm(bpf_arena_alloc_pages), + __imm_addr(arena), + __imm_const(numa_no_node, NUMA_NO_NODE) + : __clobber_all + ); +} + +SEC("syscall") +__description("Arena LDSX Exception") +__success __retval(0) +__arch_x86_64 +__arch_arm64 +__naked void arena_ldsx_exception(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r0 = 0xdeadbeef;" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = 0x3fe;" + "*(u64 *)(r0 + 0) = r1;" + "r0 = *(s8 *)(r0 + 0);" + "exit;" + : + : __imm_addr(arena) + : __clobber_all + ); +} + +SEC("syscall") +__description("Arena LDSX, S8") +__success __retval(-1) +__arch_x86_64 +__arch_arm64 +__naked void arena_ldsx_s8(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = %[numa_no_node];" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = 0x3fe;" + "*(u64 *)(r0 + 0) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s8 *)(r0 + 0);" +#else + "r0 = *(s8 *)(r0 + 7);" +#endif + "r0 >>= 1;" + "exit;" + :: __imm(bpf_arena_alloc_pages), + __imm_addr(arena), + __imm_const(numa_no_node, NUMA_NO_NODE) + : __clobber_all + ); +} + +SEC("syscall") +__description("Arena LDSX, S16") +__success __retval(-1) +__arch_x86_64 +__arch_arm64 +__naked void arena_ldsx_s16(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = %[numa_no_node];" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = 0x3fffe;" + "*(u64 *)(r0 + 0) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s16 *)(r0 + 0);" +#else + "r0 = *(s16 *)(r0 + 6);" +#endif + "r0 >>= 1;" + "exit;" + :: __imm(bpf_arena_alloc_pages), + __imm_addr(arena), + __imm_const(numa_no_node, NUMA_NO_NODE) + : __clobber_all + ); +} + +SEC("syscall") +__description("Arena LDSX, S32") +__success __retval(-1) +__arch_x86_64 +__arch_arm64 +__naked void arena_ldsx_s32(void *ctx) +{ + asm volatile ( + "r1 = %[arena] ll;" + "r2 = 0;" + "r3 = 1;" + "r4 = %[numa_no_node];" + "r5 = 0;" + "call %[bpf_arena_alloc_pages];" + "r0 = addr_space_cast(r0, 0x0, 0x1);" + "r1 = 0xfffffffe;" + "*(u64 *)(r0 + 0) = r1;" +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + "r0 = *(s32 *)(r0 + 0);" +#else + "r0 = *(s32 *)(r0 + 4);" +#endif + "r0 >>= 1;" + "exit;" + :: __imm(bpf_arena_alloc_pages), + __imm_addr(arena), + __imm_const(numa_no_node, NUMA_NO_NODE) + : __clobber_all + ); +} + +/* to retain debug info for BTF generation */ +void kfunc_root(void) +{ + bpf_arena_alloc_pages(0, 0, 0, 0, 0); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c new file mode 100644 index 000000000000..c0e808509268 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, long long); +} map SEC(".maps"); + +SEC("socket") +__log_level(2) +__msg("(0) frame 0 insn 2 +written -8") +__msg("(0) frame 0 insn 1 +live -24") +__msg("(0) frame 0 insn 1 +written -8") +__msg("(0) frame 0 insn 0 +live -8,-24") +__msg("(0) frame 0 insn 0 +written -8") +__msg("(0) live stack update done in 2 iterations") +__naked void simple_read_simple_write(void) +{ + asm volatile ( + "r1 = *(u64 *)(r10 - 8);" + "r2 = *(u64 *)(r10 - 24);" + "*(u64 *)(r10 - 8) = r1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("(0) frame 0 insn 1 +live -8") +__not_msg("(0) frame 0 insn 1 +written") +__msg("(0) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 1 +live -16") +__msg("(0) frame 0 insn 1 +written -32") +__msg("(0) live stack update done in 2 iterations") +__naked void read_write_join(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 > 42 goto 1f;" + "r0 = *(u64 *)(r10 - 8);" + "*(u64 *)(r10 - 32) = r0;" + "*(u64 *)(r10 - 40) = r0;" + "exit;" +"1:" + "r0 = *(u64 *)(r10 - 16);" + "*(u64 *)(r10 - 32) = r0;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("2: (25) if r0 > 0x2a goto pc+1") +__msg("7: (95) exit") +__msg("(0) frame 0 insn 2 +written -16") +__msg("(0) live stack update done in 2 iterations") +__msg("7: (95) exit") +__not_msg("(0) frame 0 insn 2") +__msg("(0) live stack update done in 1 iterations") +__naked void must_write_not_same_slot(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = -8;" + "if r0 > 42 goto 1f;" + "r1 = -16;" +"1:" + "r2 = r10;" + "r2 += r1;" + "*(u64 *)(r2 + 0) = r0;" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("(0) frame 0 insn 0 +written -8,-16") +__msg("(0) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 0 +written -8") +__msg("(0) live stack update done in 2 iterations") +__naked void must_write_not_same_type(void) +{ + asm volatile ( + "*(u64*)(r10 - 8) = 0;" + "r2 = r10;" + "r2 += -8;" + "r1 = %[map] ll;" + "call %[bpf_map_lookup_elem];" + "if r0 != 0 goto 1f;" + "r0 = r10;" + "r0 += -16;" +"1:" + "*(u64 *)(r0 + 0) = 42;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_map_lookup_elem), + __imm_addr(map) + : __clobber_all); +} + +SEC("socket") +__log_level(2) +__msg("(2,4) frame 0 insn 4 +written -8") +__msg("(2,4) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 2 +written -8") +__msg("(0) live stack update done in 2 iterations") +__naked void caller_stack_write(void) +{ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "call write_first_param;" + "exit;" + ::: __clobber_all); +} + +static __used __naked void write_first_param(void) +{ + asm volatile ( + "*(u64 *)(r1 + 0) = 7;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__log_level(2) +/* caller_stack_read() function */ +__msg("2: .12345.... (85) call pc+4") +__msg("5: .12345.... (85) call pc+1") +__msg("6: 0......... (95) exit") +/* read_first_param() function */ +__msg("7: .1........ (79) r0 = *(u64 *)(r1 +0)") +__msg("8: 0......... (95) exit") +/* update for callsite at (2) */ +__msg("(2,7) frame 0 insn 7 +live -8") +__msg("(2,7) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 2 +live -8") +__msg("(0) live stack update done in 2 iterations") +/* update for callsite at (5) */ +__msg("(5,7) frame 0 insn 7 +live -16") +__msg("(5,7) live stack update done in 2 iterations") +__msg("(0) frame 0 insn 5 +live -16") +__msg("(0) live stack update done in 2 iterations") +__naked void caller_stack_read(void) +{ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "call read_first_param;" + "r1 = r10;" + "r1 += -16;" + "call read_first_param;" + "exit;" + ::: __clobber_all); +} + +static __used __naked void read_first_param(void) +{ + asm volatile ( + "r0 = *(u64 *)(r1 + 0);" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__flag(BPF_F_TEST_STATE_FREQ) +__log_level(2) +/* read_first_param2() function */ +__msg(" 9: .1........ (79) r0 = *(u64 *)(r1 +0)") +__msg("10: .......... (b7) r0 = 0") +__msg("11: 0......... (05) goto pc+0") +__msg("12: 0......... (95) exit") +/* + * The purpose of the test is to check that checkpoint in + * read_first_param2() stops path traversal. This will only happen if + * verifier understands that fp[0]-8 at insn (12) is not alive. + */ +__msg("12: safe") +__msg("processed 20 insns") +__naked void caller_stack_pruning(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 == 42 goto 1f;" + "r0 = %[map] ll;" +"1:" + "*(u64 *)(r10 - 8) = r0;" + "r1 = r10;" + "r1 += -8;" + /* + * fp[0]-8 is either pointer to map or a scalar, + * preventing state pruning at checkpoint created for call. + */ + "call read_first_param2;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm_addr(map) + : __clobber_all); +} + +static __used __naked void read_first_param2(void) +{ + asm volatile ( + "r0 = *(u64 *)(r1 + 0);" + "r0 = 0;" + /* + * Checkpoint at goto +0 should fire, + * as caller stack fp[0]-8 is not alive at this point. + */ + "goto +0;" + "exit;" + ::: __clobber_all); +} + +SEC("socket") +__flag(BPF_F_TEST_STATE_FREQ) +__failure +__msg("R1 type=scalar expected=map_ptr") +__naked void caller_stack_pruning_callback(void) +{ + asm volatile ( + "r0 = %[map] ll;" + "*(u64 *)(r10 - 8) = r0;" + "r1 = 2;" + "r2 = loop_cb ll;" + "r3 = r10;" + "r3 += -8;" + "r4 = 0;" + /* + * fp[0]-8 is either pointer to map or a scalar, + * preventing state pruning at checkpoint created for call. + */ + "call %[bpf_loop];" + "r0 = 42;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_loop), + __imm_addr(map) + : __clobber_all); +} + +static __used __naked void loop_cb(void) +{ + asm volatile ( + /* + * Checkpoint at function entry should not fire, as caller + * stack fp[0]-8 is alive at this point. + */ + "r6 = r2;" + "r1 = *(u64 *)(r6 + 0);" + "*(u64*)(r10 - 8) = 7;" + "r2 = r10;" + "r2 += -8;" + "call %[bpf_map_lookup_elem];" + /* + * This should stop verifier on a second loop iteration, + * but only if verifier correctly maintains that fp[0]-8 + * is still alive. + */ + "*(u64 *)(r6 + 0) = 0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_map_lookup_elem), + __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* + * Because of a bug in verifier.c:compute_postorder() + * the program below overflowed traversal queue in that function. + */ +SEC("socket") +__naked void syzbot_postorder_bug1(void) +{ + asm volatile ( + "r0 = 0;" + "if r0 != 0 goto -1;" + "exit;" + ::: __clobber_all); +} diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index e07b43b78fd2..fbdde80e7b90 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -283,4 +283,25 @@ exit_%=: \ : __clobber_all); } +/* + * This test case triggered a bug in verifier.c:maybe_exit_scc(). + * Speculative execution path reaches stack access instruction, + * stops and triggers maybe_exit_scc() w/o accompanying maybe_enter_scc() call. + */ +SEC("socket") +__arch_x86_64 +__caps_unpriv(CAP_BPF) +__naked void maybe_exit_scc_bug1(void) +{ + asm volatile ( + "r0 = 100;" +"1:" + /* Speculative execution path reaches and stops here. */ + "*(u64 *)(r10 - 512) = r0;" + /* Condition is always false, but verifier speculatively executes the true branch. */ + "if r0 <= 0x0 goto 1b;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c index 11a079145966..e2767d27d8aa 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c @@ -70,10 +70,13 @@ __naked void bpf_map_ptr_write_rejected(void) : __clobber_all); } +/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing + * into this array is valid. The opts field is now at offset 33. + */ SEC("socket") __description("bpf_map_ptr: read non-existent field rejected") __failure -__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4") +__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4") __failure_unpriv __msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") __flag(BPF_F_ANY_ALIGNMENT) @@ -82,7 +85,7 @@ __naked void read_non_existent_field_rejected(void) asm volatile (" \ r6 = 0; \ r1 = %[map_array_48b] ll; \ - r6 = *(u32*)(r1 + 1); \ + r6 = *(u32*)(r1 + 33); \ r0 = 1; \ exit; \ " : diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c index 3966d827f288..6d1edaef9213 100644 --- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c +++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c @@ -9,6 +9,8 @@ SEC("raw_tp") __description("may_goto 0") __arch_x86_64 +__arch_s390x +__arch_arm64 __xlated("0: r0 = 1") __xlated("1: exit") __success @@ -27,6 +29,8 @@ __naked void may_goto_simple(void) SEC("raw_tp") __description("batch 2 of may_goto 0") __arch_x86_64 +__arch_s390x +__arch_arm64 __xlated("0: r0 = 1") __xlated("1: exit") __success @@ -47,6 +51,8 @@ __naked void may_goto_batch_0(void) SEC("raw_tp") __description("may_goto batch with offsets 2/1/0") __arch_x86_64 +__arch_s390x +__arch_arm64 __xlated("0: r0 = 1") __xlated("1: exit") __success @@ -69,8 +75,10 @@ __naked void may_goto_batch_1(void) } SEC("raw_tp") -__description("may_goto batch with offsets 2/0 - x86_64") +__description("may_goto batch with offsets 2/0") __arch_x86_64 +__arch_s390x +__arch_arm64 __xlated("0: *(u64 *)(r10 -16) = 65535") __xlated("1: *(u64 *)(r10 -8) = 0") __xlated("2: r11 = *(u64 *)(r10 -16)") @@ -84,33 +92,7 @@ __xlated("9: r0 = 1") __xlated("10: r0 = 2") __xlated("11: exit") __success -__naked void may_goto_batch_2_x86_64(void) -{ - asm volatile ( - ".8byte %[may_goto1];" - ".8byte %[may_goto3];" - "r0 = 1;" - "r0 = 2;" - "exit;" - : - : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)), - __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) - : __clobber_all); -} - -SEC("raw_tp") -__description("may_goto batch with offsets 2/0 - arm64") -__arch_arm64 -__xlated("0: *(u64 *)(r10 -8) = 8388608") -__xlated("1: r11 = *(u64 *)(r10 -8)") -__xlated("2: if r11 == 0x0 goto pc+3") -__xlated("3: r11 -= 1") -__xlated("4: *(u64 *)(r10 -8) = r11") -__xlated("5: r0 = 1") -__xlated("6: r0 = 2") -__xlated("7: exit") -__success -__naked void may_goto_batch_2_arm64(void) +__naked void may_goto_batch_2(void) { asm volatile ( ".8byte %[may_goto1];" diff --git a/tools/testing/selftests/bpf/progs/verifier_mul.c b/tools/testing/selftests/bpf/progs/verifier_mul.c new file mode 100644 index 000000000000..7145fe3351d5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_mul.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Nandakumar Edamana */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +/* Intended to test the abstract multiplication technique(s) used by + * the verifier. Using assembly to avoid compiler optimizations. + */ +SEC("fentry/bpf_fentry_test1") +void BPF_PROG(mul_precise, int x) +{ + /* First, force the verifier to be uncertain about the value: + * unsigned int a = (bpf_get_prandom_u32() & 0x2) | 0x1; + * + * Assuming the verifier is using tnum, a must be tnum{.v=0x1, .m=0x2}. + * Then a * 0x3 would be m0m1 (m for uncertain). Added imprecision + * would cause the following to fail, because the required return value + * is 0: + * return (a * 0x3) & 0x4); + */ + asm volatile ("\ + call %[bpf_get_prandom_u32];\ + r0 &= 0x2;\ + r0 |= 0x1;\ + r0 *= 0x3;\ + r0 &= 0x4;\ + if r0 != 0 goto l0_%=;\ + r0 = 0;\ + goto l1_%=;\ +l0_%=:\ + r0 = 1;\ +l1_%=:\ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 73fee2aec698..1fe090cd6744 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -144,21 +144,21 @@ SEC("?raw_tp") __success __log_level(2) /* * Without the bug fix there will be no history between "last_idx 3 first_idx 3" - * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor + * and "parent state regs=" lines. "R0=6" parts are here to help anchor * expected log messages to the one specific mark_chain_precision operation. * * This is quite fragile: if verifier checkpointing heuristic changes, this * might need adjusting. */ -__msg("2: (07) r0 += 1 ; R0_w=6") +__msg("2: (07) r0 += 1 ; R0=6") __msg("3: (35) if r0 >= 0xa goto pc+1") __msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1") __msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1") __msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1") __msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4") __msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1") -__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4") -__msg("3: R0_w=6") +__msg("mark_precise: frame0: parent state regs= stack=: R0=P4") +__msg("3: R0=6") __naked int state_loop_first_last_equal(void) { asm volatile ( @@ -233,8 +233,8 @@ __naked void bpf_cond_op_not_r10(void) SEC("lsm.s/socket_connect") __success __log_level(2) -__msg("0: (b7) r0 = 1 ; R0_w=1") -__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff") +__msg("0: (b7) r0 = 1 ; R0=1") +__msg("1: (84) w0 = -w0 ; R0=0xffffffff") __msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0") __msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") @@ -268,8 +268,8 @@ __naked int bpf_neg_3(void) SEC("lsm.s/socket_connect") __success __log_level(2) -__msg("0: (b7) r0 = 1 ; R0_w=1") -__msg("1: (87) r0 = -r0 ; R0_w=-1") +__msg("0: (b7) r0 = 1 ; R0=1") +__msg("1: (87) r0 = -r0 ; R0=-1") __msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0") __msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c index 7c5e5e6d10eb..c0ce690ddb68 100644 --- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -349,11 +349,11 @@ __naked void precision_two_ids(void) SEC("socket") __success __log_level(2) __flag(BPF_F_TEST_STATE_FREQ) -/* check thar r0 and r6 have different IDs after 'if', +/* check that r0 and r6 have different IDs after 'if', * collect_linked_regs() can't tie more than 6 registers for a single insn. */ __msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1") -__msg("9: (bf) r6 = r6 ; R6_w=scalar(id=2") +__msg("9: (bf) r6 = r6 ; R6=scalar(id=2") /* check that r{0-5} are marked precise after 'if' */ __msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0") __msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:") @@ -779,12 +779,12 @@ __success __retval(0) /* Check that verifier believes r1/r0 are zero at exit */ __log_level(2) -__msg("4: (77) r1 >>= 32 ; R1_w=0") -__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("4: (77) r1 >>= 32 ; R1=0") +__msg("5: (bf) r0 = r1 ; R0=0 R1=0") __msg("6: (95) exit") __msg("from 3 to 4") -__msg("4: (77) r1 >>= 32 ; R1_w=0") -__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("4: (77) r1 >>= 32 ; R1=0") +__msg("5: (bf) r0 = r1 ; R0=0 R1=0") __msg("6: (95) exit") /* Verify that statements to randomize upper half of r1 had not been * generated. diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index 0d5e56dffabb..2b4610b53382 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -1,14 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Converted from tools/testing/selftests/bpf/verifier/sock.c */ -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) -#define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) - struct { __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); __uint(max_entries, 1); @@ -1073,6 +1069,48 @@ int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) } __noinline +long xdp_pull_data2(struct xdp_md *x, __u32 len) +{ + return bpf_xdp_pull_data(x, len); +} + +__noinline +long xdp_pull_data1(struct xdp_md *x, __u32 len) +{ + return xdp_pull_data2(x, len); +} + +/* global function calls bpf_xdp_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("xdp") +__failure __msg("invalid mem access") +int invalidate_xdp_pkt_pointers_from_global_func(struct xdp_md *x) +{ + int *p = (void *)(long)x->data; + + if ((void *)(p + 1) > (void *)(long)x->data_end) + return XDP_DROP; + xdp_pull_data1(x, 0); + *p = 42; /* this is unsafe */ + return XDP_PASS; +} + +/* XDP packet changing kfunc calls invalidate packet pointers */ +SEC("xdp") +__failure __msg("invalid mem access") +int invalidate_xdp_pkt_pointers(struct xdp_md *x) +{ + int *p = (void *)(long)x->data; + + if ((void *)(p + 1) > (void *)(long)x->data_end) + return XDP_DROP; + bpf_xdp_pull_data(x, 0); + *p = 42; /* this is unsafe */ + return XDP_PASS; +} + +__noinline int tail_call(struct __sk_buff *sk) { bpf_tail_call_static(sk, &jmp_table, 0); diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 1e5a511e8494..7a13dbd794b2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -506,17 +506,17 @@ SEC("raw_tp") __log_level(2) __success /* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */ -__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0") +__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8=0") /* but fp-16 is spilled IMPRECISE zero const reg */ -__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0") +__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0=0 R10=fp0 fp-16=0") /* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register * precise immediately; if necessary, it will be marked precise later */ -__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0") +__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0") /* similarly, when R2 is assigned from spilled register, it is initially * imprecise, but will be marked precise later once it is used in precise context */ -__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0") +__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2=0 R10=fp0 fp-16=0") __msg("11: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)") @@ -598,7 +598,7 @@ __log_level(2) __success /* fp-4 is STACK_ZERO */ __msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????") -__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????") +__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0000????") __msg("5: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)") @@ -640,25 +640,25 @@ SEC("raw_tp") __log_level(2) __flag(BPF_F_TEST_STATE_FREQ) __success /* make sure fp-8 is IMPRECISE fake register spill */ -__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1") +__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8=1") /* and fp-16 is spilled IMPRECISE const reg */ -__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1") +__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=1") /* validate load from fp-8, which was initialized using BPF_ST_MEM */ -__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1") +__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2=1 R10=fp0 fp-8=1") __msg("9: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") /* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */ -__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1") __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0") __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") __msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1") -__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1") /* validate load from fp-16, which was initialized using BPF_STX_MEM */ -__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1") +__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2=1 R10=fp0 fp-16=1") __msg("13: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)") @@ -668,12 +668,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") __msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)") __msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") /* now both fp-8 and fp-16 are precise, very good */ -__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1") +__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1") __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") __msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0") __msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") -__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1") __naked void stack_load_preserves_const_precision(void) { asm volatile ( @@ -719,22 +719,22 @@ __success /* make sure fp-8 is 32-bit FAKE subregister spill */ __msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1") /* but fp-16 is spilled IMPRECISE zero const reg */ -__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1") +__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=????1") /* validate load from fp-8, which was initialized using BPF_ST_MEM */ -__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1") +__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2=1 R10=fp0 fp-8=????1") __msg("9: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") -__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1") __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") __msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0") __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") __msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1") -__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1") /* validate load from fp-16, which was initialized using BPF_STX_MEM */ -__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1") +__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2=1 R10=fp0 fp-16=????1") __msg("13: (0f) r1 += r2") __msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)") @@ -743,12 +743,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2 __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") __msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)") __msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") -__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1") +__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1") __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") __msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0") __msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") -__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1") __naked void stack_load_preserves_const_precision_subreg(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index 9d415f7ce599..ac3e418c2a96 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -105,7 +105,7 @@ __msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4") __msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3") __msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit") __msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10") -__msg("7: R0_w=scalar") +__msg("7: R0=scalar") __naked int fp_precise_subprog_result(void) { asm volatile ( @@ -141,7 +141,7 @@ __msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1") * anyways, at which point we'll break precision chain */ __msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10") -__msg("7: R0_w=scalar") +__msg("7: R0=scalar") __naked int sneaky_fp_precise_subprog_result(void) { asm volatile ( @@ -681,7 +681,7 @@ __msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1") __msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8") __msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4") __msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)") -__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0=2 R6=1 R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1") __msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7") __msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") __msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2") diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c index a9ab37d3b9e2..2129e4353fd9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include "../../../include/linux/filter.h" #include "bpf_misc.h" #define MAX_ENTRIES 11 @@ -146,6 +147,24 @@ l0_%=: exit; \ : __clobber_all); } +SEC("socket") +__description("map_ptr illegal alu op, map_ptr = -map_ptr") +__failure __msg("R0 invalid mem access 'scalar'") +__failure_unpriv __msg_unpriv("R0 pointer arithmetic prohibited") +__flag(BPF_F_ANY_ALIGNMENT) +__naked void map_ptr_illegal_alu_op(void) +{ + asm volatile (" \ + r0 = %[map_hash_48b] ll; \ + r0 = -r0; \ + r1 = 22; \ + *(u64*)(r0 + 0) = r1; \ + exit; \ +" : + : __imm_addr(map_hash_48b) + : __clobber_all); +} + SEC("flow_dissector") __description("flow_keys illegal alu op with variable offset") __failure __msg("R7 pointer arithmetic on flow_keys prohibited") @@ -165,4 +184,32 @@ __naked void flow_keys_illegal_variable_offset_alu(void) : __clobber_all); } +#define DEFINE_BAD_OFFSET_TEST(name, op, off, imm) \ + SEC("socket") \ + __failure __msg("BPF_ALU uses reserved fields") \ + __naked void name(void) \ + { \ + asm volatile( \ + "r0 = 1;" \ + ".8byte %[insn];" \ + "r0 = 0;" \ + "exit;" \ + : \ + : __imm_insn(insn, BPF_RAW_INSN((op), 0, 0, (off), (imm))) \ + : __clobber_all); \ + } + +/* + * Offset fields of 0 and 1 are legal for BPF_{DIV,MOD} instructions. + * Offset fields of 0 are legal for the rest of ALU instructions. + * Test that error is reported for illegal offsets, assuming that tests + * for legal offsets exist. + */ +DEFINE_BAD_OFFSET_TEST(bad_offset_divx, BPF_ALU64 | BPF_DIV | BPF_X, -1, 0) +DEFINE_BAD_OFFSET_TEST(bad_offset_modk, BPF_ALU64 | BPF_MOD | BPF_K, -1, 1) +DEFINE_BAD_OFFSET_TEST(bad_offset_addx, BPF_ALU64 | BPF_ADD | BPF_X, -1, 0) +DEFINE_BAD_OFFSET_TEST(bad_offset_divx2, BPF_ALU64 | BPF_DIV | BPF_X, 2, 0) +DEFINE_BAD_OFFSET_TEST(bad_offset_modk2, BPF_ALU64 | BPF_MOD | BPF_K, 2, 1) +DEFINE_BAD_OFFSET_TEST(bad_offset_addx2, BPF_ALU64 | BPF_ADD | BPF_X, 1, 0) + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c index 1d36d01b746e..f345466bca68 100644 --- a/tools/testing/selftests/bpf/progs/verifier_var_off.c +++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c @@ -114,8 +114,8 @@ __naked void stack_write_priv_vs_unpriv(void) } /* Similar to the previous test, but this time also perform a read from the - * address written to with a variable offset. The read is allowed, showing that, - * after a variable-offset write, a priviledged program can read the slots that + * address written to with a variable offet. The read is allowed, showing that, + * after a variable-offset write, a privileged program can read the slots that * were in the range of that write (even if the verifier doesn't actually know if * the slot being read was really written to or not. * @@ -157,7 +157,7 @@ __naked void stack_write_followed_by_read(void) SEC("socket") __description("variable-offset stack write clobbers spilled regs") __failure -/* In the priviledged case, dereferencing a spilled-and-then-filled +/* In the privileged case, dereferencing a spilled-and-then-filled * register is rejected because the previous variable offset stack * write might have overwritten the spilled pointer (i.e. we lose track * of the spilled register when we analyze the write). diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile index d4e50c4509c9..63c4d3f6a12f 100644 --- a/tools/testing/selftests/bpf/test_kmods/Makefile +++ b/tools/testing/selftests/bpf/test_kmods/Makefile @@ -8,7 +8,7 @@ Q = @ endif MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko + bpf_test_modorder_y.ko bpf_test_rqspinlock.ko $(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o))) diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c new file mode 100644 index 000000000000..769206fc70e4 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/prandom.h> +#include <asm/rqspinlock.h> +#include <linux/perf_event.h> +#include <linux/kthread.h> +#include <linux/atomic.h> +#include <linux/slab.h> + +static struct perf_event_attr hw_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, + .sample_period = 100000, +}; + +static rqspinlock_t lock_a; +static rqspinlock_t lock_b; + +static struct perf_event **rqsl_evts; +static int rqsl_nevts; + +static bool test_ab = false; +module_param(test_ab, bool, 0644); +MODULE_PARM_DESC(test_ab, "Test ABBA situations instead of AA situations"); + +static struct task_struct **rqsl_threads; +static int rqsl_nthreads; +static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0); + +static int pause = 0; + +static bool nmi_locks_a(int cpu) +{ + return (cpu & 1) && test_ab; +} + +static int rqspinlock_worker_fn(void *arg) +{ + int cpu = smp_processor_id(); + unsigned long flags; + int ret; + + if (cpu) { + atomic_inc(&rqsl_ready_cpus); + + while (!kthread_should_stop()) { + if (READ_ONCE(pause)) { + msleep(1000); + continue; + } + if (nmi_locks_a(cpu)) + ret = raw_res_spin_lock_irqsave(&lock_b, flags); + else + ret = raw_res_spin_lock_irqsave(&lock_a, flags); + mdelay(20); + if (nmi_locks_a(cpu) && !ret) + raw_res_spin_unlock_irqrestore(&lock_b, flags); + else if (!ret) + raw_res_spin_unlock_irqrestore(&lock_a, flags); + cpu_relax(); + } + return 0; + } + + while (!kthread_should_stop()) { + int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0; + int ready = atomic_read(&rqsl_ready_cpus); + + if (ready == expected && !READ_ONCE(pause)) { + for (int i = 0; i < rqsl_nevts; i++) + perf_event_enable(rqsl_evts[i]); + pr_err("Waiting 5 secs to pause the test\n"); + msleep(1000 * 5); + WRITE_ONCE(pause, 1); + pr_err("Paused the test\n"); + } else { + msleep(1000); + cpu_relax(); + } + } + return 0; +} + +static void nmi_cb(struct perf_event *event, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long flags; + int ret; + + if (!cpu || READ_ONCE(pause)) + return; + + if (nmi_locks_a(cpu)) + ret = raw_res_spin_lock_irqsave(&lock_a, flags); + else + ret = raw_res_spin_lock_irqsave(test_ab ? &lock_b : &lock_a, flags); + + mdelay(10); + + if (nmi_locks_a(cpu) && !ret) + raw_res_spin_unlock_irqrestore(&lock_a, flags); + else if (!ret) + raw_res_spin_unlock_irqrestore(test_ab ? &lock_b : &lock_a, flags); +} + +static void free_rqsl_threads(void) +{ + int i; + + if (rqsl_threads) { + for_each_online_cpu(i) { + if (rqsl_threads[i]) + kthread_stop(rqsl_threads[i]); + } + kfree(rqsl_threads); + } +} + +static void free_rqsl_evts(void) +{ + int i; + + if (rqsl_evts) { + for (i = 0; i < rqsl_nevts; i++) { + if (rqsl_evts[i]) + perf_event_release_kernel(rqsl_evts[i]); + } + kfree(rqsl_evts); + } +} + +static int bpf_test_rqspinlock_init(void) +{ + int i, ret; + int ncpus = num_online_cpus(); + + pr_err("Mode = %s\n", test_ab ? "ABBA" : "AA"); + + if (ncpus < 3) + return -ENOTSUPP; + + raw_res_spin_lock_init(&lock_a); + raw_res_spin_lock_init(&lock_b); + + rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL); + if (!rqsl_evts) + return -ENOMEM; + rqsl_nevts = ncpus - 1; + + for (i = 1; i < ncpus; i++) { + struct perf_event *e; + + e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL); + if (IS_ERR(e)) { + ret = PTR_ERR(e); + goto err_perf_events; + } + rqsl_evts[i - 1] = e; + } + + rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL); + if (!rqsl_threads) { + ret = -ENOMEM; + goto err_perf_events; + } + rqsl_nthreads = ncpus; + + for_each_online_cpu(i) { + struct task_struct *t; + + t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto err_threads_create; + } + kthread_bind(t, i); + rqsl_threads[i] = t; + wake_up_process(t); + } + return 0; + +err_threads_create: + free_rqsl_threads(); +err_perf_events: + free_rqsl_evts(); + return ret; +} + +module_init(bpf_test_rqspinlock_init); + +static void bpf_test_rqspinlock_exit(void) +{ + free_rqsl_threads(); + free_rqsl_evts(); +} + +module_exit(bpf_test_rqspinlock_exit); + +MODULE_AUTHOR("Kumar Kartikeya Dwivedi"); +MODULE_DESCRIPTION("BPF rqspinlock stress test module"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index e9e918cdf31f..8074bc5f6f20 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -62,6 +62,18 @@ struct bpf_testmod_struct_arg_5 { long d; }; +union bpf_testmod_union_arg_1 { + char a; + short b; + struct bpf_testmod_struct_arg_1 arg; +}; + +union bpf_testmod_union_arg_2 { + int a; + long b; + struct bpf_testmod_struct_arg_2 arg; +}; + __bpf_hook_start(); noinline int @@ -129,6 +141,20 @@ bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f, } noinline int +bpf_testmod_test_union_arg_1(union bpf_testmod_union_arg_1 a, int b, int c) +{ + bpf_testmod_test_struct_arg_result = a.arg.a + b + c; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_union_arg_2(int a, union bpf_testmod_union_arg_2 b) +{ + bpf_testmod_test_struct_arg_result = a + b.arg.a + b.arg.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline int bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { bpf_testmod_test_struct_arg_result = a->a; return bpf_testmod_test_struct_arg_result; @@ -218,6 +244,16 @@ __bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr) { } +__bpf_kfunc struct task_struct *bpf_kfunc_ret_rcu_test(void) +{ + return NULL; +} + +__bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) +{ + return NULL; +} + __bpf_kfunc struct bpf_testmod_ctx * bpf_testmod_ctx_create(int *err) { @@ -398,6 +434,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bpf_testmod_struct_arg_3 *struct_arg3; struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22}; struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26}; + union bpf_testmod_union_arg_1 union_arg1 = { .arg = {1} }; + union bpf_testmod_union_arg_2 union_arg2 = { .arg = {2, 3} }; int i = 1; while (bpf_testmod_return_ptr(i)) @@ -415,6 +453,9 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20, 21, 22, struct_arg5, 27); + (void)bpf_testmod_test_union_arg_1(union_arg1, 4, 5); + (void)bpf_testmod_test_union_arg_2(6, union_arg2); + (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); (void)trace_bpf_testmod_test_raw_tp_null_tp(NULL); @@ -501,14 +542,20 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { #ifdef __x86_64__ static int +uprobe_handler(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data) +{ + regs->cx = 0x87654321feebdaed; + return 0; +} + +static int uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, struct pt_regs *regs, __u64 *data) { regs->ax = 0x12345678deadbeef; - regs->cx = 0x87654321feebdaed; regs->r11 = (u64) -1; - return true; + return 0; } struct testmod_uprobe { @@ -520,6 +567,7 @@ struct testmod_uprobe { static DEFINE_MUTEX(testmod_uprobe_mutex); static struct testmod_uprobe uprobe = { + .consumer.handler = uprobe_handler, .consumer.ret_handler = uprobe_ret_handler, }; @@ -623,6 +671,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU) +BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED) +BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED) BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1) @@ -1057,6 +1107,8 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) return args->a; } +__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id); + BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) @@ -1097,6 +1149,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABL BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) static int bpf_testmod_ops_init(struct btf *btf) @@ -1528,6 +1581,114 @@ static struct bpf_struct_ops testmod_st_ops = { .owner = THIS_MODULE, }; +struct hlist_head multi_st_ops_list; +static DEFINE_SPINLOCK(multi_st_ops_lock); + +static int multi_st_ops_init(struct btf *btf) +{ + spin_lock_init(&multi_st_ops_lock); + INIT_HLIST_HEAD(&multi_st_ops_list); + + return 0; +} + +static int multi_st_ops_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + +static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id) +{ + struct bpf_testmod_multi_st_ops *st_ops; + + hlist_for_each_entry(st_ops, &multi_st_ops_list, node) { + if (st_ops->id == id) + return st_ops; + } + + return NULL; +} + +int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) +{ + struct bpf_testmod_multi_st_ops *st_ops; + unsigned long flags; + int ret = -1; + + spin_lock_irqsave(&multi_st_ops_lock, flags); + st_ops = multi_st_ops_find_nolock(id); + if (st_ops) + ret = st_ops->test_1(args); + spin_unlock_irqrestore(&multi_st_ops_lock, flags); + + return ret; +} + +static int multi_st_ops_reg(void *kdata, struct bpf_link *link) +{ + struct bpf_testmod_multi_st_ops *st_ops = + (struct bpf_testmod_multi_st_ops *)kdata; + unsigned long flags; + int err = 0; + u32 id; + + if (!st_ops->test_1) + return -EINVAL; + + id = bpf_struct_ops_id(kdata); + + spin_lock_irqsave(&multi_st_ops_lock, flags); + if (multi_st_ops_find_nolock(id)) { + pr_err("multi_st_ops(id:%d) has already been registered\n", id); + err = -EEXIST; + goto unlock; + } + + st_ops->id = id; + hlist_add_head(&st_ops->node, &multi_st_ops_list); +unlock: + spin_unlock_irqrestore(&multi_st_ops_lock, flags); + + return err; +} + +static void multi_st_ops_unreg(void *kdata, struct bpf_link *link) +{ + struct bpf_testmod_multi_st_ops *st_ops; + unsigned long flags; + u32 id; + + id = bpf_struct_ops_id(kdata); + + spin_lock_irqsave(&multi_st_ops_lock, flags); + st_ops = multi_st_ops_find_nolock(id); + if (st_ops) + hlist_del(&st_ops->node); + spin_unlock_irqrestore(&multi_st_ops_lock, flags); +} + +static int bpf_testmod_multi_st_ops__test_1(struct st_ops_args *args) +{ + return 0; +} + +static struct bpf_testmod_multi_st_ops multi_st_ops_cfi_stubs = { + .test_1 = bpf_testmod_multi_st_ops__test_1, +}; + +struct bpf_struct_ops testmod_multi_st_ops = { + .verifier_ops = &bpf_testmod_verifier_ops, + .init = multi_st_ops_init, + .init_member = multi_st_ops_init_member, + .reg = multi_st_ops_reg, + .unreg = multi_st_ops_unreg, + .cfi_stubs = &multi_st_ops_cfi_stubs, + .name = "bpf_testmod_multi_st_ops", + .owner = THIS_MODULE, +}; + extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) @@ -1550,6 +1711,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); + ret = ret ?: register_bpf_struct_ops(&testmod_multi_st_ops, bpf_testmod_multi_st_ops); ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, ARRAY_SIZE(bpf_testmod_dtors), THIS_MODULE); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h index c9fab51f16e2..f6e492f9d042 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h @@ -116,4 +116,10 @@ struct bpf_testmod_st_ops { struct module *owner; }; +struct bpf_testmod_multi_st_ops { + int (*test_1)(struct st_ops_args *args); + struct hlist_node node; + int id; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h index b58817938deb..4df6fa6a92cb 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h @@ -158,5 +158,9 @@ void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym; void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym; void bpf_kfunc_trusted_num_test(int *ptr) __ksym; void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym; +struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym; +int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym; + +int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym; #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index 4694422aa76c..88e4aeab21b7 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -74,7 +74,7 @@ int main(int argc, char **argv) /* Let's try detach it before it was ever attached */ ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2); - if (ret != -1 || errno != ENOENT) { + if (ret != -ENOENT) { printf("bpf_prog_detach2 not attached should fail: %m\n"); return 1; } diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 78423cf89e01..74ecc281bb8c 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -2,7 +2,6 @@ /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #include <linux/capability.h> #include <stdlib.h> -#include <regex.h> #include <test_progs.h> #include <bpf/btf.h> @@ -20,10 +19,12 @@ #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" +#define TEST_TAG_EXPECT_NOT_MSG_PFX "comment:test_expect_not_msg=" #define TEST_TAG_EXPECT_XLATED_PFX "comment:test_expect_xlated=" #define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv" #define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv" #define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv=" +#define TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV "comment:test_expect_not_msg_unpriv=" #define TEST_TAG_EXPECT_XLATED_PFX_UNPRIV "comment:test_expect_xlated_unpriv=" #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" #define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" @@ -38,6 +39,10 @@ #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv=" #define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv=" #define TEST_TAG_LOAD_MODE_PFX "comment:load_mode=" +#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr=" +#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv=" +#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout=" +#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xbadcafe @@ -61,24 +66,14 @@ enum load_mode { NO_JITED = 1 << 1, }; -struct expect_msg { - const char *substr; /* substring match */ - regex_t regex; - bool is_regex; - bool on_next_line; -}; - -struct expected_msgs { - struct expect_msg *patterns; - size_t cnt; -}; - struct test_subspec { char *name; bool expect_failure; struct expected_msgs expect_msgs; struct expected_msgs expect_xlated; struct expected_msgs jited; + struct expected_msgs stderr; + struct expected_msgs stdout; int retval; bool execute; __u64 caps; @@ -139,6 +134,10 @@ static void free_test_spec(struct test_spec *spec) free_msgs(&spec->unpriv.expect_xlated); free_msgs(&spec->priv.jited); free_msgs(&spec->unpriv.jited); + free_msgs(&spec->unpriv.stderr); + free_msgs(&spec->priv.stderr); + free_msgs(&spec->unpriv.stdout); + free_msgs(&spec->priv.stdout); free(spec->priv.name); free(spec->unpriv.name); @@ -206,7 +205,8 @@ static int compile_regex(const char *pattern, regex_t *regex) return 0; } -static int __push_msg(const char *pattern, bool on_next_line, struct expected_msgs *msgs) +static int __push_msg(const char *pattern, bool on_next_line, bool negative, + struct expected_msgs *msgs) { struct expect_msg *msg; void *tmp; @@ -222,6 +222,7 @@ static int __push_msg(const char *pattern, bool on_next_line, struct expected_ms msg = &msgs->patterns[msgs->cnt]; msg->on_next_line = on_next_line; msg->substr = pattern; + msg->negative = negative; msg->is_regex = false; if (strstr(pattern, "{{")) { err = compile_regex(pattern, &msg->regex); @@ -240,16 +241,16 @@ static int clone_msgs(struct expected_msgs *from, struct expected_msgs *to) for (i = 0; i < from->cnt; i++) { msg = &from->patterns[i]; - err = __push_msg(msg->substr, msg->on_next_line, to); + err = __push_msg(msg->substr, msg->on_next_line, msg->negative, to); if (err) return err; } return 0; } -static int push_msg(const char *substr, struct expected_msgs *msgs) +static int push_msg(const char *substr, bool negative, struct expected_msgs *msgs) { - return __push_msg(substr, false, msgs); + return __push_msg(substr, false, negative, msgs); } static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct expected_msgs *msgs) @@ -260,7 +261,7 @@ static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct exp *on_next_line = false; return 0; } - err = __push_msg(regex_str, *on_next_line, msgs); + err = __push_msg(regex_str, *on_next_line, false, msgs); if (err) return err; *on_next_line = true; @@ -374,6 +375,7 @@ enum arch { ARCH_X86_64 = 0x2, ARCH_ARM64 = 0x4, ARCH_RISCV64 = 0x8, + ARCH_S390X = 0x10, }; static int get_current_arch(void) @@ -384,6 +386,8 @@ static int get_current_arch(void) return ARCH_ARM64; #elif defined(__riscv) && __riscv_xlen == 64 return ARCH_RISCV64; +#elif defined(__s390x__) + return ARCH_S390X; #endif return ARCH_UNKNOWN; } @@ -404,6 +408,10 @@ static int parse_test_spec(struct test_loader *tester, bool xlated_on_next_line = true; bool unpriv_jit_on_next_line; bool jit_on_next_line; + bool stderr_on_next_line = true; + bool unpriv_stderr_on_next_line = true; + bool stdout_on_next_line = true; + bool unpriv_stdout_on_next_line = true; bool collect_jit = false; int func_id, i, err = 0; u32 arch_mask = 0; @@ -465,12 +473,22 @@ static int parse_test_spec(struct test_loader *tester, spec->auxiliary = true; spec->mode_mask |= UNPRIV; } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX))) { - err = push_msg(msg, &spec->priv.expect_msgs); + err = push_msg(msg, false, &spec->priv.expect_msgs); + if (err) + goto cleanup; + spec->mode_mask |= PRIV; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX))) { + err = push_msg(msg, true, &spec->priv.expect_msgs); if (err) goto cleanup; spec->mode_mask |= PRIV; } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV))) { - err = push_msg(msg, &spec->unpriv.expect_msgs); + err = push_msg(msg, false, &spec->unpriv.expect_msgs); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV))) { + err = push_msg(msg, true, &spec->unpriv.expect_msgs); if (err) goto cleanup; spec->mode_mask |= UNPRIV; @@ -565,8 +583,10 @@ static int parse_test_spec(struct test_loader *tester, arch = ARCH_ARM64; } else if (strcmp(val, "RISCV64") == 0) { arch = ARCH_RISCV64; + } else if (strcmp(val, "s390x") == 0) { + arch = ARCH_S390X; } else { - PRINT_FAIL("bad arch spec: '%s'", val); + PRINT_FAIL("bad arch spec: '%s'\n", val); err = -EINVAL; goto cleanup; } @@ -593,6 +613,26 @@ static int parse_test_spec(struct test_loader *tester, err = -EINVAL; goto cleanup; } + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) { + err = push_disasm_msg(msg, &stderr_on_next_line, + &spec->priv.stderr); + if (err) + goto cleanup; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) { + err = push_disasm_msg(msg, &unpriv_stderr_on_next_line, + &spec->unpriv.stderr); + if (err) + goto cleanup; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) { + err = push_disasm_msg(msg, &stdout_on_next_line, + &spec->priv.stdout); + if (err) + goto cleanup; + } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) { + err = push_disasm_msg(msg, &unpriv_stdout_on_next_line, + &spec->unpriv.stdout); + if (err) + goto cleanup; } } @@ -646,6 +686,10 @@ static int parse_test_spec(struct test_loader *tester, clone_msgs(&spec->priv.expect_xlated, &spec->unpriv.expect_xlated); if (spec->unpriv.jited.cnt == 0) clone_msgs(&spec->priv.jited, &spec->unpriv.jited); + if (spec->unpriv.stderr.cnt == 0) + clone_msgs(&spec->priv.stderr, &spec->unpriv.stderr); + if (spec->unpriv.stdout.cnt == 0) + clone_msgs(&spec->priv.stdout, &spec->unpriv.stdout); } spec->valid = true; @@ -707,44 +751,155 @@ static void emit_jited(const char *jited, bool force) fprintf(stdout, "JITED:\n=============\n%s=============\n", jited); } -static void validate_msgs(char *log_buf, struct expected_msgs *msgs, - void (*emit_fn)(const char *buf, bool force)) +static void emit_stderr(const char *stderr, bool force) { - const char *log = log_buf, *prev_match; + if (!force && env.verbosity == VERBOSE_NONE) + return; + fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr); +} + +static void emit_stdout(const char *bpf_stdout, bool force) +{ + if (!force && env.verbosity == VERBOSE_NONE) + return; + fprintf(stdout, "STDOUT:\n=============\n%s=============\n", bpf_stdout); +} + +static const char *match_msg(struct expect_msg *msg, const char **log) +{ + const char *match = NULL; regmatch_t reg_match[1]; - int prev_match_line; - int match_line; - int i, j, err; + int err; + + if (!msg->is_regex) { + match = strstr(*log, msg->substr); + if (match) + *log = match + strlen(msg->substr); + } else { + err = regexec(&msg->regex, *log, 1, reg_match, 0); + if (err == 0) { + match = *log + reg_match[0].rm_so; + *log += reg_match[0].rm_eo; + } + } + return match; +} + +static int count_lines(const char *start, const char *end) +{ + const char *tmp; + int n = 0; + + for (tmp = start; tmp < end; ++tmp) + if (*tmp == '\n') + n++; + return n; +} + +struct match { + const char *start; + const char *end; + int line; +}; + +/* + * Positive messages are matched sequentially, each next message + * is looked for starting from the end of a previous matched one. + */ +static void match_positive_msgs(const char *log, struct expected_msgs *msgs, struct match *matches) +{ + const char *prev_match; + int i, line; - prev_match_line = -1; - match_line = 0; prev_match = log; + line = 0; for (i = 0; i < msgs->cnt; i++) { struct expect_msg *msg = &msgs->patterns[i]; - const char *match = NULL, *pat_status; - bool wrong_line = false; - - if (!msg->is_regex) { - match = strstr(log, msg->substr); - if (match) - log = match + strlen(msg->substr); - } else { - err = regexec(&msg->regex, log, 1, reg_match, 0); - if (err == 0) { - match = log + reg_match[0].rm_so; - log += reg_match[0].rm_eo; + const char *match = NULL; + + if (msg->negative) + continue; + + match = match_msg(msg, &log); + if (match) { + line += count_lines(prev_match, match); + matches[i].start = match; + matches[i].end = log; + matches[i].line = line; + prev_match = match; + } + } +} + +/* + * Each negative messages N located between positive messages P1 and P2 + * is matched in the span P1.end .. P2.start. Consequently, negative messages + * are unordered within the span. + */ +static void match_negative_msgs(const char *log, struct expected_msgs *msgs, struct match *matches) +{ + const char *start = log, *end, *next, *match; + const char *log_end = log + strlen(log); + int i, j, next_positive; + + for (i = 0; i < msgs->cnt; i++) { + struct expect_msg *msg = &msgs->patterns[i]; + + /* positive message bumps span start */ + if (!msg->negative) { + start = matches[i].end ?: start; + continue; + } + + /* count stride of negative patterns and adjust span end */ + end = log_end; + for (next_positive = i + 1; next_positive < msgs->cnt; next_positive++) { + if (!msgs->patterns[next_positive].negative) { + end = matches[next_positive].start; + break; } } - if (match) { - for (; prev_match < match; ++prev_match) - if (*prev_match == '\n') - ++match_line; - wrong_line = msg->on_next_line && prev_match_line >= 0 && - prev_match_line + 1 != match_line; + /* try matching negative messages within identified span */ + for (j = i; j < next_positive; j++) { + next = start; + match = match_msg(msg, &next); + if (match && next <= end) { + matches[j].start = match; + matches[j].end = next; + } } - if (!match || wrong_line) { + /* -1 to account for i++ */ + i = next_positive - 1; + } +} + +void validate_msgs(const char *log_buf, struct expected_msgs *msgs, + void (*emit_fn)(const char *buf, bool force)) +{ + struct match matches[msgs->cnt]; + struct match *prev_match = NULL; + int i, j; + + memset(matches, 0, sizeof(*matches) * msgs->cnt); + match_positive_msgs(log_buf, msgs, matches); + match_negative_msgs(log_buf, msgs, matches); + + for (i = 0; i < msgs->cnt; i++) { + struct expect_msg *msg = &msgs->patterns[i]; + struct match *match = &matches[i]; + const char *pat_status; + bool unexpected; + bool wrong_line; + bool no_match; + + no_match = !msg->negative && !match->start; + wrong_line = !msg->negative && + msg->on_next_line && + prev_match && prev_match->line + 1 != match->line; + unexpected = msg->negative && match->start; + if (no_match || wrong_line || unexpected) { PRINT_FAIL("expect_msg\n"); if (env.verbosity == VERBOSE_NONE) emit_fn(log_buf, true /*force*/); @@ -754,8 +909,10 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs, pat_status = "MATCHED "; else if (wrong_line) pat_status = "WRONG LINE"; - else + else if (no_match) pat_status = "EXPECTED "; + else + pat_status = "UNEXPECTED"; msg = &msgs->patterns[j]; fprintf(stderr, "%s %s: '%s'\n", pat_status, @@ -765,12 +922,13 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs, if (wrong_line) { fprintf(stderr, "expecting match at line %d, actual match is at line %d\n", - prev_match_line + 1, match_line); + prev_match->line + 1, match->line); } break; } - prev_match_line = match_line; + if (!msg->negative) + prev_match = match; } } @@ -929,6 +1087,19 @@ out: return err; } +/* Read the bpf stream corresponding to the stream_id */ +static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz) +{ + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + int ret; + + ret = bpf_prog_stream_read(prog_fd, stream_id, text, text_sz, &ropts); + ASSERT_GT(ret, 0, "stream read"); + text[ret] = '\0'; + + return ret; +} + /* this function is forced noinline and has short generic name to look better * in test_progs output (in case of a failure) */ @@ -1083,7 +1254,7 @@ void run_subtest(struct test_loader *tester, link = bpf_map__attach_struct_ops(map); if (!link) { PRINT_FAIL("bpf_map__attach_struct_ops failed for map %s: err=%d\n", - bpf_map__name(map), err); + bpf_map__name(map), -errno); goto tobj_cleanup; } links[links_cnt++] = link; @@ -1103,6 +1274,31 @@ void run_subtest(struct test_loader *tester, PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; } + + if (subspec->stderr.cnt) { + err = get_stream(2, bpf_program__fd(tprog), + tester->log_buf, tester->log_buf_sz); + if (err <= 0) { + PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n", + err, errno); + goto tobj_cleanup; + } + emit_stderr(tester->log_buf, false /*force*/); + validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr); + } + + if (subspec->stdout.cnt) { + err = get_stream(1, bpf_program__fd(tprog), + tester->log_buf, tester->log_buf_sz); + if (err <= 0) { + PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n", + err, errno); + goto tobj_cleanup; + } + emit_stdout(tester->log_buf, false /*force*/); + validate_msgs(tester->log_buf, &subspec->stdout, emit_stdout); + } + /* redo bpf_map__attach_struct_ops for each test */ while (links_cnt > 0) bpf_link__destroy(links[--links_cnt]); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 309d9d4a8ace..02a85dda30e6 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -14,12 +14,14 @@ #include <netinet/in.h> #include <sys/select.h> #include <sys/socket.h> +#include <linux/keyctl.h> #include <sys/un.h> #include <bpf/btf.h> #include <time.h> #include "json_writer.h" #include "network_helpers.h" +#include "verification_cert.h" /* backtrace() and backtrace_symbols_fd() are glibc specific, * use header file when glibc is available and provide stub @@ -1928,6 +1930,13 @@ static void free_test_states(void) } } +static __u32 register_session_key(const char *key_data, size_t key_data_size) +{ + return syscall(__NR_add_key, "asymmetric", "libbpf_session_key", + (const void *)key_data, key_data_size, + KEY_SPEC_SESSION_KEYRING); +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -1961,6 +1970,10 @@ int main(int argc, char **argv) /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); + err = register_session_key((const char *)test_progs_verification_cert, + test_progs_verification_cert_len); + if (err < 0) + return err; traffic_monitor_set_print(traffic_monitor_print_fn); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index df2222a1806f..eebfc18cdcd2 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -7,6 +7,7 @@ #include <errno.h> #include <string.h> #include <assert.h> +#include <regex.h> #include <stdlib.h> #include <stdarg.h> #include <time.h> @@ -546,4 +547,20 @@ extern void test_loader_fini(struct test_loader *tester); test_loader_fini(&tester); \ }) +struct expect_msg { + const char *substr; /* substring match */ + regex_t regex; + bool is_regex; + bool on_next_line; + bool negative; +}; + +struct expected_msgs { + struct expect_msg *patterns; + size_t cnt; +}; + +void validate_msgs(const char *log_buf, struct expected_msgs *msgs, + void (*emit_fn)(const char *buf, bool force)); + #endif /* __TEST_PROGS_H */ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index fd2da2234cc9..76568db7a664 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1372,7 +1372,7 @@ run: } else fprintf(stderr, "unknown test\n"); out: - /* Detatch and zero all the maps */ + /* Detach and zero all the maps */ bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS); for (i = 0; i < ARRAY_SIZE(links); i++) { diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 595194453ff8..35b4893ccdf8 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -15,20 +15,18 @@ #include <bpf/libbpf.h> #include <sys/ioctl.h> #include <linux/rtnetlink.h> -#include <signal.h> #include <linux/perf_event.h> -#include <linux/err.h> -#include "bpf_util.h" #include "cgroup_helpers.h" #include "test_tcpnotify.h" -#include "trace_helpers.h" #include "testing_helpers.h" #define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) pthread_t tid; +static bool exit_thread; + int rx_callbacks; static void dummyfn(void *ctx, int cpu, void *data, __u32 size) @@ -45,7 +43,7 @@ void tcp_notifier_poller(struct perf_buffer *pb) { int err; - while (1) { + while (!exit_thread) { err = perf_buffer__poll(pb, 100); if (err < 0 && err != -EINTR) { printf("failed perf_buffer__poll: %d\n", err); @@ -78,15 +76,10 @@ int main(int argc, char **argv) int error = EXIT_FAILURE; struct bpf_object *obj; char test_script[80]; - cpu_set_t cpuset; __u32 key = 0; libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); - pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); - cg_fd = cgroup_setup_and_join(cg_path); if (cg_fd < 0) goto err; @@ -151,6 +144,13 @@ int main(int argc, char **argv) sleep(10); + exit_thread = true; + int ret = pthread_join(tid, NULL); + if (ret) { + printf("FAILED: pthread_join\n"); + goto err; + } + if (verify_result(&g)) { printf("FAILED: Wrong stats Expected %d calls, got %d\n", g.ncalls, rx_callbacks); diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 65aafe0003db..62db060298a4 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -241,4 +241,6 @@ done if [ $failures -eq 0 ]; then echo "All tests successful!" +else + exit 1 fi diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 5e9f16683be5..16eb37e5bad6 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -399,7 +399,7 @@ int unload_module(const char *name, bool verbose) return 0; } -int load_module(const char *path, bool verbose) +static int __load_module(const char *path, const char *param_values, bool verbose) { int fd; @@ -411,7 +411,7 @@ int load_module(const char *path, bool verbose) fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno); return -ENOENT; } - if (finit_module(fd, "", 0)) { + if (finit_module(fd, param_values, 0)) { fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno); close(fd); return -EINVAL; @@ -423,6 +423,16 @@ int load_module(const char *path, bool verbose) return 0; } +int load_module_params(const char *path, const char *param_values, bool verbose) +{ + return __load_module(path, param_values, verbose); +} + +int load_module(const char *path, bool verbose) +{ + return __load_module(path, "", verbose); +} + int unload_bpf_testmod(bool verbose) { return unload_module("bpf_testmod", verbose); diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 46d7f7089f63..eb20d3772218 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -39,6 +39,7 @@ int kern_sync_rcu(void); int finit_module(int fd, const char *param_values, int flags); int delete_module(const char *name, int flags); int load_module(const char *path, bool verbose); +int load_module_params(const char *path, const char *param_values, bool verbose); int unload_module(const char *name, bool verbose); static inline __u64 get_time_ns(void) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 81943c6254e6..eeaab7013ca2 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -17,7 +17,9 @@ #include <linux/limits.h> #include <libelf.h> #include <gelf.h> +#include "bpf/hashmap.h" #include "bpf/libbpf_internal.h" +#include "bpf_util.h" #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" @@ -519,3 +521,235 @@ void read_trace_pipe(void) { read_trace_pipe_iter(trace_pipe_cb, NULL, 0); } + +static size_t symbol_hash(long key, void *ctx __maybe_unused) +{ + return str_hash((const char *) key); +} + +static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcmp((const char *) key1, (const char *) key2) == 0; +} + +static bool is_invalid_entry(char *buf, bool kernel) +{ + if (kernel && strchr(buf, '[')) + return true; + if (!kernel && !strchr(buf, '[')) + return true; + return false; +} + +static const char * const trace_blacklist[] = { + "migrate_disable", + "migrate_enable", + "rcu_read_unlock_strict", + "preempt_count_add", + "preempt_count_sub", + "__rcu_read_lock", + "__rcu_read_unlock", + "bpf_get_numa_node_id", +}; + +static bool skip_entry(char *name) +{ + int i; + + /* + * We attach to almost all kernel functions and some of them + * will cause 'suspicious RCU usage' when fprobe is attached + * to them. Filter out the current culprits - arch_cpu_idle + * default_idle and rcu_* functions. + */ + if (!strcmp(name, "arch_cpu_idle")) + return true; + if (!strcmp(name, "default_idle")) + return true; + if (!strncmp(name, "rcu_", 4)) + return true; + if (!strcmp(name, "bpf_dispatcher_xdp_func")) + return true; + if (!strncmp(name, "__ftrace_invalid_address__", + sizeof("__ftrace_invalid_address__") - 1)) + return true; + + for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) { + if (!strcmp(name, trace_blacklist[i])) + return true; + } + + return false; +} + +/* Do comparison by ignoring '.llvm.<hash>' suffixes. */ +static int compare_name(const char *name1, const char *name2) +{ + const char *res1, *res2; + int len1, len2; + + res1 = strstr(name1, ".llvm."); + res2 = strstr(name2, ".llvm."); + len1 = res1 ? res1 - name1 : strlen(name1); + len2 = res2 ? res2 - name2 : strlen(name2); + + if (len1 == len2) + return strncmp(name1, name2, len1); + if (len1 < len2) + return strncmp(name1, name2, len1) <= 0 ? -1 : 1; + return strncmp(name1, name2, len2) >= 0 ? 1 : -1; +} + +static int load_kallsyms_compare(const void *p1, const void *p2) +{ + return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); +} + +static int search_kallsyms_compare(const void *p1, const struct ksym *p2) +{ + return compare_name(p1, p2->name); +} + +int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) +{ + size_t cap = 0, cnt = 0; + char *name = NULL, *ksym_name, **syms = NULL; + struct hashmap *map; + struct ksyms *ksyms; + struct ksym *ks; + char buf[256]; + FILE *f; + int err = 0; + + ksyms = load_kallsyms_custom_local(load_kallsyms_compare); + if (!ksyms) + return -EINVAL; + + /* + * The available_filter_functions contains many duplicates, + * but other than that all symbols are usable to trace. + * Filtering out duplicates by using hashmap__add, which won't + * add existing entry. + */ + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + + if (!f) + return -EINVAL; + + map = hashmap__new(symbol_hash, symbol_equal, NULL); + if (IS_ERR(map)) { + err = libbpf_get_error(map); + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + if (is_invalid_entry(buf, kernel)) + continue; + + free(name); + if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) + continue; + if (skip_entry(name)) + continue; + + ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); + if (!ks) { + err = -EINVAL; + goto error; + } + + ksym_name = ks->name; + err = hashmap__add(map, ksym_name, 0); + if (err == -EEXIST) { + err = 0; + continue; + } + if (err) + goto error; + + err = libbpf_ensure_mem((void **) &syms, &cap, + sizeof(*syms), cnt + 1); + if (err) + goto error; + + syms[cnt++] = ksym_name; + } + + *symsp = syms; + *cntp = cnt; + +error: + free(name); + fclose(f); + hashmap__free(map); + if (err) + free(syms); + return err; +} + +int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) +{ + unsigned long *addr, *addrs, *tmp_addrs; + int err = 0, max_cnt, inc_cnt; + char *name = NULL; + size_t cnt = 0; + char buf[256]; + FILE *f; + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); + + if (!f) + return -ENOENT; + + /* In my local setup, the number of entries is 50k+ so Let us initially + * allocate space to hold 64k entries. If 64k is not enough, incrementally + * increase 1k each time. + */ + max_cnt = 65536; + inc_cnt = 1024; + addrs = malloc(max_cnt * sizeof(long)); + if (addrs == NULL) { + err = -ENOMEM; + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + if (is_invalid_entry(buf, kernel)) + continue; + + free(name); + if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) + continue; + if (skip_entry(name)) + continue; + + if (cnt == max_cnt) { + max_cnt += inc_cnt; + tmp_addrs = realloc(addrs, max_cnt * sizeof(long)); + if (!tmp_addrs) { + err = -ENOMEM; + goto error; + } + addrs = tmp_addrs; + } + + addrs[cnt++] = (unsigned long)addr; + } + + *addrsp = addrs; + *cntp = cnt; + +error: + free(name); + fclose(f); + if (err) + free(addrs); + return err; +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 2ce873c9f9aa..9437bdd4afa5 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -41,4 +41,7 @@ ssize_t get_rel_offset(uintptr_t addr); int read_build_id(const char *path, char *build_id, size_t size); +int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel); +int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel); + #endif diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h new file mode 100644 index 000000000000..549d1f774810 --- /dev/null +++ b/tools/testing/selftests/bpf/usdt.h @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: BSD-2-Clause +/* + * This single-header library defines a collection of variadic macros for + * defining and triggering USDTs (User Statically-Defined Tracepoints): + * + * - For USDTs without associated semaphore: + * USDT(group, name, args...) + * + * - For USDTs with implicit (transparent to the user) semaphore: + * USDT_WITH_SEMA(group, name, args...) + * USDT_IS_ACTIVE(group, name) + * + * - For USDTs with explicit (user-defined and provided) semaphore: + * USDT_WITH_EXPLICIT_SEMA(sema, group, name, args...) + * USDT_SEMA_IS_ACTIVE(sema) + * + * all of which emit a NOP instruction into the instruction stream, and so + * have *zero* overhead for the surrounding code. USDTs are identified by + * a combination of `group` and `name` identifiers, which is used by external + * tracing tooling (tracers) for identifying exact USDTs of interest. + * + * USDTs can have an associated (2-byte) activity counter (USDT semaphore), + * automatically maintained by Linux kernel whenever any correctly written + * BPF-based tracer is attached to the USDT. This USDT semaphore can be used + * to check whether there is a need to do any extra data collection and + * processing for a given USDT (if necessary), and otherwise avoid extra work + * for a common case of USDT not being traced ("active"). + * + * See documentation for USDT_WITH_SEMA()/USDT_IS_ACTIVE() or + * USDT_WITH_EXPLICIT_SEMA()/USDT_SEMA_IS_ACTIVE() APIs below for details on + * working with USDTs with implicitly or explicitly associated + * USDT semaphores, respectively. + * + * There is also some additional data recorded into an auxiliary note + * section. The data in the note section describes the operands, in terms of + * size and location, used by tracing tooling to know where to find USDT + * arguments. Each location is encoded as an assembler operand string. + * Tracing tools (bpftrace and BPF-based tracers, systemtap, etc) insert + * breakpoints on top of the nop, and decode the location operand-strings, + * like an assembler, to find the values being passed. + * + * The operand strings are selected by the compiler for each operand. + * They are constrained by inline-assembler codes.The default is: + * + * #define USDT_ARG_CONSTRAINT nor + * + * This is a good default if the operands tend to be integral and + * moderate in number (smaller than number of registers). In other + * cases, the compiler may report "'asm' requires impossible reload" or + * similar. In this case, consider simplifying the macro call (fewer + * and simpler operands), reduce optimization, or override the default + * constraints string via: + * + * #define USDT_ARG_CONSTRAINT g + * #include <usdt.h> + * + * For some historical description of USDT v3 format (the one used by this + * library and generally recognized and assumed by BPF-based tracing tools) + * see [0]. The more formal specification can be found at [1]. Additional + * argument constraints information can be found at [2]. + * + * Original SystemTap's sys/sdt.h implementation ([3]) was used as a base for + * this USDT library implementation. Current implementation differs *a lot* in + * terms of exposed user API and general usability, which was the main goal + * and focus of the reimplementation work. Nevertheless, underlying recorded + * USDT definitions are fully binary compatible and any USDT-based tooling + * should work equally well with USDTs defined by either SystemTap's or this + * library's USDT implementation. + * + * [0] https://ecos.sourceware.org/ml/systemtap/2010-q3/msg00145.html + * [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + * [2] https://gcc.gnu.org/onlinedocs/gcc/Constraints.html + * [3] https://sourceware.org/git/?p=systemtap.git;a=blob;f=includes/sys/sdt.h + */ +#ifndef __USDT_H +#define __USDT_H + +/* + * Changelog: + * + * 0.1.0 + * ----- + * - Initial release + */ +#define USDT_MAJOR_VERSION 0 +#define USDT_MINOR_VERSION 1 +#define USDT_PATCH_VERSION 0 + +/* C++20 and C23 added __VA_OPT__ as a standard replacement for non-standard `##__VA_ARGS__` extension */ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) || (defined(__cplusplus) && __cplusplus > 201703L) +#define __usdt_va_opt 1 +#define __usdt_va_args(...) __VA_OPT__(,) __VA_ARGS__ +#else +#define __usdt_va_args(...) , ##__VA_ARGS__ +#endif + +/* + * Trigger USDT with `group`:`name` identifier and pass through `args` as its + * arguments. Zero arguments are acceptable as well. No USDT semaphore is + * associated with this USDT. + * + * Such "semaphoreless" USDTs are commonly used when there is no extra data + * collection or processing needed to collect and prepare USDT arguments and + * they are just available in the surrounding code. USDT() macro will just + * record their locations in CPU registers or in memory for tracing tooling to + * be able to access them, if necessary. + */ +#ifdef __usdt_va_opt +#define USDT(group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_none, 0 __VA_OPT__(,) __VA_ARGS__) +#else +#define USDT(group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_none, 0, ##__VA_ARGS__) +#endif + +/* + * Trigger USDT with `group`:`name` identifier and pass through `args` as its + * arguments. Zero arguments are acceptable as well. USDT also get an + * implicitly-defined associated USDT semaphore, which will be "activated" by + * tracing tooling and can be used to check whether USDT is being actively + * observed. + * + * USDTs with semaphore are commonly used when there is a need to perform + * additional data collection and processing to prepare USDT arguments, which + * otherwise might not be necessary for the rest of application logic. In such + * case, USDT semaphore can be used to avoid unnecessary extra work. If USDT + * is not traced (which is presumed to be a common situation), the associated + * USDT semaphore is "inactive", and so there is no need to waste resources to + * prepare USDT arguments. Use USDT_IS_ACTIVE(group, name) to check whether + * USDT is "active". + * + * N.B. There is an inherent (albeit short) gap between checking whether USDT + * is active and triggering corresponding USDT, in which external tracer can + * be attached to an USDT and activate USDT semaphore after the activity check. + * If such a race occurs, tracers might miss one USDT execution. Tracers are + * expected to accommodate such possibility and this is expected to not be + * a problem for applications and tracers. + * + * N.B. Implicit USDT semaphore defined by USDT_WITH_SEMA() is contained + * within a single executable or shared library and is not shared outside + * them. I.e., if you use USDT_WITH_SEMA() with the same USDT group and name + * identifier across executable and shared library, it will work and won't + * conflict, per se, but will define independent USDT semaphores, one for each + * shared library/executable in which USDT_WITH_SEMA(group, name) is used. + * That is, if you attach to this USDT in one shared library (or executable), + * then only USDT semaphore within that shared library (or executable) will be + * updated by the kernel, while other libraries (or executable) will not see + * activated USDT semaphore. In short, it's best to use unique USDT group:name + * identifiers across different shared libraries (and, equivalently, between + * executable and shared library). This is advanced consideration and is + * rarely (if ever) seen in practice, but just to avoid surprises this is + * called out here. (Static libraries become a part of final executable, once + * linked by linker, so the above considerations don't apply to them.) + */ +#ifdef __usdt_va_opt +#define USDT_WITH_SEMA(group, name, ...) \ + __usdt_probe(group, name, \ + __usdt_sema_implicit, __usdt_sema_name(group, name) \ + __VA_OPT__(,) __VA_ARGS__) +#else +#define USDT_WITH_SEMA(group, name, ...) \ + __usdt_probe(group, name, \ + __usdt_sema_implicit, __usdt_sema_name(group, name), \ + ##__VA_ARGS__) +#endif + +struct usdt_sema { volatile unsigned short active; }; + +/* + * Check if USDT with `group`:`name` identifier is "active" (i.e., whether it + * is attached to by external tracing tooling and is actively observed). + * + * This macro can be used to decide whether any additional and potentially + * expensive data collection or processing should be done to pass extra + * information into the given USDT. It is assumed that USDT is triggered with + * USDT_WITH_SEMA() macro which will implicitly define associated USDT + * semaphore. (If one needs more control over USDT semaphore, see + * USDT_DEFINE_SEMA() and USDT_WITH_EXPLICIT_SEMA() macros below.) + * + * N.B. Such checks are necessarily racy and speculative. Between checking + * whether USDT is active and triggering the USDT itself, tracer can be + * detached with no notification. This race should be extremely rare and worst + * case should result in one-time wasted extra data collection and processing. + */ +#define USDT_IS_ACTIVE(group, name) ({ \ + extern struct usdt_sema __usdt_sema_name(group, name) \ + __usdt_asm_name(__usdt_sema_name(group, name)); \ + __usdt_sema_implicit(__usdt_sema_name(group, name)); \ + __usdt_sema_name(group, name).active > 0; \ +}) + +/* + * APIs for working with user-defined explicit USDT semaphores. + * + * This is a less commonly used advanced API for use cases in which user needs + * an explicit control over (potentially shared across multiple USDTs) USDT + * semaphore instance. This can be used when there is a group of logically + * related USDTs that all need extra data collection and processing whenever + * any of a family of related USDTs are "activated" (i.e., traced). In such + * a case, all such related USDTs will be associated with the same shared USDT + * semaphore defined with USDT_DEFINE_SEMA() and the USDTs themselves will be + * triggered with USDT_WITH_EXPLICIT_SEMA() macros, taking an explicit extra + * USDT semaphore identifier as an extra parameter. + */ + +/** + * Underlying C global variable name for user-defined USDT semaphore with + * `sema` identifier. Could be useful for debugging, but normally shouldn't be + * used explicitly. + */ +#define USDT_SEMA(sema) __usdt_sema_##sema + +/* + * Define storage for user-defined USDT semaphore `sema`. + * + * Should be used only once in non-header source file to let compiler allocate + * space for the semaphore variable. Just like with any other global variable. + * + * This macro can be used anywhere where global variable declaration is + * allowed. Just like with global variable definitions, there should be only + * one definition of user-defined USDT semaphore with given `sema` identifier, + * otherwise compiler or linker will complain about duplicate variable + * definition. + * + * For C++, it is allowed to use USDT_DEFINE_SEMA() both in global namespace + * and inside namespaces (including nested namespaces). Just make sure that + * USDT_DECLARE_SEMA() is placed within the namespace where this semaphore is + * referenced, or any of its parent namespaces, so the C++ language-level + * identifier is visible to the code that needs to reference the semaphore. + * At the lowest layer, USDT semaphores have global naming and visibility + * (they have a corresponding `__usdt_sema_<name>` symbol, which can be linked + * against from C or C++ code, if necessary). To keep it simple, putting + * USDT_DECLARE_SEMA() declarations into global namespaces is the simplest + * no-brainer solution. All these aspects are irrelevant for plain C, because + * C doesn't have namespaces and everything is always in the global namespace. + * + * N.B. Due to USDT metadata being recorded in non-allocatable ELF note + * section, it has limitations when it comes to relocations, which, in + * practice, means that it's not possible to correctly share USDT semaphores + * between main executable and shared libraries, or even between multiple + * shared libraries. USDT semaphore has to be contained to individual shared + * library or executable to avoid unpleasant surprises with half-working USDT + * semaphores. We enforce this by marking semaphore ELF symbols as having + * a hidden visibility. This is quite an advanced use case and consideration + * and for most users this should have no consequences whatsoever. + */ +#define USDT_DEFINE_SEMA(sema) \ + struct usdt_sema __usdt_sema_sec USDT_SEMA(sema) \ + __usdt_asm_name(USDT_SEMA(sema)) \ + __attribute__((visibility("hidden"))) = { 0 } + +/* + * Declare extern reference to user-defined USDT semaphore `sema`. + * + * Refers to a variable defined in another compilation unit by + * USDT_DEFINE_SEMA() and allows to use the same USDT semaphore across + * multiple compilation units (i.e., .c and .cpp files). + * + * See USDT_DEFINE_SEMA() notes above for C++ language usage peculiarities. + */ +#define USDT_DECLARE_SEMA(sema) \ + extern struct usdt_sema USDT_SEMA(sema) __usdt_asm_name(USDT_SEMA(sema)) + +/* + * Check if user-defined USDT semaphore `sema` is "active" (i.e., whether it + * is attached to by external tracing tooling and is actively observed). + * + * This macro can be used to decide whether any additional and potentially + * expensive data collection or processing should be done to pass extra + * information into USDT(s) associated with USDT semaphore `sema`. + * + * N.B. Such checks are necessarily racy. Between checking the state of USDT + * semaphore and triggering associated USDT(s), the active tracer might attach + * or detach. This race should be extremely rare and worst case should result + * in one-time missed USDT event or wasted extra data collection and + * processing. USDT-using tracers should be written with this in mind and is + * not a concern of the application defining USDTs with associated semaphore. + */ +#define USDT_SEMA_IS_ACTIVE(sema) (USDT_SEMA(sema).active > 0) + +/* + * Invoke USDT specified by `group` and `name` identifiers and associate + * explicitly user-defined semaphore `sema` with it. Pass through `args` as + * USDT arguments. `args` are optional and zero arguments are acceptable. + * + * Semaphore is defined with the help of USDT_DEFINE_SEMA() macro and can be + * checked whether active with USDT_SEMA_IS_ACTIVE(). + */ +#ifdef __usdt_va_opt +#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema), ##__VA_ARGS__) +#else +#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \ + __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema) __VA_OPT__(,) __VA_ARGS__) +#endif + +/* + * Adjustable implementation aspects + */ +#ifndef USDT_ARG_CONSTRAINT +#if defined __powerpc__ +#define USDT_ARG_CONSTRAINT nZr +#elif defined __arm__ +#define USDT_ARG_CONSTRAINT g +#elif defined __loongarch__ +#define USDT_ARG_CONSTRAINT nmr +#else +#define USDT_ARG_CONSTRAINT nor +#endif +#endif /* USDT_ARG_CONSTRAINT */ + +#ifndef USDT_NOP +#if defined(__ia64__) || defined(__s390__) || defined(__s390x__) +#define USDT_NOP nop 0 +#else +#define USDT_NOP nop +#endif +#endif /* USDT_NOP */ + +/* + * Implementation details + */ +/* USDT name for implicitly-defined USDT semaphore, derived from group:name */ +#define __usdt_sema_name(group, name) __usdt_sema_##group##__##name +/* ELF section into which USDT semaphores are put */ +#define __usdt_sema_sec __attribute__((section(".probes"))) + +#define __usdt_concat(a, b) a ## b +#define __usdt_apply(fn, n) __usdt_concat(fn, n) + +#ifndef __usdt_nth +#define __usdt_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, N, ...) N +#endif + +#ifndef __usdt_narg +#ifdef __usdt_va_opt +#define __usdt_narg(...) __usdt_nth(_ __VA_OPT__(,) __VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#else +#define __usdt_narg(...) __usdt_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#endif +#endif /* __usdt_narg */ + +#define __usdt_hash # +#define __usdt_str_(x) #x +#define __usdt_str(x) __usdt_str_(x) + +#ifndef __usdt_asm_name +#define __usdt_asm_name(name) __asm__(__usdt_str(name)) +#endif + +#define __usdt_asm0() "\n" +#define __usdt_asm1(x) __usdt_str(x) "\n" +#define __usdt_asm2(x, ...) __usdt_str(x) "," __usdt_asm1(__VA_ARGS__) +#define __usdt_asm3(x, ...) __usdt_str(x) "," __usdt_asm2(__VA_ARGS__) +#define __usdt_asm4(x, ...) __usdt_str(x) "," __usdt_asm3(__VA_ARGS__) +#define __usdt_asm5(x, ...) __usdt_str(x) "," __usdt_asm4(__VA_ARGS__) +#define __usdt_asm6(x, ...) __usdt_str(x) "," __usdt_asm5(__VA_ARGS__) +#define __usdt_asm7(x, ...) __usdt_str(x) "," __usdt_asm6(__VA_ARGS__) +#define __usdt_asm8(x, ...) __usdt_str(x) "," __usdt_asm7(__VA_ARGS__) +#define __usdt_asm9(x, ...) __usdt_str(x) "," __usdt_asm8(__VA_ARGS__) +#define __usdt_asm10(x, ...) __usdt_str(x) "," __usdt_asm9(__VA_ARGS__) +#define __usdt_asm11(x, ...) __usdt_str(x) "," __usdt_asm10(__VA_ARGS__) +#define __usdt_asm12(x, ...) __usdt_str(x) "," __usdt_asm11(__VA_ARGS__) +#define __usdt_asm(...) __usdt_apply(__usdt_asm, __usdt_narg(__VA_ARGS__))(__VA_ARGS__) + +#ifdef __LP64__ +#define __usdt_asm_addr .8byte +#else +#define __usdt_asm_addr .4byte +#endif + +#define __usdt_asm_strz_(x) __usdt_asm1(.asciz #x) +#define __usdt_asm_strz(x) __usdt_asm_strz_(x) +#define __usdt_asm_str_(x) __usdt_asm1(.ascii #x) +#define __usdt_asm_str(x) __usdt_asm_str_(x) + +/* "semaphoreless" USDT case */ +#ifndef __usdt_sema_none +#define __usdt_sema_none(sema) +#endif + +/* implicitly defined __usdt_sema__group__name semaphore (using weak symbols) */ +#ifndef __usdt_sema_implicit +#define __usdt_sema_implicit(sema) \ + __asm__ __volatile__ ( \ + __usdt_asm1(.ifndef sema) \ + __usdt_asm3( .pushsection .probes, "aw", "progbits") \ + __usdt_asm1( .weak sema) \ + __usdt_asm1( .hidden sema) \ + __usdt_asm1( .align 2) \ + __usdt_asm1(sema:) \ + __usdt_asm1( .zero 2) \ + __usdt_asm2( .type sema, @object) \ + __usdt_asm2( .size sema, 2) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.endif) \ + ); +#endif + +/* externally defined semaphore using USDT_DEFINE_SEMA() and passed explicitly by user */ +#ifndef __usdt_sema_explicit +#define __usdt_sema_explicit(sema) \ + __asm__ __volatile__ ("" :: "m" (sema)); +#endif + +/* main USDT definition (nop and .note.stapsdt metadata) */ +#define __usdt_probe(group, name, sema_def, sema, ...) do { \ + sema_def(sema) \ + __asm__ __volatile__ ( \ + __usdt_asm( 990: USDT_NOP) \ + __usdt_asm3( .pushsection .note.stapsdt, "", "note") \ + __usdt_asm1( .balign 4) \ + __usdt_asm3( .4byte 992f-991f,994f-993f,3) \ + __usdt_asm1(991: .asciz "stapsdt") \ + __usdt_asm1(992: .balign 4) \ + __usdt_asm1(993: __usdt_asm_addr 990b) \ + __usdt_asm1( __usdt_asm_addr _.stapsdt.base) \ + __usdt_asm1( __usdt_asm_addr sema) \ + __usdt_asm_strz(group) \ + __usdt_asm_strz(name) \ + __usdt_asm_args(__VA_ARGS__) \ + __usdt_asm1( .ascii "\0") \ + __usdt_asm1(994: .balign 4) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.ifndef _.stapsdt.base) \ + __usdt_asm5( .pushsection .stapsdt.base,"aG","progbits",.stapsdt.base,comdat)\ + __usdt_asm1( .weak _.stapsdt.base) \ + __usdt_asm1( .hidden _.stapsdt.base) \ + __usdt_asm1(_.stapsdt.base:) \ + __usdt_asm1( .space 1) \ + __usdt_asm2( .size _.stapsdt.base, 1) \ + __usdt_asm1( .popsection) \ + __usdt_asm1(.endif) \ + :: __usdt_asm_ops(__VA_ARGS__) \ + ); \ +} while (0) + +/* + * NB: gdb PR24541 highlighted an unspecified corner of the sdt.h + * operand note format. + * + * The named register may be a longer or shorter (!) alias for the + * storage where the value in question is found. For example, on + * i386, 64-bit value may be put in register pairs, and a register + * name stored would identify just one of them. Previously, gcc was + * asked to emit the %w[id] (16-bit alias of some registers holding + * operands), even when a wider 32-bit value was used. + * + * Bottom line: the byte-width given before the @ sign governs. If + * there is a mismatch between that width and that of the named + * register, then a sys/sdt.h note consumer may need to employ + * architecture-specific heuristics to figure out where the compiler + * has actually put the complete value. + */ +#if defined(__powerpc__) || defined(__powerpc64__) +#define __usdt_argref(id) %I[id]%[id] +#elif defined(__i386__) +#define __usdt_argref(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */ +#else +#define __usdt_argref(id) %[id] +#endif + +#define __usdt_asm_arg(n) __usdt_asm_str(%c[__usdt_asz##n]) \ + __usdt_asm1(.ascii "@") \ + __usdt_asm_str(__usdt_argref(__usdt_aval##n)) + +#define __usdt_asm_args0 /* no arguments */ +#define __usdt_asm_args1 __usdt_asm_arg(1) +#define __usdt_asm_args2 __usdt_asm_args1 __usdt_asm1(.ascii " ") __usdt_asm_arg(2) +#define __usdt_asm_args3 __usdt_asm_args2 __usdt_asm1(.ascii " ") __usdt_asm_arg(3) +#define __usdt_asm_args4 __usdt_asm_args3 __usdt_asm1(.ascii " ") __usdt_asm_arg(4) +#define __usdt_asm_args5 __usdt_asm_args4 __usdt_asm1(.ascii " ") __usdt_asm_arg(5) +#define __usdt_asm_args6 __usdt_asm_args5 __usdt_asm1(.ascii " ") __usdt_asm_arg(6) +#define __usdt_asm_args7 __usdt_asm_args6 __usdt_asm1(.ascii " ") __usdt_asm_arg(7) +#define __usdt_asm_args8 __usdt_asm_args7 __usdt_asm1(.ascii " ") __usdt_asm_arg(8) +#define __usdt_asm_args9 __usdt_asm_args8 __usdt_asm1(.ascii " ") __usdt_asm_arg(9) +#define __usdt_asm_args10 __usdt_asm_args9 __usdt_asm1(.ascii " ") __usdt_asm_arg(10) +#define __usdt_asm_args11 __usdt_asm_args10 __usdt_asm1(.ascii " ") __usdt_asm_arg(11) +#define __usdt_asm_args12 __usdt_asm_args11 __usdt_asm1(.ascii " ") __usdt_asm_arg(12) +#define __usdt_asm_args(...) __usdt_apply(__usdt_asm_args, __usdt_narg(__VA_ARGS__)) + +#define __usdt_is_arr(x) (__builtin_classify_type(x) == 14 || __builtin_classify_type(x) == 5) +#define __usdt_arg_size(x) (__usdt_is_arr(x) ? sizeof(void *) : sizeof(x)) + +/* + * We can't use __builtin_choose_expr() in C++, so fall back to table-based + * signedness determination for known types, utilizing templates magic. + */ +#ifdef __cplusplus + +#define __usdt_is_signed(x) (!__usdt_is_arr(x) && __usdt_t<__typeof(x)>::is_signed) + +#include <cstddef> + +template<typename T> struct __usdt_t { static const bool is_signed = false; }; +template<typename A> struct __usdt_t<A[]> : public __usdt_t<A *> {}; +template<typename A, size_t N> struct __usdt_t<A[N]> : public __usdt_t<A *> {}; + +#define __usdt_def_signed(T) \ +template<> struct __usdt_t<T> { static const bool is_signed = true; }; \ +template<> struct __usdt_t<const T> { static const bool is_signed = true; }; \ +template<> struct __usdt_t<volatile T> { static const bool is_signed = true; }; \ +template<> struct __usdt_t<const volatile T> { static const bool is_signed = true; } +#define __usdt_maybe_signed(T) \ +template<> struct __usdt_t<T> { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t<const T> { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t<volatile T> { static const bool is_signed = (T)-1 < (T)1; }; \ +template<> struct __usdt_t<const volatile T> { static const bool is_signed = (T)-1 < (T)1; } + +__usdt_def_signed(signed char); +__usdt_def_signed(short); +__usdt_def_signed(int); +__usdt_def_signed(long); +__usdt_def_signed(long long); +__usdt_maybe_signed(char); +__usdt_maybe_signed(wchar_t); + +#else /* !__cplusplus */ + +#define __usdt_is_inttype(x) (__builtin_classify_type(x) >= 1 && __builtin_classify_type(x) <= 4) +#define __usdt_inttype(x) __typeof(__builtin_choose_expr(__usdt_is_inttype(x), (x), 0U)) +#define __usdt_is_signed(x) ((__usdt_inttype(x))-1 < (__usdt_inttype(x))1) + +#endif /* __cplusplus */ + +#define __usdt_asm_op(n, x) \ + [__usdt_asz##n] "n" ((__usdt_is_signed(x) ? (int)-1 : 1) * (int)__usdt_arg_size(x)), \ + [__usdt_aval##n] __usdt_str(USDT_ARG_CONSTRAINT)(x) + +#define __usdt_asm_ops0() [__usdt_dummy] "g" (0) +#define __usdt_asm_ops1(x) __usdt_asm_op(1, x) +#define __usdt_asm_ops2(a,x) __usdt_asm_ops1(a), __usdt_asm_op(2, x) +#define __usdt_asm_ops3(a,b,x) __usdt_asm_ops2(a,b), __usdt_asm_op(3, x) +#define __usdt_asm_ops4(a,b,c,x) __usdt_asm_ops3(a,b,c), __usdt_asm_op(4, x) +#define __usdt_asm_ops5(a,b,c,d,x) __usdt_asm_ops4(a,b,c,d), __usdt_asm_op(5, x) +#define __usdt_asm_ops6(a,b,c,d,e,x) __usdt_asm_ops5(a,b,c,d,e), __usdt_asm_op(6, x) +#define __usdt_asm_ops7(a,b,c,d,e,f,x) __usdt_asm_ops6(a,b,c,d,e,f), __usdt_asm_op(7, x) +#define __usdt_asm_ops8(a,b,c,d,e,f,g,x) __usdt_asm_ops7(a,b,c,d,e,f,g), __usdt_asm_op(8, x) +#define __usdt_asm_ops9(a,b,c,d,e,f,g,h,x) __usdt_asm_ops8(a,b,c,d,e,f,g,h), __usdt_asm_op(9, x) +#define __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,x) __usdt_asm_ops9(a,b,c,d,e,f,g,h,i), __usdt_asm_op(10, x) +#define __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,x) __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,j), __usdt_asm_op(11, x) +#define __usdt_asm_ops12(a,b,c,d,e,f,g,h,i,j,k,x) __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,k), __usdt_asm_op(12, x) +#define __usdt_asm_ops(...) __usdt_apply(__usdt_asm_ops, __usdt_narg(__VA_ARGS__))(__VA_ARGS__) + +#endif /* __USDT_H */ diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c index b616575c3b00..ce13002c7a19 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c +++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c @@ -93,7 +93,7 @@ .expected_attach_type = BPF_SK_LOOKUP, .result = VERBOSE_ACCEPT, .runs = -1, - .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\ + .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8=-44\ 2: (c5) if r0 s< 0x0 goto pc+2\ - R0_w=-44", + R0=-44", }, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index f3492efc8834..c8d640802cce 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1375,7 +1375,7 @@ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), /* write into map value */ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), - /* fetch secound map_value_ptr from the stack */ + /* fetch second map_value_ptr from the stack */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), /* write into map value */ @@ -1439,7 +1439,7 @@ /* second time with fp-16 */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), - /* fetch secound map_value_ptr from the stack */ + /* fetch second map_value_ptr from the stack */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), /* write into map value */ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), @@ -1493,7 +1493,7 @@ /* second time with fp-16 */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - /* fetch secound map_value_ptr from the stack */ + /* fetch second map_value_ptr from the stack */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), /* write into map value */ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), @@ -2380,7 +2380,7 @@ */ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), BPF_MOV64_REG(BPF_REG_9, BPF_REG_8), - /* r9 = *r9 ; verifier get's to this point via two paths: + /* r9 = *r9 ; verifier gets to this point via two paths: * ; (I) one including r9 = r8, verified first; * ; (II) one excluding r9 = r8, verified next. * ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id. diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh index f2cac42298ba..09179fb551f0 100755 --- a/tools/testing/selftests/bpf/verify_sig_setup.sh +++ b/tools/testing/selftests/bpf/verify_sig_setup.sh @@ -32,7 +32,7 @@ usage() exit 1 } -setup() +genkey() { local tmp_dir="$1" @@ -45,9 +45,14 @@ setup() openssl x509 -in ${tmp_dir}/signing_key.pem -out \ ${tmp_dir}/signing_key.der -outform der +} - key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s) +setup() +{ + local tmp_dir="$1" + genkey "${tmp_dir}" + key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s) keyring_id=$(keyctl newring ebpf_testing_keyring @s) keyctl link $key_id $keyring_id } @@ -105,6 +110,8 @@ main() if [[ "${action}" == "setup" ]]; then setup "${tmp_dir}" + elif [[ "${action}" == "genkey" ]]; then + genkey "${tmp_dir}" elif [[ "${action}" == "cleanup" ]]; then cleanup "${tmp_dir}" elif [[ "${action}" == "fsverity-create-sign" ]]; then diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index d532dd82a3a8..e962f133250c 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -181,6 +181,12 @@ struct var_preset { bool applied; }; +enum dump_mode { + DUMP_NONE = 0, + DUMP_XLATED = 1, + DUMP_JITED = 2, +}; + static struct env { char **filenames; int filename_cnt; @@ -227,6 +233,7 @@ static struct env { char orig_cgroup[PATH_MAX]; char stat_cgroup[PATH_MAX]; int memory_peak_fd; + __u32 dump_mode; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -271,6 +278,7 @@ const char argp_program_doc[] = enum { OPT_LOG_FIXED = 1000, OPT_LOG_SIZE = 1001, + OPT_DUMP = 1002, }; static const struct argp_option opts[] = { @@ -295,6 +303,7 @@ static const struct argp_option opts[] = { "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, { "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" }, + { "dump", OPT_DUMP, "DUMP_MODE", OPTION_ARG_OPTIONAL, "Print BPF program dump (xlated, jited)" }, {}, }; @@ -427,6 +436,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return err; } break; + case OPT_DUMP: + if (!arg || strcasecmp(arg, "xlated") == 0) { + env.dump_mode |= DUMP_XLATED; + } else if (strcasecmp(arg, "jited") == 0) { + env.dump_mode |= DUMP_JITED; + } else { + fprintf(stderr, "Unrecognized dump mode '%s'\n", arg); + return -EINVAL; + } + break; default: return ARGP_ERR_UNKNOWN; } @@ -1554,6 +1573,36 @@ static int parse_rvalue(const char *val, struct rvalue *rvalue) return 0; } +static void dump(__u32 prog_id, enum dump_mode mode, const char *file_name, const char *prog_name) +{ + char command[64], buf[4096]; + FILE *fp; + int status; + + status = system("command -v bpftool > /dev/null 2>&1"); + if (status != 0) { + fprintf(stderr, "bpftool is not available, can't print program dump\n"); + return; + } + snprintf(command, sizeof(command), "bpftool prog dump %s id %u", + mode == DUMP_JITED ? "jited" : "xlated", prog_id); + fp = popen(command, "r"); + if (!fp) { + fprintf(stderr, "bpftool failed with error: %d\n", errno); + return; + } + + printf("DUMP (%s) %s/%s:\n", mode == DUMP_JITED ? "JITED" : "XLATED", file_name, prog_name); + while (fgets(buf, sizeof(buf), fp)) + fputs(buf, stdout); + fprintf(stdout, "\n"); + + if (ferror(fp)) + fprintf(stderr, "Failed to dump BPF prog with error: %d\n", errno); + + pclose(fp); +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); @@ -1630,8 +1679,13 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf memset(&info, 0, info_len); fd = bpf_program__fd(prog); - if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) + if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) { stats->stats[JITED_SIZE] = info.jited_prog_len; + if (env.dump_mode & DUMP_JITED) + dump(info.id, DUMP_JITED, base_filename, prog_name); + if (env.dump_mode & DUMP_XLATED) + dump(info.id, DUMP_XLATED, base_filename, prog_name); + } parse_verif_log(buf, buf_sz, stats); diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index 1503a1d2faa0..9ed8c796645d 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -155,7 +155,7 @@ int main(int argc, char **argv) } if (!server) { - /* Only supports IPv4; see hints initiailization above. */ + /* Only supports IPv4; see hints initialization above. */ if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) { fprintf(stderr, "Could not resolve %s\n", argv[optind]); return 1; diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 93c2cc413cfc..48729da142c2 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -93,8 +93,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) /* Refresh the local tail pointer. * cached_cons is r->size bigger than the real consumer pointer so * that this addition can be avoided in the more frequently - * executed code that computs free_entries in the beginning of - * this function. Without this optimization it whould have been + * executed code that computes free_entries in the beginning of + * this function. Without this optimization it would have been * free_entries = r->cached_prod - r->cached_cons + r->size. */ r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index a29de0713f19..352adc8df2d1 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -2276,25 +2276,13 @@ static int testapp_xdp_metadata_copy(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; - struct bpf_map *data_map; - int count = 0; - int key = 0; test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata, skel_tx->progs.xsk_xdp_populate_metadata, skel_rx->maps.xsk, skel_tx->maps.xsk); test->ifobj_rx->use_metadata = true; - data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); - if (!data_map || !bpf_map__is_internal(data_map)) { - ksft_print_msg("Error: could not find bss section of XDP program\n"); - return TEST_FAILURE; - } - - if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) { - ksft_print_msg("Error: could not update count element\n"); - return TEST_FAILURE; - } + skel_rx->bss->count = 0; return testapp_validate_traffic(test); } diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c index 0e89fcff4d05..44c52f620fda 100644 --- a/tools/testing/selftests/cgroup/lib/cgroup_util.c +++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c @@ -522,6 +522,18 @@ int proc_mount_contains(const char *option) return strstr(buf, option) != NULL; } +int cgroup_feature(const char *feature) +{ + char buf[PAGE_SIZE]; + ssize_t read; + + read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf)); + if (read < 0) + return read; + + return strstr(buf, feature) != NULL; +} + ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) { char path[PATH_MAX]; diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index c69cab66254b..9dc90a1b386d 100644 --- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -60,6 +60,7 @@ extern int cg_run_nowait(const char *cgroup, extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); int proc_mount_contains(const char *option); +int cgroup_feature(const char *feature); extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size); extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle); extern pid_t clone_into_cgroup(int cgroup_fd); diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index 8730645d363a..dfb763819581 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -804,6 +804,662 @@ cleanup: return ret; } +/* + * Get the current frozen_usec for the cgroup. + */ +static long cg_check_freezetime(const char *cgroup) +{ + return cg_read_key_long(cgroup, "cgroup.stat.local", + "frozen_usec "); +} + +/* + * Test that the freeze time will behave as expected for an empty cgroup. + */ +static int test_cgfreezer_time_empty(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + + cgroup = cg_name(root, "cg_time_test_empty"); + if (!cgroup) + goto cleanup; + + /* + * 1) Create an empty cgroup and check that its freeze time + * is 0. + */ + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + + /* + * 2) Sleep for 1000 us. Check that the freeze time is at + * least 1000 us. + */ + usleep(1000); + curr = cg_check_freezetime(cgroup); + if (curr < 1000) { + debug("Expect time (%ld) to be at least 1000 us\n", + curr); + goto cleanup; + } + + /* + * 3) Unfreeze the cgroup. Check that the freeze time is + * larger than at 2). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 4) Check the freeze time again to ensure that it has not + * changed. + */ + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * A simple test for cgroup freezer time accounting. This test follows + * the same flow as test_cgfreezer_time_empty, but with a single process + * in the cgroup. + */ +static int test_cgfreezer_time_simple(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + + cgroup = cg_name(root, "cg_time_test_simple"); + if (!cgroup) + goto cleanup; + + /* + * 1) Create a cgroup and check that its freeze time is 0. + */ + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 2) Populate the cgroup with one child and check that the + * freeze time is still 0. + */ + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr > prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + + /* + * 3) Sleep for 1000 us. Check that the freeze time is at + * least 1000 us. + */ + usleep(1000); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr < 1000) { + debug("Expect time (%ld) to be at least 1000 us\n", + curr); + goto cleanup; + } + + /* + * 4) Unfreeze the cgroup. Check that the freeze time is + * larger than at 3). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 5) Sleep for 1000 us. Check that the freeze time is the + * same as at 4). + */ + usleep(1000); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * Test that freezer time accounting works as expected, even while we're + * populating a cgroup with processes. + */ +static int test_cgfreezer_time_populate(const char *root) +{ + int ret = KSFT_FAIL; + char *cgroup = NULL; + long prev, curr; + int i; + + cgroup = cg_name(root, "cg_time_test_populate"); + if (!cgroup) + goto cleanup; + + if (cg_create(cgroup)) + goto cleanup; + + curr = cg_check_freezetime(cgroup); + if (curr < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + if (curr > 0) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 1) Populate the cgroup with 100 processes. Check that + * the freeze time is 0. + */ + for (i = 0; i < 100; i++) + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 2) Wait for the group to become fully populated. Check + * that the freeze time is 0. + */ + if (cg_wait_for_proc_count(cgroup, 100)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be 0\n", curr); + goto cleanup; + } + + /* + * 3) Freeze the cgroup and then populate it with 100 more + * processes. Check that the freeze time continues to grow. + */ + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + for (i = 0; i < 100; i++) + cg_run_nowait(cgroup, child_fn, NULL); + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 4) Wait for the group to become fully populated. Check + * that the freeze time is larger than at 3). + */ + if (cg_wait_for_proc_count(cgroup, 200)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 5) Unfreeze the cgroup. Check that the freeze time is + * larger than at 4). + */ + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 6) Kill the processes. Check that the freeze time is the + * same as it was at 5). + */ + if (cg_killall(cgroup)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr != prev) { + debug("Expect time (%ld) to be unchanged from previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + /* + * 7) Freeze and unfreeze the cgroup. Check that the freeze + * time is larger than it was at 6). + */ + if (cg_freeze_nowait(cgroup, true)) + goto cleanup; + if (cg_freeze_nowait(cgroup, false)) + goto cleanup; + prev = curr; + curr = cg_check_freezetime(cgroup); + if (curr <= prev) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr, prev); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup) + cg_destroy(cgroup); + free(cgroup); + return ret; +} + +/* + * Test that frozen time for a cgroup continues to work as expected, + * even as processes are migrated. Frozen cgroup A's freeze time should + * continue to increase and running cgroup B's should stay 0. + */ +static int test_cgfreezer_time_migrate(const char *root) +{ + long prev_A, curr_A, curr_B; + char *cgroup[2] = {0}; + int ret = KSFT_FAIL; + int pid; + + cgroup[0] = cg_name(root, "cg_time_test_migrate_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(root, "cg_time_test_migrate_B"); + if (!cgroup[1]) + goto cleanup; + + if (cg_create(cgroup[0])) + goto cleanup; + + if (cg_check_freezetime(cgroup[0]) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(cgroup[1])) + goto cleanup; + + pid = cg_run_nowait(cgroup[0], child_fn, NULL); + if (pid < 0) + goto cleanup; + + if (cg_wait_for_proc_count(cgroup[0], 1)) + goto cleanup; + + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A) { + debug("Expect time (%ld) to be 0\n", curr_A); + goto cleanup; + } + curr_B = cg_check_freezetime(cgroup[1]); + if (curr_B) { + debug("Expect time (%ld) to be 0\n", curr_B); + goto cleanup; + } + + /* + * Freeze cgroup A. + */ + if (cg_freeze_wait(cgroup[0], true)) + goto cleanup; + prev_A = curr_A; + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A <= prev_A) { + debug("Expect time (%ld) to be > 0\n", curr_A); + goto cleanup; + } + + /* + * Migrate from A (frozen) to B (running). + */ + if (cg_enter(cgroup[1], pid)) + goto cleanup; + + usleep(1000); + curr_B = cg_check_freezetime(cgroup[1]); + if (curr_B) { + debug("Expect time (%ld) to be 0\n", curr_B); + goto cleanup; + } + + prev_A = curr_A; + curr_A = cg_check_freezetime(cgroup[0]); + if (curr_A <= prev_A) { + debug("Expect time (%ld) to be more than previous check (%ld)\n", + curr_A, prev_A); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (cgroup[0]) + cg_destroy(cgroup[0]); + free(cgroup[0]); + if (cgroup[1]) + cg_destroy(cgroup[1]); + free(cgroup[1]); + return ret; +} + +/* + * The test creates a cgroup and freezes it. Then it creates a child cgroup. + * After that it checks that the child cgroup has a non-zero freeze time + * that is less than the parent's. Next, it freezes the child, unfreezes + * the parent, and sleeps. Finally, it checks that the child's freeze + * time has grown larger than the parent's. + */ +static int test_cgfreezer_time_parent(const char *root) +{ + char *parent, *child = NULL; + int ret = KSFT_FAIL; + long ptime, ctime; + + parent = cg_name(root, "cg_test_parent_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_parent_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_check_freezetime(parent) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_freeze_wait(parent, true)) + goto cleanup; + + usleep(1000); + if (cg_create(child)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + /* + * Since the parent was frozen the entire time the child cgroup + * was being created, we expect the parent's freeze time to be + * larger than the child's. + * + * Ideally, we would be able to check both times simultaneously, + * but here we get the child's after we get the parent's. + */ + ptime = cg_check_freezetime(parent); + ctime = cg_check_freezetime(child); + if (ptime <= ctime) { + debug("Expect ptime (%ld) > ctime (%ld)\n", ptime, ctime); + goto cleanup; + } + + if (cg_freeze_nowait(child, true)) + goto cleanup; + + if (cg_freeze_wait(parent, false)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + usleep(100000); + + ctime = cg_check_freezetime(child); + ptime = cg_check_freezetime(parent); + + if (ctime <= ptime) { + debug("Expect ctime (%ld) > ptime (%ld)\n", ctime, ptime); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + +/* + * The test creates a parent cgroup and a child cgroup. Then, it freezes + * the child and checks that the child's freeze time is greater than the + * parent's, which should be zero. + */ +static int test_cgfreezer_time_child(const char *root) +{ + char *parent, *child = NULL; + int ret = KSFT_FAIL; + long ptime, ctime; + + parent = cg_name(root, "cg_test_child_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_child_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_check_freezetime(parent) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(child)) + goto cleanup; + + if (cg_freeze_wait(child, true)) + goto cleanup; + + ctime = cg_check_freezetime(child); + ptime = cg_check_freezetime(parent); + if (ptime != 0) { + debug("Expect ptime (%ld) to be 0\n", ptime); + goto cleanup; + } + + if (ctime <= ptime) { + debug("Expect ctime (%ld) <= ptime (%ld)\n", ctime, ptime); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + +/* + * The test creates the following hierarchy: + * A + * | + * B + * | + * C + * + * Then it freezes the cgroups in the order C, B, A. + * Then it unfreezes the cgroups in the order A, B, C. + * Then it checks that C's freeze time is larger than B's and + * that B's is larger than A's. + */ +static int test_cgfreezer_time_nested(const char *root) +{ + char *cgroup[3] = {0}; + int ret = KSFT_FAIL; + long time[3] = {0}; + int i; + + cgroup[0] = cg_name(root, "cg_test_time_A"); + if (!cgroup[0]) + goto cleanup; + + cgroup[1] = cg_name(cgroup[0], "B"); + if (!cgroup[1]) + goto cleanup; + + cgroup[2] = cg_name(cgroup[1], "C"); + if (!cgroup[2]) + goto cleanup; + + if (cg_create(cgroup[0])) + goto cleanup; + + if (cg_check_freezetime(cgroup[0]) < 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_create(cgroup[1])) + goto cleanup; + + if (cg_create(cgroup[2])) + goto cleanup; + + if (cg_freeze_nowait(cgroup[2], true)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[1], true)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[0], true)) + goto cleanup; + + usleep(1000); + + if (cg_freeze_nowait(cgroup[0], false)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[1], false)) + goto cleanup; + + if (cg_freeze_nowait(cgroup[2], false)) + goto cleanup; + + time[2] = cg_check_freezetime(cgroup[2]); + time[1] = cg_check_freezetime(cgroup[1]); + time[0] = cg_check_freezetime(cgroup[0]); + + if (time[2] <= time[1]) { + debug("Expect C's time (%ld) > B's time (%ld)", time[2], time[1]); + goto cleanup; + } + + if (time[1] <= time[0]) { + debug("Expect B's time (%ld) > A's time (%ld)", time[1], time[0]); + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + for (i = 2; i >= 0 && cgroup[i]; i--) { + cg_destroy(cgroup[i]); + free(cgroup[i]); + } + + return ret; +} + #define T(x) { x, #x } struct cgfreezer_test { int (*fn)(const char *root); @@ -819,6 +1475,13 @@ struct cgfreezer_test { T(test_cgfreezer_stopped), T(test_cgfreezer_ptraced), T(test_cgfreezer_vfork), + T(test_cgfreezer_time_empty), + T(test_cgfreezer_time_simple), + T(test_cgfreezer_time_populate), + T(test_cgfreezer_time_migrate), + T(test_cgfreezer_time_parent), + T(test_cgfreezer_time_child), + T(test_cgfreezer_time_nested), }; #undef T diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c index 9ecb83c6cc5c..d8a1d1cd5007 100644 --- a/tools/testing/selftests/cgroup/test_pids.c +++ b/tools/testing/selftests/cgroup/test_pids.c @@ -77,6 +77,9 @@ static int test_pids_events(const char *root) char *cg_parent = NULL, *cg_child = NULL; int pid; + if (cgroup_feature("pids_localevents") <= 0) + return KSFT_SKIP; + cg_parent = cg_name(root, "pids_parent"); cg_child = cg_name(cg_parent, "pids_child"); if (!cg_parent || !cg_child) diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 9a3499827d4b..2180c328a825 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -5,6 +5,7 @@ TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _damon_sysfs.py TEST_FILES += drgn_dump_damon_status.py +TEST_FILES += _common.sh # functionality tests TEST_PROGS += sysfs.sh @@ -18,6 +19,7 @@ TEST_PROGS += reclaim.sh lru_sort.sh TEST_PROGS += sysfs_update_removed_scheme_dir.sh TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py TEST_PROGS += sysfs_memcg_path_leak.sh +TEST_PROGS += sysfs_no_op_commit_break.py EXTRA_CLEAN = __pycache__ diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c index a9f4e9aaf3a9..93f3a71bcfd4 100644 --- a/tools/testing/selftests/damon/access_memory_even.c +++ b/tools/testing/selftests/damon/access_memory_even.c @@ -9,7 +9,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <time.h> int main(int argc, char *argv[]) { diff --git a/tools/testing/selftests/damon/sysfs_no_op_commit_break.py b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py new file mode 100755 index 000000000000..2c65cffe6b54 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_no_op_commit_break.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import json +import os +import subprocess +import sys + +import _damon_sysfs + +def dump_damon_status_dict(pid): + try: + subprocess.check_output(['which', 'drgn'], stderr=subprocess.DEVNULL) + except: + return None, 'drgn not found' + file_dir = os.path.dirname(os.path.abspath(__file__)) + dump_script = os.path.join(file_dir, 'drgn_dump_damon_status.py') + rc = subprocess.call(['drgn', dump_script, pid, 'damon_dump_output'], + stderr=subprocess.DEVNULL) + + if rc != 0: + return None, f'drgn fail: return code({rc})' + try: + with open('damon_dump_output', 'r') as f: + return json.load(f), None + except Exception as e: + return None, 'json.load fail (%s)' % e + +def main(): + kdamonds = _damon_sysfs.Kdamonds( + [_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + schemes=[_damon_sysfs.Damos( + ops_filters=[ + _damon_sysfs.DamosFilter( + type_='anon', + matching=True, + allow=True, + ) + ] + )], + )])] + ) + + err = kdamonds.start() + if err is not None: + print('kdamond start failed: %s' % err) + exit(1) + + before_commit_status, err = \ + dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print('before-commit status dump failed: %s' % err) + exit(1) + + kdamonds.kdamonds[0].commit() + + after_commit_status, err = \ + dump_damon_status_dict(kdamonds.kdamonds[0].pid) + if err is not None: + print('after-commit status dump failed: %s' % err) + exit(1) + + if before_commit_status != after_commit_status: + print(f'before: {json.dumps(before_commit_status, indent=2)}') + print(f'after: {json.dumps(after_commit_status, indent=2)}') + exit(1) + + kdamonds.stop() + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index d634d8395d90..585ecb4d5dc4 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only napi_id_helper +psp_responder diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 984ece05f7f9..6e41635bd55a 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -10,6 +10,7 @@ TEST_GEN_FILES := \ # end of TEST_GEN_FILES TEST_PROGS := \ + hds.py \ napi_id.py \ napi_threaded.py \ netcons_basic.sh \ @@ -19,11 +20,20 @@ TEST_PROGS := \ netcons_sysdata.sh \ netpoll_basic.py \ ping.py \ + psp.py \ queues.py \ - stats.py \ shaper.py \ - hds.py \ + stats.py \ xdp.py \ # end of TEST_PROGS +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := psp_responder +TEST_GEN_FILES += $(YNL_GEN_FILES) + include ../../lib.mk + +# YNL build +YNL_GENS := psp + +include ../../net/ynl.mk diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 44b98f17f8ff..402d4ee84f2e 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -4,22 +4,27 @@ TEST_PROGS := \ bond-arp-interval-causes-panic.sh \ bond-break-lacpdu-tx.sh \ + bond-eth-type-change.sh \ bond-lladdr-target.sh \ + bond_ipsec_offload.sh \ + bond_lacp_prio.sh \ + bond_macvlan_ipvlan.sh \ + bond_options.sh \ + bond_passive_lacp.sh \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ - bond_options.sh \ - bond-eth-type-change.sh \ - bond_macvlan_ipvlan.sh \ - bond_passive_lacp.sh +# end of TEST_PROGS TEST_FILES := \ - lag_lib.sh \ bond_topo_2d1c.sh \ - bond_topo_3d1c.sh + bond_topo_3d1c.sh \ + lag_lib.sh \ +# end of TEST_FILES TEST_INCLUDES := \ + ../../../net/lib.sh \ ../../../net/forwarding/lib.sh \ - ../../../net/lib.sh +# end of TEST_INCLUDES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh new file mode 100755 index 000000000000..f09e100232c7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# IPsec over bonding offload test: +# +# +----------------+ +# | bond0 | +# | | | +# | eth0 eth1 | +# +---+-------+----+ +# +# We use netdevsim instead of physical interfaces +#------------------------------------------------------------------- +# Example commands +# ip x s add proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 replay-window 32 \ +# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \ +# sel src 192.0.2.1/24 dst 192.0.2.2/24 +# offload dev bond0 dir out +# ip x p add dir out src 192.0.2.1/24 dst 192.0.2.2/24 \ +# tmpl proto esp src 192.0.2.1 dst 192.0.2.2 \ +# spi 0x07 mode transport reqid 0x07 +# +#------------------------------------------------------------------- + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh +srcip=192.0.2.1 +dstip=192.0.2.2 +ipsec0=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ipsec +ipsec1=/sys/kernel/debug/netdevsim/netdevsim0/ports/1/ipsec +active_slave="" + +# shellcheck disable=SC2317 +active_slave_changed() +{ + local old_active_slave=$1 + local new_active_slave + + # shellcheck disable=SC2154 + new_active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + [ "$new_active_slave" != "$old_active_slave" ] && [ "$new_active_slave" != "null" ] +} + +test_offload() +{ + # use ping to exercise the Tx path + ip netns exec "$ns" ping -I bond0 -c 3 -W 1 -i 0 "$dstip" >/dev/null + + active_slave=$(ip -n "${ns}" -d -j link show bond0 | \ + jq -r ".[].linkinfo.info_data.active_slave") + + if [ "$active_slave" = "$nic0" ]; then + sysfs=$ipsec0 + elif [ "$active_slave" = "$nic1" ]; then + sysfs=$ipsec1 + else + check_err 1 "bond_ipsec_offload invalid active_slave $active_slave" + fi + + # The tx/rx order in sysfs may changed after failover + grep -q "SA count=2 tx=3" "$sysfs" && grep -q "tx ipaddr=$dstip" "$sysfs" + check_err $? "incorrect tx count with link ${active_slave}" + + log_test bond_ipsec_offload "active_slave ${active_slave}" +} + +setup_env() +{ + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + defer umount /sys/kernel/debug/ + + fi + + # setup netdevsim since dummy/veth dev doesn't have offload support + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + if ! modprobe -q netdevsim; then + echo "SKIP: can't load netdevsim for ipsec offload" + # shellcheck disable=SC2154 + exit "$ksft_skip" + fi + defer modprobe -r netdevsim + fi + + setup_ns ns + defer cleanup_ns "$ns" +} + +setup_bond() +{ + ip -n "$ns" link add bond0 type bond mode active-backup miimon 100 + ip -n "$ns" addr add "$srcip/24" dev bond0 + ip -n "$ns" link set bond0 up + + echo "0 2" | ip netns exec "$ns" tee /sys/bus/netdevsim/new_device >/dev/null + nic0=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | head -n 1) + nic1=$(ip netns exec "$ns" ls /sys/bus/netdevsim/devices/netdevsim0/net | tail -n 1) + ip -n "$ns" link set "$nic0" master bond0 + ip -n "$ns" link set "$nic1" master bond0 + + # we didn't create a peer, make sure we can Tx by adding a permanent + # neighbour this need to be added after enslave + ip -n "$ns" neigh add "$dstip" dev bond0 lladdr 00:11:22:33:44:55 + + # create offloaded SAs, both in and out + ip -n "$ns" x p add dir out src "$srcip/24" dst "$dstip/24" \ + tmpl proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x p add dir in src "$dstip/24" dst "$srcip/24" \ + tmpl proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 + + ip -n "$ns" x s add proto esp src "$srcip" dst "$dstip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$srcip/24" dst "$dstip/24" \ + offload dev bond0 dir out + + ip -n "$ns" x s add proto esp src "$dstip" dst "$srcip" spi 9 \ + mode transport reqid 42 aead "rfc4106(gcm(aes))" \ + 0x3132333435363738393031323334353664636261 128 \ + sel src "$dstip/24" dst "$srcip/24" \ + offload dev bond0 dir in + + # does offload show up in ip output + lines=$(ip -n "$ns" x s list | grep -c "crypto offload parameters: dev bond0 dir") + if [ "$lines" -ne 2 ] ; then + check_err 1 "bond_ipsec_offload SA offload missing from list output" + fi +} + +trap defer_scopes_cleanup EXIT +setup_env +setup_bond + +# start Offload testing +test_offload + +# do failover and re-test +ip -n "$ns" link set "$active_slave" down +slowwait 5 active_slave_changed "$active_slave" +test_offload + +# make sure offload get removed from driver +ip -n "$ns" x s flush +ip -n "$ns" x p flush +line0=$(grep -c "SA count=0" "$ipsec0") +line1=$(grep -c "SA count=0" "$ipsec1") +[ "$line0" -ne 1 ] || [ "$line1" -ne 1 ] +check_fail $? "bond_ipsec_offload SA not removed from driver" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh new file mode 100755 index 000000000000..a483d505c6a8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing if bond lacp per port priority works +# +# Switch (s_ns) Backup Switch (b_ns) +# +-------------------------+ +-------------------------+ +# | bond0 | | bond0 | +# | + | | + | +# | eth0 | eth1 | | eth0 | eth1 | +# | +---+---+ | | +---+---+ | +# | | | | | | | | +# +-------------------------+ +-------------------------+ +# | | | | +# +-----------------------------------------------------+ +# | | | | | | +# | +-------+---------+---------+-------+ | +# | eth0 eth1 | eth2 eth3 | +# | + | +# | bond0 | +# +-----------------------------------------------------+ +# Client (c_ns) + +lib_dir=$(dirname "$0") +# shellcheck disable=SC1091 +source "$lib_dir"/../../../net/lib.sh + +setup_links() +{ + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}" + ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}" + # shellcheck disable=SC2154 + ip -n "${c_ns}" link add eth2 type veth peer name eth0 netns "${b_ns}" + ip -n "${c_ns}" link add eth3 type veth peer name eth1 netns "${b_ns}" + + ip -n "${c_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast ad_select actor_port_prio + ip -n "${s_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + ip -n "${b_ns}" link add bond0 type bond mode 802.3ad miimon 100 \ + lacp_rate fast + + ip -n "${c_ns}" link set eth0 master bond0 + ip -n "${c_ns}" link set eth1 master bond0 + ip -n "${c_ns}" link set eth2 master bond0 + ip -n "${c_ns}" link set eth3 master bond0 + ip -n "${s_ns}" link set eth0 master bond0 + ip -n "${s_ns}" link set eth1 master bond0 + ip -n "${b_ns}" link set eth0 master bond0 + ip -n "${b_ns}" link set eth1 master bond0 + + ip -n "${c_ns}" link set bond0 up + ip -n "${s_ns}" link set bond0 up + ip -n "${b_ns}" link set bond0 up +} + +test_port_prio_setting() +{ + RET=0 + ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 1000 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth0" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 1000 ] && RET=1 + ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 10 + prio=$(cmd_jq "ip -n ${c_ns} -d -j link show eth2" \ + ".[].linkinfo.info_slave_data.actor_port_prio") + [ "$prio" -ne 10 ] && RET=1 +} + +test_agg_reselect() +{ + local bond_agg_id slave_agg_id + local expect_slave="$1" + RET=0 + + # Trigger link state change to reselect the aggregator + ip -n "${c_ns}" link set eth1 down + sleep 0.5 + ip -n "${c_ns}" link set eth1 up + sleep 0.5 + + bond_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.ad_info.aggregator") + slave_agg_id=$(cmd_jq "ip -n ${c_ns} -d -j link show $expect_slave" \ + ".[].linkinfo.info_slave_data.ad_aggregator_id") + # shellcheck disable=SC2034 + [ "${bond_agg_id}" -ne "${slave_agg_id}" ] && \ + RET=1 +} + +trap cleanup_all_ns EXIT +setup_ns c_ns s_ns b_ns +setup_links + +test_port_prio_setting +log_test "bond 802.3ad" "actor_port_prio setting" + +test_agg_reselect eth0 +log_test "bond 802.3ad" "actor_port_prio select" + +# Change the actor port prio and re-test +ip -n "${c_ns}" link set eth0 type bond_slave actor_port_prio 10 +ip -n "${c_ns}" link set eth2 type bond_slave actor_port_prio 1000 +test_agg_reselect eth2 +log_test "bond 802.3ad" "actor_port_prio switch" + +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 832fa1caeb66..6bb290abd48b 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -1,13 +1,17 @@ CONFIG_BONDING=y CONFIG_BRIDGE=y CONFIG_DUMMY=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IPV6=y -CONFIG_MACVLAN=y CONFIG_IPVLAN=y +CONFIG_MACVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y CONFIG_NET_CLS_MATCHALL=m +CONFIG_NETDEVSIM=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y CONFIG_VETH=y CONFIG_VLAN_8021Q=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index f27172ddee0a..77ccf83d87e0 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -1,7 +1,10 @@ -CONFIG_IPV6=y -CONFIG_NETDEVSIM=m CONFIG_CONFIGFS_FS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n +CONFIG_INET_PSP=y +CONFIG_IPV6=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_NETDEVSIM=m CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile index cd6817fe5be6..7994bd0e5c44 100644 --- a/tools/testing/selftests/drivers/net/dsa/Makefile +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_locked_port.sh \ +TEST_PROGS := \ + bridge_locked_port.sh \ bridge_mdb.sh \ bridge_mld.sh \ bridge_vlan_aware.sh \ @@ -9,11 +10,13 @@ TEST_PROGS = bridge_locked_port.sh \ local_termination.sh \ no_forwarding.sh \ tc_actions.sh \ - test_bridge_fdb_stress.sh + test_bridge_fdb_stress.sh \ +# end of TEST_PROGS TEST_FILES := \ + forwarding.config \ run_net_forwarding_test.sh \ - forwarding.config +# end of TEST_FILES TEST_INCLUDES := \ ../../../net/forwarding/bridge_locked_port.sh \ @@ -27,6 +30,7 @@ TEST_INCLUDES := \ ../../../net/forwarding/no_forwarding.sh \ ../../../net/forwarding/tc_actions.sh \ ../../../net/forwarding/tc_common.sh \ - ../../../net/lib.sh + ../../../net/lib.sh \ +# end of TEST_INCLUDES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 7c90a040ce45..c4fe049e9baa 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -3,10 +3,12 @@ import errno import os +import random +from typing import Union from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv -from lib.py import defer, ethtool, ip, random +from lib.py import defer, ethtool, ip def _get_hds_mode(cfg, netnl) -> str: @@ -58,7 +60,39 @@ def get_hds_thresh(cfg, netnl) -> None: if 'hds-thresh' not in rings: raise KsftSkipEx('hds-thresh not supported by device') + +def _hds_reset(cfg, netnl, rings) -> None: + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + + arg = {'header': {'dev-index': cfg.ifindex}} + if cur.get('tcp-data-split') != rings.get('tcp-data-split'): + # Try to reset to "unknown" first, we don't know if the setting + # was the default or user chose it. Default seems more likely. + arg['tcp-data-split'] = "unknown" + netnl.rings_set(arg) + cur = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if cur['tcp-data-split'] == rings['tcp-data-split']: + del arg['tcp-data-split'] + else: + # Try the explicit setting + arg['tcp-data-split'] = rings['tcp-data-split'] + if cur.get('hds-thresh') != rings.get('hds-thresh'): + arg['hds-thresh'] = rings['hds-thresh'] + if len(arg) > 1: + netnl.rings_set(arg) + + +def _defer_reset_hds(cfg, netnl) -> Union[dict, None]: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' in rings or 'tcp-data-split' in rings: + defer(_hds_reset, cfg, netnl, rings) + except NlError as e: + pass + + def set_hds_enable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) except NlError as e: @@ -76,6 +110,7 @@ def set_hds_enable(cfg, netnl) -> None: ksft_eq('enabled', rings['tcp-data-split']) def set_hds_disable(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) except NlError as e: @@ -93,6 +128,7 @@ def set_hds_disable(cfg, netnl) -> None: ksft_eq('disabled', rings['tcp-data-split']) def set_hds_thresh_zero(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) except NlError as e: @@ -110,6 +146,7 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) def set_hds_thresh_random(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -140,6 +177,7 @@ def set_hds_thresh_random(cfg, netnl) -> None: ksft_eq(hds_thresh, rings['hds-thresh']) def set_hds_thresh_max(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -157,6 +195,7 @@ def set_hds_thresh_max(cfg, netnl) -> None: ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) def set_hds_thresh_gt(cfg, netnl) -> None: + _defer_reset_hds(cfg, netnl) try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: @@ -178,6 +217,7 @@ def set_xdp(cfg, netnl) -> None: """ mode = _get_hds_mode(cfg, netnl) if mode == 'enabled': + _defer_reset_hds(cfg, netnl) netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'unknown'}) diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index fdc97355588c..8133d1a0051c 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -15,9 +15,11 @@ TEST_PROGS = \ iou-zcrx.py \ irq.py \ loopback.sh \ + nic_timestamp.py \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ + rss_flow_label.py \ rss_input_xfrm.py \ tso.py \ xsk_reconfig.py \ @@ -30,8 +32,8 @@ TEST_FILES := \ TEST_INCLUDES := \ $(wildcard lib/py/*.py ../lib/py/*.py) \ ../../../net/lib.sh \ - ../../../net/forwarding/lib.sh \ ../../../net/forwarding/ipip_lib.sh \ + ../../../net/forwarding/lib.sh \ ../../../net/forwarding/tc_common.sh \ # @@ -43,7 +45,11 @@ TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) include ../../../lib.mk # YNL build -YNL_GENS := ethtool netdev +YNL_GENS := \ + ethtool \ + netdev \ +# end of YNL_GENS + include ../../../net/ynl.mk include ../../../net/bpf.mk diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index 88ae719e6f8f..2307aa001be1 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -1,5 +1,11 @@ +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FUNCTION_ERROR_INJECTION=y +CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y +CONFIG_UDMABUF=y CONFIG_VXLAN=y diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index baa2f24240ba..45c2d49d55b6 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -24,7 +24,7 @@ def check_rx(cfg) -> None: require_devmem(cfg) port = rand_port() - socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}" + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" with bkg(listen_cmd, exit_wait=True) as ncdevmem: @@ -42,9 +42,9 @@ def check_tx(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") @@ -56,9 +56,9 @@ def check_tx_chunks(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd, exit_wait=True) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index 1462a339a74b..0ceb297e7757 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -13,7 +13,7 @@ try: # Import one by one to avoid pylint false positives from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily, DevlinkFamily + NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ rand_port, tool, wait_port_listen @@ -22,7 +22,7 @@ try: from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ - ksft_ne, ksft_not_in, ksft_raises, ksft_true + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none from net.lib.py import NetNSEnter from drivers.net.lib.py import GenerateTraffic from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 72f828021f83..3288ed04ce08 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -39,6 +39,7 @@ #define __EXPORTED_HEADERS__ #include <linux/uio.h> +#include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -97,6 +98,10 @@ static unsigned int dmabuf_id; static uint32_t tx_dmabuf_id; static int waittime_ms = 500; +/* System state loaded by current_config_load() */ +#define MAX_FLOWS 8 +static int ntuple_ids[MAX_FLOWS] = { -1, -1, -1, -1, -1, -1, -1, -1, }; + struct memory_buffer { int fd; size_t size; @@ -115,6 +120,21 @@ struct memory_provider { size_t off, int n); }; +static void pr_err(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "%s: ", TEST_PREFIX); + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + if (errno != 0) + fprintf(stderr, ": %s", strerror(errno)); + fprintf(stderr, "\n"); +} + static struct memory_buffer *udmabuf_alloc(size_t size) { struct udmabuf_create create; @@ -123,27 +143,33 @@ static struct memory_buffer *udmabuf_alloc(size_t size) ctx = malloc(sizeof(*ctx)); if (!ctx) - error(1, ENOMEM, "malloc failed"); + return NULL; ctx->size = size; ctx->devfd = open("/dev/udmabuf", O_RDWR); - if (ctx->devfd < 0) - error(1, errno, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); + if (ctx->devfd < 0) { + pr_err("[skip,no-udmabuf: Unable to access DMA buffer device file]"); + goto err_free_ctx; + } ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (ctx->memfd < 0) - error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + if (ctx->memfd < 0) { + pr_err("[skip,no-memfd]"); + goto err_close_dev; + } ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + if (ret < 0) { + pr_err("[skip,fcntl-add-seals]"); + goto err_close_memfd; + } ret = ftruncate(ctx->memfd, size); - if (ret == -1) - error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + if (ret == -1) { + pr_err("[FAIL,memfd-truncate]"); + goto err_close_memfd; + } memset(&create, 0, sizeof(create)); @@ -151,15 +177,29 @@ static struct memory_buffer *udmabuf_alloc(size_t size) create.offset = 0; create.size = size; ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); - if (ctx->fd < 0) - error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + if (ctx->fd < 0) { + pr_err("[FAIL, create udmabuf]"); + goto err_close_fd; + } ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ctx->fd, 0); - if (ctx->buf_mem == MAP_FAILED) - error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + if (ctx->buf_mem == MAP_FAILED) { + pr_err("[FAIL, map udmabuf]"); + goto err_close_fd; + } return ctx; + +err_close_fd: + close(ctx->fd); +err_close_memfd: + close(ctx->memfd); +err_close_dev: + close(ctx->devfd); +err_free_ctx: + free(ctx); + return NULL; } static void udmabuf_free(struct memory_buffer *ctx) @@ -217,7 +257,7 @@ static void print_nonzero_bytes(void *ptr, size_t size) putchar(p[i]); } -void validate_buffer(void *line, size_t size) +int validate_buffer(void *line, size_t size) { static unsigned char seed = 1; unsigned char *ptr = line; @@ -232,8 +272,10 @@ void validate_buffer(void *line, size_t size) "Failed validation: expected=%u, actual=%u, index=%lu\n", expected, ptr[i], i); errors++; - if (errors > 20) - error(1, 0, "validation failed."); + if (errors > 20) { + pr_err("validation failed"); + return -1; + } } seed++; if (seed == do_validation) @@ -241,6 +283,86 @@ void validate_buffer(void *line, size_t size) } fprintf(stdout, "Validated buffer\n"); + return 0; +} + +static int +__run_command(char *out, size_t outlen, const char *cmd, va_list args) +{ + char command[256]; + FILE *fp; + + vsnprintf(command, sizeof(command), cmd, args); + + fprintf(stderr, "Running: %s\n", command); + fp = popen(command, "r"); + if (!fp) + return -1; + if (out) { + size_t len; + + if (!fgets(out, outlen, fp)) + return -1; + + /* Remove trailing newline if present */ + len = strlen(out); + if (len && out[len - 1] == '\n') + out[len - 1] = '\0'; + } + return pclose(fp); +} + +static int run_command(const char *cmd, ...) +{ + va_list args; + int ret; + + va_start(args, cmd); + ret = __run_command(NULL, 0, cmd, args); + va_end(args); + + return ret; +} + +static int ethtool_add_flow(const char *format, ...) +{ + char local_output[256], cmd[256]; + const char *id_start; + int flow_idx, ret; + char *endptr; + long flow_id; + va_list args; + + for (flow_idx = 0; flow_idx < MAX_FLOWS; flow_idx++) + if (ntuple_ids[flow_idx] == -1) + break; + if (flow_idx == MAX_FLOWS) { + fprintf(stderr, "Error: too many flows\n"); + return -1; + } + + snprintf(cmd, sizeof(cmd), "ethtool -N %s %s", ifname, format); + + va_start(args, format); + ret = __run_command(local_output, sizeof(local_output), cmd, args); + va_end(args); + + if (ret != 0) + return ret; + + /* Extract the ID from the output */ + id_start = strstr(local_output, "Added rule with ID "); + if (!id_start) + return -1; + id_start += strlen("Added rule with ID "); + + flow_id = strtol(id_start, &endptr, 10); + if (endptr == id_start || flow_id < 0 || flow_id > INT_MAX) + return -1; + + fprintf(stderr, "Added flow rule with ID %ld\n", flow_id); + ntuple_ids[flow_idx] = flow_id; + return flow_id; } static int rxq_num(int ifindex) @@ -270,29 +392,17 @@ static int rxq_num(int ifindex) return num; } -#define run_command(cmd, ...) \ - ({ \ - char command[256]; \ - memset(command, 0, sizeof(command)); \ - snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - fprintf(stderr, "Running: %s\n", command); \ - system(command); \ - }) - -static int reset_flow_steering(void) +static void reset_flow_steering(void) { - /* Depending on the NIC, toggling ntuple off and on might not - * be allowed. Additionally, attempting to delete existing filters - * will fail if no filters are present. Therefore, do not enforce - * the exit status. - */ - - run_command("sudo ethtool -K %s ntuple off >&2", ifname); - run_command("sudo ethtool -K %s ntuple on >&2", ifname); - run_command( - "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", - ifname, ifname); - return 0; + int i; + + for (i = 0; i < MAX_FLOWS; i++) { + if (ntuple_ids[i] == -1) + continue; + run_command("ethtool -N %s delete %d", + ifname, ntuple_ids[i]); + ntuple_ids[i] = -1; + } } static const char *tcp_data_split_str(int val) @@ -309,7 +419,81 @@ static const char *tcp_data_split_str(int val) } } -static int configure_headersplit(bool on) +static struct ethtool_rings_get_rsp *get_ring_config(void) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return NULL; + } + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + ynl_sock_destroy(ys); + + return get_rsp; +} + +static void restore_ring_config(const struct ethtool_rings_get_rsp *config) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + if (!config) + return; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + if (config->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, config->hds_thresh); + + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring HDS cfg: %s\n", ys->err.msg); + + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + + /* use explicit value if UKNOWN didn't give us the previous */ + if (get_rsp->tcp_data_split != config->tcp_data_split) { + ethtool_rings_set_req_set_tcp_data_split(req, + config->tcp_data_split); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL restoring expl HDS cfg: %s\n", + ys->err.msg); + } + + ethtool_rings_get_rsp_free(get_rsp); + ethtool_rings_set_req_free(req); + + ynl_sock_destroy(ys); +} + +static int +configure_headersplit(const struct ethtool_rings_get_rsp *old, bool on) { struct ethtool_rings_get_req *get_req; struct ethtool_rings_get_rsp *get_rsp; @@ -326,8 +510,15 @@ static int configure_headersplit(bool on) req = ethtool_rings_set_req_alloc(); ethtool_rings_set_req_set_header_dev_index(req, ifindex); - /* 0 - off, 1 - auto, 2 - on */ - ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); + if (on) { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_ENABLED); + if (old->_present.hds_thresh) + ethtool_rings_set_req_set_hds_thresh(req, 0); + } else { + ethtool_rings_set_req_set_tcp_data_split(req, + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN); + } ret = ethtool_rings_set(ys, req); if (ret < 0) fprintf(stderr, "YNL failed: %s\n", ys->err.msg); @@ -351,12 +542,103 @@ static int configure_headersplit(bool on) static int configure_rss(void) { - return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); + return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static void reset_rss(void) +{ + run_command("ethtool -X %s default >&2", ifname, start_queue); } -static int configure_channels(unsigned int rx, unsigned int tx) +static int check_changing_channels(unsigned int rx, unsigned int tx) { - return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); + struct ethtool_channels_get_req *gchan; + struct ethtool_channels_set_req *schan; + struct ethtool_channels_get_rsp *chan; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + gchan = ethtool_channels_get_req_alloc(); + if (!gchan) { + ret = -1; + goto exit_close_sock; + } + + ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); + chan = ethtool_channels_get(ys, gchan); + ethtool_channels_get_req_free(gchan); + if (!chan) { + fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); + ret = -1; + goto exit_close_sock; + } + + schan = ethtool_channels_set_req_alloc(); + if (!schan) { + ret = -1; + goto exit_free_chan; + } + + ethtool_channels_set_req_set_header_dev_index(schan, ifindex); + + if (chan->_present.combined_count) { + if (chan->_present.rx_count || chan->_present.tx_count) { + ethtool_channels_set_req_set_rx_count(schan, 0); + ethtool_channels_set_req_set_tx_count(schan, 0); + } + + if (rx == tx) { + ethtool_channels_set_req_set_combined_count(schan, rx); + } else if (rx > tx) { + ethtool_channels_set_req_set_combined_count(schan, tx); + ethtool_channels_set_req_set_rx_count(schan, rx - tx); + } else { + ethtool_channels_set_req_set_combined_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx - rx); + } + + } else if (chan->_present.rx_count) { + ethtool_channels_set_req_set_rx_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx); + } else { + fprintf(stderr, "Error: device has neither combined nor rx channels\n"); + ret = -1; + goto exit_free_schan; + } + + ret = ethtool_channels_set(ys, schan); + if (ret) { + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else { + /* We were expecting a failure, go back to previous settings */ + ethtool_channels_set_req_set_combined_count(schan, + chan->combined_count); + ethtool_channels_set_req_set_rx_count(schan, chan->rx_count); + ethtool_channels_set_req_set_tx_count(schan, chan->tx_count); + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL un-setting channels: %s\n", + ys->err.msg); + } + +exit_free_schan: + ethtool_channels_set_req_free(schan); +exit_free_chan: + ethtool_channels_get_rsp_free(chan); +exit_close_sock: + ynl_sock_destroy(ys); + + return ret; } static int configure_flow_steering(struct sockaddr_in6 *server_sin) @@ -364,6 +646,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) const char *type = "tcp6"; const char *server_addr; char buf[40]; + int flow_id; inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); server_addr = buf; @@ -374,23 +657,22 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) } /* Try configure 5-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", - ifname, - type, - client_ip ? "src-ip" : "", - client_ip ?: "", - server_addr, - client_ip ? "src-port" : "", - client_ip ? port : "", - port, start_queue)) + flow_id = ethtool_add_flow("flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d", + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); + if (flow_id < 0) { /* If that fails, try configure 3-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", - ifname, - type, - server_addr, - port, start_queue)) + flow_id = ethtool_add_flow("flow-type %s dst-ip %s dst-port %s queue %d", + type, server_addr, port, start_queue); + if (flow_id < 0) /* If that fails, return error */ return -1; + } return 0; } @@ -405,6 +687,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, *ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!*ys) { + netdev_queue_id_free(queues); fprintf(stderr, "YNL: %s\n", yerr.msg); return -1; } @@ -483,18 +766,24 @@ err_close: return -1; } -static void enable_reuseaddr(int fd) +static int enable_reuseaddr(int fd) { int opt = 1; int ret; ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEPORT failed"); + return -1; + } ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); - if (ret) - error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); + if (ret) { + pr_err("SO_REUSEADDR failed"); + return -1; + } + + return 0; } static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) @@ -537,6 +826,7 @@ static struct netdev_queue_id *create_queues(void) static int do_server(struct memory_buffer *mem) { + struct ethtool_rings_get_rsp *ring_config; char ctrl_data[sizeof(int) * 20000]; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; @@ -548,54 +838,72 @@ static int do_server(struct memory_buffer *mem) char *tmp_mem = NULL; struct ynl_sock *ys; char iobuf[819200]; + int ret, err = -1; char buffer[256]; int socket_fd; int client_fd; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } - if (reset_flow_steering()) - error(1, 0, "Failed to reset flow steering\n"); + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + return -1; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to enable TCP header split\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to enable TCP header split"); + goto err_free_ring_config; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "Failed to configure rss\n"); + if (configure_rss()) { + pr_err("Failed to configure rss"); + goto err_reset_headersplit; + } /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering(&server_sin)) - error(1, 0, "Failed to configure flow steering\n"); - - sleep(1); + if (configure_flow_steering(&server_sin)) { + pr_err("Failed to configure flow steering"); + goto err_reset_rss; + } - if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_flow_steering; + } tmp_mem = malloc(mem->size); if (!tmp_mem) - error(1, ENOMEM, "malloc failed"); + goto err_unbind; socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + if (socket_fd < 0) { + pr_err("Failed to create socket"); + goto err_free_tmp; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; fprintf(stderr, "binding to address %s:%d\n", server_ip, ntohs(server_sin.sin6_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to bind"); + goto err_close_socket; + } ret = listen(socket_fd, 1); - if (ret) - error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + if (ret) { + pr_err("Failed to listen"); + goto err_close_socket; + } client_addr_len = sizeof(client_addr); @@ -604,6 +912,10 @@ static int do_server(struct memory_buffer *mem) fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, ntohs(server_sin.sin6_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); + if (client_fd < 0) { + pr_err("Failed to accept"); + goto err_close_socket; + } inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, sizeof(buffer)); @@ -631,10 +943,15 @@ static int do_server(struct memory_buffer *mem) continue; if (ret < 0) { perror("recvmsg"); + if (errno == EFAULT) { + pr_err("received EFAULT, won't recover"); + goto err_close_client; + } continue; } if (ret == 0) { - fprintf(stderr, "client exited\n"); + errno = 0; + pr_err("client exited"); goto cleanup; } @@ -672,9 +989,10 @@ static int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, total_received, dmabuf_cmsg->dmabuf_id); - if (dmabuf_cmsg->dmabuf_id != dmabuf_id) - error(1, 0, - "received on wrong dmabuf_id: flow steering error\n"); + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) { + pr_err("received on wrong dmabuf_id: flow steering error"); + goto err_close_client; + } if (dmabuf_cmsg->frag_size % getpagesize()) non_page_aligned_frags++; @@ -685,22 +1003,27 @@ static int do_server(struct memory_buffer *mem) dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size); - if (do_validation) - validate_buffer(tmp_mem, - dmabuf_cmsg->frag_size); - else + if (do_validation) { + if (validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size)) + goto err_close_client; + } else { print_nonzero_bytes(tmp_mem, dmabuf_cmsg->frag_size); + } ret = setsockopt(client_fd, SOL_SOCKET, SO_DEVMEM_DONTNEED, &token, sizeof(token)); - if (ret != 1) - error(1, 0, - "SO_DEVMEM_DONTNEED not enough tokens"); + if (ret != 1) { + pr_err("SO_DEVMEM_DONTNEED not enough tokens"); + goto err_close_client; + } + } + if (!is_devmem) { + pr_err("flow steering error"); + goto err_close_client; } - if (!is_devmem) - error(1, 0, "flow steering error\n"); fprintf(stderr, "total_received=%lu\n", total_received); } @@ -711,54 +1034,121 @@ static int do_server(struct memory_buffer *mem) page_aligned_frags, non_page_aligned_frags); cleanup: + err = 0; - free(tmp_mem); +err_close_client: close(client_fd); +err_close_socket: close(socket_fd); +err_free_tmp: + free(tmp_mem); +err_unbind: ynl_sock_destroy(ys); - - return 0; +err_reset_flow_steering: + reset_flow_steering(); +err_reset_rss: + reset_rss(); +err_reset_headersplit: + restore_ring_config(ring_config); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); + return err; } -void run_devmem_tests(void) +int run_devmem_tests(void) { + struct ethtool_rings_get_rsp *ring_config; + struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; + int err = -1; mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return -1; + } + + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + goto err_free_mem; + } /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "rss error\n"); + if (configure_rss()) { + pr_err("rss error"); + goto err_free_ring_config; + } + + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_rss; + } + + queues = netdev_queue_id_alloc(num_queues); + if (!queues) { + pr_err("Failed to allocate empty queues array"); + goto err_reset_headersplit; + } + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Binding empty queues array should have failed"); + goto err_unbind; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + if (configure_headersplit(ring_config, 0)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } - if (!bind_rx_queue(ifindex, mem->fd, - calloc(num_queues, sizeof(struct netdev_queue_id)), - num_queues, &ys)) - error(1, 0, "Binding empty queues array should have failed\n"); + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } - if (configure_headersplit(0)) - error(1, 0, "Failed to configure header split\n"); + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Configure dmabuf with header split off should have failed"); + goto err_unbind; + } - if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Configure dmabuf with header split off should have failed\n"); + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to configure header split"); + goto err_reset_headersplit; + } - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); + queues = create_queues(); + if (!queues) { + pr_err("Failed to create queues"); + goto err_reset_headersplit; + } - if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) { + pr_err("Failed to bind"); + goto err_reset_headersplit; + } /* Deactivating a bound queue should not be legal */ - if (!configure_channels(num_queues, num_queues - 1)) - error(1, 0, "Deactivating a bound queue should be illegal.\n"); + if (!check_changing_channels(num_queues, num_queues)) { + pr_err("Deactivating a bound queue should be illegal"); + goto err_unbind; + } - /* Closing the netlink socket does an implicit unbind */ - ynl_sock_destroy(ys); + err = 0; + goto err_unbind; +err_unbind: + ynl_sock_destroy(ys); +err_reset_headersplit: + restore_ring_config(ring_config); +err_reset_rss: + reset_rss(); +err_free_ring_config: + ethtool_rings_get_rsp_free(ring_config); +err_free_mem: provider->free(mem); + return err; } static uint64_t gettimeofday_ms(void) @@ -778,13 +1168,15 @@ static int do_poll(int fd) pfd.fd = fd; ret = poll(&pfd, 1, waittime_ms); - if (ret == -1) - error(1, errno, "poll"); + if (ret == -1) { + pr_err("poll"); + return -1; + } return ret && (pfd.revents & POLLERR); } -static void wait_compl(int fd) +static int wait_compl(int fd) { int64_t tstop = gettimeofday_ms() + waittime_ms; char control[CMSG_SPACE(100)] = {}; @@ -798,18 +1190,23 @@ static void wait_compl(int fd) msg.msg_controllen = sizeof(control); while (gettimeofday_ms() < tstop) { - if (!do_poll(fd)) + ret = do_poll(fd); + if (ret < 0) + return ret; + if (!ret) continue; ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret < 0) { if (errno == EAGAIN) continue; - error(1, errno, "recvmsg(MSG_ERRQUEUE)"); - return; + pr_err("recvmsg(MSG_ERRQUEUE)"); + return -1; + } + if (msg.msg_flags & MSG_CTRUNC) { + pr_err("MSG_CTRUNC"); + return -1; } - if (msg.msg_flags & MSG_CTRUNC) - error(1, 0, "MSG_CTRUNC\n"); for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_IP && @@ -823,20 +1220,25 @@ static void wait_compl(int fd) continue; serr = (void *)CMSG_DATA(cm); - if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) - error(1, 0, "wrong origin %u", serr->ee_origin); - if (serr->ee_errno != 0) - error(1, 0, "wrong errno %d", serr->ee_errno); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { + pr_err("wrong origin %u", serr->ee_origin); + return -1; + } + if (serr->ee_errno != 0) { + pr_err("wrong errno %d", serr->ee_errno); + return -1; + } hi = serr->ee_data; lo = serr->ee_info; fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); - return; + return 0; } } - error(1, 0, "did not receive tx completion"); + pr_err("did not receive tx completion"); + return -1; } static int do_client(struct memory_buffer *mem) @@ -850,50 +1252,69 @@ static int do_client(struct memory_buffer *mem) ssize_t line_size = 0; struct cmsghdr *cmsg; char *line = NULL; + int ret, err = -1; size_t len = 0; int socket_fd; __u32 ddmabuf; int opt = 1; - int ret; ret = parse_address(server_ip, atoi(port), &server_sin); - if (ret < 0) - error(1, 0, "parse server address"); + if (ret < 0) { + pr_err("parse server address"); + return -1; + } + + if (client_ip) { + ret = parse_address(client_ip, atoi(port), &client_sin); + if (ret < 0) { + pr_err("parse client address"); + return ret; + } + } socket_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (socket_fd < 0) - error(1, socket_fd, "create socket"); + if (socket_fd < 0) { + pr_err("create socket"); + return -1; + } - enable_reuseaddr(socket_fd); + if (enable_reuseaddr(socket_fd)) + goto err_close_socket; ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, strlen(ifname) + 1); - if (ret) - error(1, errno, "bindtodevice"); + if (ret) { + pr_err("bindtodevice"); + goto err_close_socket; + } - if (bind_tx_queue(ifindex, mem->fd, &ys)) - error(1, 0, "Failed to bind\n"); + if (bind_tx_queue(ifindex, mem->fd, &ys)) { + pr_err("Failed to bind"); + goto err_close_socket; + } if (client_ip) { - ret = parse_address(client_ip, atoi(port), &client_sin); - if (ret < 0) - error(1, 0, "parse client address"); - ret = bind(socket_fd, &client_sin, sizeof(client_sin)); - if (ret) - error(1, errno, "bind"); + if (ret) { + pr_err("bind"); + goto err_unbind; + } } ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); - if (ret) - error(1, errno, "set sock opt"); + if (ret) { + pr_err("set sock opt"); + goto err_unbind; + } fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, ntohs(server_sin.sin6_port), ifname); ret = connect(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(1, errno, "connect"); + if (ret) { + pr_err("connect"); + goto err_unbind; + } while (1) { free(line); @@ -906,10 +1327,11 @@ static int do_client(struct memory_buffer *mem) if (max_chunk) { msg.msg_iovlen = (line_size + max_chunk - 1) / max_chunk; - if (msg.msg_iovlen > MAX_IOV) - error(1, 0, - "can't partition %zd bytes into maximum of %d chunks", - line_size, MAX_IOV); + if (msg.msg_iovlen > MAX_IOV) { + pr_err("can't partition %zd bytes into maximum of %d chunks", + line_size, MAX_IOV); + goto err_free_line; + } for (int i = 0; i < msg.msg_iovlen; i++) { iov[i].iov_base = (void *)(i * max_chunk); @@ -940,34 +1362,40 @@ static int do_client(struct memory_buffer *mem) *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); - if (ret < 0) - error(1, errno, "Failed sendmsg"); + if (ret < 0) { + pr_err("Failed sendmsg"); + goto err_free_line; + } fprintf(stderr, "sendmsg_ret=%d\n", ret); - if (ret != line_size) - error(1, errno, "Did not send all bytes %d vs %zd", ret, - line_size); + if (ret != line_size) { + pr_err("Did not send all bytes %d vs %zd", ret, line_size); + goto err_free_line; + } - wait_compl(socket_fd); + if (wait_compl(socket_fd)) + goto err_free_line; } fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); + err = 0; + +err_free_line: free(line); +err_unbind: + ynl_sock_destroy(ys); +err_close_socket: close(socket_fd); - - if (ys) - ynl_sock_destroy(ys); - - return 0; + return err; } int main(int argc, char *argv[]) { struct memory_buffer *mem; int is_server = 0, opt; - int ret; + int ret, err = 1; while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { switch (opt) { @@ -1004,8 +1432,10 @@ int main(int argc, char *argv[]) } } - if (!ifname) - error(1, 0, "Missing -f argument\n"); + if (!ifname) { + pr_err("Missing -f argument"); + return 1; + } ifindex = if_nametoindex(ifname); @@ -1014,33 +1444,41 @@ int main(int argc, char *argv[]) if (!server_ip && !client_ip) { if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 0) - error(1, 0, "couldn't detect number of queues\n"); - if (num_queues < 2) - error(1, 0, - "number of device queues is too low\n"); + if (num_queues < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } /* make sure can bind to multiple queues */ start_queue = num_queues / 2; num_queues /= 2; } - if (start_queue < 0 || num_queues < 0) - error(1, 0, "Both -t and -q are required\n"); + if (start_queue < 0 || num_queues < 0) { + pr_err("Both -t and -q are required"); + return 1; + } - run_devmem_tests(); - return 0; + return run_devmem_tests(); } if (start_queue < 0 && num_queues < 0) { num_queues = rxq_num(ifindex); - if (num_queues < 2) - error(1, 0, "number of device queues is too low\n"); + if (num_queues < 2) { + pr_err("number of device queues is too low"); + return 1; + } num_queues = 1; start_queue = rxq_num(ifindex) - num_queues; - if (start_queue < 0) - error(1, 0, "couldn't detect number of queues\n"); + if (start_queue < 0) { + pr_err("couldn't detect number of queues"); + return 1; + } fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); } @@ -1048,21 +1486,39 @@ int main(int argc, char *argv[]) for (; optind < argc; optind++) fprintf(stderr, "extra arguments: %s\n", argv[optind]); - if (start_queue < 0) - error(1, 0, "Missing -t argument\n"); + if (start_queue < 0) { + pr_err("Missing -t argument"); + return 1; + } - if (num_queues < 0) - error(1, 0, "Missing -q argument\n"); + if (num_queues < 0) { + pr_err("Missing -q argument"); + return 1; + } - if (!server_ip) - error(1, 0, "Missing -s argument\n"); + if (!server_ip) { + pr_err("Missing -s argument"); + return 1; + } - if (!port) - error(1, 0, "Missing -p argument\n"); + if (!port) { + pr_err("Missing -p argument"); + return 1; + } mem = provider->alloc(getpagesize() * NUM_PAGES); + if (!mem) { + pr_err("Failed to allocate memory buffer"); + return 1; + } + ret = is_server ? do_server(mem) : do_client(mem); - provider->free(mem); + if (ret) + goto err_free_mem; - return ret; + err = 0; + +err_free_mem: + provider->free(mem); + return err; } diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py new file mode 100755 index 000000000000..c1e943d53f19 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests related to configuration of HW timestamping +""" + +import errno +from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx +from lib.py import NetDrvEnv, EthtoolFamily, NlError + + +def __get_hwtimestamp_support(cfg): + """ Retrieve supported configuration information """ + + try: + tsinfo = cfg.ethnl.tsinfo_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported") from e + raise + + ctx = {} + tx = tsinfo.get('tx-types', {}) + rx = tsinfo.get('rx-filters', {}) + + bits = tx.get('bits', {}) + ctx['tx'] = bits.get('bit', []) + bits = rx.get('bits', {}) + ctx['rx'] = bits.get('bit', []) + return ctx + + +def __get_hwtimestamp_config(cfg): + """ Retrieve current TS configuration information """ + + try: + tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return tscfg + + +def __set_hwtimestamp_config(cfg, ts): + """ Setup new TS configuration information """ + + ts['header'] = {'dev-name': cfg.ifname} + try: + res = cfg.ethnl.tsconfig_set(ts) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via netlink") from e + raise + return res + + +def test_hwtstamp_tx(cfg): + """ + Test TX timestamp configuration. + The driver should apply provided config and report back proper state. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + tx = ts['tx'] + for t in tx: + tscfg = orig_tscfg + tscfg['tx-types']['bits']['bit'] = [t] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + ksft_eq(res['tx-types']['bits']['bit'], [t]) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def test_hwtstamp_rx(cfg): + """ + Test RX timestamp configuration. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + + orig_tscfg = __get_hwtimestamp_config(cfg) + ts = __get_hwtimestamp_support(cfg) + rx = ts['rx'] + for r in rx: + tscfg = orig_tscfg + tscfg['rx-filters']['bits']['bit'] = [r] + res = __set_hwtimestamp_config(cfg, tscfg) + if res is None: + res = __get_hwtimestamp_config(cfg) + if r['index'] == 0 or r['index'] == 1: + ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + else: + # the driver can fallback to some value which has higher coverage for timestamping + ksft_ge(res['rx-filters']['bits']['bit'][0]['index'], r['index']) + __set_hwtimestamp_config(cfg, orig_tscfg) + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py index ad192fef3117..2a51b60df8a1 100755 --- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -1,8 +1,13 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +""" +Test driver resilience vs page pool allocation failures. +""" + import errno import time +import math import os from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import KsftSkipEx, KsftFailEx @@ -13,7 +18,8 @@ from lib.py import cmd, tool, GenerateTraffic def _write_fail_config(config): for key, value in config.items(): - with open("/sys/kernel/debug/fail_function/" + key, "w") as fp: + path = "/sys/kernel/debug/fail_function/" + with open(path + key, "w", encoding='ascii') as fp: fp.write(str(value) + "\n") @@ -22,8 +28,7 @@ def _enable_pp_allocation_fail(): raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): - with open("/sys/kernel/debug/fail_function/inject", "w") as fp: - fp.write("page_pool_alloc_netmems\n") + _write_fail_config({"inject": "page_pool_alloc_netmems"}) _write_fail_config({ "verbose": 0, @@ -38,8 +43,7 @@ def _disable_pp_allocation_fail(): return if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): - with open("/sys/kernel/debug/fail_function/inject", "w") as fp: - fp.write("\n") + _write_fail_config({"inject": ""}) _write_fail_config({ "probability": 0, @@ -48,6 +52,10 @@ def _disable_pp_allocation_fail(): def test_pp_alloc(cfg, netdevnl): + """ + Configure page pool allocation fail injection while traffic is running. + """ + def get_stats(): return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] @@ -55,7 +63,7 @@ def test_pp_alloc(cfg, netdevnl): stat1 = get_stats() time.sleep(1) stat2 = get_stats() - if stat2['rx-packets'] - stat1['rx-packets'] < 15000: + if stat2['rx-packets'] - stat1['rx-packets'] < 4000: raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets']) @@ -82,11 +90,16 @@ def test_pp_alloc(cfg, netdevnl): time.sleep(3) s2 = get_stats() - if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1: + seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail'] + if seen_fails < 1: raise KsftSkipEx("Allocation failures not increasing") - if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100: - raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'], - "packets:", s2['rx-packets'] - s1['rx-packets']) + pkts = s2['rx-packets'] - s1['rx-packets'] + # Expecting one failure per 512 buffers, 3.1x safety margin + want_fails = math.floor(pkts / 512 / 3.1) + if seen_fails < want_fails: + raise KsftSkipEx("Allocation increasing too slowly", seen_fails, + "packets:", pkts) + ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})") # Basic failures are fine, try to wobble some settings to catch extra failures check_traffic_flowing() @@ -105,7 +118,7 @@ def test_pp_alloc(cfg, netdevnl): else: ksft_pr("ethtool -G change retval: did not succeed", new_g) else: - ksft_pr("ethtool -G change retval: did not try") + ksft_pr("ethtool -G change retval: did not try") time.sleep(0.1) check_traffic_flowing() @@ -119,6 +132,7 @@ def test_pp_alloc(cfg, netdevnl): def main() -> None: + """ Ksft boiler plate main """ netdevnl = NetdevFamily() with NetDrvEpEnv(__file__, nsim_test=False) as cfg: diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 7bb552f8b182..ed7e405682f0 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -118,7 +118,7 @@ def test_rss_key_indir(cfg): qcnt = len(_get_rx_cnts(cfg)) if qcnt < 3: - KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") + raise KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -178,8 +178,13 @@ def test_rss_key_indir(cfg): cnts = _get_rx_cnts(cfg) GenerateTraffic(cfg).wait_pkts_and_stop(20000) cnts = _get_rx_cnts(cfg, prev=cnts) - # First two queues get less traffic than all the rest - ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts)) + if qcnt > 4: + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), + "traffic distributed: " + str(cnts)) + else: + # When queue count is low make sure third queue got significant pkts + ksft_ge(cnts[2], 3500, "traffic distributed: " + str(cnts)) def test_rss_queue_reconfigure(cfg, main_ctx=True): @@ -335,19 +340,20 @@ def test_hitless_key_update(cfg): data = get_rss(cfg) key_len = len(data['rss-hash-key']) - key = _rss_key_rand(key_len) + ethnl = EthtoolFamily() + key = random.randbytes(key_len) tgen = GenerateTraffic(cfg) try: errors0, carrier0 = get_drop_err_sum(cfg) t0 = datetime.datetime.now() - ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, "hkey": key}) t1 = datetime.datetime.now() errors1, carrier1 = get_drop_err_sum(cfg) finally: tgen.wait_pkts_and_stop(5000) - ksft_lt((t1 - t0).total_seconds(), 0.2) + ksft_lt((t1 - t0).total_seconds(), 0.15) ksft_eq(errors1 - errors1, 0) ksft_eq(carrier1 - carrier0, 0) diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py new file mode 100755 index 000000000000..6fa95fe27c47 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for RSS hashing on IPv6 Flow Label. +""" + +import glob +import os +import socket +from lib.py import CmdExitFailure +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ + ksft_not_in, ksft_raises, KsftSkipEx +from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port +from lib.py import NetDrvEpEnv + + +def _check_system(cfg): + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: + try: + with open(f, 'r') as fp: + setting = fp.read().strip() + # CPU mask will be zeros and commas + if setting.replace("0", "").replace(",", ""): + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") + except FileNotFoundError: + pass + + # 1 is the default, if someone changed it we probably shouldn"t mess with it + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout + if af.strip() != "1": + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") + + +def _ethtool_get_cfg(cfg, fl_type): + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + "IPv6 Flow Label": "l", + } + + ret = "" + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + ret += converter[line] + return ret + + +def _traffic(cfg, one_sock, one_cpu): + local_port = rand_port(socket.SOCK_DGRAM) + remote_port = rand_port(socket.SOCK_DGRAM) + + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v["6"], 0)) + if one_sock: + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" + else: + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" + + cpus = set() + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): + for _ in range(20): + fd_read_timeout(sock.fileno(), 1) + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + cpus.add(cpu) + + if one_cpu: + ksft_eq(len(cpus), 1, + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") + else: + ksft_ge(len(cpus), 2, + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") + + +def test_rss_flow_label(cfg): + """ + Test hashing on IPv6 flow label. Send traffic over a single socket + and over multiple sockets. Depend on the remote having auto-label + enabled so that it randomizes the label per socket. + """ + + cfg.require_ipver("6") + cfg.require_cmd("socat", remote=True) + _check_system(cfg) + + # Enable flow label hashing for UDP6 + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _traffic(cfg, one_sock=True, one_cpu=True) + _traffic(cfg, one_sock=False, one_cpu=False) + + # Disable it, we should see no hashing (reset was already defer()ed) + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + + _traffic(cfg, one_sock=False, one_cpu=True) + + +def _check_v4_flow_types(cfg): + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: + try: + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + ksft_not_in("Flow Label", cur, + comment=f"{fl_type=} has Flow Label:" + cur) + except CmdExitFailure: + # Probably does not support this flow type + pass + + +def test_rss_flow_label_6only(cfg): + """ + Test interactions with IPv4 flow types. It should not be possible to set + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also + not appear in the IPv4 "current config". + """ + + with ksft_raises(CmdExitFailure) as cm: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") + ksft_in("Invalid argument", cm.exception.cmd.stderr) + + _check_v4_flow_types(cfg) + + # Try to enable Flow Labels and check again, in case it leaks thru + initial = _ethtool_get_cfg(cfg, "udp6") + changed = initial.replace("l", "") if "l" in initial else initial + "l" + + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _check_v4_flow_types(cfg) + restore.exec() + _check_v4_flow_types(cfg) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_rss_flow_label, + test_rss_flow_label_6only], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index c13dd5efa27a..0998e68ebaf0 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -60,16 +60,17 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): sock_wait_drain(sock) qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - # No math behind the 10 here, but try to catch cases where - # TCP falls back to non-LSO. - ksft_lt(tcp_sock_get_retrans(sock), 10) - sock.close() - # Check that at least 90% of the data was sent as LSO packets. # System noise may cause false negatives. Also header overheads # will add up to 5% of extra packes... The check is best effort. total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + + # Make sure we have order of magnitude more LSO packets than + # retransmits, in case TCP retransmitted all the LSO packets. + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4) + sock.close() + if should_lso: if cfg.have_stat_super_count: ksft_ge(qstat_new['tx-hw-gso-packets'] - diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 8711c67ad658..e6c070f32f51 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -1,5 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Driver test environment. +NetDrvEnv and NetDrvEpEnv are the main environment classes. +Former is for local host only tests, latter creates / connects +to a remote endpoint. See NIPA wiki for more information about +running and writing driver tests. +""" + import sys from pathlib import Path @@ -8,26 +16,39 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) - from net.lib.py import * - # Import one by one to avoid pylint false positives + from net.lib.py import NetNS, NetNSEnter, NetdevSimDev from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ - NlError, RtnlFamily, DevlinkFamily + NlError, RtnlFamily, DevlinkFamily, PSPFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, tool, wait_port_listen - from net.lib.py import fd_read_timeout + fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ ksft_setup from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ - ksft_ne, ksft_not_in, ksft_raises, ksft_true + ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none + + __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", + "EthtoolFamily", "NetdevFamily", "NetshaperFamily", + "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", + "CmdExitFailure", + "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", + "fd_read_timeout", "ip", "rand_port", "tool", + "wait_port_listen", "wait_file", + "KsftSkipEx", "KsftFailEx", "KsftXfailEx", + "ksft_disruptive", "ksft_exit", "ksft_pr", "ksft_run", + "ksft_setup", + "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", + "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", + "ksft_not_none", "ksft_not_none"] + + from .env import NetDrvEnv, NetDrvEpEnv + from .load import GenerateTraffic + from .remote import Remote + + __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote"] except ModuleNotFoundError as e: - ksft_pr("Failed importing `net` library from kernel sources") - ksft_pr(str(e)) - ktap_result(True, comment="SKIP") + print("Failed importing `net` library from kernel sources") + print(str(e)) sys.exit(4) - -from .env import * -from .load import * -from .remote import Remote diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1b8bd648048f..01be3d9b9720 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,7 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx -from lib.py import ksft_setup +from lib.py import ksft_setup, wait_file from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -25,6 +25,9 @@ class NetDrvEnvBase: self.env = self._load_env_file() + # Following attrs must be set be inheriting classes + self.dev = None + def _load_env_file(self): env = os.environ.copy() @@ -48,6 +51,22 @@ class NetDrvEnvBase: env[pair[0]] = pair[1] return ksft_setup(env) + def __del__(self): + pass + + def __enter__(self): + ip(f"link set dev {self.dev['ifname']} up") + wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier", + lambda x: x.strip() == "1") + + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() + class NetDrvEnv(NetDrvEnvBase): """ @@ -72,17 +91,6 @@ class NetDrvEnv(NetDrvEnvBase): self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] - def __enter__(self): - ip(f"link set dev {self.dev['ifname']} up") - - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() @@ -219,15 +227,6 @@ class NetDrvEpEnv(NetDrvEnvBase): raise Exception("Can't resolve remote interface name, multiple interfaces match") return v6[0]["ifname"] if v6 else v4[0]["ifname"] - def __enter__(self): - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() @@ -246,6 +245,10 @@ class NetDrvEpEnv(NetDrvEnvBase): if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") + def require_nsim(self): + if self._ns is None: + raise KsftXfailEx("Test only works on netdevsim") + def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) if cached.get(key) is None: diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index b6071e80ebbb..8e1085e89647 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -148,12 +148,20 @@ function create_dynamic_target() { # Generate the command line argument for netconsole following: # netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr] function create_cmdline_str() { + local BINDMODE=${1:-"ifname"} + if [ "${BINDMODE}" == "ifname" ] + then + SRCDEV=${SRCIF} + else + SRCDEV=$(mac_get "${SRCIF}") + fi + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') SRCPORT="1514" TGTPORT="6666" - echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\"" + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\"" } # Do not append the release to the header of the message diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 29a672c2270f..e212ad8ccef6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -44,8 +44,7 @@ source $lib_dir/devlink_lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/24 - defer simple_if_fini $h1 192.0.2.1/24 + adf_simple_if_init $h1 192.0.2.1/24 mtu_set $h1 10000 defer mtu_restore $h1 @@ -56,8 +55,7 @@ h1_create() h2_create() { - simple_if_init $h2 198.51.100.1/24 - defer simple_if_fini $h2 198.51.100.1/24 + adf_simple_if_init $h2 198.51.100.1/24 mtu_set $h2 10000 defer mtu_restore $h2 @@ -106,8 +104,7 @@ setup_prepare() # Reload to ensure devlink-trap settings are back to default. defer devlink_reload - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index d5b6f2cc9a29..9ca340c5f3a6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -57,8 +57,7 @@ source qos_lib.sh h1_create() { - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 mtu_set $h1 10000 defer mtu_restore $h1 @@ -70,8 +69,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 10000 defer mtu_restore $h2 @@ -83,8 +81,7 @@ h2_create() h3_create() { - simple_if_init $h3 - defer simple_if_fini $h3 + adf_simple_if_init $h3 mtu_set $h3 10000 defer mtu_restore $h3 @@ -225,8 +222,7 @@ setup_prepare() h3mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh index 2b5d2c2751d5..a4a25637fe2a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh @@ -68,8 +68,7 @@ mlxsw_only_on_spectrum 2+ || exit h1_create() { - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 vlan_create $h1 111 v$h1 192.0.2.33/28 defer vlan_destroy $h1 111 @@ -78,8 +77,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 vlan_create $h2 111 v$h2 192.0.2.34/28 defer vlan_destroy $h2 111 @@ -178,8 +176,7 @@ setup_prepare() h2mac=$(mac_get $h2) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index cd4a5c21360c..d8f8ae8533cd 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -72,8 +72,7 @@ source qos_lib.sh h1_create() { - simple_if_init $h1 192.0.2.65/28 - defer simple_if_fini $h1 192.0.2.65/28 + adf_simple_if_init $h1 192.0.2.65/28 mtu_set $h1 10000 defer mtu_restore $h1 @@ -81,8 +80,7 @@ h1_create() h2_create() { - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 10000 defer mtu_restore $h2 @@ -94,8 +92,7 @@ h2_create() h3_create() { - simple_if_init $h3 192.0.2.66/28 - defer simple_if_fini $h3 192.0.2.66/28 + adf_simple_if_init $h3 192.0.2.66/28 mtu_set $h3 10000 defer mtu_restore $h3 @@ -196,8 +193,7 @@ setup_prepare() h3mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 537d6baa77b7..47d2ffcf366e 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -100,8 +100,7 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev - defer simple_if_fini $dev + adf_simple_if_init $dev mtu_set $dev 10000 defer mtu_restore $dev @@ -250,8 +249,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index 9699a100a87d..f4be72b2145a 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -24,7 +24,8 @@ def _assert_napi_threaded_disabled(nl, napi_id) -> None: def _set_threaded_state(cfg, threaded) -> None: - cmd(f"echo {threaded} > /sys/class/net/{cfg.ifname}/threaded") + with open(f"/sys/class/net/{cfg.ifname}/threaded", "wb") as fp: + fp.write(str(threaded).encode('utf-8')) def _setup_deferred_cleanup(cfg) -> None: @@ -38,6 +39,34 @@ def _setup_deferred_cleanup(cfg) -> None: return combined +def napi_init(cfg, nl) -> None: + """ + Test that threaded state (in the persistent NAPI config) gets updated + even when NAPI with given ID is not allocated at the time. + """ + + qcnt = _setup_deferred_cleanup(cfg) + + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 0) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ Test that when napi threaded is enabled at device level and @@ -103,7 +132,8 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, queue_count=2) as cfg: - ksft_run([change_num_queues, + ksft_run([napi_init, + change_num_queues, enable_dev_threaded_disable_napi_threaded], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh index ad2fb8b1c463..d1d23dc67f99 100755 --- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -19,9 +19,6 @@ check_netconsole_module modprobe netdevsim 2> /dev/null || true rmmod netconsole 2> /dev/null || true -# The content of kmsg will be save to the following file -OUTPUT_FILE="/tmp/${TARGET}" - # Check for basic system dependency and exit if not found # check_for_dependencies # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) @@ -30,23 +27,39 @@ echo "6 5" > /proc/sys/kernel/printk trap do_cleanup EXIT # Create one namespace and two interfaces set_network -# Create the command line for netconsole, with the configuration from the -# function above -CMDLINE="$(create_cmdline_str)" - -# Load the module, with the cmdline set -modprobe netconsole "${CMDLINE}" - -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" -# Make sure the message was received in the dst part -# and exit -validate_msg "${OUTPUT_FILE}" + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + + # Listed for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Unload the module + rmmod netconsole + echo "${BINDMODE} : Test passed" >&2 +done exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index 07b7c46d3311..daf51113c827 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = devlink.sh \ +TEST_PROGS := \ + devlink.sh \ devlink_in_netns.sh \ devlink_trap.sh \ ethtool-coalesce.sh \ @@ -17,5 +18,6 @@ TEST_PROGS = devlink.sh \ psample.sh \ tc-mq-visibility.sh \ udp_tunnel_nic.sh \ +# end of TEST_PROGS include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py new file mode 100755 index 000000000000..4ae7a785ff10 --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp.py @@ -0,0 +1,627 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Test suite for PSP capable drivers.""" + +import errno +import fcntl +import socket +import struct +import termios +import time + +from lib.py import defer +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_true, ksft_eq, ksft_ne, ksft_gt, ksft_raises +from lib.py import ksft_not_none +from lib.py import KsftSkipEx +from lib.py import NetDrvEpEnv, PSPFamily, NlError +from lib.py import bkg, rand_port, wait_port_listen + + +def _get_outq(s): + one = b'\0' * 4 + outq = fcntl.ioctl(s.fileno(), termios.TIOCOUTQ, one) + return struct.unpack("I", outq)[0] + + +def _send_with_ack(cfg, msg): + cfg.comm_sock.send(msg) + response = cfg.comm_sock.recv(4) + if response != b'ack\0': + raise RuntimeError("Unexpected server response", response) + + +def _remote_read_len(cfg): + cfg.comm_sock.send(b'read len\0') + return int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + + +def _make_clr_conn(cfg, ipver=None): + _send_with_ack(cfg, b'conn clr\0') + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _make_psp_conn(cfg, version=0, ipver=None): + _send_with_ack(cfg, b'conn psp\0' + struct.pack('BB', version, version)) + remote_addr = cfg.remote_addr_v[ipver] if ipver else cfg.remote_addr + s = socket.create_connection((remote_addr, cfg.comm_port), ) + return s + + +def _close_conn(cfg, s): + _send_with_ack(cfg, b'data close\0') + s.close() + + +def _close_psp_conn(cfg, s): + _close_conn(cfg, s) + + +def _spi_xchg(s, rx): + s.send(struct.pack('I', rx['spi']) + rx['key']) + tx = s.recv(4 + len(rx['key'])) + return { + 'spi': struct.unpack('I', tx[:4])[0], + 'key': tx[4:] + } + + +def _send_careful(cfg, s, rounds): + data = b'0123456789' * 200 + for i in range(rounds): + n = 0 + for _ in range(10): # allow 10 retries + try: + n += s.send(data[n:], socket.MSG_DONTWAIT) + if n == len(data): + break + except BlockingIOError: + time.sleep(0.05) + else: + rlen = _remote_read_len(cfg) + outq = _get_outq(s) + report = f'sent: {i * len(data) + n} remote len: {rlen} outq: {outq}' + raise RuntimeError(report) + + return len(data) * rounds + + +def _check_data_rx(cfg, exp_len): + read_len = -1 + for _ in range(30): + cfg.comm_sock.send(b'read len\0') + read_len = int(cfg.comm_sock.recv(1024)[:-1].decode('utf-8')) + if read_len == exp_len: + break + time.sleep(0.01) + ksft_eq(read_len, exp_len) + + +def _check_data_outq(s, exp_len, force_wait=False): + outq = 0 + for _ in range(10): + outq = _get_outq(s) + if not force_wait and outq == exp_len: + break + time.sleep(0.01) + ksft_eq(outq, exp_len) + +# +# Test case boiler plate +# + +def _init_psp_dev(cfg): + if not hasattr(cfg, 'psp_dev_id'): + # Figure out which local device we are testing against + for dev in cfg.pspnl.dev_get({}, dump=True): + if dev['ifindex'] == cfg.ifindex: + cfg.psp_info = dev + cfg.psp_dev_id = cfg.psp_info['id'] + break + else: + raise KsftSkipEx("No PSP devices found") + + # Enable PSP if necessary + cap = cfg.psp_info['psp-versions-cap'] + ena = cfg.psp_info['psp-versions-ena'] + if cap != ena: + cfg.pspnl.dev_set({'id': cfg.psp_dev_id, 'psp-versions-ena': cap}) + defer(cfg.pspnl.dev_set, {'id': cfg.psp_dev_id, + 'psp-versions-ena': ena }) + +# +# Test cases +# + +def dev_list_devices(cfg): + """ Dump all devices """ + _init_psp_dev(cfg) + + devices = cfg.pspnl.dev_get({}, dump=True) + + found = False + for dev in devices: + found |= dev['id'] == cfg.psp_dev_id + ksft_true(found) + + +def dev_get_device(cfg): + """ Get the device we intend to use """ + _init_psp_dev(cfg) + + dev = cfg.pspnl.dev_get({'id': cfg.psp_dev_id}) + ksft_eq(dev['id'], cfg.psp_dev_id) + + +def dev_get_device_bad(cfg): + """ Test getting device which doesn't exist """ + raised = False + try: + cfg.pspnl.dev_get({'id': 1234567}) + except NlError as e: + ksft_eq(e.nl_msg.error, -errno.ENODEV) + raised = True + ksft_true(raised) + + +def dev_rotate(cfg): + """ Test key rotation """ + _init_psp_dev(cfg) + + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + ksft_eq(rot['id'], cfg.psp_dev_id) + + +def dev_rotate_spi(cfg): + """ Test key rotation and SPI check """ + _init_psp_dev(cfg) + + top_a = top_b = 0 + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc_a = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_a = assoc_a['rx-key']['spi'] >> 31 + s.close() + rot = cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + ksft_eq(rot['id'], cfg.psp_dev_id) + assoc_b = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + top_b = assoc_b['rx-key']['spi'] >> 31 + s.close() + ksft_ne(top_a, top_b) + + +def assoc_basic(cfg): + """ Test creating associations """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + assoc = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(assoc), 0) + s.close() + + +def assoc_bad_dev(cfg): + """ Test creating associations with bad device ID """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENODEV) + + +def assoc_sk_only_conn(cfg): + """ Test creating associations based on socket """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + _close_conn(cfg, s) + + +def assoc_sk_only_mismatch(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_sk_only_mismatch_tx(cfg): + """ Test creating associations based on socket (dev mismatch) """ + _init_psp_dev(cfg) + + with _make_clr_conn(cfg) as s: + with ksft_raises(NlError) as cm: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": assoc['rx-key'], + "dev-id": cfg.psp_dev_id + 1234567, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_sk_only_unconn(cfg): + """ Test creating associations based on socket (unconnected, should fail) """ + _init_psp_dev(cfg) + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.extack['miss-type'], "dev-id") + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_version_mismatch(cfg): + """ Test creating associations where Rx and Tx PSP versions do not match """ + _init_psp_dev(cfg) + + versions = list(cfg.psp_info['psp-versions-cap']) + if len(versions) < 2: + raise KsftSkipEx("Not enough PSP versions supported by the device for the test") + + # Translate versions to integers + versions = [cfg.pspnl.consts["version"].entries[v].value for v in versions] + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + rx = cfg.pspnl.rx_assoc({"version": versions[0], + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + + for version in versions[1:]: + with ksft_raises(NlError) as cm: + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": rx['rx-key'], + "sock-fd": s.fileno()}) + the_exception = cm.exception + ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + + +def assoc_twice(cfg): + """ Test reusing Tx assoc for two sockets """ + _init_psp_dev(cfg) + + def rx_assoc_check(s): + assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(assoc['dev-id'], cfg.psp_dev_id) + ksft_gt(assoc['rx-key']['spi'], 0) + ksft_eq(len(assoc['rx-key']['key']), 16) + + return assoc + + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + assoc = rx_assoc_check(s) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s.fileno()}) + ksft_eq(len(tx), 0) + + # Use the same Tx assoc second time + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s2: + rx_assoc_check(s2) + tx = cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": assoc['rx-key'], + "sock-fd": s2.fileno()}) + ksft_eq(len(tx), 0) + + s.close() + + +def _data_basic_send(cfg, version, ipver): + """ Test basic data send """ + _init_psp_dev(cfg) + + # Version 0 is required by spec, don't let it skip + if version: + name = cfg.pspnl.consts["version"].entries_by_val[version].name + if name not in cfg.psp_info['psp-versions-cap']: + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + with ksft_raises(NlError) as cm: + cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) + raise KsftSkipEx("PSP version not supported", name) + + s = _make_psp_conn(cfg, version, ipver) + + rx_assoc = cfg.pspnl.rx_assoc({"version": version, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _close_psp_conn(cfg, s) + + +def __bad_xfer_do(cfg, s, tx, version='hdr0-aes-gcm-128'): + # Make sure we accept the ACK for the SPI before we seal with the bad assoc + _check_data_outq(s, 0) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": version, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 20) + _check_data_outq(s, data_len, force_wait=True) + _check_data_rx(cfg, 0) + _close_psp_conn(cfg, s) + + +def data_send_bad_key(cfg): + """ Test send data with bad key """ + _init_psp_dev(cfg) + + s = _make_psp_conn(cfg) + + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + tx['key'] = (tx['key'][0] ^ 0xff).to_bytes(1, 'little') + tx['key'][1:] + __bad_xfer_do(cfg, s, tx) + + +def data_send_disconnect(cfg): + """ Test socket close after sending data """ + _init_psp_dev(cfg) + + with _make_psp_conn(cfg) as s: + assoc = cfg.pspnl.rx_assoc({"version": 0, + "sock-fd": s.fileno()}) + tx = _spi_xchg(s, assoc['rx-key']) + cfg.pspnl.tx_assoc({"version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + + s.shutdown(socket.SHUT_RDWR) + s.close() + + +def _data_mss_adjust(cfg, ipver): + _init_psp_dev(cfg) + + # First figure out what the MSS would be without any adjustments + s = _make_clr_conn(cfg, ipver) + s.send(b"0123456789abcdef" * 1024) + _check_data_rx(cfg, 16 * 1024) + mss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + _close_conn(cfg, s) + + s = _make_psp_conn(cfg, 0, ipver) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + rxmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, rxmss) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + txmss = s.getsockopt(socket.IPPROTO_TCP, socket.TCP_MAXSEG) + ksft_eq(mss, txmss + 40) + finally: + _close_psp_conn(cfg, s) + + +def data_stale_key(cfg): + """ Test send on a double-rotated key """ + _init_psp_dev(cfg) + + s = _make_psp_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + rx = rx_assoc['rx-key'] + tx = _spi_xchg(s, rx) + + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": tx, + "sock-fd": s.fileno()}) + + data_len = _send_careful(cfg, s, 100) + _check_data_rx(cfg, data_len) + _check_data_outq(s, 0) + + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + cfg.pspnl.key_rotate({"id": cfg.psp_dev_id}) + + s.send(b'0123456789' * 200) + _check_data_outq(s, 2000, force_wait=True) + finally: + _close_psp_conn(cfg, s) + + +def __nsim_psp_rereg(cfg): + # The PSP dev ID will change, remember what was there before + before = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + cfg._ns.nsims[0].dfs_write('psp_rereg', '1') + + after = set([x['id'] for x in cfg.pspnl.dev_get({}, dump=True)]) + + new_devs = list(after - before) + ksft_eq(len(new_devs), 1) + cfg.psp_dev_id = list(after - before)[0] + + +def removal_device_rx(cfg): + """ Test removing a netdev / PSD with active Rx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + ksft_not_none(rx_assoc) + + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def removal_device_bi(cfg): + """ Test removing a netdev / PSD with active Rx/Tx assoc """ + + # We could technically devlink reload real devices, too + # but that kills the control socket. So test this on + # netdevsim only for now + cfg.require_nsim() + + s = _make_clr_conn(cfg) + try: + rx_assoc = cfg.pspnl.rx_assoc({"version": 0, + "dev-id": cfg.psp_dev_id, + "sock-fd": s.fileno()}) + cfg.pspnl.tx_assoc({"dev-id": cfg.psp_dev_id, + "version": 0, + "tx-key": rx_assoc['rx-key'], + "sock-fd": s.fileno()}) + __nsim_psp_rereg(cfg) + finally: + _close_conn(cfg, s) + + +def psp_ip_ver_test_builder(name, test_func, psp_ver, ipver): + """Build test cases for each combo of PSP version and IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_case.__name__ = f"{name}_v{psp_ver}_ip{ipver}" + test_func(cfg, psp_ver, ipver) + return test_case + + +def ipver_test_builder(name, test_func, ipver): + """Build test cases for each IP version""" + def test_case(cfg): + cfg.require_ipver(ipver) + test_case.__name__ = f"{name}_ip{ipver}" + test_func(cfg, ipver) + return test_case + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEpEnv(__file__) as cfg: + cfg.pspnl = PSPFamily() + + # Set up responder and communication sock + responder = cfg.remote.deploy("psp_responder") + + cfg.comm_port = rand_port() + srv = None + try: + with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote, + exit_wait=True) as srv: + wait_port_listen(cfg.comm_port, host=cfg.remote) + + cfg.comm_sock = socket.create_connection((cfg.remote_addr, + cfg.comm_port), + timeout=1) + + cases = [ + psp_ip_ver_test_builder( + "data_basic_send", _data_basic_send, version, ipver + ) + for version in range(0, 4) + for ipver in ("4", "6") + ] + cases += [ + ipver_test_builder("data_mss_adjust", _data_mss_adjust, ipver) + for ipver in ("4", "6") + ] + + ksft_run(cases=cases, globs=globals(), + case_pfx={"dev_", "data_", "assoc_", "removal_"}, + args=(cfg, )) + + cfg.comm_sock.send(b"exit\0") + cfg.comm_sock.close() + finally: + if srv and (srv.stdout or srv.stderr): + ksft_pr("") + ksft_pr(f"Responder logs ({srv.ret}):") + if srv and srv.stdout: + ksft_pr("STDOUT:\n# " + srv.stdout.strip().replace("\n", "\n# ")) + if srv and srv.stderr: + ksft_pr("STDERR:\n# " + srv.stderr.strip().replace("\n", "\n# ")) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c new file mode 100644 index 000000000000..f309e0d73cbf --- /dev/null +++ b/tools/testing/selftests/drivers/net/psp_responder.c @@ -0,0 +1,483 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> +#include <sys/poll.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <netinet/in.h> +#include <unistd.h> + +#include <ynl.h> + +#include "psp-user.h" + +#define dbg(msg...) \ +do { \ + if (opts->verbose) \ + fprintf(stderr, "DEBUG: " msg); \ +} while (0) + +static bool should_quit; + +struct opts { + int port; + int devid; + bool verbose; +}; + +enum accept_cfg { + ACCEPT_CFG_NONE = 0, + ACCEPT_CFG_CLEAR, + ACCEPT_CFG_PSP, +}; + +static struct { + unsigned char tx; + unsigned char rx; +} psp_vers; + +static int conn_setup_psp(struct ynl_sock *ys, struct opts *opts, int data_sock) +{ + struct psp_rx_assoc_rsp *rsp; + struct psp_rx_assoc_req *req; + struct psp_tx_assoc_rsp *tsp; + struct psp_tx_assoc_req *teq; + char info[300]; + int key_len; + ssize_t sz; + __u32 spi; + + dbg("create PSP connection\n"); + + // Rx assoc alloc + req = psp_rx_assoc_req_alloc(); + + psp_rx_assoc_req_set_sock_fd(req, data_sock); + psp_rx_assoc_req_set_version(req, psp_vers.rx); + + rsp = psp_rx_assoc(ys, req); + psp_rx_assoc_req_free(req); + + if (!rsp) { + perror("ERROR: failed to Rx assoc"); + return -1; + } + + // SPI exchange + key_len = rsp->rx_key._len.key; + memcpy(info, &rsp->rx_key.spi, sizeof(spi)); + memcpy(&info[sizeof(spi)], rsp->rx_key.key, key_len); + sz = sizeof(spi) + key_len; + + send(data_sock, info, sz, MSG_WAITALL); + psp_rx_assoc_rsp_free(rsp); + + sz = recv(data_sock, info, sz, MSG_WAITALL); + if (sz < 0) { + perror("ERROR: failed to read PSP key from sock"); + return -1; + } + memcpy(&spi, info, sizeof(spi)); + + // Setup Tx assoc + teq = psp_tx_assoc_req_alloc(); + + psp_tx_assoc_req_set_sock_fd(teq, data_sock); + psp_tx_assoc_req_set_version(teq, psp_vers.tx); + psp_tx_assoc_req_set_tx_key_spi(teq, spi); + psp_tx_assoc_req_set_tx_key_key(teq, &info[sizeof(spi)], key_len); + + tsp = psp_tx_assoc(ys, teq); + psp_tx_assoc_req_free(teq); + if (!tsp) { + perror("ERROR: failed to Tx assoc"); + return -1; + } + psp_tx_assoc_rsp_free(tsp); + + return 0; +} + +static void send_ack(int sock) +{ + send(sock, "ack", 4, MSG_WAITALL); +} + +static void send_err(int sock) +{ + send(sock, "err", 4, MSG_WAITALL); +} + +static void send_str(int sock, int value) +{ + char buf[128]; + int ret; + + ret = snprintf(buf, sizeof(buf), "%d", value); + send(sock, buf, ret + 1, MSG_WAITALL); +} + +static void +run_session(struct ynl_sock *ys, struct opts *opts, + int server_sock, int comm_sock) +{ + enum accept_cfg accept_cfg = ACCEPT_CFG_NONE; + struct pollfd pfds[3]; + size_t data_read = 0; + int data_sock = -1; + + while (true) { + bool race_close = false; + int nfds; + + memset(pfds, 0, sizeof(pfds)); + + pfds[0].fd = server_sock; + pfds[0].events = POLLIN; + + pfds[1].fd = comm_sock; + pfds[1].events = POLLIN; + + nfds = 2; + if (data_sock >= 0) { + pfds[2].fd = data_sock; + pfds[2].events = POLLIN; + nfds++; + } + + dbg(" ...\n"); + if (poll(pfds, nfds, -1) < 0) { + perror("poll"); + break; + } + + /* data sock */ + if (pfds[2].revents & POLLIN) { + char buf[8192]; + ssize_t n; + + n = recv(data_sock, buf, sizeof(buf), 0); + if (n <= 0) { + if (n < 0) + perror("data read"); + close(data_sock); + data_sock = -1; + dbg("data sock closed\n"); + } else { + data_read += n; + dbg("data read %zd\n", data_read); + } + } + + /* comm sock */ + if (pfds[1].revents & POLLIN) { + static char buf[4096]; + static ssize_t off; + bool consumed; + ssize_t n; + + n = recv(comm_sock, &buf[off], sizeof(buf) - off, 0); + if (n <= 0) { + if (n < 0) + perror("comm read"); + return; + } + + off += n; + n = off; + +#define __consume(sz) \ + ({ \ + if (n == (sz)) { \ + off = 0; \ + } else { \ + off -= (sz); \ + memmove(buf, &buf[(sz)], off); \ + } \ + }) + +#define cmd(_name) \ + ({ \ + ssize_t sz = sizeof(_name); \ + bool match = n >= sz && !memcmp(buf, _name, sz); \ + \ + if (match) { \ + dbg("command: " _name "\n"); \ + __consume(sz); \ + } \ + consumed |= match; \ + match; \ + }) + + do { + consumed = false; + + if (cmd("read len")) + send_str(comm_sock, data_read); + + if (cmd("data echo")) { + if (data_sock >= 0) + send(data_sock, "echo", 5, + MSG_WAITALL); + else + fprintf(stderr, "WARN: echo but no data sock\n"); + send_ack(comm_sock); + } + if (cmd("data close")) { + if (data_sock >= 0) { + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + race_close = true; + } + } + if (cmd("conn psp")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_PSP; + send_ack(comm_sock); + /* next two bytes are versions */ + if (off >= 2) { + memcpy(&psp_vers, buf, 2); + __consume(2); + } else { + fprintf(stderr, "WARN: short conn psp command!\n"); + } + } + if (cmd("conn clr")) { + if (accept_cfg != ACCEPT_CFG_NONE) + fprintf(stderr, "WARN: old conn config still set!\n"); + accept_cfg = ACCEPT_CFG_CLEAR; + send_ack(comm_sock); + } + if (cmd("exit")) + should_quit = true; +#undef cmd + + if (!consumed) { + fprintf(stderr, "WARN: unknown cmd: [%zd] %s\n", + off, buf); + } + } while (consumed && off); + } + + /* server sock */ + if (pfds[0].revents & POLLIN) { + if (data_sock >= 0) { + fprintf(stderr, "WARN: new data sock but old one still here\n"); + close(data_sock); + data_sock = -1; + } + data_sock = accept(server_sock, NULL, NULL); + if (data_sock < 0) { + perror("accept"); + continue; + } + data_read = 0; + + if (accept_cfg == ACCEPT_CFG_CLEAR) { + dbg("new data sock: clear\n"); + /* nothing to do */ + } else if (accept_cfg == ACCEPT_CFG_PSP) { + dbg("new data sock: psp\n"); + conn_setup_psp(ys, opts, data_sock); + } else { + fprintf(stderr, "WARN: new data sock but no config\n"); + } + accept_cfg = ACCEPT_CFG_NONE; + } + + if (race_close) { + if (data_sock >= 0) { + /* indeed, ordering problem, handle the close */ + close(data_sock); + data_sock = -1; + send_ack(comm_sock); + } else { + fprintf(stderr, "WARN: close but no data sock\n"); + send_err(comm_sock); + } + } + } + dbg("session ending\n"); +} + +static int spawn_server(struct opts *opts) +{ + struct sockaddr_in6 addr; + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd < 0) { + perror("can't open socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = htons(opts->port); + + if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) { + perror("can't bind socket"); + return -1; + } + + if (listen(fd, 5)) { + perror("can't listen"); + return -1; + } + + return fd; +} + +static int run_responder(struct ynl_sock *ys, struct opts *opts) +{ + int server_sock, comm; + + server_sock = spawn_server(opts); + if (server_sock < 0) + return 4; + + while (!should_quit) { + comm = accept(server_sock, NULL, NULL); + if (comm < 0) { + perror("accept failed"); + } else { + run_session(ys, opts, server_sock, comm); + close(comm); + } + } + + return 0; +} + +static void usage(const char *name, const char *miss) +{ + if (miss) + fprintf(stderr, "Missing argument: %s\n", miss); + + fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name); + exit(EXIT_FAILURE); +} + +static void parse_cmd_opts(int argc, char **argv, struct opts *opts) +{ + int opt; + + while ((opt = getopt(argc, argv, "vp:d:")) != -1) { + switch (opt) { + case 'v': + opts->verbose = 1; + break; + case 'p': + opts->port = atoi(optarg); + break; + case 'd': + opts->devid = atoi(optarg); + break; + default: + usage(argv[0], NULL); + } + } +} + +static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions) +{ + struct psp_dev_set_req *sreq; + struct psp_dev_set_rsp *srsp; + + fprintf(stderr, "Set PSP enable on device %d to 0x%x\n", + dev_id, versions); + + sreq = psp_dev_set_req_alloc(); + + psp_dev_set_req_set_id(sreq, dev_id); + psp_dev_set_req_set_psp_versions_ena(sreq, versions); + + srsp = psp_dev_set(ys, sreq); + psp_dev_set_req_free(sreq); + if (!srsp) + return 10; + + psp_dev_set_rsp_free(srsp); + return 0; +} + +int main(int argc, char **argv) +{ + struct psp_dev_get_list *dev_list; + bool devid_found = false; + __u32 ver_ena, ver_cap; + struct opts opts = {}; + struct ynl_error yerr; + struct ynl_sock *ys; + int first_id = 0; + int ret; + + parse_cmd_opts(argc, argv, &opts); + if (!opts.port) + usage(argv[0], "port"); // exits + + ys = ynl_sock_create(&ynl_psp_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + dev_list = psp_dev_get_dump(ys); + if (ynl_dump_empty(dev_list)) { + if (ys->err.code) + goto err_close; + fprintf(stderr, "No PSP devices\n"); + goto err_close_silent; + } + + ynl_dump_foreach(dev_list, d) { + if (opts.devid) { + devid_found = true; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else if (!first_id) { + first_id = d->id; + ver_ena = d->psp_versions_ena; + ver_cap = d->psp_versions_cap; + } else { + fprintf(stderr, "Multiple PSP devices found\n"); + goto err_close_silent; + } + } + psp_dev_get_list_free(dev_list); + + if (opts.devid && !devid_found) { + fprintf(stderr, "PSP device %d requested on cmdline, not found\n", + opts.devid); + goto err_close_silent; + } else if (!opts.devid) { + opts.devid = first_id; + } + + if (ver_ena != ver_cap) { + ret = psp_dev_set_ena(ys, opts.devid, ver_cap); + if (ret) + goto err_close; + } + + ret = run_responder(ys, &opts); + + if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena)) + fprintf(stderr, "WARN: failed to set the PSP versions back\n"); + + ynl_sock_destroy(ys); + + return ret; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_close_silent: + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index c2bb5d3f1ca1..04d0a2a13e73 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -57,6 +57,36 @@ def check_fec(cfg) -> None: ksft_true(data['stats'], "driver does not report stats") +def check_fec_hist(cfg) -> None: + """ + Check that drivers which support FEC histogram statistics report + reasonable values. + """ + + try: + data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("FEC not supported by the device") from e + raise + if 'stats' not in data: + raise KsftSkipEx("FEC stats not supported by the device") + if 'hist' not in data['stats']: + raise KsftSkipEx("FEC histogram not supported by the device") + + hist = data['stats']['hist'] + for fec_bin in hist: + for key in ['bin-low', 'bin-high', 'bin-val']: + ksft_in(key, fec_bin, + "Drivers should always report FEC bin range and value") + ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'], + "FEC bin range should be valid") + if 'bin-val-per-lane' in fec_bin: + ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'], + "FEC bin value should be equal to sum of per-plane values") + + def pkt_byte_sum(cfg) -> None: """ Check that qstat and interface stats match in value. @@ -279,8 +309,9 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, - check_down, procfs_hammer, procfs_downup_hammer], + ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum, + qstat_by_ifindex, check_down, procfs_hammer, + procfs_downup_hammer], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index eaf6938f100e..1340b3df9c31 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -1,11 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -TEST_PROGS := dev_addr_lists.sh propagation.sh +TEST_PROGS := \ + dev_addr_lists.sh \ + options.sh \ + propagation.sh \ +# end of TEST_PROGS TEST_INCLUDES := \ ../bonding/lag_lib.sh \ ../../../net/forwarding/lib.sh \ - ../../../net/lib.sh + ../../../net/in_netns.sh \ + ../../../net/lib.sh \ + ../../../net/lib/sh/defer.sh \ +# end of TEST_INCLUDES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 636b3525b679..558e1d0cf565 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -3,4 +3,5 @@ CONFIG_IPV6=y CONFIG_MACVLAN=y CONFIG_NETDEVSIM=m CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_ACTIVEBACKUP=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y diff --git a/tools/testing/selftests/drivers/net/team/options.sh b/tools/testing/selftests/drivers/net/team/options.sh new file mode 100755 index 000000000000..44888f32b513 --- /dev/null +++ b/tools/testing/selftests/drivers/net/team/options.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# These tests verify basic set and get functionality of the team +# driver options over netlink. + +# Run in private netns. +test_dir="$(dirname "$0")" +if [[ $# -eq 0 ]]; then + "${test_dir}"/../../../net/in_netns.sh "$0" __subprocess + exit $? +fi + +ALL_TESTS=" + team_test_options +" + +source "${test_dir}/../../../net/lib.sh" + +TEAM_PORT="team0" +MEMBER_PORT="dummy0" + +setup() +{ + ip link add name "${MEMBER_PORT}" type dummy + ip link add name "${TEAM_PORT}" type team +} + +get_and_check_value() +{ + local option_name="$1" + local expected_value="$2" + local port_flag="$3" + + local value_from_get + + if ! value_from_get=$(teamnl "${TEAM_PORT}" getoption "${option_name}" \ + "${port_flag}"); then + echo "Could not get option '${option_name}'" >&2 + return 1 + fi + + if [[ "${value_from_get}" != "${expected_value}" ]]; then + echo "Incorrect value for option '${option_name}'" >&2 + echo "get (${value_from_get}) != set (${expected_value})" >&2 + return 1 + fi +} + +set_and_check_get() +{ + local option_name="$1" + local option_value="$2" + local port_flag="$3" + + local value_from_get + + if ! teamnl "${TEAM_PORT}" setoption "${option_name}" \ + "${option_value}" "${port_flag}"; then + echo "'setoption ${option_name} ${option_value}' failed" >&2 + return 1 + fi + + get_and_check_value "${option_name}" "${option_value}" "${port_flag}" + return $? +} + +# Get a "port flag" to pass to the `teamnl` command. +# E.g. $1="dummy0" -> "port=dummy0", +# $1="" -> "" +get_port_flag() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + echo "--port=${port_name}" + fi +} + +attach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" master "${TEAM_PORT}" + return $? + fi +} + +detach_port_if_specified() +{ + local port_name="$1" + + if [[ -n "${port_name}" ]]; then + ip link set dev "${port_name}" nomaster + return $? + fi +} + +# Test that an option's get value matches its set value. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# value_1 - The first value to try setting. +# value_2 - The second value to try setting. +# port_name - The (optional) name of the attached port. +team_test_option() +{ + local option_name="$1" + local value_1="$2" + local value_2="$3" + local possible_values="$2 $3 $2" + local port_name="$4" + local port_flag + + RET=0 + + echo "Setting '${option_name}' to '${value_1}' and '${value_2}'" + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Set and get both possible values. + for value in ${possible_values}; do + set_and_check_get "${option_name}" "${value}" "${port_flag}" + check_err $? "Failed to set '${option_name}' to '${value}'" + done + + detach_port_if_specified "${port_name}" + check_err $? "Couldn't detach ${port_name} from its master" + + log_test "Set + Get '${option_name}' test" +} + +# Test that getting a non-existant option fails. +# Globals: +# RET - Used by testing infra like `check_err`. +# EXIT_STATUS - Used by `log_test` for whole script exit value. +# Arguments: +# option_name - The name of the option. +# port_name - The (optional) name of the attached port. +team_test_get_option_fails() +{ + local option_name="$1" + local port_name="$2" + local port_flag + + RET=0 + + attach_port_if_specified "${port_name}" + check_err $? "Couldn't attach ${port_name} to master" + port_flag=$(get_port_flag "${port_name}") + + # Just confirm that getting the value fails. + teamnl "${TEAM_PORT}" getoption "${option_name}" "${port_flag}" + check_fail $? "Shouldn't be able to get option '${option_name}'" + + detach_port_if_specified "${port_name}" + + log_test "Get '${option_name}' fails" +} + +team_test_options() +{ + # Wrong option name behavior. + team_test_get_option_fails fake_option1 + team_test_get_option_fails fake_option2 "${MEMBER_PORT}" + + # Correct set and get behavior. + team_test_option mode activebackup loadbalance + team_test_option notify_peers_count 0 5 + team_test_option notify_peers_interval 0 5 + team_test_option mcast_rejoin_count 0 5 + team_test_option mcast_rejoin_interval 0 5 + team_test_option enabled true false "${MEMBER_PORT}" + team_test_option user_linkup true false "${MEMBER_PORT}" + team_test_option user_linkup_enabled true false "${MEMBER_PORT}" + team_test_option priority 10 20 "${MEMBER_PORT}" + team_test_option queue_id 0 1 "${MEMBER_PORT}" +} + +require_command teamnl +setup +tests_run +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile index 7ec7cd3ab2cc..868ece3fea1f 100644 --- a/tools/testing/selftests/drivers/net/virtio_net/Makefile +++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile @@ -1,15 +1,12 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = basic_features.sh \ - # +TEST_PROGS = basic_features.sh -TEST_FILES = \ - virtio_net_common.sh \ - # +TEST_FILES = virtio_net_common.sh TEST_INCLUDES = \ - ../../../net/forwarding/lib.sh \ - ../../../net/lib.sh \ - # + ../../../net/forwarding/lib.sh \ + ../../../net/lib.sh \ +# end of TEST_INCLUDES include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py index 1dd8bf3bf6c9..a148004e1c36 100755 --- a/tools/testing/selftests/drivers/net/xdp.py +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -11,8 +11,9 @@ import string from dataclasses import dataclass from enum import Enum -from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr -from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_ne, ksft_pr +from lib.py import KsftFailEx, NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import bkg, cmd, rand_port, wait_port_listen from lib.py import ip, bpftool, defer @@ -112,10 +113,10 @@ def _load_xdp_prog(cfg, bpf_info): defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) cmd( - f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}", + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdpdrv obj {abs_path} sec {bpf_info.xdp_sec}", shell=True ) - defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpdrv off") xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] prog_info["id"] = xdp_info["xdp"]["prog"]["id"] @@ -290,34 +291,78 @@ def test_xdp_native_drop_mb(cfg): _test_drop(cfg, bpf_info, 8000) -def test_xdp_native_tx_mb(cfg): +def _test_xdp_native_tx(cfg, bpf_info, payload_lens): """ - Tests the XDP_TX action for a multi-buff case. + Tests the XDP_TX action. Args: cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing the BPF program metadata. + payload_lens: Array of packet lengths to send. """ cfg.require_cmd("socat", remote=True) - - bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) prog_info = _load_xdp_prog(cfg, bpf_info) port = rand_port() _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) - test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000)) - rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" - tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + expected_pkts = 0 + for payload_len in payload_lens: + test_string = "".join( + random.choice(string.ascii_lowercase) for _ in range(payload_len) + ) + + rx_udp = f"socat -{cfg.addr_ipver} -T 2 " + \ + f"-u UDP-RECV:{port},reuseport STDOUT" + + # Writing zero bytes to stdin gets ignored by socat, + # but with the shut-null flag socat generates a zero sized packet + # when the socket is closed. + tx_cmd_suffix = ",shut-null" if payload_len == 0 else "" + tx_udp = f"echo -n {test_string} | socat -t 2 " + \ + f"-u STDIN UDP:{cfg.baddr}:{port}{tx_cmd_suffix}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) + cmd(tx_udp, host=cfg.remote, shell=True) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + + expected_pkts += 1 + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "RX stats mismatch") + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "TX stats mismatch") + + +def test_xdp_native_tx_sb(cfg): + """ + Tests the XDP_TX action for a single-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + # Ensure there's enough room for an ETH / IP / UDP header + pkt_hdr_len = 42 if cfg.addr_ipver == "4" else 62 + + _test_xdp_native_tx(cfg, bpf_info, [0, 1500 // 2, 1500 - pkt_hdr_len]) - with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: - wait_port_listen(port, proto="udp", host=cfg.remote) - cmd(tx_udp, host=cfg.remote, shell=True) - stats = _get_stats(prog_info['maps']['map_xdp_stats']) +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. - ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") - ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", + "xdp.frags", 9000) + # The first packet ensures we exercise the fragmented code path. + # And the subsequent 0-sized packet ensures the driver + # reinitializes xdp_buff correctly. + _test_xdp_native_tx(cfg, bpf_info, [8000, 0]) def _validate_res(res, offset_lst, pkt_sz_lst): @@ -497,11 +542,11 @@ def get_hds_thresh(cfg): The HDS threshold value. If the threshold is not supported or an error occurs, a default value of 1500 is returned. """ - netnl = cfg.netnl + ethnl = cfg.ethnl hds_thresh = 1500 try: - rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}}) if 'hds-thresh' not in rings: ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}') return hds_thresh @@ -518,7 +563,7 @@ def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): Args: cfg: Configuration object containing network settings. - netnl: Network namespace or link object (not used in this function). + ethnl: Network namespace or link object (not used in this function). This function sets up the packet size and offset lists, then performs the head adjustment test by sending and receiving UDP packets. @@ -627,6 +672,88 @@ def test_xdp_native_adjst_head_shrnk_data(cfg): _validate_res(res, offset_lst, pkt_sz_lst) +def _test_xdp_native_ifc_stats(cfg, act): + cfg.require_cmd("socat") + + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, act.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + # Discard the input, but we need a listener to avoid ICMP errors + rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport " + \ + "/dev/null" + # Listener runs on "remote" in case of XDP_TX + rx_host = cfg.remote if act == XDPAction.TX else None + # We want to spew 2000 packets quickly, bash seems to do a good enough job + tx_udp = f"exec 5<>/dev/udp/{cfg.addr}/{port}; " \ + "for i in `seq 2000`; do echo a >&5; done; exec 5>&-" + + cfg.wait_hw_stats_settle() + # Qstats have more clearly defined semantics than rtnetlink. + # XDP is the "first layer of the stack" so XDP packets should be counted + # as received and sent as if the decision was made in the routing layer. + before = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + with bkg(rx_udp, host=rx_host, exit_wait=True): + wait_port_listen(port, proto="udp", host=rx_host) + cmd(tx_udp, host=cfg.remote, shell=True) + + cfg.wait_hw_stats_settle() + after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + ksft_ge(after['rx-packets'] - before['rx-packets'], 2000) + if act == XDPAction.TX: + ksft_ge(after['tx-packets'] - before['tx-packets'], 2000) + + expected_pkts = 2000 + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + ksft_eq(stats[XDPStats.RX.value], expected_pkts, "XDP RX stats mismatch") + if act == XDPAction.TX: + ksft_eq(stats[XDPStats.TX.value], expected_pkts, "XDP TX stats mismatch") + + # Flip the ring count back and forth to make sure the stats from XDP rings + # don't get lost. + chans = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if chans.get('combined-count', 0) > 1: + cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex}, + 'combined-count': 1}) + cfg.ethnl.channels_set({'header': {'dev-index': cfg.ifindex}, + 'combined-count': chans['combined-count']}) + before = after + after = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + ksft_ge(after['rx-packets'], before['rx-packets']) + if act == XDPAction.TX: + ksft_ge(after['tx-packets'], before['tx-packets']) + + +def test_xdp_native_qstats_pass(cfg): + """ + Send 2000 messages, expect XDP_PASS, make sure the packets were counted + to interface level qstats (Rx). + """ + _test_xdp_native_ifc_stats(cfg, XDPAction.PASS) + + +def test_xdp_native_qstats_drop(cfg): + """ + Send 2000 messages, expect XDP_DROP, make sure the packets were counted + to interface level qstats (Rx). + """ + _test_xdp_native_ifc_stats(cfg, XDPAction.DROP) + + +def test_xdp_native_qstats_tx(cfg): + """ + Send 2000 messages, expect XDP_TX, make sure the packets were counted + to interface level qstats (Rx and Tx) + """ + _test_xdp_native_ifc_stats(cfg, XDPAction.TX) + + def main(): """ Main function to execute the XDP tests. @@ -637,18 +764,23 @@ def main(): function to execute the tests. """ with NetDrvEpEnv(__file__) as cfg: - cfg.netnl = EthtoolFamily() + cfg.ethnl = EthtoolFamily() + cfg.netnl = NetdevFamily() ksft_run( [ test_xdp_native_pass_sb, test_xdp_native_pass_mb, test_xdp_native_drop_sb, test_xdp_native_drop_mb, + test_xdp_native_tx_sb, test_xdp_native_tx_mb, test_xdp_native_adjst_tail_grow_data, test_xdp_native_adjst_tail_shrnk_data, test_xdp_native_adjst_head_grow_data, test_xdp_native_adjst_head_shrnk_data, + test_xdp_native_qstats_pass, + test_xdp_native_qstats_drop, + test_xdp_native_qstats_tx, ], args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index fcbdb1297e24..64ac0dfa46b7 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only dnotify_test devpts_pts +fclog file_stressor anon_inode_test kernfs_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 73d4650af1a5..85427d7f19b9 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test fclog TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 81db85a5cc16..39a68078a79b 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -65,6 +65,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) "oneway_spam_detection", "extended_error", "freeze_notification", + "transaction_report", }; change_mountns(_metadata); diff --git a/tools/testing/selftests/filesystems/fclog.c b/tools/testing/selftests/filesystems/fclog.c new file mode 100644 index 000000000000..912a8b755c3b --- /dev/null +++ b/tools/testing/selftests/filesystems/fclog.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2025 SUSE LLC. + */ + +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mount.h> + +#include "../kselftest_harness.h" + +#define ASSERT_ERRNO(expected, _t, seen) \ + __EXPECT(expected, #expected, \ + ({__typeof__(seen) _tmp_seen = (seen); \ + _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1) + +#define ASSERT_ERRNO_EQ(expected, seen) \ + ASSERT_ERRNO(expected, ==, seen) + +#define ASSERT_SUCCESS(seen) \ + ASSERT_ERRNO(0, <=, seen) + +FIXTURE(ns) +{ + int host_mntns; +}; + +FIXTURE_SETUP(ns) +{ + /* Stash the old mntns. */ + self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_mntns); + + /* Create a new mount namespace and make it private. */ + ASSERT_SUCCESS(unshare(CLONE_NEWNS)); + ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); +} + +FIXTURE_TEARDOWN(ns) +{ + ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS)); + ASSERT_SUCCESS(close(self->host_mntns)); +} + +TEST_F(ns, fscontext_log_enodata) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + /* A brand new fscontext has no log entries. */ + char buf[128] = {}; + for (int i = 0; i < 16; i++) + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_errorfc) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + char buf[128] = {}; + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_errorfc_after_fsmount) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + int mfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NOSUID); + ASSERT_SUCCESS(mfd); + ASSERT_SUCCESS(move_mount(mfd, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH)); + + /* + * The fscontext log should still contain data even after + * FSCONFIG_CMD_CREATE and fsmount(). + */ + char buf[128] = {}; + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_emsgsize) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + char buf[128] = {}; + /* + * Attempting to read a message with too small a buffer should not + * result in the message getting consumed. + */ + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 0)); + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 1)); + for (int i = 0; i < 16; i++) + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 16)); + + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore b/tools/testing/selftests/filesystems/fuse/.gitignore new file mode 100644 index 000000000000..3e72e742d08e --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +fuse_mnt +fusectl_test diff --git a/tools/testing/selftests/filesystems/fuse/Makefile b/tools/testing/selftests/filesystems/fuse/Makefile new file mode 100644 index 000000000000..612aad69a93a --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) + +TEST_GEN_PROGS := fusectl_test +TEST_GEN_FILES := fuse_mnt + +include ../../lib.mk + +VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse +endif + +VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS := -lfuse -pthread +endif + +$(OUTPUT)/fuse_mnt: CFLAGS += $(VAR_CFLAGS) +$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS) diff --git a/tools/testing/selftests/filesystems/fuse/fuse_mnt.c b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c new file mode 100644 index 000000000000..d12b17f30fad --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fuse_mnt.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fusectl test file-system + * Creates a simple FUSE filesystem with a single read-write file (/test) + */ + +#define FUSE_USE_VERSION 26 + +#include <fuse.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static char *content; +static size_t content_size = 0; +static const char test_path[] = "/test"; + +static int test_getattr(const char *path, struct stat *st) +{ + memset(st, 0, sizeof(*st)); + + if (!strcmp(path, "/")) { + st->st_mode = S_IFDIR | 0755; + st->st_nlink = 2; + return 0; + } + + if (!strcmp(path, test_path)) { + st->st_mode = S_IFREG | 0664; + st->st_nlink = 1; + st->st_size = content_size; + return 0; + } + + return -ENOENT; +} + +static int test_readdir(const char *path, void *buf, fuse_fill_dir_t filler, + off_t offset, struct fuse_file_info *fi) +{ + if (strcmp(path, "/")) + return -ENOENT; + + filler(buf, ".", NULL, 0); + filler(buf, "..", NULL, 0); + filler(buf, test_path + 1, NULL, 0); + + return 0; +} + +static int test_open(const char *path, struct fuse_file_info *fi) +{ + if (strcmp(path, test_path)) + return -ENOENT; + + return 0; +} + +static int test_read(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if (!content || content_size == 0) + return 0; + + if (offset >= content_size) + return 0; + + if (offset + size > content_size) + size = content_size - offset; + + memcpy(buf, content + offset, size); + + return size; +} + +static int test_write(const char *path, const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + size_t new_size; + + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if(offset > content_size) + return -EINVAL; + + new_size = MAX(offset + size, content_size); + + if (new_size > content_size) + content = realloc(content, new_size); + + content_size = new_size; + + if (!content) + return -ENOMEM; + + memcpy(content + offset, buf, size); + + return size; +} + +static int test_truncate(const char *path, off_t size) +{ + if (strcmp(path, test_path) != 0) + return -ENOENT; + + if (size == 0) { + free(content); + content = NULL; + content_size = 0; + return 0; + } + + content = realloc(content, size); + + if (!content) + return -ENOMEM; + + if (size > content_size) + memset(content + content_size, 0, size - content_size); + + content_size = size; + return 0; +} + +static struct fuse_operations memfd_ops = { + .getattr = test_getattr, + .readdir = test_readdir, + .open = test_open, + .read = test_read, + .write = test_write, + .truncate = test_truncate, +}; + +int main(int argc, char *argv[]) +{ + return fuse_main(argc, argv, &memfd_ops, NULL); +} diff --git a/tools/testing/selftests/filesystems/fuse/fusectl_test.c b/tools/testing/selftests/filesystems/fuse/fusectl_test.c new file mode 100644 index 000000000000..8d124d1cacb2 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fusectl_test.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Chen Linxuan <chenlinxuan@uniontech.com> + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <dirent.h> +#include <sched.h> +#include <linux/limits.h> + +#include "../../kselftest_harness.h" + +#define FUSECTL_MOUNTPOINT "/sys/fs/fuse/connections" +#define FUSE_MOUNTPOINT "/tmp/fuse_mnt_XXXXXX" +#define FUSE_DEVICE "/dev/fuse" +#define FUSECTL_TEST_VALUE "1" + +static void write_file(struct __test_metadata *const _metadata, + const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + + ASSERT_GE(fd, 0); + ASSERT_EQ(write(fd, val, len), len); + ASSERT_EQ(close(fd), 0); +} + +FIXTURE(fusectl){ + char fuse_mountpoint[sizeof(FUSE_MOUNTPOINT)]; + int connection; +}; + +FIXTURE_SETUP(fusectl) +{ + const char *fuse_mnt_prog = "./fuse_mnt"; + int status, pid; + struct stat statbuf; + uid_t uid = getuid(); + gid_t gid = getgid(); + char buf[32]; + + /* Setup userns */ + ASSERT_EQ(unshare(CLONE_NEWNS|CLONE_NEWUSER), 0); + sprintf(buf, "0 %d 1", uid); + write_file(_metadata, "/proc/self/uid_map", buf); + write_file(_metadata, "/proc/self/setgroups", "deny"); + sprintf(buf, "0 %d 1", gid); + write_file(_metadata, "/proc/self/gid_map", buf); + ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0); + + strcpy(self->fuse_mountpoint, FUSE_MOUNTPOINT); + + if (!mkdtemp(self->fuse_mountpoint)) + SKIP(return, + "Failed to create FUSE mountpoint %s", + strerror(errno)); + + if (access(FUSECTL_MOUNTPOINT, F_OK)) + SKIP(return, + "FUSE control filesystem not mounted"); + + pid = fork(); + if (pid < 0) + SKIP(return, + "Failed to fork FUSE daemon process: %s", + strerror(errno)); + + if (pid == 0) { + execlp(fuse_mnt_prog, fuse_mnt_prog, self->fuse_mountpoint, NULL); + exit(errno); + } + + waitpid(pid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + SKIP(return, + "Failed to start FUSE daemon %s", + strerror(WEXITSTATUS(status))); + } + + if (stat(self->fuse_mountpoint, &statbuf)) + SKIP(return, + "Failed to stat FUSE mountpoint %s", + strerror(errno)); + + self->connection = statbuf.st_dev; +} + +FIXTURE_TEARDOWN(fusectl) +{ + umount2(self->fuse_mountpoint, MNT_DETACH); + rmdir(self->fuse_mountpoint); +} + +TEST_F(fusectl, abort) +{ + char path_buf[PATH_MAX]; + int abort_fd, test_fd, ret; + + sprintf(path_buf, "/sys/fs/fuse/connections/%d/abort", self->connection); + + ASSERT_EQ(0, access(path_buf, F_OK)); + + abort_fd = open(path_buf, O_WRONLY); + ASSERT_GE(abort_fd, 0); + + sprintf(path_buf, "%s/test", self->fuse_mountpoint); + + test_fd = open(path_buf, O_RDWR); + ASSERT_GE(test_fd, 0); + + ret = read(test_fd, path_buf, sizeof(path_buf)); + ASSERT_EQ(ret, 0); + + ret = write(test_fd, "test", sizeof("test")); + ASSERT_EQ(ret, sizeof("test")); + + ret = lseek(test_fd, 0, SEEK_SET); + ASSERT_GE(ret, 0); + + ret = write(abort_fd, FUSECTL_TEST_VALUE, sizeof(FUSECTL_TEST_VALUE)); + ASSERT_GT(ret, 0); + + close(abort_fd); + + ret = read(test_fd, path_buf, sizeof(path_buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENOTCONN); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 8cfb87f7f7c5..490ace1f017e 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,12 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 +PKG_CONFIG ?= pkg-config +LIBNUMA_TEST = $(shell sh -c "$(PKG_CONFIG) numa --atleast-version 2.0.16 > /dev/null 2>&1 && echo SUFFICIENT || echo NO") + INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) -CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES) +CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 $(INCLUDES) $(KHDR_INCLUDES) -DLIBNUMA_VER_$(LIBNUMA_TEST)=1 LDLIBS := -lpthread -lrt -lnuma LOCAL_HDRS := \ ../include/futextest.h \ - ../include/atomic.h \ - ../include/logging.h + ../include/atomic.h TEST_GEN_PROGS := \ futex_wait_timeout \ futex_wait_wouldblock \ diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c index f29e4d627e79..e0a33510ccb6 100644 --- a/tools/testing/selftests/futex/functional/futex_numa.c +++ b/tools/testing/selftests/futex/functional/futex_numa.c @@ -5,9 +5,10 @@ #include <sys/mman.h> #include <fcntl.h> #include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> #include <time.h> #include <assert.h> -#include "logging.h" #include "futextest.h" #include "futex2test.h" diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index a9ecfb2d3932..d037a3f10ee8 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -16,9 +16,9 @@ #include <linux/futex.h> #include <sys/mman.h> -#include "logging.h" #include "futextest.h" #include "futex2test.h" +#include "../../kselftest_harness.h" #define MAX_THREADS 64 @@ -77,7 +77,7 @@ static void join_max_threads(void) } } -static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags) +static void __test_futex(void *futex_ptr, int err_value, unsigned int futex_flags) { int to_wake, ret, i, need_exit = 0; @@ -88,11 +88,17 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag do { ret = futex2_wake(futex_ptr, to_wake, futex_flags); - if (must_fail) { - if (ret < 0) - break; - ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", - to_wake, futex_flags); + + if (err_value) { + if (ret >= 0) + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", + to_wake, futex_flags); + + if (errno != err_value) + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) expected error was %d, but returned %d (%s)\n", + to_wake, futex_flags, err_value, errno, strerror(errno)); + + break; } if (ret < 0) { ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n", @@ -106,12 +112,12 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag join_max_threads(); for (i = 0; i < MAX_THREADS; i++) { - if (must_fail && thread_args[i].result != -1) { + if (err_value && thread_args[i].result != -1) { ksft_print_msg("Thread %d should fail but succeeded (%d)\n", i, thread_args[i].result); need_exit = 1; } - if (!must_fail && thread_args[i].result != 0) { + if (!err_value && thread_args[i].result != 0) { ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result); need_exit = 1; } @@ -120,58 +126,30 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag ksft_exit_fail_msg("Aborting due to earlier errors.\n"); } -static void test_futex(void *futex_ptr, int must_fail) +static void test_futex(void *futex_ptr, int err_value) { - __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); + __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); } -static void test_futex_mpol(void *futex_ptr, int must_fail) +static void test_futex_mpol(void *futex_ptr, int err_value) { - __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); + __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); } -static void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - -int main(int argc, char *argv[]) +TEST(futex_numa_mpol) { struct futex32_numa *futex_numa; - int mem_size, i; void *futex_ptr; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); + int mem_size; mem_size = sysconf(_SC_PAGE_SIZE); - futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + futex_ptr = mmap(NULL, mem_size * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); if (futex_ptr == MAP_FAILED) ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size); + /* Create an invalid memory region for the "Memory out of range" test */ + mprotect(futex_ptr + mem_size, mem_size, PROT_NONE); + futex_numa = futex_ptr; ksft_print_msg("Regular test\n"); @@ -182,27 +160,31 @@ int main(int argc, char *argv[]) if (futex_numa->numa == FUTEX_NO_NODE) ksft_exit_fail_msg("NUMA node is left uninitialized\n"); - ksft_print_msg("Memory too small\n"); - test_futex(futex_ptr + mem_size - 4, 1); + /* FUTEX2_NUMA futex must be 8-byte aligned */ + ksft_print_msg("Mis-aligned futex\n"); + test_futex(futex_ptr + mem_size - 4, EINVAL); ksft_print_msg("Memory out of range\n"); - test_futex(futex_ptr + mem_size, 1); + test_futex(futex_ptr + mem_size, EFAULT); futex_numa->numa = FUTEX_NO_NODE; mprotect(futex_ptr, mem_size, PROT_READ); ksft_print_msg("Memory, RO\n"); - test_futex(futex_ptr, 1); + test_futex(futex_ptr, EFAULT); mprotect(futex_ptr, mem_size, PROT_NONE); ksft_print_msg("Memory, no access\n"); - test_futex(futex_ptr, 1); + test_futex(futex_ptr, EFAULT); mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE); ksft_print_msg("Memory back to RW\n"); test_futex(futex_ptr, 0); + ksft_test_result_pass("futex2 memory boundary tests passed\n"); + /* MPOL test. Does not work as expected */ - for (i = 0; i < 4; i++) { +#ifdef LIBNUMA_VER_SUFFICIENT + for (int i = 0; i < 4; i++) { unsigned long nodemask; int ret; @@ -221,15 +203,17 @@ int main(int argc, char *argv[]) ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); if (ret < 0) ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n"); - if (0) - test_futex_mpol(futex_numa, 0); if (futex_numa->numa != i) { ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n", futex_numa->numa, i); } } } - ksft_test_result_pass("NUMA MPOL tests passed\n"); - ksft_finished(); - return 0; + ksft_test_result_pass("futex2 MPOL hints test passed\n"); +#else + ksft_test_result_skip("futex2 MPOL hints test requires libnuma 2.0.16+\n"); +#endif + munmap(futex_ptr, mem_size * 2); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index aea001ac4946..3b7b5851f290 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -14,7 +14,7 @@ #include <linux/prctl.h> #include <sys/prctl.h> -#include "logging.h" +#include "../../kselftest_harness.h" #define MAX_THREADS 64 @@ -128,46 +128,14 @@ static void futex_dummy_op(void) ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret); } -static void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -g Test global hash instead intead local immutable \n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n"; static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n"; -int main(int argc, char *argv[]) +TEST(priv_hash) { int futex_slots1, futex_slotsn, online_cpus; pthread_mutexattr_t mutex_attr_pi; int ret, retry = 20; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(21); ret = pthread_mutexattr_init(&mutex_attr_pi); ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); @@ -189,14 +157,14 @@ int main(int argc, char *argv[]) if (ret != 0) ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret); - /* First thread, has to initialiaze private hash */ + /* First thread, has to initialize private hash */ futex_slots1 = futex_hash_slots_get(); if (futex_slots1 <= 0) { ksft_print_msg("Current hash buckets: %d\n", futex_slots1); - ksft_exit_fail_msg(test_msg_auto_create); + ksft_exit_fail_msg("%s", test_msg_auto_create); } - ksft_test_result_pass(test_msg_auto_create); + ksft_test_result_pass("%s", test_msg_auto_create); online_cpus = sysconf(_SC_NPROCESSORS_ONLN); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); @@ -237,11 +205,11 @@ retry_getslots: } ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", futex_slots1, futex_slotsn); - ksft_exit_fail_msg(test_msg_auto_inc); + ksft_exit_fail_msg("%s", test_msg_auto_inc); } - ksft_test_result_pass(test_msg_auto_inc); + ksft_test_result_pass("%s", test_msg_auto_inc); } else { - ksft_test_result_skip(test_msg_auto_inc); + ksft_test_result_skip("%s", test_msg_auto_inc); } ret = pthread_mutex_unlock(&global_lock); @@ -257,17 +225,17 @@ retry_getslots: futex_hash_slots_set_verify(2); join_max_threads(); - ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n", + ksft_test_result(counter == MAX_THREADS, "Created and waited for %d of %d threads\n", counter, MAX_THREADS); counter = 0; - /* Once the user set something, auto reisze must be disabled */ + /* Once the user set something, auto resize must be disabled */ ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); create_max_threads(thread_lock_fn); join_max_threads(); ret = futex_hash_slots_get(); - ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", + ksft_test_result(ret == 2, "No more auto-resize after manual setting, got %d\n", ret); futex_hash_slots_set_must_fail(1 << 29); @@ -280,7 +248,7 @@ retry_getslots: ret = futex_hash_slots_set(0); ksft_test_result(ret == 0, "Global hash request\n"); if (ret != 0) - goto out; + return; futex_hash_slots_set_must_fail(4); futex_hash_slots_set_must_fail(8); @@ -289,17 +257,14 @@ retry_getslots: futex_hash_slots_set_must_fail(6); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); - if (ret != 0) { + if (ret != 0) ksft_exit_fail_msg("pthread_barrier_init failed: %m\n"); - return 1; - } + create_max_threads(thread_lock_fn); join_max_threads(); ret = futex_hash_slots_get(); ksft_test_result(ret == 0, "Continue to use global hash\n"); - -out: - ksft_finished(); - return 0; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c index 51485be6eb2f..69e2555b6039 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue.c +++ b/tools/testing/selftests/futex/functional/futex_requeue.c @@ -7,24 +7,15 @@ #include <pthread.h> #include <limits.h> -#include "logging.h" + #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue" #define timeout_ns 30000000 #define WAKE_WAIT_US 10000 volatile futex_t *f1; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *waiterfn(void *arg) { struct timespec to; @@ -38,67 +29,49 @@ void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(requeue_single) { - pthread_t waiter[10]; - int res, ret = RET_PASS; - int c, i; volatile futex_t _f1 = 0; volatile futex_t f2 = 0; + pthread_t waiter[10]; + int res; f1 = &_f1; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(2); - ksft_print_msg("%s: Test futex_requeue\n", - basename(argv[0])); - /* * Requeue a waiter from f1 to f2, and wake f2. */ if (pthread_create(&waiter[0], NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Requeuing 1 futex from f1 to f2\n"); + ksft_print_dbg_msg("Requeuing 1 futex from f1 to f2\n"); res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0); - if (res != 1) { + if (res != 1) ksft_test_result_fail("futex_requeue simple returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; - } - - info("Waking 1 futex at f2\n"); + ksft_print_dbg_msg("Waking 1 futex at f2\n"); res = futex_wake(&f2, 1, 0); if (res != 1) { ksft_test_result_fail("futex_requeue simple returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_requeue simple succeeds\n"); } +} + +TEST(requeue_multiple) +{ + volatile futex_t _f1 = 0; + volatile futex_t f2 = 0; + pthread_t waiter[10]; + int res, i; + f1 = &_f1; /* * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7. @@ -106,31 +79,28 @@ int main(int argc, char *argv[]) */ for (i = 0; i < 10; i++) { if (pthread_create(&waiter[i], NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); } usleep(WAKE_WAIT_US); - info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n"); + ksft_print_dbg_msg("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n"); res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0); if (res != 10) { ksft_test_result_fail("futex_requeue many returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } - info("Waking INT_MAX futexes at f2\n"); + ksft_print_dbg_msg("Waking INT_MAX futexes at f2\n"); res = futex_wake(&f2, INT_MAX, 0); if (res != 7) { ksft_test_result_fail("futex_requeue many returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_requeue many succeeds\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c index 215c6cb539b4..f299d75848cd 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c @@ -26,11 +26,11 @@ #include <stdlib.h> #include <signal.h> #include <string.h> + #include "atomic.h" #include "futextest.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue-pi" #define MAX_WAKE_ITERS 1000 #define THREAD_MAX 10 #define SIGNAL_PERIOD_US 100 @@ -42,12 +42,6 @@ futex_t f1 = FUTEX_INITIALIZER; futex_t f2 = FUTEX_INITIALIZER; futex_t wake_complete = FUTEX_INITIALIZER; -/* Test option defaults */ -static long timeout_ns; -static int broadcast; -static int owner; -static int locked; - struct thread_arg { long id; struct timespec *timeout; @@ -56,18 +50,73 @@ struct thread_arg { }; #define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 } -void usage(char *prog) +FIXTURE(args) { - printf("Usage: %s\n", prog); - printf(" -b Broadcast wakeup (all waiters)\n"); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -l Lock the pi futex across requeue\n"); - printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n"); - printf(" -t N Timeout in nanoseconds (default: 0)\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} +}; + +FIXTURE_SETUP(args) +{ +}; + +FIXTURE_TEARDOWN(args) +{ +}; + +FIXTURE_VARIANT(args) +{ + long timeout_ns; + bool broadcast; + bool owner; + bool locked; +}; + +/* + * For a given timeout value, this macro creates a test input with all the + * possible combinations of valid arguments + */ +#define FIXTURE_VARIANT_ADD_TIMEOUT(timeout) \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout) \ +{ \ + .timeout_ns = timeout, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_locked) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ + .locked = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_owner) \ +{ \ + .timeout_ns = timeout, \ + .broadcast = true, \ + .owner = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_locked) \ +{ \ + .timeout_ns = timeout, \ + .locked = true, \ +}; \ + \ +FIXTURE_VARIANT_ADD(args, t_##timeout##_owner) \ +{ \ + .timeout_ns = timeout, \ + .owner = true, \ +}; \ + +FIXTURE_VARIANT_ADD_TIMEOUT(0); +FIXTURE_VARIANT_ADD_TIMEOUT(5000); +FIXTURE_VARIANT_ADD_TIMEOUT(500000); +FIXTURE_VARIANT_ADD_TIMEOUT(2000000000); int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, int policy, int prio) @@ -81,26 +130,26 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); if (ret) { - error("pthread_attr_setinheritsched\n", ret); + ksft_exit_fail_msg("pthread_attr_setinheritsched\n"); return -1; } ret = pthread_attr_setschedpolicy(&attr, policy); if (ret) { - error("pthread_attr_setschedpolicy\n", ret); + ksft_exit_fail_msg("pthread_attr_setschedpolicy\n"); return -1; } schedp.sched_priority = prio; ret = pthread_attr_setschedparam(&attr, &schedp); if (ret) { - error("pthread_attr_setschedparam\n", ret); + ksft_exit_fail_msg("pthread_attr_setschedparam\n"); return -1; } ret = pthread_create(pth, &attr, func, arg); if (ret) { - error("pthread_create\n", ret); + ksft_exit_fail_msg("pthread_create\n"); return -1; } return 0; @@ -112,7 +161,7 @@ void *waiterfn(void *arg) struct thread_arg *args = (struct thread_arg *)arg; futex_t old_val; - info("Waiter %ld: running\n", args->id); + ksft_print_dbg_msg("Waiter %ld: running\n", args->id); /* Each thread sleeps for a different amount of time * This is to avoid races, because we don't lock the * external mutex here */ @@ -120,26 +169,25 @@ void *waiterfn(void *arg) old_val = f1; atomic_inc(&waiters_blocked); - info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", + ksft_print_dbg_msg("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", &f1, f1, &f2); args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout, FUTEX_PRIVATE_FLAG); - info("waiter %ld woke with %d %s\n", args->id, args->ret, + ksft_print_dbg_msg("waiter %ld woke with %d %s\n", args->id, args->ret, args->ret < 0 ? strerror(errno) : ""); atomic_inc(&waiters_woken); if (args->ret < 0) { if (args->timeout && errno == ETIMEDOUT) args->ret = 0; else { - args->ret = RET_ERROR; - error("futex_wait_requeue_pi\n", errno); + ksft_exit_fail_msg("futex_wait_requeue_pi\n"); } futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - info("Waiter %ld: exiting with %d\n", args->id, args->ret); + ksft_print_dbg_msg("Waiter %ld: exiting with %d\n", args->id, args->ret); pthread_exit((void *)&args->ret); } @@ -152,14 +200,14 @@ void *broadcast_wakerfn(void *arg) int nr_wake = 1; int i = 0; - info("Waker: waiting for waiters to block\n"); + ksft_print_dbg_msg("Waker: waiting for waiters to block\n"); while (waiters_blocked.val < THREAD_MAX) usleep(1000); usleep(1000); - info("Waker: Calling broadcast\n"); + ksft_print_dbg_msg("Waker: Calling broadcast\n"); if (args->lock) { - info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } continue_requeue: @@ -167,16 +215,14 @@ void *broadcast_wakerfn(void *arg) args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue, FUTEX_PRIVATE_FLAG); if (args->ret < 0) { - args->ret = RET_ERROR; - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); } else if (++i < MAX_WAKE_ITERS) { task_count += args->ret; if (task_count < THREAD_MAX - waiters_woken.val) goto continue_requeue; } else { - error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", - 0, MAX_WAKE_ITERS, task_count, THREAD_MAX); - args->ret = RET_ERROR; + ksft_exit_fail_msg("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", + MAX_WAKE_ITERS, task_count, THREAD_MAX); } futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); @@ -187,7 +233,7 @@ void *broadcast_wakerfn(void *arg) if (args->ret > 0) args->ret = task_count; - info("Waker: exiting with %d\n", args->ret); + ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret); pthread_exit((void *)&args->ret); } @@ -200,20 +246,20 @@ void *signal_wakerfn(void *arg) int nr_wake = 1; int i = 0; - info("Waker: waiting for waiters to block\n"); + ksft_print_dbg_msg("Waker: waiting for waiters to block\n"); while (waiters_blocked.val < THREAD_MAX) usleep(1000); usleep(1000); while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) { - info("task_count: %d, waiters_woken: %d\n", + ksft_print_dbg_msg("task_count: %d, waiters_woken: %d\n", task_count, waiters_woken.val); if (args->lock) { - info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", - f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", + f2, &f2); futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); } - info("Waker: Calling signal\n"); + ksft_print_dbg_msg("Waker: Calling signal\n"); /* cond_signal */ old_val = f1; args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, @@ -221,28 +267,23 @@ void *signal_wakerfn(void *arg) FUTEX_PRIVATE_FLAG); if (args->ret < 0) args->ret = -errno; - info("futex: %x\n", f2); + ksft_print_dbg_msg("futex: %x\n", f2); if (args->lock) { - info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", - f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", + f2, &f2); futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); } - info("futex: %x\n", f2); - if (args->ret < 0) { - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); - args->ret = RET_ERROR; - break; - } + ksft_print_dbg_msg("futex: %x\n", f2); + if (args->ret < 0) + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); task_count += args->ret; usleep(SIGNAL_PERIOD_US); i++; /* we have to loop at least THREAD_MAX times */ if (i > MAX_WAKE_ITERS + THREAD_MAX) { - error("max signaling iterations (%d) reached, giving up on pending waiters.\n", - 0, MAX_WAKE_ITERS + THREAD_MAX); - args->ret = RET_ERROR; - break; + ksft_exit_fail_msg("max signaling iterations (%d) reached, giving up on pending waiters.\n", + MAX_WAKE_ITERS + THREAD_MAX); } } @@ -251,8 +292,8 @@ void *signal_wakerfn(void *arg) if (args->ret >= 0) args->ret = task_count; - info("Waker: exiting with %d\n", args->ret); - info("Waker: waiters_woken: %d\n", waiters_woken.val); + ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret); + ksft_print_dbg_msg("Waker: waiters_woken: %d\n", waiters_woken.val); pthread_exit((void *)&args->ret); } @@ -269,35 +310,40 @@ void *third_party_blocker(void *arg) ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); out: - if (args->ret || ret2) { - error("third_party_blocker() futex error", 0); - args->ret = RET_ERROR; - } + if (args->ret || ret2) + ksft_exit_fail_msg("third_party_blocker() futex error"); pthread_exit((void *)&args->ret); } -int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) +TEST_F(args, futex_requeue_pi) { - void *(*wakerfn)(void *) = signal_wakerfn; struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER; struct thread_arg waker_arg = THREAD_ARG_INITIALIZER; pthread_t waiter[THREAD_MAX], waker, blocker; - struct timespec ts, *tsp = NULL; + void *(*wakerfn)(void *) = signal_wakerfn; + bool third_party_owner = variant->owner; + long timeout_ns = variant->timeout_ns; + bool broadcast = variant->broadcast; struct thread_arg args[THREAD_MAX]; - int *waiter_ret; - int i, ret = RET_PASS; + struct timespec ts, *tsp = NULL; + bool lock = variant->locked; + int *waiter_ret, i, ret = 0; + + ksft_print_msg( + "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, lock, third_party_owner, timeout_ns); if (timeout_ns) { time_t secs; - info("timeout_ns = %ld\n", timeout_ns); + ksft_print_dbg_msg("timeout_ns = %ld\n", timeout_ns); ret = clock_gettime(CLOCK_MONOTONIC, &ts); secs = (ts.tv_nsec + timeout_ns) / 1000000000; ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000; ts.tv_sec += secs; - info("ts.tv_sec = %ld\n", ts.tv_sec); - info("ts.tv_nsec = %ld\n", ts.tv_nsec); + ksft_print_dbg_msg("ts.tv_sec = %ld\n", ts.tv_sec); + ksft_print_dbg_msg("ts.tv_nsec = %ld\n", ts.tv_nsec); tsp = &ts; } @@ -307,10 +353,7 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) if (third_party_owner) { if (create_rt_thread(&blocker, third_party_blocker, (void *)&blocker_arg, SCHED_FIFO, 1)) { - error("Creating third party blocker thread failed\n", - errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating third party blocker thread failed\n"); } } @@ -318,20 +361,16 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) for (i = 0; i < THREAD_MAX; i++) { args[i].id = i; args[i].timeout = tsp; - info("Starting thread %d\n", i); + ksft_print_dbg_msg("Starting thread %d\n", i); if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i], SCHED_FIFO, 1)) { - error("Creating waiting thread failed\n", errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating waiting thread failed\n"); } } waker_arg.lock = lock; if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg, SCHED_FIFO, 1)) { - error("Creating waker thread failed\n", errno); - ret = RET_ERROR; - goto out; + ksft_exit_fail_msg("Creating waker thread failed\n"); } /* Wait for threads to finish */ @@ -345,7 +384,6 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) pthread_join(blocker, NULL); pthread_join(waker, NULL); -out: if (!ret) { if (*waiter_ret) ret = *waiter_ret; @@ -355,66 +393,8 @@ out: ret = blocker_arg.ret; } - return ret; + if (ret) + ksft_test_result_fail("fail"); } -int main(int argc, char *argv[]) -{ - char *test_name; - int c, ret; - - while ((c = getopt(argc, argv, "bchlot:v:")) != -1) { - switch (c) { - case 'b': - broadcast = 1; - break; - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'l': - locked = 1; - break; - case 'o': - owner = 1; - locked = 0; - break; - case 't': - timeout_ns = atoi(optarg); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0])); - ksft_print_msg( - "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", - broadcast, locked, owner, timeout_ns); - - ret = asprintf(&test_name, - "%s broadcast=%d locked=%d owner=%d timeout=%ldns", - TEST_NAME, broadcast, locked, owner, timeout_ns); - if (ret < 0) { - ksft_print_msg("Failed to generate test name\n"); - test_name = TEST_NAME; - } - - /* - * FIXME: unit_test is obsolete now that we parse options and the - * various style of runs are done by run.sh - simplify the code and move - * unit_test into main() - */ - ret = unit_test(broadcast, locked, owner, timeout_ns); - - print_result(test_name, ret); - return ret; -} +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c index d0a4d332ea44..77135a22a583 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -23,67 +23,32 @@ #include <stdlib.h> #include <string.h> #include <time.h> -#include "futextest.h" -#include "logging.h" -#define TEST_NAME "futex-requeue-pi-mismatched-ops" +#include "futextest.h" +#include "../../kselftest_harness.h" futex_t f1 = FUTEX_INITIALIZER; futex_t f2 = FUTEX_INITIALIZER; int child_ret = 0; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *blocking_child(void *arg) { child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG); if (child_ret < 0) { child_ret = -errno; - error("futex_wait\n", errno); + ksft_exit_fail_msg("futex_wait\n"); } return (void *)&child_ret; } -int main(int argc, char *argv[]) +TEST(requeue_pi_mismatched_ops) { - int ret = RET_PASS; pthread_t child; - int c; + int ret; - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Detect mismatched requeue_pi operations\n", - basename(argv[0])); + if (pthread_create(&child, NULL, blocking_child, NULL)) + ksft_exit_fail_msg("pthread_create\n"); - if (pthread_create(&child, NULL, blocking_child, NULL)) { - error("pthread_create\n", errno); - ret = RET_ERROR; - goto out; - } /* Allow the child to block in the kernel. */ sleep(1); @@ -102,34 +67,27 @@ int main(int argc, char *argv[]) * FUTEX_WAKE. */ ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG); - if (ret == 1) { - ret = RET_PASS; - } else if (ret < 0) { - error("futex_wake\n", errno); - ret = RET_ERROR; - } else { - error("futex_wake did not wake the child\n", 0); - ret = RET_ERROR; - } + if (ret == 1) + ret = 0; + else if (ret < 0) + ksft_exit_fail_msg("futex_wake\n"); + else + ksft_exit_fail_msg("futex_wake did not wake the child\n"); } else { - error("futex_cmp_requeue_pi\n", errno); - ret = RET_ERROR; + ksft_exit_fail_msg("futex_cmp_requeue_pi\n"); } } else if (ret > 0) { - fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); - ret = RET_FAIL; + ksft_test_result_fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); } else { - error("futex_cmp_requeue_pi found no waiters\n", 0); - ret = RET_ERROR; + ksft_exit_fail_msg("futex_cmp_requeue_pi found no waiters\n"); } pthread_join(child, NULL); - if (!ret) - ret = child_ret; - - out: - /* If the kernel crashes, we shouldn't return at all. */ - print_result(TEST_NAME, ret); - return ret; + if (!ret && !child_ret) + ksft_test_result_pass("futex_requeue_pi_mismatched_ops passed\n"); + else + ksft_test_result_pass("futex_requeue_pi_mismatched_ops failed\n"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c index c6b8f32990c8..e34ee0f9ebcc 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -24,11 +24,11 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> + #include "atomic.h" #include "futextest.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-requeue-pi-signal-restart" #define DELAY_US 100 futex_t f1 = FUTEX_INITIALIZER; @@ -37,15 +37,6 @@ atomic_t requeued = ATOMIC_INITIALIZER; int waiter_ret = 0; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, int policy, int prio) { @@ -57,35 +48,28 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, memset(&schedp, 0, sizeof(schedp)); ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); - if (ret) { - error("pthread_attr_setinheritsched\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setinheritsched\n"); ret = pthread_attr_setschedpolicy(&attr, policy); - if (ret) { - error("pthread_attr_setschedpolicy\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setschedpolicy\n"); schedp.sched_priority = prio; ret = pthread_attr_setschedparam(&attr, &schedp); - if (ret) { - error("pthread_attr_setschedparam\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_attr_setschedparam\n"); ret = pthread_create(pth, &attr, func, arg); - if (ret) { - error("pthread_create\n", ret); - return -1; - } + if (ret) + ksft_exit_fail_msg("pthread_create\n"); + return 0; } void handle_signal(int signo) { - info("signal received %s requeue\n", + ksft_print_dbg_msg("signal received %s requeue\n", requeued.val ? "after" : "prior to"); } @@ -94,78 +78,46 @@ void *waiterfn(void *arg) unsigned int old_val; int res; - waiter_ret = RET_PASS; - - info("Waiter running\n"); - info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Waiter running\n"); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); old_val = f1; res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL, FUTEX_PRIVATE_FLAG); if (!requeued.val || errno != EWOULDBLOCK) { - fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", + ksft_test_result_fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", res, strerror(errno)); - info("w2:futex: %x\n", f2); + ksft_print_dbg_msg("w2:futex: %x\n", f2); if (!res) futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - waiter_ret = RET_FAIL; } - info("Waiter exiting with %d\n", waiter_ret); pthread_exit(NULL); } -int main(int argc, char *argv[]) +TEST(futex_requeue_pi_signal_restart) { unsigned int old_val; struct sigaction sa; pthread_t waiter; - int c, res, ret = RET_PASS; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test signal handling during requeue_pi\n", - basename(argv[0])); - ksft_print_msg("\tArguments: <none>\n"); + int res; sa.sa_handler = handle_signal; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; - if (sigaction(SIGUSR1, &sa, NULL)) { - error("sigaction\n", errno); - exit(1); - } + if (sigaction(SIGUSR1, &sa, NULL)) + ksft_exit_fail_msg("sigaction\n"); - info("m1:f2: %x\n", f2); - info("Creating waiter\n"); + ksft_print_dbg_msg("m1:f2: %x\n", f2); + ksft_print_dbg_msg("Creating waiter\n"); res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1); - if (res) { - error("Creating waiting thread failed", res); - ret = RET_ERROR; - goto out; - } + if (res) + ksft_exit_fail_msg("Creating waiting thread failed"); - info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); - info("m2:f2: %x\n", f2); + ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("m2:f2: %x\n", f2); futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG); - info("m3:f2: %x\n", f2); + ksft_print_dbg_msg("m3:f2: %x\n", f2); while (1) { /* @@ -173,11 +125,11 @@ int main(int argc, char *argv[]) * restart futex_wait_requeue_pi() in the kernel. Wait for the * waiter to block on f1 again. */ - info("Issuing SIGUSR1 to waiter\n"); + ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n"); pthread_kill(waiter, SIGUSR1); usleep(DELAY_US); - info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); + ksft_print_dbg_msg("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); old_val = f1; res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0, FUTEX_PRIVATE_FLAG); @@ -191,12 +143,10 @@ int main(int argc, char *argv[]) atomic_set(&requeued, 1); break; } else if (res < 0) { - error("FUTEX_CMP_REQUEUE_PI failed\n", errno); - ret = RET_ERROR; - break; + ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n"); } } - info("m4:f2: %x\n", f2); + ksft_print_dbg_msg("m4:f2: %x\n", f2); /* * Signal the waiter after requeue, waiter should return from @@ -204,19 +154,14 @@ int main(int argc, char *argv[]) * futex_unlock_pi() can't happen before the signal wakeup is detected * in the kernel. */ - info("Issuing SIGUSR1 to waiter\n"); + ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n"); pthread_kill(waiter, SIGUSR1); - info("Waiting for waiter to return\n"); + ksft_print_dbg_msg("Waiting for waiter to return\n"); pthread_join(waiter, NULL); - info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); + ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); - info("m5:f2: %x\n", f2); - - out: - if (ret == RET_PASS && waiter_ret) - ret = waiter_ret; - - print_result(TEST_NAME, ret); - return ret; + ksft_print_dbg_msg("m5:f2: %x\n", f2); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c index 685140d9b93d..152ca4612886 100644 --- a/tools/testing/selftests/futex/functional/futex_wait.c +++ b/tools/testing/selftests/futex/functional/futex_wait.c @@ -9,25 +9,16 @@ #include <sys/shm.h> #include <sys/mman.h> #include <fcntl.h> -#include "logging.h" + #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait" #define timeout_ns 30000000 #define WAKE_WAIT_US 10000 #define SHM_PATH "futex_shm_file" void *futex; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - static void *waiterfn(void *arg) { struct timespec to; @@ -45,53 +36,37 @@ static void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(private_futex) { - int res, ret = RET_PASS, fd, c, shm_id; - u_int32_t f_private = 0, *shared_data; unsigned int flags = FUTEX_PRIVATE_FLAG; + u_int32_t f_private = 0; pthread_t waiter; - void *shm; + int res; futex = &f_private; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(3); - ksft_print_msg("%s: Test futex_wait\n", basename(argv[0])); - /* Testing a private futex */ - info("Calling private futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling private futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling private futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling private futex_wake on futex: %p\n", futex); res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG); if (res != 1) { ksft_test_result_fail("futex_wake private returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake private succeeds\n"); } +} + +TEST(anon_page) +{ + u_int32_t *shared_data; + pthread_t waiter; + int res, shm_id; /* Testing an anon page shared memory */ shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666); @@ -105,67 +80,65 @@ int main(int argc, char *argv[]) *shared_data = 0; futex = shared_data; - info("Calling shared (page anon) futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (page anon) futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling shared (page anon) futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (page anon) futex_wake on futex: %p\n", futex); res = futex_wake(futex, 1, 0); if (res != 1) { ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake shared (page anon) succeeds\n"); } + shmdt(shared_data); +} + +TEST(file_backed) +{ + u_int32_t f_private = 0; + pthread_t waiter; + int res, fd; + void *shm; /* Testing a file backed shared memory */ fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); - if (fd < 0) { - perror("open"); - exit(1); - } + if (fd < 0) + ksft_exit_fail_msg("open"); - if (ftruncate(fd, sizeof(f_private))) { - perror("ftruncate"); - exit(1); - } + if (ftruncate(fd, sizeof(f_private))) + ksft_exit_fail_msg("ftruncate"); shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (shm == MAP_FAILED) { - perror("mmap"); - exit(1); - } + if (shm == MAP_FAILED) + ksft_exit_fail_msg("mmap"); memcpy(shm, &f_private, sizeof(f_private)); futex = shm; - info("Calling shared (file backed) futex_wait on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (file backed) futex_wait on futex: %p\n", futex); if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); - info("Calling shared (file backed) futex_wake on futex: %p\n", futex); + ksft_print_dbg_msg("Calling shared (file backed) futex_wake on futex: %p\n", futex); res = futex_wake(shm, 1, 0); if (res != 1) { ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n", errno, strerror(errno)); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wake shared (file backed) succeeds\n"); } - /* Freeing resources */ - shmdt(shared_data); munmap(shm, sizeof(f_private)); remove(SHM_PATH); close(fd); - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c index fb4148f23fa3..8952ebda14ab 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c +++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c @@ -27,10 +27,9 @@ #include <libgen.h> #include <signal.h> -#include "logging.h" #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-private-mapped-file" #define PAGE_SZ 4096 char pad[PAGE_SZ] = {1}; @@ -40,86 +39,44 @@ char pad2[PAGE_SZ] = {1}; #define WAKE_WAIT_US 3000000 struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0}; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *thr_futex_wait(void *arg) { int ret; - info("futex wait\n"); + ksft_print_dbg_msg("futex wait\n"); ret = futex_wait(&val, 1, &wait_timeout, 0); - if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) { - error("futex error.\n", errno); - print_result(TEST_NAME, RET_ERROR); - exit(RET_ERROR); - } + if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) + ksft_exit_fail_msg("futex error.\n"); if (ret && errno == ETIMEDOUT) - fail("waiter timedout\n"); + ksft_exit_fail_msg("waiter timedout\n"); - info("futex_wait: ret = %d, errno = %d\n", ret, errno); + ksft_print_dbg_msg("futex_wait: ret = %d, errno = %d\n", ret, errno); return NULL; } -int main(int argc, char **argv) +TEST(wait_private_mapped_file) { pthread_t thr; - int ret = RET_PASS; int res; - int c; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg( - "%s: Test the futex value of private file mappings in FUTEX_WAIT\n", - basename(argv[0])); - - ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); - if (ret < 0) { - fprintf(stderr, "pthread_create error\n"); - ret = RET_ERROR; - goto out; - } - - info("wait a while\n"); + + res = pthread_create(&thr, NULL, thr_futex_wait, NULL); + if (res < 0) + ksft_exit_fail_msg("pthread_create error\n"); + + ksft_print_dbg_msg("wait a while\n"); usleep(WAKE_WAIT_US); val = 2; res = futex_wake(&val, 1, 0); - info("futex_wake %d\n", res); - if (res != 1) { - fail("FUTEX_WAKE didn't find the waiting thread.\n"); - ret = RET_FAIL; - } + ksft_print_dbg_msg("futex_wake %d\n", res); + if (res != 1) + ksft_exit_fail_msg("FUTEX_WAKE didn't find the waiting thread.\n"); - info("join\n"); + ksft_print_dbg_msg("join\n"); pthread_join(thr, NULL); - out: - print_result(TEST_NAME, ret); - return ret; + ksft_test_result_pass("wait_private_mapped_file"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index d183f878360b..0c8766aced2e 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -16,26 +16,15 @@ *****************************************************************************/ #include <pthread.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" - -#define TEST_NAME "futex-wait-timeout" +#include "../../kselftest_harness.h" static long timeout_ns = 100000; /* 100us default timeout */ static futex_t futex_pi; static pthread_barrier_t barrier; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -t N Timeout in nanoseconds (default: 100,000)\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - /* * Get a PI lock and hold it forever, so the main thread lock_pi will block * and we can test the timeout @@ -47,13 +36,13 @@ void *get_pi_lock(void *arg) ret = futex_lock_pi(&futex_pi, NULL, 0, 0); if (ret != 0) - error("futex_lock_pi failed\n", ret); + ksft_exit_fail_msg("futex_lock_pi failed\n"); pthread_barrier_wait(&barrier); /* Blocks forever */ ret = futex_wait(&lock, 0, NULL, 0); - error("futex_wait failed\n", ret); + ksft_exit_fail_msg("futex_wait failed\n"); return NULL; } @@ -61,12 +50,11 @@ void *get_pi_lock(void *arg) /* * Check if the function returned the expected error */ -static void test_timeout(int res, int *ret, char *test_name, int err) +static void test_timeout(int res, char *test_name, int err) { if (!res || errno != err) { ksft_test_result_fail("%s returned %d\n", test_name, res < 0 ? errno : res); - *ret = RET_FAIL; } else { ksft_test_result_pass("%s succeeds\n", test_name); } @@ -78,10 +66,8 @@ static void test_timeout(int res, int *ret, char *test_name, int err) static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to, long timeout_ns) { - if (clock_gettime(clockid, to)) { - error("clock_gettime failed\n", errno); - return errno; - } + if (clock_gettime(clockid, to)) + ksft_exit_fail_msg("clock_gettime failed\n"); to->tv_nsec += timeout_ns; @@ -93,83 +79,66 @@ static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to, return 0; } -int main(int argc, char *argv[]) +TEST(wait_bitset) { futex_t f1 = FUTEX_INITIALIZER; - int res, ret = RET_PASS; struct timespec to; - pthread_t thread; - int c; - struct futex_waitv waitv = { - .uaddr = (uintptr_t)&f1, - .val = f1, - .flags = FUTEX_32, - .__reserved = 0 - }; - - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 't': - timeout_ns = atoi(optarg); - break; - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(9); - ksft_print_msg("%s: Block on a futex and wait for timeout\n", - basename(argv[0])); - ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); - - pthread_barrier_init(&barrier, NULL, 2); - pthread_create(&thread, NULL, get_pi_lock, NULL); + int res; /* initialize relative timeout */ to.tv_sec = 0; to.tv_nsec = timeout_ns; res = futex_wait(&f1, f1, &to, 0); - test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT); + test_timeout(res, "futex_wait relative", ETIMEDOUT); /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT); + test_timeout(res, "futex_wait_bitset realtime", ETIMEDOUT); /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_bitset(&f1, f1, &to, 1, 0); - test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT); + test_timeout(res, "futex_wait_bitset monotonic", ETIMEDOUT); +} + +TEST(requeue_pi) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; + int res; /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT); + test_timeout(res, "futex_wait_requeue_pi realtime", ETIMEDOUT); /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0); - test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT); + test_timeout(res, "futex_wait_requeue_pi monotonic", ETIMEDOUT); + +} + +TEST(lock_pi) +{ + struct timespec to; + pthread_t thread; + int res; + + /* Create a thread that will lock forever so any waiter will timeout */ + pthread_barrier_init(&barrier, NULL, 2); + pthread_create(&thread, NULL, get_pi_lock, NULL); /* Wait until the other thread calls futex_lock_pi() */ pthread_barrier_wait(&barrier); pthread_barrier_destroy(&barrier); + /* * FUTEX_LOCK_PI with CLOCK_REALTIME * Due to historical reasons, FUTEX_LOCK_PI supports only realtime @@ -181,26 +150,38 @@ int main(int argc, char *argv[]) * smaller than realtime and the syscall will timeout immediately. */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_lock_pi(&futex_pi, &to, 0, 0); - test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT); + test_timeout(res, "futex_lock_pi realtime", ETIMEDOUT); /* Test operations that don't support FUTEX_CLOCK_REALTIME */ res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME); - test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS); + test_timeout(res, "futex_lock_pi invalid timeout flag", ENOSYS); +} + +TEST(waitv) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct futex_waitv waitv = { + .uaddr = (uintptr_t)&f1, + .val = f1, + .flags = FUTEX_32, + .__reserved = 0, + }; + struct timespec to; + int res; /* futex_waitv with CLOCK_MONOTONIC */ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); - test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT); + test_timeout(res, "futex_waitv monotonic", ETIMEDOUT); /* futex_waitv with CLOCK_REALTIME */ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns)) - return RET_FAIL; + ksft_test_result_error("get_time error"); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME); - test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT); - - ksft_print_cnts(); - return ret; + test_timeout(res, "futex_waitv realtime", ETIMEDOUT); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c index ed9cd07e31c1..ce2301500d83 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c +++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c @@ -29,95 +29,55 @@ #include <linux/futex.h> #include <libgen.h> -#include "logging.h" #include "futextest.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-uninitialized-heap" #define WAIT_US 5000000 static int child_blocked = 1; -static int child_ret; +static bool child_ret; void *buf; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *wait_thread(void *arg) { int res; - child_ret = RET_PASS; + child_ret = true; res = futex_wait(buf, 1, NULL, 0); child_blocked = 0; if (res != 0 && errno != EWOULDBLOCK) { - error("futex failure\n", errno); - child_ret = RET_ERROR; + ksft_exit_fail_msg("futex failure\n"); + child_ret = false; } pthread_exit(NULL); } -int main(int argc, char **argv) +TEST(futex_wait_uninitialized_heap) { - int c, ret = RET_PASS; long page_size; pthread_t thr; - - while ((c = getopt(argc, argv, "chv:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } + int ret; page_size = sysconf(_SC_PAGESIZE); buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); - if (buf == (void *)-1) { - error("mmap\n", errno); - exit(1); - } - - ksft_print_header(); - ksft_set_plan(1); - ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n", - basename(argv[0])); - + if (buf == (void *)-1) + ksft_exit_fail_msg("mmap\n"); ret = pthread_create(&thr, NULL, wait_thread, NULL); - if (ret) { - error("pthread_create\n", errno); - ret = RET_ERROR; - goto out; - } + if (ret) + ksft_exit_fail_msg("pthread_create\n"); - info("waiting %dus for child to return\n", WAIT_US); + ksft_print_dbg_msg("waiting %dus for child to return\n", WAIT_US); usleep(WAIT_US); - ret = child_ret; - if (child_blocked) { - fail("child blocked in kernel\n"); - ret = RET_FAIL; - } + if (child_blocked) + ksft_test_result_fail("child blocked in kernel\n"); - out: - print_result(TEST_NAME, ret); - return ret; + if (!child_ret) + ksft_test_result_fail("child error\n"); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 2d8230da9064..36b7a54a4085 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -21,72 +21,44 @@ #include <stdlib.h> #include <string.h> #include <time.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait-wouldblock" #define timeout_ns 100000 -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - -int main(int argc, char *argv[]) +TEST(futex_wait_wouldblock) { struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; futex_t f1 = FUTEX_INITIALIZER; - int res, ret = RET_PASS; - int c; - struct futex_waitv waitv = { - .uaddr = (uintptr_t)&f1, - .val = f1+1, - .flags = FUTEX_32, - .__reserved = 0 - }; + int res; - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(2); - ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", - basename(argv[0])); - - info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + ksft_print_dbg_msg("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); if (!res || errno != EWOULDBLOCK) { ksft_test_result_fail("futex_wait returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_wait\n"); } +} - if (clock_gettime(CLOCK_MONOTONIC, &to)) { - error("clock_gettime failed\n", errno); - return errno; - } +TEST(futex_waitv_wouldblock) +{ + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; + struct futex_waitv waitv = { + .uaddr = (uintptr_t)&f1, + .val = f1 + 1, + .flags = FUTEX_32, + .__reserved = 0, + }; + int res; + + if (clock_gettime(CLOCK_MONOTONIC, &to)) + ksft_exit_fail_msg("clock_gettime failed %d\n", errno); to.tv_nsec += timeout_ns; @@ -95,17 +67,15 @@ int main(int argc, char *argv[]) to.tv_nsec -= 1000000000; } - info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + ksft_print_dbg_msg("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); if (!res || errno != EWOULDBLOCK) { ksft_test_result_fail("futex_waitv returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c index a94337f677e1..c684b10eb76e 100644 --- a/tools/testing/selftests/futex/functional/futex_waitv.c +++ b/tools/testing/selftests/futex/functional/futex_waitv.c @@ -15,25 +15,16 @@ #include <pthread.h> #include <stdint.h> #include <sys/shm.h> + #include "futextest.h" #include "futex2test.h" -#include "logging.h" +#include "../../kselftest_harness.h" -#define TEST_NAME "futex-wait" #define WAKE_WAIT_US 10000 #define NR_FUTEXES 30 static struct futex_waitv waitv[NR_FUTEXES]; u_int32_t futexes[NR_FUTEXES] = {0}; -void usage(char *prog) -{ - printf("Usage: %s\n", prog); - printf(" -c Use color\n"); - printf(" -h Display this help message\n"); - printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", - VQUIET, VCRITICAL, VINFO); -} - void *waiterfn(void *arg) { struct timespec to; @@ -41,7 +32,7 @@ void *waiterfn(void *arg) /* setting absolute timeout for futex2 */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -57,34 +48,10 @@ void *waiterfn(void *arg) return NULL; } -int main(int argc, char *argv[]) +TEST(private_waitv) { pthread_t waiter; - int res, ret = RET_PASS; - struct timespec to; - int c, i; - - while ((c = getopt(argc, argv, "cht:v:")) != -1) { - switch (c) { - case 'c': - log_color(1); - break; - case 'h': - usage(basename(argv[0])); - exit(0); - case 'v': - log_verbosity(atoi(optarg)); - break; - default: - usage(basename(argv[0])); - exit(1); - } - } - - ksft_print_header(); - ksft_set_plan(7); - ksft_print_msg("%s: Test FUTEX_WAITV\n", - basename(argv[0])); + int res, i; for (i = 0; i < NR_FUTEXES; i++) { waitv[i].uaddr = (uintptr_t)&futexes[i]; @@ -95,7 +62,7 @@ int main(int argc, char *argv[]) /* Private waitv */ if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); @@ -104,10 +71,15 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv private\n"); } +} + +TEST(shared_waitv) +{ + pthread_t waiter; + int res, i; /* Shared waitv */ for (i = 0; i < NR_FUTEXES; i++) { @@ -128,7 +100,7 @@ int main(int argc, char *argv[]) } if (pthread_create(&waiter, NULL, waiterfn, NULL)) - error("pthread_create failed\n", errno); + ksft_exit_fail_msg("pthread_create failed\n"); usleep(WAKE_WAIT_US); @@ -137,19 +109,24 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake shared returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv shared\n"); } for (i = 0; i < NR_FUTEXES; i++) shmdt(u64_to_ptr(waitv[i].uaddr)); +} + +TEST(invalid_flag) +{ + struct timespec to; + int res; /* Testing a waiter without FUTEX_32 flag */ waitv[0].flags = FUTEX_PRIVATE_FLAG; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -158,17 +135,22 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv without FUTEX_32\n"); } +} + +TEST(unaligned_address) +{ + struct timespec to; + int res; /* Testing a waiter with an unaligned address */ waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32; waitv[0].uaddr = 1; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -177,16 +159,21 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_wake private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv with an unaligned address\n"); } +} + +TEST(null_address) +{ + struct timespec to; + int res; /* Testing a NULL address for waiters.uaddr */ waitv[0].uaddr = 0x00000000; if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -195,14 +182,13 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n"); } /* Testing a NULL address for *waiters */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -211,14 +197,19 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv NULL address in *waiters\n"); } +} + +TEST(invalid_clockid) +{ + struct timespec to; + int res; /* Testing an invalid clockid */ if (clock_gettime(CLOCK_MONOTONIC, &to)) - error("gettime64 failed\n", errno); + ksft_exit_fail_msg("gettime64 failed\n"); to.tv_sec++; @@ -227,11 +218,9 @@ int main(int argc, char *argv[]) ksft_test_result_fail("futex_waitv private returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); - ret = RET_FAIL; } else { ksft_test_result_pass("futex_waitv invalid clockid\n"); } - - ksft_print_cnts(); - return ret; } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 81739849f299..e88545c06d57 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -18,74 +18,36 @@ # ############################################################################### -# Test for a color capable console -if [ -z "$USE_COLOR" ]; then - tput setf 7 || tput setaf 7 - if [ $? -eq 0 ]; then - USE_COLOR=1 - tput sgr0 - fi -fi -if [ "$USE_COLOR" -eq 1 ]; then - COLOR="-c" -fi - - echo -# requeue pi testing -# without timeouts -./futex_requeue_pi $COLOR -./futex_requeue_pi $COLOR -b -./futex_requeue_pi $COLOR -b -l -./futex_requeue_pi $COLOR -b -o -./futex_requeue_pi $COLOR -l -./futex_requeue_pi $COLOR -o -# with timeouts -./futex_requeue_pi $COLOR -b -l -t 5000 -./futex_requeue_pi $COLOR -l -t 5000 -./futex_requeue_pi $COLOR -b -l -t 500000 -./futex_requeue_pi $COLOR -l -t 500000 -./futex_requeue_pi $COLOR -b -t 5000 -./futex_requeue_pi $COLOR -t 5000 -./futex_requeue_pi $COLOR -b -t 500000 -./futex_requeue_pi $COLOR -t 500000 -./futex_requeue_pi $COLOR -b -o -t 5000 -./futex_requeue_pi $COLOR -l -t 5000 -./futex_requeue_pi $COLOR -b -o -t 500000 -./futex_requeue_pi $COLOR -l -t 500000 -# with long timeout -./futex_requeue_pi $COLOR -b -l -t 2000000000 -./futex_requeue_pi $COLOR -l -t 2000000000 - +./futex_requeue_pi echo -./futex_requeue_pi_mismatched_ops $COLOR +./futex_requeue_pi_mismatched_ops echo -./futex_requeue_pi_signal_restart $COLOR +./futex_requeue_pi_signal_restart echo -./futex_wait_timeout $COLOR +./futex_wait_timeout echo -./futex_wait_wouldblock $COLOR +./futex_wait_wouldblock echo -./futex_wait_uninitialized_heap $COLOR -./futex_wait_private_mapped_file $COLOR +./futex_wait_uninitialized_heap +./futex_wait_private_mapped_file echo -./futex_wait $COLOR +./futex_wait echo -./futex_requeue $COLOR +./futex_requeue echo -./futex_waitv $COLOR +./futex_waitv echo -./futex_priv_hash $COLOR -./futex_priv_hash -g $COLOR +./futex_priv_hash echo -./futex_numa_mpol $COLOR +./futex_numa_mpol diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index 7a5fd1d5355e..3d48e9789d9f 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -58,6 +58,17 @@ typedef volatile u_int32_t futex_t; #define SYS_futex SYS_futex_time64 #endif +/* + * On 32bit systems if we use "-D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64" or if + * we are using a newer compiler then the size of the timestamps will be 64bit, + * however, the SYS_futex will still point to the 32bit futex system call. + */ +#if __SIZEOF_POINTER__ == 4 && defined(SYS_futex_time64) && \ + defined(_TIME_BITS) && _TIME_BITS == 64 +# undef SYS_futex +# define SYS_futex SYS_futex_time64 +#endif + /** * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h deleted file mode 100644 index 874c69ce5cce..000000000000 --- a/tools/testing/selftests/futex/include/logging.h +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/****************************************************************************** - * - * Copyright © International Business Machines Corp., 2009 - * - * DESCRIPTION - * Glibc independent futex library for testing kernel functionality. - * - * AUTHOR - * Darren Hart <dvhart@linux.intel.com> - * - * HISTORY - * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> - * - *****************************************************************************/ - -#ifndef _LOGGING_H -#define _LOGGING_H - -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <linux/futex.h> -#include "kselftest.h" - -/* - * Define PASS, ERROR, and FAIL strings with and without color escape - * sequences, default to no color. - */ -#define ESC 0x1B, '[' -#define BRIGHT '1' -#define GREEN '3', '2' -#define YELLOW '3', '3' -#define RED '3', '1' -#define ESCEND 'm' -#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND -#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND -#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND -#define RESET_COLOR ESC, '0', 'm' -static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S', - RESET_COLOR, 0}; -static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R', - RESET_COLOR, 0}; -static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L', - RESET_COLOR, 0}; -static const char INFO_NORMAL[] = " INFO"; -static const char PASS_NORMAL[] = " PASS"; -static const char ERROR_NORMAL[] = "ERROR"; -static const char FAIL_NORMAL[] = " FAIL"; -const char *INFO = INFO_NORMAL; -const char *PASS = PASS_NORMAL; -const char *ERROR = ERROR_NORMAL; -const char *FAIL = FAIL_NORMAL; - -/* Verbosity setting for INFO messages */ -#define VQUIET 0 -#define VCRITICAL 1 -#define VINFO 2 -#define VMAX VINFO -int _verbose = VCRITICAL; - -/* Functional test return codes */ -#define RET_PASS 0 -#define RET_ERROR -1 -#define RET_FAIL -2 - -/** - * log_color() - Use colored output for PASS, ERROR, and FAIL strings - * @use_color: use color (1) or not (0) - */ -void log_color(int use_color) -{ - if (use_color) { - PASS = PASS_COLOR; - ERROR = ERROR_COLOR; - FAIL = FAIL_COLOR; - } else { - PASS = PASS_NORMAL; - ERROR = ERROR_NORMAL; - FAIL = FAIL_NORMAL; - } -} - -/** - * log_verbosity() - Set verbosity of test output - * @verbose: Enable (1) verbose output or not (0) - * - * Currently setting verbose=1 will enable INFO messages and 0 will disable - * them. FAIL and ERROR messages are always displayed. - */ -void log_verbosity(int level) -{ - if (level > VMAX) - level = VMAX; - else if (level < 0) - level = 0; - _verbose = level; -} - -/** - * print_result() - Print standard PASS | ERROR | FAIL results - * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL - * - * print_result() is primarily intended for functional tests. - */ -void print_result(const char *test_name, int ret) -{ - switch (ret) { - case RET_PASS: - ksft_test_result_pass("%s\n", test_name); - ksft_print_cnts(); - return; - case RET_ERROR: - ksft_test_result_error("%s\n", test_name); - ksft_print_cnts(); - return; - case RET_FAIL: - ksft_test_result_fail("%s\n", test_name); - ksft_print_cnts(); - return; - } -} - -/* log level macros */ -#define info(message, vargs...) \ -do { \ - if (_verbose >= VINFO) \ - fprintf(stderr, "\t%s: "message, INFO, ##vargs); \ -} while (0) - -#define error(message, err, args...) \ -do { \ - if (_verbose >= VCRITICAL) {\ - if (err) \ - fprintf(stderr, "\t%s: %s: "message, \ - ERROR, strerror(err), ##args); \ - else \ - fprintf(stderr, "\t%s: "message, ERROR, ##args); \ - } \ -} while (0) - -#define fail(message, args...) \ -do { \ - if (_verbose >= VCRITICAL) \ - fprintf(stderr, "\t%s: "message, FAIL, ##args); \ -} while (0) - -#endif diff --git a/tools/testing/selftests/hid/hid_common.h b/tools/testing/selftests/hid/hid_common.h index f77f69c6657d..8085519c47cb 100644 --- a/tools/testing/selftests/hid/hid_common.h +++ b/tools/testing/selftests/hid/hid_common.h @@ -230,6 +230,12 @@ static int uhid_event(struct __test_metadata *_metadata, int fd) break; case UHID_SET_REPORT: UHID_LOG("UHID_SET_REPORT from uhid-dev"); + + answer.type = UHID_SET_REPORT_REPLY; + answer.u.set_report_reply.id = ev.u.set_report.id; + answer.u.set_report_reply.err = 0; /* success */ + + uhid_write(_metadata, fd, &answer); break; default: TH_LOG("Invalid event from uhid-dev: %u", ev.type); diff --git a/tools/testing/selftests/hid/hidraw.c b/tools/testing/selftests/hid/hidraw.c index 821db37ba4bb..d625772f8b7c 100644 --- a/tools/testing/selftests/hid/hidraw.c +++ b/tools/testing/selftests/hid/hidraw.c @@ -2,6 +2,9 @@ /* Copyright (c) 2022-2024 Red Hat */ #include "hid_common.h" +#include <linux/input.h> +#include <string.h> +#include <sys/ioctl.h> /* for older kernels */ #ifndef HIDIOCREVOKE @@ -215,6 +218,476 @@ TEST_F(hidraw, write_event_revoked) pthread_mutex_unlock(&uhid_output_mtx); } +/* + * Test HIDIOCGRDESCSIZE ioctl to get report descriptor size + */ +TEST_F(hidraw, ioctl_rdescsize) +{ + int desc_size = 0; + int err; + + /* call HIDIOCGRDESCSIZE ioctl */ + err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size); + ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESCSIZE ioctl failed"); + + /* verify the size matches our test report descriptor */ + ASSERT_EQ(desc_size, sizeof(rdesc)) + TH_LOG("expected size %zu, got %d", sizeof(rdesc), desc_size); +} + +/* + * Test HIDIOCGRDESC ioctl to get report descriptor data + */ +TEST_F(hidraw, ioctl_rdesc) +{ + struct hidraw_report_descriptor desc; + int err; + + /* get the full report descriptor */ + desc.size = sizeof(rdesc); + err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &desc); + ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESC ioctl failed"); + + /* verify the descriptor data matches our test descriptor */ + ASSERT_EQ(memcmp(desc.value, rdesc, sizeof(rdesc)), 0) + TH_LOG("report descriptor data mismatch"); +} + +/* + * Test HIDIOCGRDESC ioctl with smaller buffer size + */ +TEST_F(hidraw, ioctl_rdesc_small_buffer) +{ + struct hidraw_report_descriptor desc; + int err; + size_t small_size = sizeof(rdesc) / 2; /* request half the descriptor size */ + + /* get partial report descriptor */ + desc.size = small_size; + err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &desc); + ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRDESC ioctl failed with small buffer"); + + /* verify we got the first part of the descriptor */ + ASSERT_EQ(memcmp(desc.value, rdesc, small_size), 0) + TH_LOG("partial report descriptor data mismatch"); +} + +/* + * Test HIDIOCGRAWINFO ioctl to get device information + */ +TEST_F(hidraw, ioctl_rawinfo) +{ + struct hidraw_devinfo devinfo; + int err; + + /* get device info */ + err = ioctl(self->hidraw_fd, HIDIOCGRAWINFO, &devinfo); + ASSERT_EQ(err, 0) TH_LOG("HIDIOCGRAWINFO ioctl failed"); + + /* verify device info matches our test setup */ + ASSERT_EQ(devinfo.bustype, BUS_USB) + TH_LOG("expected bustype 0x03, got 0x%x", devinfo.bustype); + ASSERT_EQ(devinfo.vendor, 0x0001) + TH_LOG("expected vendor 0x0001, got 0x%x", devinfo.vendor); + ASSERT_EQ(devinfo.product, 0x0a37) + TH_LOG("expected product 0x0a37, got 0x%x", devinfo.product); +} + +/* + * Test HIDIOCGFEATURE ioctl to get feature report + */ +TEST_F(hidraw, ioctl_gfeature) +{ + __u8 buf[10] = {0}; + int err; + + /* set report ID 1 in first byte */ + buf[0] = 1; + + /* get feature report */ + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGFEATURE ioctl failed, got %d", err); + + /* verify we got the expected feature data */ + ASSERT_EQ(buf[0], feature_data[0]) + TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]); + ASSERT_EQ(buf[1], feature_data[1]) + TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]); +} + +/* + * Test HIDIOCGFEATURE ioctl with invalid report ID + */ +TEST_F(hidraw, ioctl_gfeature_invalid) +{ + __u8 buf[10] = {0}; + int err; + + /* set invalid report ID (not 1) */ + buf[0] = 2; + + /* try to get feature report */ + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE should have failed with invalid report ID"); + ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno); +} + +/* + * Test ioctl with incorrect nr bits + */ +TEST_F(hidraw, ioctl_invalid_nr) +{ + char buf[256] = {0}; + int err; + unsigned int bad_cmd; + + /* + * craft an ioctl command with wrong _IOC_NR bits + */ + bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x00, sizeof(buf)); /* 0 is not valid */ + + /* test the ioctl */ + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("ioctl read-write with wrong _IOC_NR (0) should have failed"); + ASSERT_EQ(errno, ENOTTY) + TH_LOG("expected ENOTTY for wrong read-write _IOC_NR (0), got errno %d", errno); + + /* + * craft an ioctl command with wrong _IOC_NR bits + */ + bad_cmd = _IOC(_IOC_READ, 'H', 0x00, sizeof(buf)); /* 0 is not valid */ + + /* test the ioctl */ + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("ioctl read-only with wrong _IOC_NR (0) should have failed"); + ASSERT_EQ(errno, ENOTTY) + TH_LOG("expected ENOTTY for wrong read-only _IOC_NR (0), got errno %d", errno); + + /* also test with bigger number */ + bad_cmd = _IOC(_IOC_READ, 'H', 0x42, sizeof(buf)); /* 0x42 is not valid as well */ + + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("ioctl read-only with wrong _IOC_NR (0x42) should have failed"); + ASSERT_EQ(errno, ENOTTY) + TH_LOG("expected ENOTTY for wrong read-only _IOC_NR (0x42), got errno %d", errno); + + /* also test with bigger number: 0x42 is not valid as well */ + bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x42, sizeof(buf)); + + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("ioctl read-write with wrong _IOC_NR (0x42) should have failed"); + ASSERT_EQ(errno, ENOTTY) + TH_LOG("expected ENOTTY for wrong read-write _IOC_NR (0x42), got errno %d", errno); +} + +/* + * Test ioctl with incorrect type bits + */ +TEST_F(hidraw, ioctl_invalid_type) +{ + char buf[256] = {0}; + int err; + unsigned int bad_cmd; + + /* + * craft an ioctl command with wrong _IOC_TYPE bits + */ + bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'I', 0x01, sizeof(buf)); /* 'I' should be 'H' */ + + /* test the ioctl */ + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("ioctl with wrong _IOC_TYPE (I) should have failed"); + ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_NR, got errno %d", errno); +} + +/* + * Test HIDIOCGFEATURE ioctl with incorrect _IOC_DIR bits + */ +TEST_F(hidraw, ioctl_gfeature_invalid_dir) +{ + __u8 buf[10] = {0}; + int err; + unsigned int bad_cmd; + + /* set report ID 1 in first byte */ + buf[0] = 1; + + /* + * craft an ioctl command with wrong _IOC_DIR bits + * HIDIOCGFEATURE should have _IOC_WRITE|_IOC_READ, let's use only _IOC_WRITE + */ + bad_cmd = _IOC(_IOC_WRITE, 'H', 0x07, sizeof(buf)); /* should be _IOC_WRITE|_IOC_READ */ + + /* try to get feature report with wrong direction bits */ + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE with wrong _IOC_DIR should have failed"); + ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno); + + /* also test with only _IOC_READ */ + bad_cmd = _IOC(_IOC_READ, 'H', 0x07, sizeof(buf)); /* should be _IOC_WRITE|_IOC_READ */ + + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("HIDIOCGFEATURE with wrong _IOC_DIR should have failed"); + ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno); +} + +/* + * Test read-only ioctl with incorrect _IOC_DIR bits + */ +TEST_F(hidraw, ioctl_readonly_invalid_dir) +{ + char buf[256] = {0}; + int err; + unsigned int bad_cmd; + + /* + * craft an ioctl command with wrong _IOC_DIR bits + * HIDIOCGRAWNAME should have _IOC_READ, let's use _IOC_WRITE + */ + bad_cmd = _IOC(_IOC_WRITE, 'H', 0x04, sizeof(buf)); /* should be _IOC_READ */ + + /* try to get device name with wrong direction bits */ + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("HIDIOCGRAWNAME with wrong _IOC_DIR should have failed"); + ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno); + + /* also test with _IOC_WRITE|_IOC_READ */ + bad_cmd = _IOC(_IOC_WRITE|_IOC_READ, 'H', 0x04, sizeof(buf)); /* should be only _IOC_READ */ + + err = ioctl(self->hidraw_fd, bad_cmd, buf); + ASSERT_LT(err, 0) TH_LOG("HIDIOCGRAWNAME with wrong _IOC_DIR should have failed"); + ASSERT_EQ(errno, EINVAL) TH_LOG("expected EINVAL for wrong _IOC_DIR, got errno %d", errno); +} + +/* + * Test HIDIOCSFEATURE ioctl to set feature report + */ +TEST_F(hidraw, ioctl_sfeature) +{ + __u8 buf[10] = {0}; + int err; + + /* prepare feature report data */ + buf[0] = 1; /* report ID */ + buf[1] = 0x42; + buf[2] = 0x24; + + /* set feature report */ + err = ioctl(self->hidraw_fd, HIDIOCSFEATURE(3), buf); + ASSERT_EQ(err, 3) TH_LOG("HIDIOCSFEATURE ioctl failed, got %d", err); + + /* + * Note: The uhid mock doesn't validate the set report data, + * so we just verify the ioctl succeeds + */ +} + +/* + * Test HIDIOCGINPUT ioctl to get input report + */ +TEST_F(hidraw, ioctl_ginput) +{ + __u8 buf[10] = {0}; + int err; + + /* set report ID 1 in first byte */ + buf[0] = 1; + + /* get input report */ + err = ioctl(self->hidraw_fd, HIDIOCGINPUT(sizeof(buf)), buf); + ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGINPUT ioctl failed, got %d", err); + + /* verify we got the expected input data */ + ASSERT_EQ(buf[0], feature_data[0]) + TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]); + ASSERT_EQ(buf[1], feature_data[1]) + TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]); +} + +/* + * Test HIDIOCGINPUT ioctl with invalid report ID + */ +TEST_F(hidraw, ioctl_ginput_invalid) +{ + __u8 buf[10] = {0}; + int err; + + /* set invalid report ID (not 1) */ + buf[0] = 2; + + /* try to get input report */ + err = ioctl(self->hidraw_fd, HIDIOCGINPUT(sizeof(buf)), buf); + ASSERT_LT(err, 0) TH_LOG("HIDIOCGINPUT should have failed with invalid report ID"); + ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno); +} + +/* + * Test HIDIOCSINPUT ioctl to set input report + */ +TEST_F(hidraw, ioctl_sinput) +{ + __u8 buf[10] = {0}; + int err; + + /* prepare input report data */ + buf[0] = 1; /* report ID */ + buf[1] = 0x55; + buf[2] = 0xAA; + + /* set input report */ + err = ioctl(self->hidraw_fd, HIDIOCSINPUT(3), buf); + ASSERT_EQ(err, 3) TH_LOG("HIDIOCSINPUT ioctl failed, got %d", err); + + /* + * Note: The uhid mock doesn't validate the set report data, + * so we just verify the ioctl succeeds + */ +} + +/* + * Test HIDIOCGOUTPUT ioctl to get output report + */ +TEST_F(hidraw, ioctl_goutput) +{ + __u8 buf[10] = {0}; + int err; + + /* set report ID 1 in first byte */ + buf[0] = 1; + + /* get output report */ + err = ioctl(self->hidraw_fd, HIDIOCGOUTPUT(sizeof(buf)), buf); + ASSERT_EQ(err, sizeof(feature_data)) TH_LOG("HIDIOCGOUTPUT ioctl failed, got %d", err); + + /* verify we got the expected output data */ + ASSERT_EQ(buf[0], feature_data[0]) + TH_LOG("expected feature_data[0] = %d, got %d", feature_data[0], buf[0]); + ASSERT_EQ(buf[1], feature_data[1]) + TH_LOG("expected feature_data[1] = %d, got %d", feature_data[1], buf[1]); +} + +/* + * Test HIDIOCGOUTPUT ioctl with invalid report ID + */ +TEST_F(hidraw, ioctl_goutput_invalid) +{ + __u8 buf[10] = {0}; + int err; + + /* set invalid report ID (not 1) */ + buf[0] = 2; + + /* try to get output report */ + err = ioctl(self->hidraw_fd, HIDIOCGOUTPUT(sizeof(buf)), buf); + ASSERT_LT(err, 0) TH_LOG("HIDIOCGOUTPUT should have failed with invalid report ID"); + ASSERT_EQ(errno, EIO) TH_LOG("expected EIO, got errno %d", errno); +} + +/* + * Test HIDIOCSOUTPUT ioctl to set output report + */ +TEST_F(hidraw, ioctl_soutput) +{ + __u8 buf[10] = {0}; + int err; + + /* prepare output report data */ + buf[0] = 1; /* report ID */ + buf[1] = 0x33; + buf[2] = 0xCC; + + /* set output report */ + err = ioctl(self->hidraw_fd, HIDIOCSOUTPUT(3), buf); + ASSERT_EQ(err, 3) TH_LOG("HIDIOCSOUTPUT ioctl failed, got %d", err); + + /* + * Note: The uhid mock doesn't validate the set report data, + * so we just verify the ioctl succeeds + */ +} + +/* + * Test HIDIOCGRAWNAME ioctl to get device name string + */ +TEST_F(hidraw, ioctl_rawname) +{ + char name[256] = {0}; + char expected_name[64]; + int err; + + /* get device name */ + err = ioctl(self->hidraw_fd, HIDIOCGRAWNAME(sizeof(name)), name); + ASSERT_GT(err, 0) TH_LOG("HIDIOCGRAWNAME ioctl failed, got %d", err); + + /* construct expected name based on device id */ + snprintf(expected_name, sizeof(expected_name), "test-uhid-device-%d", self->hid.dev_id); + + /* verify the name matches expected pattern */ + ASSERT_EQ(strcmp(name, expected_name), 0) + TH_LOG("expected name '%s', got '%s'", expected_name, name); +} + +/* + * Test HIDIOCGRAWPHYS ioctl to get device physical address string + */ +TEST_F(hidraw, ioctl_rawphys) +{ + char phys[256] = {0}; + char expected_phys[64]; + int err; + + /* get device physical address */ + err = ioctl(self->hidraw_fd, HIDIOCGRAWPHYS(sizeof(phys)), phys); + ASSERT_GT(err, 0) TH_LOG("HIDIOCGRAWPHYS ioctl failed, got %d", err); + + /* construct expected phys based on device id */ + snprintf(expected_phys, sizeof(expected_phys), "%d", self->hid.dev_id); + + /* verify the phys matches expected value */ + ASSERT_EQ(strcmp(phys, expected_phys), 0) + TH_LOG("expected phys '%s', got '%s'", expected_phys, phys); +} + +/* + * Test HIDIOCGRAWUNIQ ioctl to get device unique identifier string + */ +TEST_F(hidraw, ioctl_rawuniq) +{ + char uniq[256] = {0}; + int err; + + /* get device unique identifier */ + err = ioctl(self->hidraw_fd, HIDIOCGRAWUNIQ(sizeof(uniq)), uniq); + ASSERT_GE(err, 0) TH_LOG("HIDIOCGRAWUNIQ ioctl failed, got %d", err); + + /* uniq is typically empty in our test setup */ + ASSERT_EQ(strlen(uniq), 0) TH_LOG("expected empty uniq, got '%s'", uniq); +} + +/* + * Test device string ioctls with small buffer sizes + */ +TEST_F(hidraw, ioctl_strings_small_buffer) +{ + char small_buf[8] = {0}; + char expected_name[64]; + int err; + + /* test HIDIOCGRAWNAME with small buffer */ + err = ioctl(self->hidraw_fd, HIDIOCGRAWNAME(sizeof(small_buf)), small_buf); + ASSERT_EQ(err, sizeof(small_buf)) + TH_LOG("HIDIOCGRAWNAME with small buffer failed, got %d", err); + + /* construct expected truncated name */ + snprintf(expected_name, sizeof(expected_name), "test-uhid-device-%d", self->hid.dev_id); + + /* verify we got truncated name (first 8 chars, no null terminator guaranteed) */ + ASSERT_EQ(strncmp(small_buf, expected_name, sizeof(small_buf)), 0) + TH_LOG("expected truncated name to match first %zu chars", sizeof(small_buf)); + + /* Note: hidraw driver doesn't guarantee null termination when buffer is too small */ +} + int main(int argc, char **argv) { return test_harness_run(argc, argv); diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh index db534e9099a8..ecbd57f775a0 100755 --- a/tools/testing/selftests/hid/vmtest.sh +++ b/tools/testing/selftests/hid/vmtest.sh @@ -1,296 +1,474 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Red Hat +# Copyright (c) 2025 Meta Platforms, Inc. and affiliates +# +# Dependencies: +# * virtme-ng +# * busybox-static (used by virtme-ng) +# * qemu (used by virtme-ng) + +readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" +readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../) + +source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh + +readonly HID_BPF_TEST="${SCRIPT_DIR}"/hid_bpf +readonly HIDRAW_TEST="${SCRIPT_DIR}"/hidraw +readonly HID_BPF_PROGS="${KERNEL_CHECKOUT}/drivers/hid/bpf/progs" +readonly SSH_GUEST_PORT=22 +readonly WAIT_PERIOD=3 +readonly WAIT_PERIOD_MAX=60 +readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX )) +readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_hid_vmtest_XXXX.pid) + +readonly QEMU_OPTS="\ + --pidfile ${QEMU_PIDFILE} \ +" +readonly KERNEL_CMDLINE="" +readonly LOG=$(mktemp /tmp/hid_vmtest_XXXX.log) +readonly TEST_NAMES=(vm_hid_bpf vm_hidraw vm_pytest) +readonly TEST_DESCS=( + "Run hid_bpf tests in the VM." + "Run hidraw tests in the VM." + "Run the hid-tools test-suite in the VM." +) + +VERBOSE=0 +SHELL_MODE=0 +BUILD_HOST="" +BUILD_HOST_PODMAN_CONTAINER_NAME="" + +usage() { + local name + local desc + local i + + echo + echo "$0 [OPTIONS] [TEST]... [-- tests-args]" + echo "If no TEST argument is given, all tests will be run." + echo + echo "Options" + echo " -b: build the kernel from the current source tree and use it for guest VMs" + echo " -H: hostname for remote build host (used with -b)" + echo " -p: podman container name for remote build host (used with -b)" + echo " Example: -H beefyserver -p vng" + echo " -q: set the path to or name of qemu binary" + echo " -s: start a shell in the VM instead of running tests" + echo " -v: more verbose output (can be repeated multiple times)" + echo + echo "Available tests" + + for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do + name=${TEST_NAMES[${i}]} + desc=${TEST_DESCS[${i}]} + printf "\t%-35s%-35s\n" "${name}" "${desc}" + done + echo -set -u -set -e - -# This script currently only works for x86_64 -ARCH="$(uname -m)" -case "${ARCH}" in -x86_64) - QEMU_BINARY=qemu-system-x86_64 - BZIMAGE="arch/x86/boot/bzImage" - ;; -*) - echo "Unsupported architecture" exit 1 - ;; -esac -SCRIPT_DIR="$(dirname $(realpath $0))" -OUTPUT_DIR="$SCRIPT_DIR/results" -KCONFIG_REL_PATHS=("${SCRIPT_DIR}/config" "${SCRIPT_DIR}/config.common" "${SCRIPT_DIR}/config.${ARCH}") -B2C_URL="https://gitlab.freedesktop.org/gfx-ci/boot2container/-/raw/main/vm2c.py" -NUM_COMPILE_JOBS="$(nproc)" -LOG_FILE_BASE="$(date +"hid_selftests.%Y-%m-%d_%H-%M-%S")" -LOG_FILE="${LOG_FILE_BASE}.log" -EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status" -CONTAINER_IMAGE="registry.freedesktop.org/bentiss/hid/fedora/39:2023-11-22.1" - -TARGETS="${TARGETS:=$(basename ${SCRIPT_DIR})}" -DEFAULT_COMMAND="pip3 install hid-tools; make -C tools/testing/selftests TARGETS=${TARGETS} run_tests" - -usage() -{ - cat <<EOF -Usage: $0 [-j N] [-s] [-b] [-d <output_dir>] -- [<command>] - -<command> is the command you would normally run when you are in -the source kernel direcory. e.g: - - $0 -- ./tools/testing/selftests/hid/hid_bpf - -If no command is specified and a debug shell (-s) is not requested, -"${DEFAULT_COMMAND}" will be run by default. - -If you build your kernel using KBUILD_OUTPUT= or O= options, these -can be passed as environment variables to the script: - - O=<kernel_build_path> $0 -- ./tools/testing/selftests/hid/hid_bpf - -or - - KBUILD_OUTPUT=<kernel_build_path> $0 -- ./tools/testing/selftests/hid/hid_bpf - -Options: - - -u) Update the boot2container script to a newer version. - -d) Update the output directory (default: ${OUTPUT_DIR}) - -b) Run only the build steps for the kernel and the selftests - -j) Number of jobs for compilation, similar to -j in make - (default: ${NUM_COMPILE_JOBS}) - -s) Instead of powering off the VM, start an interactive - shell. If <command> is specified, the shell runs after - the command finishes executing -EOF } -download() -{ - local file="$1" +die() { + echo "$*" >&2 + exit "${KSFT_FAIL}" +} - echo "Downloading $file..." >&2 - curl -Lsf "$file" -o "${@:2}" +vm_ssh() { + # vng --ssh-client keeps shouting "Warning: Permanently added 'virtme-ng%22' + # (ED25519) to the list of known hosts.", + # So replace the command with what's actually called and add the "-q" option + stdbuf -oL ssh -q \ + -F ${HOME}/.cache/virtme-ng/.ssh/virtme-ng-ssh.conf \ + -l root virtme-ng%${SSH_GUEST_PORT} \ + "$@" + return $? } -recompile_kernel() -{ - local kernel_checkout="$1" - local make_command="$2" +cleanup() { + if [[ -s "${QEMU_PIDFILE}" ]]; then + pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1 + fi - cd "${kernel_checkout}" + # If failure occurred during or before qemu start up, then we need + # to clean this up ourselves. + if [[ -e "${QEMU_PIDFILE}" ]]; then + rm "${QEMU_PIDFILE}" + fi +} + +check_args() { + local found - ${make_command} olddefconfig - ${make_command} headers - ${make_command} + for arg in "$@"; do + found=0 + for name in "${TEST_NAMES[@]}"; do + if [[ "${name}" = "${arg}" ]]; then + found=1 + break + fi + done + + if [[ "${found}" -eq 0 ]]; then + echo "${arg} is not an available test" >&2 + usage + fi + done + + for arg in "$@"; do + if ! command -v > /dev/null "test_${arg}"; then + echo "Test ${arg} not found" >&2 + usage + fi + done +} + +check_deps() { + for dep in vng ${QEMU} busybox pkill ssh pytest; do + if [[ ! -x $(command -v "${dep}") ]]; then + echo -e "skip: dependency ${dep} not found!\n" + exit "${KSFT_SKIP}" + fi + done + + if [[ ! -x $(command -v "${HID_BPF_TEST}") ]]; then + printf "skip: %s not found!" "${HID_BPF_TEST}" + printf " Please build the kselftest hid_bpf target.\n" + exit "${KSFT_SKIP}" + fi + + if [[ ! -x $(command -v "${HIDRAW_TEST}") ]]; then + printf "skip: %s not found!" "${HIDRAW_TEST}" + printf " Please build the kselftest hidraw target.\n" + exit "${KSFT_SKIP}" + fi } -update_selftests() -{ - local kernel_checkout="$1" - local selftests_dir="${kernel_checkout}/tools/testing/selftests/hid" +check_vng() { + local tested_versions + local version + local ok - cd "${selftests_dir}" - ${make_command} + tested_versions=("1.36" "1.37") + version="$(vng --version)" + + ok=0 + for tv in "${tested_versions[@]}"; do + if [[ "${version}" == *"${tv}"* ]]; then + ok=1 + break + fi + done + + if [[ ! "${ok}" -eq 1 ]]; then + printf "warning: vng version '%s' has not been tested and may " "${version}" >&2 + printf "not function properly.\n\tThe following versions have been tested: " >&2 + echo "${tested_versions[@]}" >&2 + fi } -run_vm() -{ - local run_dir="$1" - local b2c="$2" - local kernel_bzimage="$3" - local command="$4" - local post_command="" - - cd "${run_dir}" - - if ! which "${QEMU_BINARY}" &> /dev/null; then - cat <<EOF -Could not find ${QEMU_BINARY} -Please install qemu or set the QEMU_BINARY environment variable. -EOF +handle_build() { + if [[ ! "${BUILD}" -eq 1 ]]; then + return + fi + + if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then + echo "-b requires vmtest.sh called from the kernel source tree" >&2 exit 1 fi - # alpine (used in post-container requires the PATH to have /bin - export PATH=$PATH:/bin + pushd "${KERNEL_CHECKOUT}" &>/dev/null - if [[ "${debug_shell}" != "yes" ]] - then - touch ${OUTPUT_DIR}/${LOG_FILE} - command="mount bpffs -t bpf /sys/fs/bpf/; set -o pipefail ; ${command} 2>&1 | tee ${OUTPUT_DIR}/${LOG_FILE}" - post_command="cat ${OUTPUT_DIR}/${LOG_FILE}" - else - command="mount bpffs -t bpf /sys/fs/bpf/; ${command}" + if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then + die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})" fi - set +e - $b2c --command "${command}" \ - --kernel ${kernel_bzimage} \ - --workdir ${OUTPUT_DIR} \ - --image ${CONTAINER_IMAGE} + local vng_args=("-v" "--config" "${SCRIPT_DIR}/config" "--build") - echo $? > ${OUTPUT_DIR}/${EXIT_STATUS_FILE} + if [[ -n "${BUILD_HOST}" ]]; then + vng_args+=("--build-host" "${BUILD_HOST}") + fi - set -e + if [[ -n "${BUILD_HOST_PODMAN_CONTAINER_NAME}" ]]; then + vng_args+=("--build-host-exec-prefix" \ + "podman exec -ti ${BUILD_HOST_PODMAN_CONTAINER_NAME}") + fi - ${post_command} -} + if ! vng "${vng_args[@]}"; then + die "failed to build kernel from source tree (${KERNEL_CHECKOUT})" + fi -is_rel_path() -{ - local path="$1" + if ! make -j$(nproc) -C "${HID_BPF_PROGS}"; then + die "failed to build HID bpf objects from source tree (${HID_BPF_PROGS})" + fi - [[ ${path:0:1} != "/" ]] + if ! make -j$(nproc) -C "${SCRIPT_DIR}"; then + die "failed to build HID selftests from source tree (${SCRIPT_DIR})" + fi + + popd &>/dev/null } -do_update_kconfig() -{ - local kernel_checkout="$1" - local kconfig_file="$2" +vm_start() { + local logfile=/dev/null + local verbose_opt="" + local kernel_opt="" + local qemu - rm -f "$kconfig_file" 2> /dev/null + qemu=$(command -v "${QEMU}") - for config in "${KCONFIG_REL_PATHS[@]}"; do - local kconfig_src="${config}" - cat "$kconfig_src" >> "$kconfig_file" - done -} + if [[ "${VERBOSE}" -eq 2 ]]; then + verbose_opt="--verbose" + logfile=/dev/stdout + fi -update_kconfig() -{ - local kernel_checkout="$1" - local kconfig_file="$2" - - if [[ -f "${kconfig_file}" ]]; then - local local_modified="$(stat -c %Y "${kconfig_file}")" - - for config in "${KCONFIG_REL_PATHS[@]}"; do - local kconfig_src="${config}" - local src_modified="$(stat -c %Y "${kconfig_src}")" - # Only update the config if it has been updated after the - # previously cached config was created. This avoids - # unnecessarily compiling the kernel and selftests. - if [[ "${src_modified}" -gt "${local_modified}" ]]; then - do_update_kconfig "$kernel_checkout" "$kconfig_file" - # Once we have found one outdated configuration - # there is no need to check other ones. - break - fi - done - else - do_update_kconfig "$kernel_checkout" "$kconfig_file" + # If we are running from within the kernel source tree, use the kernel source tree + # as the kernel to boot, otherwise use the currently running kernel. + if [[ "$(realpath "$(pwd)")" == "${KERNEL_CHECKOUT}"* ]]; then + kernel_opt="${KERNEL_CHECKOUT}" fi -} -main() -{ - local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" - local kernel_checkout=$(realpath "${script_dir}"/../../../../) - # By default the script searches for the kernel in the checkout directory but - # it also obeys environment variables O= and KBUILD_OUTPUT= - local kernel_bzimage="${kernel_checkout}/${BZIMAGE}" - local command="${DEFAULT_COMMAND}" - local update_b2c="no" - local debug_shell="no" - local build_only="no" - - while getopts ':hsud:j:b' opt; do - case ${opt} in - u) - update_b2c="yes" - ;; - d) - OUTPUT_DIR="$OPTARG" - ;; - j) - NUM_COMPILE_JOBS="$OPTARG" - ;; - s) - command="/bin/sh" - debug_shell="yes" - ;; - b) - build_only="yes" - ;; - h) - usage - exit 0 - ;; - \? ) - echo "Invalid Option: -$OPTARG" - usage - exit 1 - ;; - : ) - echo "Invalid Option: -$OPTARG requires an argument" - usage - exit 1 - ;; - esac - done - shift $((OPTIND -1)) - - # trap 'catch "$?"' EXIT - if [[ "${build_only}" == "no" && "${debug_shell}" == "no" ]]; then - if [[ $# -eq 0 ]]; then - echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" - else - command="$@" - - if [[ "${command}" == "/bin/bash" || "${command}" == "bash" ]] - then - debug_shell="yes" - fi + vng \ + --run \ + ${kernel_opt} \ + ${verbose_opt} \ + --qemu-opts="${QEMU_OPTS}" \ + --qemu="${qemu}" \ + --user root \ + --append "${KERNEL_CMDLINE}" \ + --ssh "${SSH_GUEST_PORT}" \ + --rw &> ${logfile} & + + local vng_pid=$! + local elapsed=0 + + while [[ ! -s "${QEMU_PIDFILE}" ]]; do + if ! kill -0 "${vng_pid}" 2>/dev/null; then + echo "vng process (PID ${vng_pid}) exited early, check logs for details" >&2 + die "failed to boot VM" fi - fi - local kconfig_file="${OUTPUT_DIR}/latest.config" - local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}" + if [[ ${elapsed} -ge ${WAIT_TOTAL} ]]; then + echo "Timed out after ${WAIT_TOTAL} seconds waiting for VM to boot" >&2 + die "failed to boot VM" + fi - # Figure out where the kernel is being built. - # O takes precedence over KBUILD_OUTPUT. - if [[ "${O:=""}" != "" ]]; then - if is_rel_path "${O}"; then - O="$(realpath "${PWD}/${O}")" + sleep 1 + elapsed=$((elapsed + 1)) + done +} + +vm_wait_for_ssh() { + local i + + i=0 + while true; do + if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then + die "Timed out waiting for guest ssh" fi - kernel_bzimage="${O}/${BZIMAGE}" - make_command="${make_command} O=${O}" - elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then - if is_rel_path "${KBUILD_OUTPUT}"; then - KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")" + if vm_ssh -- true; then + break fi - kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}" - make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}" + i=$(( i + 1 )) + sleep ${WAIT_PERIOD} + done +} + +vm_mount_bpffs() { + vm_ssh -- mount bpffs -t bpf /sys/fs/bpf +} + +__log_stdin() { + stdbuf -oL awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0; fflush() }' +} + +__log_args() { + echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }' +} + +log() { + local verbose="$1" + shift + + local prefix="$1" + + shift + local redirect= + if [[ ${verbose} -le 0 ]]; then + redirect=/dev/null + else + redirect=/dev/stdout + fi + + if [[ "$#" -eq 0 ]]; then + __log_stdin | tee -a "${LOG}" > ${redirect} + else + __log_args "$@" | tee -a "${LOG}" > ${redirect} fi +} - local b2c="${OUTPUT_DIR}/vm2c.py" +log_setup() { + log $((VERBOSE-1)) "setup" "$@" +} - echo "Output directory: ${OUTPUT_DIR}" +log_host() { + local testname=$1 - mkdir -p "${OUTPUT_DIR}" - update_kconfig "${kernel_checkout}" "${kconfig_file}" + shift + log $((VERBOSE-1)) "test:${testname}:host" "$@" +} - recompile_kernel "${kernel_checkout}" "${make_command}" - update_selftests "${kernel_checkout}" "${make_command}" +log_guest() { + local testname=$1 - if [[ "${build_only}" == "no" ]]; then - if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then - echo "vm2c script not found in ${b2c}" - update_b2c="yes" - fi + shift + log ${VERBOSE} "# test:${testname}" "$@" +} - if [[ "${update_b2c}" == "yes" ]]; then - download $B2C_URL $b2c - chmod +x $b2c - fi +test_vm_hid_bpf() { + local testname="${FUNCNAME[0]#test_}" - run_vm "${kernel_checkout}" $b2c "${kernel_bzimage}" "${command}" - if [[ "${debug_shell}" != "yes" ]]; then - echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" - fi + vm_ssh -- "${HID_BPF_TEST}" \ + 2>&1 | log_guest "${testname}" + + return ${PIPESTATUS[0]} +} + +test_vm_hidraw() { + local testname="${FUNCNAME[0]#test_}" + + vm_ssh -- "${HIDRAW_TEST}" \ + 2>&1 | log_guest "${testname}" + + return ${PIPESTATUS[0]} +} + +test_vm_pytest() { + local testname="${FUNCNAME[0]#test_}" - exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE}) + shift + + vm_ssh -- pytest ${SCRIPT_DIR}/tests --color=yes "$@" \ + 2>&1 | log_guest "${testname}" + + return ${PIPESTATUS[0]} +} + +run_test() { + local vm_oops_cnt_before + local vm_warn_cnt_before + local vm_oops_cnt_after + local vm_warn_cnt_after + local name + local rc + + vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') + vm_error_cnt_before=$(vm_ssh -- dmesg --level=err | wc -l) + + name=$(echo "${1}" | awk '{ print $1 }') + eval test_"${name}" "$@" + rc=$? + + vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l) + if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then + echo "FAIL: kernel oops detected on vm" | log_host "${name}" + rc=$KSFT_FAIL + fi + + vm_error_cnt_after=$(vm_ssh -- dmesg --level=err | wc -l) + if [[ ${vm_error_cnt_after} -gt ${vm_error_cnt_before} ]]; then + echo "FAIL: kernel error detected on vm" | log_host "${name}" + vm_ssh -- dmesg --level=err | log_host "${name}" + rc=$KSFT_FAIL fi + + return "${rc}" } -main "$@" +QEMU="qemu-system-$(uname -m)" + +while getopts :hvsbq:H:p: o +do + case $o in + v) VERBOSE=$((VERBOSE+1));; + s) SHELL_MODE=1;; + b) BUILD=1;; + q) QEMU=$OPTARG;; + H) BUILD_HOST=$OPTARG;; + p) BUILD_HOST_PODMAN_CONTAINER_NAME=$OPTARG;; + h|*) usage;; + esac +done +shift $((OPTIND-1)) + +trap cleanup EXIT + +PARAMS="" + +if [[ ${#} -eq 0 ]]; then + ARGS=("${TEST_NAMES[@]}") +else + ARGS=() + COUNT=0 + for arg in $@; do + COUNT=$((COUNT+1)) + if [[ x"$arg" == x"--" ]]; then + break + fi + ARGS+=($arg) + done + shift $COUNT + PARAMS="$@" +fi + +if [[ "${SHELL_MODE}" -eq 0 ]]; then + check_args "${ARGS[@]}" + echo "1..${#ARGS[@]}" +fi +check_deps +check_vng +handle_build + +log_setup "Booting up VM" +vm_start +vm_wait_for_ssh +vm_mount_bpffs +log_setup "VM booted up" + +if [[ "${SHELL_MODE}" -eq 1 ]]; then + log_setup "Starting interactive shell in VM" + echo "Starting shell in VM. Use 'exit' to quit and shutdown the VM." + CURRENT_DIR="$(pwd)" + vm_ssh -t -- "cd '${CURRENT_DIR}' && exec bash -l" + exit "$KSFT_PASS" +fi + +cnt_pass=0 +cnt_fail=0 +cnt_skip=0 +cnt_total=0 +for arg in "${ARGS[@]}"; do + run_test "${arg}" "${PARAMS}" + rc=$? + if [[ ${rc} -eq $KSFT_PASS ]]; then + cnt_pass=$(( cnt_pass + 1 )) + echo "ok ${cnt_total} ${arg}" + elif [[ ${rc} -eq $KSFT_SKIP ]]; then + cnt_skip=$(( cnt_skip + 1 )) + echo "ok ${cnt_total} ${arg} # SKIP" + elif [[ ${rc} -eq $KSFT_FAIL ]]; then + cnt_fail=$(( cnt_fail + 1 )) + echo "not ok ${cnt_total} ${arg} # exit=$rc" + fi + cnt_total=$(( cnt_total + 1 )) +done + +echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}" +echo "Log: ${LOG}" + +if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then + exit "$KSFT_PASS" +else + exit "$KSFT_FAIL" +fi diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index 651fc9f13c08..45c14323a618 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -113,7 +113,7 @@ static bool fail_nth_next(struct __test_metadata *_metadata, * necessarily mean a test failure, just that the limit has to be made * bigger. */ - ASSERT_GT(400, nth_state->iteration); + ASSERT_GT(1000, nth_state->iteration); if (nth_state->iteration != 0) { ssize_t res; ssize_t res2; diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 3c3e08b8c90e..772ca1db6e59 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -1042,15 +1042,13 @@ static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents) .dev_id = dev_id, }, }; - int ret; while (nvevents--) { - ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), - &trigger_vevent_cmd); - if (ret < 0) + if (!ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), + &trigger_vevent_cmd)) return -1; } - return ret; + return 0; } #define test_cmd_trigger_vevents(dev_id, nvevents) \ diff --git a/tools/testing/selftests/kexec/.gitignore b/tools/testing/selftests/kexec/.gitignore new file mode 100644 index 000000000000..5f3d9e089ae8 --- /dev/null +++ b/tools/testing/selftests/kexec/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +test_kexec_jump diff --git a/tools/testing/selftests/kho/init.c b/tools/testing/selftests/kho/init.c index 8034e24c6bf6..6d9e91d55d68 100644 --- a/tools/testing/selftests/kho/init.c +++ b/tools/testing/selftests/kho/init.c @@ -1,22 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -#ifndef NOLIBC -#include <errno.h> #include <stdio.h> #include <unistd.h> #include <fcntl.h> -#include <syscall.h> +#include <sys/syscall.h> #include <sys/mount.h> #include <sys/reboot.h> -#endif +#include <linux/kexec.h> /* from arch/x86/include/asm/setup.h */ #define COMMAND_LINE_SIZE 2048 -/* from include/linux/kexex.h */ -#define KEXEC_FILE_NO_INITRAMFS 0x00000004 - -#define KHO_FINILIZE "/debugfs/kho/out/finalize" +#define KHO_FINALIZE "/debugfs/kho/out/finalize" #define KERNEL_IMAGE "/kernel" static int mount_filesystems(void) @@ -32,7 +27,7 @@ static int kho_enable(void) const char enable[] = "1"; int fd; - fd = open(KHO_FINILIZE, O_RDWR); + fd = open(KHO_FINALIZE, O_RDWR); if (fd < 0) return -1; diff --git a/tools/testing/selftests/kho/vmtest.sh b/tools/testing/selftests/kho/vmtest.sh index ec70a17bd476..3f6c17166846 100755 --- a/tools/testing/selftests/kho/vmtest.sh +++ b/tools/testing/selftests/kho/vmtest.sh @@ -10,7 +10,6 @@ kernel_dir=$(realpath "$test_dir/../../../..") tmp_dir=$(mktemp -d /tmp/kho-test.XXXXXXXX) headers_dir="$tmp_dir/usr" -initrd_dir="$tmp_dir/initrd" initrd="$tmp_dir/initrd.cpio" source "$test_dir/../kselftest/ktap_helpers.sh" @@ -81,19 +80,22 @@ EOF function mkinitrd() { local kernel=$1 - mkdir -p "$initrd_dir"/{dev,debugfs,proc} - sudo mknod "$initrd_dir/dev/console" c 5 1 - - "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -I"$headers_dir/include" \ - -fno-asynchronous-unwind-tables -fno-ident -nostdlib \ - -include "$test_dir/../../../include/nolibc/nolibc.h" \ - -o "$initrd_dir/init" "$test_dir/init.c" \ - - cp "$kernel" "$initrd_dir/kernel" + "$CROSS_COMPILE"gcc -s -static -Os -nostdinc -nostdlib \ + -fno-asynchronous-unwind-tables -fno-ident \ + -I "$headers_dir/include" \ + -I "$kernel_dir/tools/include/nolibc" \ + -o "$tmp_dir/init" "$test_dir/init.c" + + cat > "$tmp_dir/cpio_list" <<EOF +dir /dev 0755 0 0 +dir /proc 0755 0 0 +dir /debugfs 0755 0 0 +nod /dev/console 0600 0 0 c 5 1 +file /init $tmp_dir/init 0755 0 0 +file /kernel $kernel 0644 0 0 +EOF - pushd "$initrd_dir" &>/dev/null - find . | cpio -H newc --create > "$initrd" 2>/dev/null - popd &>/dev/null + "$build_dir/usr/gen_init_cpio" "$tmp_dir/cpio_list" > "$initrd" } function run_qemu() { diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index c3b6d2604b1e..afbcf8412ae5 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -54,6 +54,7 @@ #include <stdlib.h> #include <unistd.h> #include <stdarg.h> +#include <stdbool.h> #include <string.h> #include <stdio.h> #include <sys/utsname.h> @@ -92,6 +93,14 @@ #endif #define __printf(a, b) __attribute__((format(printf, a, b))) +#ifndef __always_unused +#define __always_unused __attribute__((__unused__)) +#endif + +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + /* counters */ struct ksft_count { unsigned int ksft_pass; @@ -104,6 +113,7 @@ struct ksft_count { static struct ksft_count ksft_cnt; static unsigned int ksft_plan; +static bool ksft_debug_enabled; static inline unsigned int ksft_test_num(void) { @@ -175,6 +185,18 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...) va_end(args); } +static inline void ksft_print_dbg_msg(const char *msg, ...) +{ + va_list args; + + if (!ksft_debug_enabled) + return; + + va_start(args, msg); + ksft_print_msg(msg, args); + va_end(args); +} + static inline void ksft_perror(const char *msg) { ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno); diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 8516e8434bc4..3f66e862e83e 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -1091,7 +1091,7 @@ static int test_harness_argv_check(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "hlF:f:V:v:t:T:r:")) != -1) { + while ((opt = getopt(argc, argv, "dhlF:f:V:v:t:T:r:")) != -1) { switch (opt) { case 'f': case 'F': @@ -1104,12 +1104,16 @@ static int test_harness_argv_check(int argc, char **argv) case 'l': test_harness_list_tests(); return KSFT_SKIP; + case 'd': + ksft_debug_enabled = true; + break; case 'h': default: fprintf(stderr, - "Usage: %s [-h|-l] [-t|-T|-v|-V|-f|-F|-r name]\n" + "Usage: %s [-h|-l|-d] [-t|-T|-v|-V|-f|-F|-r name]\n" "\t-h print help\n" "\t-l list all tests\n" + "\t-d enable debug prints\n" "\n" "\t-t name include test\n" "\t-T name exclude test\n" @@ -1142,8 +1146,9 @@ static bool test_enabled(int argc, char **argv, int opt; optind = 1; - while ((opt = getopt(argc, argv, "F:f:V:v:t:T:r:")) != -1) { - has_positive |= islower(opt); + while ((opt = getopt(argc, argv, "dF:f:V:v:t:T:r:")) != -1) { + if (opt != 'd') + has_positive |= islower(opt); switch (tolower(opt)) { case 't': diff --git a/tools/testing/selftests/kselftest_harness/Makefile b/tools/testing/selftests/kselftest_harness/Makefile index 0617535a6ce4..d2369c01701a 100644 --- a/tools/testing/selftests/kselftest_harness/Makefile +++ b/tools/testing/selftests/kselftest_harness/Makefile @@ -2,6 +2,7 @@ TEST_GEN_PROGS_EXTENDED := harness-selftest TEST_PROGS := harness-selftest.sh +TEST_FILES := harness-selftest.expected EXTRA_CLEAN := harness-selftest.seen include ../lib.mk diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 41b40c676d7f..148d427ff24b 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -87,6 +87,7 @@ TEST_GEN_PROGS_x86 += x86/kvm_clock_test TEST_GEN_PROGS_x86 += x86/kvm_pv_test TEST_GEN_PROGS_x86 += x86/kvm_buslock_test TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/msrs_test TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test TEST_GEN_PROGS_x86 += x86/platform_info_test @@ -156,6 +157,7 @@ TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/hello_el2 TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls TEST_GEN_PROGS_arm64 += arm64/external_aborts @@ -175,6 +177,7 @@ TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test TEST_GEN_PROGS_arm64 += get-reg-list +TEST_GEN_PROGS_arm64 += guest_memfd_test TEST_GEN_PROGS_arm64 += memslot_modification_stress_test TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test @@ -196,9 +199,15 @@ TEST_GEN_PROGS_s390 += rseq_test TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test +TEST_GEN_PROGS_riscv += access_tracking_perf_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += coalesced_io_test +TEST_GEN_PROGS_riscv += dirty_log_perf_test TEST_GEN_PROGS_riscv += get-reg-list +TEST_GEN_PROGS_riscv += memslot_modification_stress_test +TEST_GEN_PROGS_riscv += memslot_perf_test +TEST_GEN_PROGS_riscv += mmu_stress_test +TEST_GEN_PROGS_riscv += rseq_test TEST_GEN_PROGS_riscv += steal_time TEST_GEN_PROGS_loongarch += coalesced_io_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index c9de66537ec3..b058f27b2141 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -50,6 +50,7 @@ #include "memstress.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" #include "cgroup_util.h" #include "lru_gen_util.h" diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c index eeba1cc87ff8..d592a4515399 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c @@ -165,10 +165,8 @@ static void guest_code(void) static void test_init_timer_irq(struct kvm_vm *vm) { /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]); + vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]); sync_global_to_guest(vm, ptimer_irq); sync_global_to_guest(vm, vtimer_irq); @@ -176,14 +174,14 @@ static void test_init_timer_irq(struct kvm_vm *vm) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } -static int gic_fd; - struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; int nr_vcpus = test_args.nr_vcpus; + TEST_REQUIRE(kvm_supports_vgic_v3()); + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); vm_init_descriptor_tables(vm); @@ -204,8 +202,6 @@ struct kvm_vm *test_vm_create(void) vcpu_init_descriptor_tables(vcpus[i]); test_init_timer_irq(vm); - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); /* Make all the test's cmdline args visible to the guest */ sync_global_to_guest(vm, test_args); @@ -215,6 +211,5 @@ struct kvm_vm *test_vm_create(void) void test_vm_cleanup(struct kvm_vm *vm) { - close(gic_fd); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index ce74d069cb7b..91906414a474 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -924,10 +924,8 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + ptimer_irq = vcpu_get_ptimer_irq(vcpu); + vtimer_irq = vcpu_get_vtimer_irq(vcpu); sync_global_to_guest(vm, ptimer_irq); sync_global_to_guest(vm, vtimer_irq); @@ -935,8 +933,6 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } -static int gic_fd; - static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, enum arch_timer timer) { @@ -951,8 +947,6 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, vcpu_args_set(*vcpu, 1, timer); test_init_timer_irq(*vm, *vcpu); - gic_fd = vgic_v3_setup(*vm, 1, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); sync_global_to_guest(*vm, test_args); sync_global_to_guest(*vm, CVAL_MAX); @@ -961,7 +955,6 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, static void test_vm_cleanup(struct kvm_vm *vm) { - close(gic_fd); kvm_vm_free(vm); } @@ -1042,6 +1035,8 @@ int main(int argc, char *argv[]) /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); + TEST_REQUIRE(kvm_supports_vgic_v3()); + if (!parse_args(argc, argv)) exit(KSFT_SKIP); diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c index 062bf84cced1..592b26ded779 100644 --- a/tools/testing/selftests/kvm/arm64/external_aborts.c +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -250,6 +250,47 @@ static void test_serror(void) kvm_vm_free(vm); } +static void expect_sea_s1ptw_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3)); + + GUEST_DONE(); +} + +static noinline void test_s1ptw_abort_guest(void) +{ + extern char test_s1ptw_abort_insn; + + WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn); + + asm volatile("test_s1ptw_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("Load on S1PTW abort should not retire"); +} + +static void test_s1ptw_abort(void) +{ + struct kvm_vcpu *vcpu; + u64 *ptep, bad_pa; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest, + expect_sea_s1ptw_handler); + + ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2); + bad_pa = BIT(vm->pa_bits) - vm->page_size; + + *ptep &= ~GENMASK(47, 12); + *ptep |= bad_pa; + + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + static void test_serror_emulated_guest(void) { GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); @@ -327,4 +368,5 @@ int main(void) test_serror_masked(); test_serror_emulated(); test_mmio_ease(); + test_s1ptw_abort(); } diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c new file mode 100644 index 000000000000..bbe6862c6ab1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/hello_el2.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1 + * + * Copyright 2025 Google LLC + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include <asm/sysreg.h> + +static void guest_code(void) +{ + u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1); + u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1); + u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4); + + GUEST_ASSERT_EQ(get_current_el(), 2); + GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H); + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1), + ID_AA64MMFR1_EL1_VH_IMP); + + /* + * Traps of the complete ID register space are IMPDEF without FEAT_FGT, + * which is really annoying to deal with in KVM describing E2H as RES1. + * + * If the implementation doesn't honor the trap then expect the register + * to return all zeros. + */ + if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP) + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0), + ID_AA64MMFR0_EL1_FGT_NI); + else + GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1); + + GUEST_DONE(); +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index 44cfcf8a7f46..bf038a0371f4 100644 --- a/tools/testing/selftests/kvm/arm64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -108,7 +108,7 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info) for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { memset(&res, 0, sizeof(res)); - smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); + do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); switch (stage) { case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c index af9581b860f1..b5be9133535a 100644 --- a/tools/testing/selftests/kvm/arm64/kvm-uuid.c +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -25,7 +25,7 @@ static void guest_code(void) { struct arm_smccc_res res = {}; - smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c index f222538e6084..152c34776981 100644 --- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c @@ -163,6 +163,8 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; uint64_t pfr0; + test_disable_default_vgic(); + vm = vm_create_with_one_vcpu(&vcpu, NULL); pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0), diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index ab491ee9e5f7..98e49f710aef 100644 --- a/tools/testing/selftests/kvm/arm64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -27,7 +27,7 @@ static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, + do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, 0, 0, 0, 0, &res); return res.a0; @@ -38,7 +38,7 @@ static uint64_t psci_affinity_info(uint64_t target_affinity, { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, + do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, 0, 0, 0, 0, 0, &res); return res.a0; @@ -48,7 +48,7 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, + do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, 0, 0, 0, 0, 0, &res); return res.a0; @@ -58,7 +58,7 @@ static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -67,7 +67,7 @@ static uint64_t psci_features(uint32_t func_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); + do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -89,12 +89,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, vm = vm_create(2); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *source = aarch64_vcpu_add(vm, 0, &init, guest_code); *target = aarch64_vcpu_add(vm, 1, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 189321e96925..8ff1e853f7f8 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -15,8 +15,6 @@ #include "test_util.h" #include <linux/bitfield.h> -bool have_cap_arm_mte; - enum ftr_type { FTR_EXACT, /* Use a predefined safe value */ FTR_LOWER_SAFE, /* Smaller value is safe */ @@ -125,6 +123,13 @@ static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), @@ -165,7 +170,9 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0), REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), @@ -221,6 +228,7 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), + TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1), TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), @@ -239,6 +247,7 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); @@ -568,7 +577,9 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) uint64_t mte_frac; int idx, err; - if (!have_cap_arm_mte) { + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); + if (!mte) { ksft_test_result_skip("MTE capability not supported, nothing to test\n"); return; } @@ -593,9 +604,6 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) * from unsupported (0xF) to supported (0). * */ - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); - - mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { @@ -750,28 +758,23 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) ksft_test_result_pass("%s\n", __func__); } -void kvm_arch_vm_post_create(struct kvm_vm *vm) -{ - if (vm_check_cap(vm, KVM_CAP_ARM_MTE)) { - vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); - have_cap_arm_mte = true; - } -} - int main(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; bool aarch64_only; uint64_t val, el0; - int test_cnt; + int test_cnt, i, j; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS)); + test_wants_mte(); + vm = vm_create(1); vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0); vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); /* Check for AARCH64 only system */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); @@ -780,13 +783,10 @@ int main(void) ksft_print_header(); - test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + - ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + - ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - - ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; + test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; + for (i = 0; i < ARRAY_SIZE(test_regs); i++) + for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++) + test_cnt++; ksft_set_plan(test_cnt); diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c index 2d189f3da228..1763b9d45400 100644 --- a/tools/testing/selftests/kvm/arm64/smccc_filter.c +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c @@ -22,8 +22,20 @@ enum smccc_conduit { SMC_INSN, }; +static bool test_runs_at_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + struct kvm_vcpu_init init; + + kvm_get_default_vcpu_target(vm, &init); + kvm_vm_free(vm); + + return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + #define for_each_conduit(conduit) \ - for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) + for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \ + conduit <= SMC_INSN; conduit++) static void guest_main(uint32_t func_id, enum smccc_conduit conduit) { @@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) struct kvm_vm *vm; vm = vm_create(1); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); /* * Enable in-kernel emulation of PSCI to ensure that calls are denied @@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); + kvm_arch_vm_finalize_vcpus(vm); return vm; } diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index a8e0f46bc0ab..8d6d3a4ae4db 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -994,6 +994,8 @@ int main(int ac, char **av) int pa_bits; int cnt_impl = 0; + test_disable_default_vgic(); + pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; max_phys_size = 1ULL << pa_bits; diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index a09dd423c2d7..6338f5bbdb70 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -752,7 +752,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) vcpu_args_set(vcpu, 1, args_gva); gic_fd = vgic_v3_setup(vm, 1, nr_irqs); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handlers[args.eoi_split][args.level_sensitive]); @@ -802,6 +801,9 @@ int main(int argc, char **argv) int opt; bool eoi_split = false; + TEST_REQUIRE(kvm_supports_vgic_v3()); + test_disable_default_vgic(); + while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index fc4fe52fb6f8..87922a89b134 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -27,7 +27,7 @@ static vm_paddr_t gpa_base; static struct kvm_vm *vm; static struct kvm_vcpu **vcpus; -static int gic_fd, its_fd; +static int its_fd; static struct test_data { bool request_vcpus_stop; @@ -214,9 +214,6 @@ static void setup_test_data(void) static void setup_gic(void) { - gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); - its_fd = vgic_its_setup(vm); } @@ -355,7 +352,6 @@ static void setup_vm(void) static void destroy_vm(void) { close(its_fd); - close(gic_fd); kvm_vm_free(vm); free(vcpus); } @@ -374,6 +370,8 @@ int main(int argc, char **argv) u32 nr_threads; int c; + TEST_REQUIRE(kvm_supports_vgic_v3()); + while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { switch (c) { case 'v': diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index a0c4ab839155..ae36325c022f 100644 --- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -28,7 +28,6 @@ struct vpmu_vm { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - int gic_fd; }; static struct vpmu_vm vpmu_vm; @@ -45,11 +44,6 @@ static uint64_t get_pmcr_n(uint64_t pmcr) return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); } -static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) -{ - u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); -} - static uint64_t get_counters_mask(uint64_t n) { uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); @@ -415,10 +409,6 @@ static void create_vpmu_vm(void *guest_code) .attr = KVM_ARM_VCPU_PMU_V3_IRQ, .addr = (uint64_t)&irq, }; - struct kvm_device_attr init_attr = { - .group = KVM_ARM_VCPU_PMU_V3_CTRL, - .attr = KVM_ARM_VCPU_PMU_V3_INIT, - }; /* The test creates the vpmu_vm multiple times. Ensure a clean state */ memset(&vpmu_vm, 0, sizeof(vpmu_vm)); @@ -431,13 +421,12 @@ static void create_vpmu_vm(void *guest_code) } /* Create vCPU with PMUv3 */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); vcpu_init_descriptor_tables(vpmu_vm.vcpu); - vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); - __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, - "Failed to create vgic-v3, skipping"); + + kvm_arch_vm_finalize_vcpus(vpmu_vm.vm); /* Make sure that PMUv3 support is indicated in the ID register */ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); @@ -446,14 +435,11 @@ static void create_vpmu_vm(void *guest_code) pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); - /* Initialize vPMU */ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); - vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr); } static void destroy_vpmu_vm(void) { - close(vpmu_vm.gic_fd); kvm_vm_free(vpmu_vm.vm); } @@ -475,33 +461,28 @@ static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) } } -static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) +static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail) { struct kvm_vcpu *vcpu; - uint64_t pmcr, pmcr_orig; + unsigned int prev; + int ret; create_vpmu_vm(guest_code); vcpu = vpmu_vm.vcpu; - pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); - pmcr = pmcr_orig; + prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0))); - /* - * Setting a larger value of PMCR.N should not modify the field, and - * return a success. - */ - set_pmcr_n(&pmcr, pmcr_n); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); - pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); + ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters); if (expect_fail) - TEST_ASSERT(pmcr_orig == pmcr, - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", - pmcr, pmcr_n); + TEST_ASSERT(ret && errno == EINVAL, + "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded", + nr_counters, prev); else - TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), - "Failed to update PMCR.N to %lu (received: %lu)", - pmcr_n, get_pmcr_n(pmcr)); + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); + + vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL); } /* @@ -516,11 +497,11 @@ static void run_access_test(uint64_t pmcr_n) pr_debug("Test with pmcr_n %lu\n", pmcr_n); - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; /* Save the initial sp to restore them later to run the guest again */ - sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); + sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1)); run_vcpu(vcpu, pmcr_n); @@ -528,11 +509,11 @@ static void run_access_test(uint64_t pmcr_n) * Reset and re-initialize the vCPU, and run the guest code again to * check if PMCR_EL0.N is preserved. */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); aarch64_vcpu_setup(vcpu, &init); vcpu_init_descriptor_tables(vcpu); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp); vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); run_vcpu(vcpu, pmcr_n); @@ -557,7 +538,7 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) uint64_t set_reg_id, clr_reg_id, reg_val; uint64_t valid_counters_mask, max_counters_mask; - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; valid_counters_mask = get_counters_mask(pmcr_n); @@ -611,7 +592,7 @@ static void run_error_test(uint64_t pmcr_n) { pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); - test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); + test_create_vpmu_vm_with_nr_counters(pmcr_n, true); destroy_vpmu_vm(); } @@ -629,11 +610,25 @@ static uint64_t get_pmcr_n_limit(void) return get_pmcr_n(pmcr); } +static bool kvm_supports_nr_counters_attr(void) +{ + bool supported; + + create_vpmu_vm(NULL); + supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS); + destroy_vpmu_vm(); + + return supported; +} + int main(void) { uint64_t i, pmcr_n; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); + TEST_REQUIRE(kvm_supports_vgic_v3()); + TEST_REQUIRE(kvm_supports_nr_counters_attr()); pmcr_n = get_pmcr_n_limit(); for (i = 0; i <= pmcr_n; i++) { diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index e79817bd0e29..0a1ea1d1e2d8 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -20,38 +20,6 @@ #include "guest_modes.h" #include "ucall_common.h" -#ifdef __aarch64__ -#include "arm64/vgic.h" - -static int gic_fd; - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ - /* - * The test can still run even if hardware does not support GICv3, as it - * is only an optimization to reduce guest exits. - */ - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ - if (gic_fd > 0) - close(gic_fd); -} - -#else /* __aarch64__ */ - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ -} - -#endif - /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -166,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, dirty_log_manual_caps); - arch_setup_vm(vm, nr_vcpus); - /* Start the iterations */ iteration = 0; host_quit = false; @@ -285,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) } memstress_free_bitmaps(bitmaps, p->slots); - arch_cleanup_vm(vm); memstress_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 23593d9eeba9..d58a641b0e6a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -585,6 +585,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, log_mode_create_vm_done(vm); *vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index 91f05f78e824..f4644c9d2d3b 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -116,10 +116,13 @@ void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) } #ifdef __aarch64__ -static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init) +static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c, + struct kvm_vcpu_init *init) { struct vcpu_reg_sublist *s; + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init); + for_each_sublist(c, s) if (s->capability) init->features[s->feature / 32] |= 1 << (s->feature % 32); @@ -127,10 +130,10 @@ static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *ini static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) { - struct kvm_vcpu_init init = { .target = -1, }; + struct kvm_vcpu_init init; struct kvm_vcpu *vcpu; - prepare_vcpu_init(c, &init); + prepare_vcpu_init(vm, c, &init); vcpu = __vm_vcpu_add(vm, 0); aarch64_vcpu_setup(vcpu, &init); diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ce687f8d248f..b3ca6737f304 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -13,12 +13,16 @@ #include <linux/bitmap.h> #include <linux/falloc.h> +#include <linux/sizes.h> +#include <setjmp.h> +#include <signal.h> #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> #include "kvm_util.h" #include "test_util.h" +#include "ucall_common.h" static void test_file_read_write(int fd) { @@ -34,12 +38,83 @@ static void test_file_read_write(int fd) "pwrite on a guest_mem fd should fail"); } -static void test_mmap(int fd, size_t page_size) +static void test_mmap_supported(int fd, size_t page_size, size_t total_size) +{ + const char val = 0xaa; + char *mem; + size_t i; + int ret; + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + + memset(mem, val, total_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, + page_size); + TEST_ASSERT(!ret, "fallocate the first page should succeed."); + + for (i = 0; i < page_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00); + for (; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + memset(mem, val, page_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = munmap(mem, total_size); + TEST_ASSERT(!ret, "munmap() should succeed."); +} + +static sigjmp_buf jmpbuf; +void fault_sigbus_handler(int signum) +{ + siglongjmp(jmpbuf, 1); +} + +static void test_fault_overflow(int fd, size_t page_size, size_t total_size) +{ + struct sigaction sa_old, sa_new = { + .sa_handler = fault_sigbus_handler, + }; + size_t map_size = total_size * 4; + const char val = 0xaa; + char *mem; + size_t i; + int ret; + + mem = mmap(NULL, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() for guest_memfd should succeed."); + + sigaction(SIGBUS, &sa_new, &sa_old); + if (sigsetjmp(jmpbuf, 1) == 0) { + memset(mem, 0xaa, map_size); + TEST_ASSERT(false, "memset() should have triggered SIGBUS."); + } + sigaction(SIGBUS, &sa_old, NULL); + + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = munmap(mem, map_size); + TEST_ASSERT(!ret, "munmap() should succeed."); +} + +static void test_mmap_not_supported(int fd, size_t page_size, size_t total_size) { char *mem; mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); TEST_ASSERT_EQ(mem, MAP_FAILED); + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); } static void test_file_size(int fd, size_t page_size, size_t total_size) @@ -120,80 +195,187 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) } } -static void test_create_guest_memfd_invalid(struct kvm_vm *vm) +static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, + uint64_t guest_memfd_flags, + size_t page_size) { - size_t page_size = getpagesize(); - uint64_t flag; size_t size; int fd; for (size = 1; size < page_size; size++) { - fd = __vm_create_guest_memfd(vm, size, 0); - TEST_ASSERT(fd == -1 && errno == EINVAL, + fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags); + TEST_ASSERT(fd < 0 && errno == EINVAL, "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", size); } - - for (flag = BIT(0); flag; flag <<= 1) { - fd = __vm_create_guest_memfd(vm, page_size, flag); - TEST_ASSERT(fd == -1 && errno == EINVAL, - "guest_memfd() with flag '0x%lx' should fail with EINVAL", - flag); - } } static void test_create_guest_memfd_multiple(struct kvm_vm *vm) { int fd1, fd2, ret; struct stat st1, st2; + size_t page_size = getpagesize(); - fd1 = __vm_create_guest_memfd(vm, 4096, 0); + fd1 = __vm_create_guest_memfd(vm, page_size, 0); TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); ret = fstat(fd1, &st1); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size"); + TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size"); - fd2 = __vm_create_guest_memfd(vm, 8192, 0); + fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0); TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); ret = fstat(fd2, &st2); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size"); + TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size"); ret = fstat(fd1, &st1); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size"); + TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size"); TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); close(fd2); close(fd1); } -int main(int argc, char *argv[]) +static void test_guest_memfd_flags(struct kvm_vm *vm, uint64_t valid_flags) { - size_t page_size; - size_t total_size; + size_t page_size = getpagesize(); + uint64_t flag; int fd; - struct kvm_vm *vm; - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + for (flag = BIT(0); flag; flag <<= 1) { + fd = __vm_create_guest_memfd(vm, page_size, flag); + if (flag & valid_flags) { + TEST_ASSERT(fd >= 0, + "guest_memfd() with flag '0x%lx' should succeed", + flag); + close(fd); + } else { + TEST_ASSERT(fd < 0 && errno == EINVAL, + "guest_memfd() with flag '0x%lx' should fail with EINVAL", + flag); + } + } +} + +static void test_guest_memfd(unsigned long vm_type) +{ + uint64_t flags = 0; + struct kvm_vm *vm; + size_t total_size; + size_t page_size; + int fd; page_size = getpagesize(); total_size = page_size * 4; - vm = vm_create_barebones(); + vm = vm_create_barebones_type(vm_type); + + if (vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP)) + flags |= GUEST_MEMFD_FLAG_MMAP; - test_create_guest_memfd_invalid(vm); test_create_guest_memfd_multiple(vm); + test_create_guest_memfd_invalid_sizes(vm, flags, page_size); - fd = vm_create_guest_memfd(vm, total_size, 0); + fd = vm_create_guest_memfd(vm, total_size, flags); test_file_read_write(fd); - test_mmap(fd, page_size); + + if (flags & GUEST_MEMFD_FLAG_MMAP) { + test_mmap_supported(fd, page_size, total_size); + test_fault_overflow(fd, page_size, total_size); + } else { + test_mmap_not_supported(fd, page_size, total_size); + } + test_file_size(fd, page_size, total_size); test_fallocate(fd, page_size, total_size); test_invalid_punch_hole(fd, page_size, total_size); + test_guest_memfd_flags(vm, flags); + close(fd); + kvm_vm_free(vm); +} + +static void guest_code(uint8_t *mem, uint64_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + __GUEST_ASSERT(mem[i] == 0xaa, + "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]); + + memset(mem, 0xff, size); + GUEST_DONE(); +} + +static void test_guest_memfd_guest(void) +{ + /* + * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back + * the guest's code, stack, and page tables, and low memory contains + * the PCI hole and other MMIO regions that need to be avoided. + */ + const uint64_t gpa = SZ_4G; + const int slot = 1; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint8_t *mem; + size_t size; + int fd, i; + + if (!kvm_has_cap(KVM_CAP_GUEST_MEMFD_MMAP)) + return; + + vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); + + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_MMAP), + "Default VM type should always support guest_memfd mmap()"); + + size = vm->page_size; + fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP); + vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + memset(mem, 0xaa, size); + munmap(mem, size); + + virt_pg_map(vm, gpa, gpa); + vcpu_args_set(vcpu, 2, gpa, size); + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() on guest_memfd failed"); + for (i = 0; i < size; i++) + TEST_ASSERT_EQ(mem[i], 0xff); + + close(fd); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + unsigned long vm_types, vm_type; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + + /* + * Not all architectures support KVM_CAP_VM_TYPES. However, those that + * support guest_memfd have that support for the default VM type. + */ + vm_types = kvm_check_cap(KVM_CAP_VM_TYPES); + if (!vm_types) + vm_types = BIT(VM_TYPE_DEFAULT); + + for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types)) + test_guest_memfd(vm_type); + + test_guest_memfd_guest(); } diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index bf461de34785..e2c4e9f0010f 100644 --- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -155,4 +155,28 @@ static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec) timer_set_tval(timer, msec_to_cycles(msec)); } +static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER : + KVM_ARM_VCPU_TIMER_IRQ_VTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + +static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER : + KVM_ARM_VCPU_TIMER_IRQ_PTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + #endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h index e43a57d99b56..b973bb2c64a6 100644 --- a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h @@ -2,6 +2,9 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H -struct kvm_vm_arch {}; +struct kvm_vm_arch { + bool has_gic; + int gic_fd; +}; #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 255fed769a8a..6f481475c135 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -175,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level); uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); static inline void cpu_relax(void) @@ -300,4 +301,77 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, /* Execute a Wait For Interrupt instruction. */ void wfi(void); +void test_wants_mte(void); +void test_disable_default_vgic(void); + +bool vm_supports_el2(struct kvm_vm *vm); +static bool vcpu_has_el2(struct kvm_vcpu *vcpu) +{ + return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + +#define MAPPED_EL2_SYSREG(el2, el1) \ + case SYS_##el1: \ + if (vcpu_has_el2(vcpu)) \ + alias = SYS_##el2; \ + break + + +static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding) +{ + u32 alias = encoding; + + BUILD_BUG_ON(!__builtin_constant_p(encoding)); + + switch (encoding) { + MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1); + MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1); + MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1); + MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1); + MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1); + MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1); + MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1); + MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1); + MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1); + MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1); + MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1); + MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1); + MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1); + MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1); + MAPPED_EL2_SYSREG(POR_EL2, POR_EL1); + MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1); + MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1); + MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1); + MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1); + MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1); + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1); + MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1); + case SYS_SP_EL1: + if (!vcpu_has_el2(vcpu)) + return ARM64_CORE_REG(sp_el1); + + alias = SYS_SP_EL2; + break; + default: + BUILD_BUG(); + } + + return KVM_ARM64_SYS_REG(alias); +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init); + +static inline unsigned int get_current_el(void) +{ + return (read_sysreg(CurrentEL) >> 2) & 0x3; +} + +#define do_smccc(...) \ +do { \ + if (get_current_el() == 2) \ + smccc_smc(__VA_ARGS__); \ + else \ + smccc_hvc(__VA_ARGS__); \ +} while (0) + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index c481d0c00a5d..688beccc9436 100644 --- a/tools/testing/selftests/kvm/include/arm64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h @@ -16,6 +16,9 @@ ((uint64_t)(flags) << 12) | \ index) +bool kvm_supports_vgic_v3(void); +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); +void __vgic_v3_init(int fd); int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); #define VGIC_MAX_RESERVED 1023 diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 23a506d7eca3..26cc30290e76 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -64,6 +64,9 @@ struct kvm_vcpu { #ifdef __x86_64__ struct kvm_cpuid2 *cpuid; #endif +#ifdef __aarch64__ + struct kvm_vcpu_init init; +#endif struct kvm_binary_stats stats; struct kvm_dirty_gfn *dirty_gfns; uint32_t fetch_index; @@ -260,13 +263,18 @@ int __open_path_or_exit(const char *path, int flags, const char *enoent_help); int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); -bool get_kvm_param_bool(const char *param); -bool get_kvm_intel_param_bool(const char *param); -bool get_kvm_amd_param_bool(const char *param); +int kvm_get_module_param_integer(const char *module_name, const char *param); +bool kvm_get_module_param_bool(const char *module_name, const char *param); + +static inline bool get_kvm_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm", param); +} -int get_kvm_param_integer(const char *param); -int get_kvm_intel_param_integer(const char *param); -int get_kvm_amd_param_integer(const char *param); +static inline int get_kvm_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm", param); +} unsigned int kvm_check_cap(long cap); @@ -1257,7 +1265,9 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) */ void kvm_selftest_arch_init(void); -void kvm_arch_vm_post_create(struct kvm_vm *vm); +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus); +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm); +void kvm_arch_vm_release(struct kvm_vm *vm); bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 162f303d9daa..e58282488beb 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -9,6 +9,7 @@ #include <linux/stringify.h> #include <asm/csr.h> +#include <asm/vdso/processor.h> #include "kvm_util.h" #define INSN_OPCODE_MASK 0x007c diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h index 3c10c4dc0ae8..72575eadb63a 100644 --- a/tools/testing/selftests/kvm/include/x86/pmu.h +++ b/tools/testing/selftests/kvm/include/x86/pmu.h @@ -5,8 +5,11 @@ #ifndef SELFTEST_KVM_PMU_H #define SELFTEST_KVM_PMU_H +#include <stdbool.h> #include <stdint.h> +#include <linux/bits.h> + #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 /* @@ -61,6 +64,11 @@ #define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00) #define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00) #define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01) +#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02) +#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00) +#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01) +#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02) +#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01) #define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00) #define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) @@ -80,6 +88,11 @@ enum intel_pmu_architectural_events { INTEL_ARCH_BRANCHES_RETIRED_INDEX, INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX, INTEL_ARCH_TOPDOWN_SLOTS_INDEX, + INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX, + INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_RETIRING_INDEX, + INTEL_ARCH_LBR_INSERTS_INDEX, NR_INTEL_ARCH_EVENTS, }; @@ -94,4 +107,17 @@ enum amd_pmu_zen_events { extern const uint64_t intel_pmu_arch_events[]; extern const uint64_t amd_pmu_zen_events[]; +enum pmu_errata { + INSTRUCTIONS_RETIRED_OVERCOUNT, + BRANCHES_RETIRED_OVERCOUNT, +}; +extern uint64_t pmu_errata_mask; + +void kvm_init_pmu_errata(void); + +static inline bool this_pmu_has_errata(enum pmu_errata errata) +{ + return pmu_errata_mask & BIT_ULL(errata); +} + #endif /* SELFTEST_KVM_PMU_H */ diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 2efb05c2f2fb..51cd84b9ca66 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -34,6 +34,8 @@ extern uint64_t guest_tsc_khz; #define NMI_VECTOR 0x02 +const char *ex_str(int vector); + #define X86_EFLAGS_FIXED (1u << 1) #define X86_CR4_VME (1ul << 0) @@ -265,7 +267,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) #define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) -#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7) +#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12) #define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) #define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) #define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) @@ -332,6 +334,11 @@ struct kvm_x86_pmu_feature { #define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5) #define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6) #define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7) +#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8) +#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9) +#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10) +#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11) +#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12) #define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0) #define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1) @@ -1179,6 +1186,12 @@ struct idt_entry { void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +/* + * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be + * used to signal "no expcetion". + */ +#define KVM_MAGIC_DE_VECTOR 0xff + /* If a toddler were to say "abracadabra". */ #define KVM_EXCEPTION_MAGIC 0xabacadabaULL @@ -1314,6 +1327,26 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) bool kvm_is_tdp_enabled(void); +static inline bool get_kvm_intel_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_intel", param); +} + +static inline bool get_kvm_amd_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_amd", param); +} + +static inline int get_kvm_intel_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_intel", param); +} + +static inline int get_kvm_amd_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_amd", param); +} + static inline bool kvm_is_pmu_enabled(void) { return get_kvm_param_bool("enable_pmu"); @@ -1329,6 +1362,11 @@ static inline bool kvm_is_unrestricted_guest_enabled(void) return get_kvm_intel_param_bool("unrestricted_guest"); } +static inline bool kvm_is_ignore_msrs(void) +{ + return get_kvm_param_bool("ignore_msrs"); +} + uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level); uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index eb115123d741..369a4c87dd8f 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -12,6 +12,7 @@ #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" +#include "vgic.h" #include <linux/bitfield.h> #include <linux/sizes.h> @@ -185,7 +186,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) _virt_pg_map(vm, vaddr, paddr, attr_idx); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level) { uint64_t *ptep; @@ -195,17 +196,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 0) + return ptep; switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 1) + break; /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 2) + break; /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; @@ -223,6 +230,11 @@ unmapped_gva: exit(EXIT_FAILURE); } +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return virt_get_pte_hva_at_level(vm, gva, 3); +} + vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep = virt_get_pte_hva(vm, gva); @@ -266,31 +278,49 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } +bool vm_supports_el2(struct kvm_vm *vm) +{ + const char *value = getenv("NV"); + + if (value && *value == '0') + return false; + + return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic; +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init) +{ + struct kvm_vcpu_init preferred = {}; + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); + if (vm_supports_el2(vm)) + preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + + *init = preferred; +} + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; struct kvm_vm *vm = vcpu->vm; uint64_t sctlr_el1, tcr_el1, ttbr0_el1; - if (!init) + if (!init) { + kvm_get_default_vcpu_target(vm, &default_init); init = &default_init; - - if (init->target == -1) { - struct kvm_vcpu_init preferred; - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); - init->target = preferred.target; } vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); + vcpu->init = *init; /* * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 * registers, which the variable argument list macros do. */ - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20); - sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); - tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); + sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { @@ -357,11 +387,17 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) if (use_lpa2_pte_format(vm)) tcr_el1 |= TCR_DS; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); + + if (!vcpu_has_el2(vcpu)) + return; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), + HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H); } void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) @@ -395,7 +431,7 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, aarch64_vcpu_setup(vcpu, init); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size); return vcpu; } @@ -465,7 +501,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { extern char vectors; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -653,3 +689,39 @@ void wfi(void) { asm volatile("wfi"); } + +static bool request_mte; +static bool request_vgic = true; + +void test_wants_mte(void) +{ + request_mte = true; +} + +void test_disable_default_vgic(void) +{ + request_vgic = false; +} + +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE)) + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); + + if (request_vgic && kvm_supports_vgic_v3()) { + vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64); + vm->arch.has_gic = true; + } +} + +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + __vgic_v3_init(vm->arch.gic_fd); +} + +void kvm_arch_vm_release(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + close(vm->arch.gic_fd); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index 4427f43f73ea..d0f7bd0984b8 100644 --- a/tools/testing/selftests/kvm/lib/arm64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -15,6 +15,17 @@ #include "gic.h" #include "gic_v3.h" +bool kvm_supports_vgic_v3(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + kvm_vm_free(vm); + + return !r; +} + /* * vGIC-v3 default host setup * @@ -30,24 +41,11 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) { int gic_fd; uint64_t attr; - struct list_head *iter; - unsigned int nr_gic_pages, nr_vcpus_created = 0; - - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); - - /* - * Make sure that the caller is infact calling this - * function after all the vCPUs are added. - */ - list_for_each(iter, &vm->vcpus) - nr_vcpus_created++; - TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", - nr_vcpus, nr_vcpus_created); + unsigned int nr_gic_pages; /* Distributor setup */ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); @@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - attr = GICD_BASE_GPA; kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); @@ -73,10 +68,39 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + return gic_fd; +} + +void __vgic_v3_init(int fd) +{ + kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); +} - return gic_fd; +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +{ + unsigned int nr_vcpus_created = 0; + struct list_head *iter; + int fd; + + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); + + /* + * Make sure that the caller is infact calling this + * function after all the vCPUs are added. + */ + list_for_each(iter, &vm->vcpus) + nr_vcpus_created++; + TEST_ASSERT(nr_vcpus == nr_vcpus_created, + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", + nr_vcpus, nr_vcpus_created); + + fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs); + if (fd < 0) + return fd; + + __vgic_v3_init(fd); + return fd; } /* should only work for level sensitive interrupts */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index c3f5142b0a54..6743fbd9bd67 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -24,7 +24,7 @@ uint32_t guest_random_seed; struct guest_random_state guest_rng; static uint32_t last_guest_seed; -static int vcpu_mmap_sz(void); +static size_t vcpu_mmap_sz(void); int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { @@ -95,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param, return bytes_read; } -static int get_module_param_integer(const char *module_name, const char *param) +int kvm_get_module_param_integer(const char *module_name, const char *param) { /* * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the @@ -119,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param) return atoi_paranoid(value); } -static bool get_module_param_bool(const char *module_name, const char *param) +bool kvm_get_module_param_bool(const char *module_name, const char *param) { char value; ssize_t r; @@ -135,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param) TEST_FAIL("Unrecognized value '%c' for boolean module param", value); } -bool get_kvm_param_bool(const char *param) -{ - return get_module_param_bool("kvm", param); -} - -bool get_kvm_intel_param_bool(const char *param) -{ - return get_module_param_bool("kvm_intel", param); -} - -bool get_kvm_amd_param_bool(const char *param) -{ - return get_module_param_bool("kvm_amd", param); -} - -int get_kvm_param_integer(const char *param) -{ - return get_module_param_integer("kvm", param); -} - -int get_kvm_intel_param_integer(const char *param) -{ - return get_module_param_integer("kvm_intel", param); -} - -int get_kvm_amd_param_integer(const char *param) -{ - return get_module_param_integer("kvm_amd", param); -} - /* * Capability * @@ -517,7 +487,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, guest_rng = new_guest_random_state(guest_random_seed); sync_global_to_guest(vm, guest_rng); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, nr_runnable_vcpus); return vm; } @@ -555,6 +525,7 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, for (i = 0; i < nr_vcpus; ++i) vcpus[i] = vm_vcpu_add(vm, i, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -805,6 +776,8 @@ void kvm_vm_release(struct kvm_vm *vmp) /* Free cached stats metadata and close FD */ kvm_stats_release(&vmp->stats); + + kvm_arch_vm_release(vmp); } static void __vm_mem_region_delete(struct kvm_vm *vm, @@ -1321,14 +1294,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, } /* Returns the size of a vCPU's kvm_run structure. */ -static int vcpu_mmap_sz(void) +static size_t vcpu_mmap_sz(void) { int dev_fd, ret; dev_fd = open_kvm_dev_path_or_exit(); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); - TEST_ASSERT(ret >= sizeof(struct kvm_run), + TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run), KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); close(dev_fd); @@ -1369,7 +1342,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " - "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); @@ -2330,7 +2303,15 @@ void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, TEST_FAIL("Unable to find stat '%s'", name); } -__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ +} + +__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ +} + +__weak void kvm_arch_vm_release(struct kvm_vm *vm) { } diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c index f31f0427c17c..34cb57d1d671 100644 --- a/tools/testing/selftests/kvm/lib/x86/pmu.c +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c @@ -8,6 +8,7 @@ #include <linux/kernel.h> #include "kvm_util.h" +#include "processor.h" #include "pmu.h" const uint64_t intel_pmu_arch_events[] = { @@ -19,6 +20,11 @@ const uint64_t intel_pmu_arch_events[] = { INTEL_ARCH_BRANCHES_RETIRED, INTEL_ARCH_BRANCHES_MISPREDICTED, INTEL_ARCH_TOPDOWN_SLOTS, + INTEL_ARCH_TOPDOWN_BE_BOUND, + INTEL_ARCH_TOPDOWN_BAD_SPEC, + INTEL_ARCH_TOPDOWN_FE_BOUND, + INTEL_ARCH_TOPDOWN_RETIRING, + INTEL_ARCH_LBR_INSERTS, }; kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); @@ -29,3 +35,46 @@ const uint64_t amd_pmu_zen_events[] = { AMD_ZEN_BRANCHES_MISPREDICTED, }; kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); + +/* + * For Intel Atom CPUs, the PMU events "Instruction Retired" or + * "Branch Instruction Retired" may be overcounted for some certain + * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD + * and complex SGX/SMX/CSTATE instructions/flows. + * + * The detailed information can be found in the errata (section SRF7): + * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ + * + * For the Atom platforms before Sierra Forest (including Sierra Forest), + * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would + * be overcounted on these certain instructions, but for Clearwater Forest + * only "Instruction Retired" event is overcounted on these instructions. + */ +static uint64_t get_pmu_errata(void) +{ + if (!this_cpu_is_intel()) + return 0; + + if (this_cpu_family() != 0x6) + return 0; + + switch (this_cpu_model()) { + case 0xDD: /* Clearwater Forest */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT); + case 0xAF: /* Sierra Forest */ + case 0x4D: /* Avaton, Rangely */ + case 0x5F: /* Denverton */ + case 0x86: /* Jacobsville */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) | + BIT_ULL(BRANCHES_RETIRED_OVERCOUNT); + default: + return 0; + } +} + +uint64_t pmu_errata_mask; + +void kvm_init_pmu_errata(void) +{ + pmu_errata_mask = get_pmu_errata(); +} diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index d4c19ac885a9..c748cd9b2eef 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -6,6 +6,7 @@ #include "linux/bitmap.h" #include "test_util.h" #include "kvm_util.h" +#include "pmu.h" #include "processor.h" #include "sev.h" @@ -23,6 +24,39 @@ bool host_cpu_is_intel; bool is_forced_emulation_enabled; uint64_t guest_tsc_khz; +const char *ex_str(int vector) +{ + switch (vector) { +#define VEC_STR(v) case v##_VECTOR: return "#" #v + case DE_VECTOR: return "no exception"; + case KVM_MAGIC_DE_VECTOR: return "#DE"; + VEC_STR(DB); + VEC_STR(NMI); + VEC_STR(BP); + VEC_STR(OF); + VEC_STR(BR); + VEC_STR(UD); + VEC_STR(NM); + VEC_STR(DF); + VEC_STR(TS); + VEC_STR(NP); + VEC_STR(SS); + VEC_STR(GP); + VEC_STR(PF); + VEC_STR(MF); + VEC_STR(AC); + VEC_STR(MC); + VEC_STR(XM); + VEC_STR(VE); + VEC_STR(CP); + VEC_STR(HV); + VEC_STR(VC); + VEC_STR(SX); + default: return "#??"; +#undef VEC_STR + } +} + static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " @@ -557,7 +591,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) return false; if (regs->vector == DE_VECTOR) - return false; + regs->vector = KVM_MAGIC_DE_VECTOR; regs->rip = regs->r11; regs->r9 = regs->vector; @@ -625,7 +659,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) REPORT_GUEST_ASSERT(uc); } -void kvm_arch_vm_post_create(struct kvm_vm *vm) +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) { int r; @@ -638,6 +672,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); sync_global_to_guest(vm, is_forced_emulation_enabled); + sync_global_to_guest(vm, pmu_errata_mask); if (is_sev_vm(vm)) { struct kvm_sev_init init = { 0 }; @@ -1269,6 +1304,8 @@ void kvm_selftest_arch_init(void) host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); + + kvm_init_pmu_errata(); } bool sys_clocksource_is_based_on_tsc(void) diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index c81a84990eab..3cdfa3b19b85 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -22,6 +22,7 @@ #include "processor.h" #include "test_util.h" #include "guest_modes.h" +#include "ucall_common.h" #define DUMMY_MEMSLOT_INDEX 7 diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index e3711beff7f3..5087d082c4b0 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -25,6 +25,7 @@ #include <test_util.h> #include <kvm_util.h> #include <processor.h> +#include <ucall_common.h> #define MEM_EXTRA_SIZE SZ_64K diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index a0b7dabb5040..705ab3d7778b 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -80,9 +80,11 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: @@ -103,6 +105,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB: @@ -128,6 +132,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: return true; @@ -255,6 +260,8 @@ static const char *config_id_to_str(const char *prefix, __u64 id) return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)"; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)"; case KVM_REG_RISCV_CONFIG_REG(mvendorid): return "KVM_REG_RISCV_CONFIG_REG(mvendorid)"; case KVM_REG_RISCV_CONFIG_REG(marchid): @@ -532,9 +539,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZCF), KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), @@ -555,6 +564,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZTSO), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), KVM_ISA_EXT_ARR(ZVFH), KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), @@ -627,6 +638,7 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off) KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), }; @@ -683,6 +695,19 @@ static const char *sbi_sta_id_to_str(__u64 reg_off) return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off); } +static const char *sbi_fwft_id_to_str(__u64 reg_off) +{ + switch (reg_off) { + case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)"; + case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)"; + case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)"; + case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)"; + case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)"; + case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)"; + } + return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off); +} + static const char *sbi_id_to_str(const char *prefix, __u64 id) { __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE); @@ -695,6 +720,8 @@ static const char *sbi_id_to_str(const char *prefix, __u64 id) switch (reg_subtype) { case KVM_REG_RISCV_SBI_STA: return sbi_sta_id_to_str(reg_off); + case KVM_REG_RISCV_SBI_FWFT: + return sbi_fwft_id_to_str(reg_off); } return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); @@ -780,10 +807,13 @@ void print_reg(const char *prefix, __u64 id) */ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp), @@ -859,11 +889,26 @@ static __u64 sbi_sta_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi), }; +static __u64 sbi_fwft_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value), +}; + static __u64 zicbom_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM, }; +static __u64 zicbop_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP, +}; + static __u64 zicboz_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ, @@ -1010,8 +1055,13 @@ static __u64 vector_regs[] = { #define SUBLIST_SBI_STA \ {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \ .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),} +#define SUBLIST_SBI_FWFT \ + {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \ + .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),} #define SUBLIST_ZICBOM \ {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),} +#define SUBLIST_ZICBOP \ + {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),} #define SUBLIST_ZICBOZ \ {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),} #define SUBLIST_AIA \ @@ -1092,6 +1142,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); +KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); @@ -1127,9 +1178,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); +KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); +KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE); KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); @@ -1150,6 +1203,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO); KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB); KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA); KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH); KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB); @@ -1167,6 +1222,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_pmu, &config_sbi_dbcn, &config_sbi_susp, + &config_sbi_fwft, &config_aia, &config_fp_f, &config_fp_d, @@ -1201,9 +1257,11 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zcf, &config_zcmop, &config_zfa, + &config_zfbfmin, &config_zfh, &config_zfhmin, &config_zicbom, + &config_zicbop, &config_zicboz, &config_ziccrse, &config_zicntr, @@ -1224,6 +1282,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_ztso, &config_zvbb, &config_zvbc, + &config_zvfbfmin, + &config_zvfbfwma, &config_zvfh, &config_zvfhmin, &config_zvkb, diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index 85cc8c18d6e7..e39a724fe860 100644 --- a/tools/testing/selftests/kvm/s390/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -145,7 +145,7 @@ static void finish_vm_setup(struct kvm_vm *vm) slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, 0); } static struct kvm_vm *create_vm_two_memslots(void) diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c index 27255880dabd..aded795d42be 100644 --- a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c @@ -291,7 +291,7 @@ int main(int argc, char *argv[]) ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); free(array); } else { - ksft_test_result_skip("%s feature is not avaialable\n", + ksft_test_result_skip("%s feature is not available\n", testlist[idx].subfunc_name); } } diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index cce2520af720..8edc1fca345b 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -118,7 +118,7 @@ static int64_t smccc(uint32_t func, uint64_t arg) { struct arm_smccc_res res; - smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res); + do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res); return res.a0; } diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c index 2ac89d6c1e46..8926cfe0e209 100644 --- a/tools/testing/selftests/kvm/x86/fastops_test.c +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -8,14 +8,21 @@ * to set RFLAGS.CF based on whether or not the input is even or odd, so that * instructions like ADC and SBB are deterministic. */ +#define fastop(__insn) \ + "bt $0, %[bt_val]\n\t" \ + __insn "\n\t" \ + "pushfq\n\t" \ + "pop %[flags]\n\t" + +#define flags_constraint(flags_val) [flags]"=r"(flags_val) +#define bt_constraint(__bt_val) [bt_val]"rm"((uint32_t)__bt_val) + #define guest_execute_fastop_1(FEP, insn, __val, __flags) \ ({ \ - __asm__ __volatile__("bt $0, %[val]\n\t" \ - FEP insn " %[val]\n\t" \ - "pushfq\n\t" \ - "pop %[flags]\n\t" \ - : [val]"+r"(__val), [flags]"=r"(__flags) \ - : : "cc", "memory"); \ + __asm__ __volatile__(fastop(FEP insn " %[val]") \ + : [val]"+r"(__val), flags_constraint(__flags) \ + : bt_constraint(__val) \ + : "cc", "memory"); \ }) #define guest_test_fastop_1(insn, type_t, __val) \ @@ -36,12 +43,10 @@ #define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \ ({ \ - __asm__ __volatile__("bt $0, %[output]\n\t" \ - FEP insn " %[input], %[output]\n\t" \ - "pushfq\n\t" \ - "pop %[flags]\n\t" \ - : [output]"+r"(__output), [flags]"=r"(__flags) \ - : [input]"r"(__input) : "cc", "memory"); \ + __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : [input]"r"(__input), bt_constraint(__output) \ + : "cc", "memory"); \ }) #define guest_test_fastop_2(insn, type_t, __val1, __val2) \ @@ -63,12 +68,10 @@ #define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \ ({ \ - __asm__ __volatile__("bt $0, %[output]\n\t" \ - FEP insn " %%cl, %[output]\n\t" \ - "pushfq\n\t" \ - "pop %[flags]\n\t" \ - : [output]"+r"(__output), [flags]"=r"(__flags) \ - : "c"(__shift) : "cc", "memory"); \ + __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : "c"(__shift), bt_constraint(__output) \ + : "cc", "memory"); \ }) #define guest_test_fastop_cl(insn, type_t, __val1, __val2) \ @@ -89,6 +92,42 @@ ex_flags, insn, shift, (uint64_t)input, flags); \ }) +#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \ +({ \ + uint64_t ign_error_code; \ + uint8_t vector; \ + \ + __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \ + : "+a"(__a), "+d"(__d), flags_constraint(__flags), \ + KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : [denom]"rm"(__rm), bt_constraint(__rm) \ + : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define guest_test_fastop_div(insn, type_t, __val1, __val2) \ +({ \ + type_t _a = __val1, _d = __val1, rm = __val2; \ + type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \ + uint64_t flags, ex_flags; \ + uint8_t v, ex_v; \ + \ + ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \ + v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \ + \ + GUEST_ASSERT_EQ(v, ex_v); \ + __GUEST_ASSERT(v == ex_v, \ + "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \ + ex_v, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, v); \ + __GUEST_ASSERT(a == ex_a && d == ex_d, \ + "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\ + (uint64_t)ex_a, (uint64_t)ex_d, insn, (uint64_t)_a, \ + (uint64_t)_d, (uint64_t)rm, (uint64_t)a, (uint64_t)d); \ + __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \ + "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \ + ex_flags, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, flags);\ +}) + static const uint64_t vals[] = { 0, 1, @@ -115,14 +154,16 @@ do { \ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \ +if (sizeof(type_t) != 1) { \ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \ - guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \ +} \ + guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \ @@ -136,12 +177,15 @@ do { \ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \ } \ } \ } while (0) static void guest_code(void) { + guest_test_fastops(uint8_t, "b"); guest_test_fastops(uint16_t, "w"); guest_test_fastops(uint32_t, "l"); guest_test_fastops(uint64_t, "q"); diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index c863a689aa98..3c21af811d8f 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -45,7 +45,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) TEST_ASSERT((entry->function >= 0x40000000) && (entry->function <= 0x40000082), - "function %x is our of supported range", + "function %x is out of supported range", entry->function); TEST_ASSERT(entry->index == 0, diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 068e9c69710d..99d327084172 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -54,12 +54,12 @@ static void guest_msr(struct msr_data *msr) if (msr->fault_expected) __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); + "Expected #GP on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); else __GUEST_ASSERT(!vector, - "Expected success on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); + "Expected success on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); if (vector || is_write_only_msr(msr->idx)) goto done; @@ -102,12 +102,12 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { __GUEST_ASSERT(vector == UD_VECTOR, - "Expected #UD for control '%lu', got vector '0x%x'", - hcall->control, vector); + "Expected #UD for control '%lu', got %s", + hcall->control, ex_str(vector)); } else { __GUEST_ASSERT(!vector, - "Expected no exception for control '%lu', got vector '0x%x'", - hcall->control, vector); + "Expected no exception for control '%lu', got %s", + hcall->control, ex_str(vector)); GUEST_ASSERT_EQ(res, hcall->expect); } diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 0eb371c62ab8..e45c028d2a7e 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -30,12 +30,12 @@ do { \ \ if (fault_wanted) \ __GUEST_ASSERT((vector) == UD_VECTOR, \ - "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ + "Expected #UD on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ else \ __GUEST_ASSERT(!(vector), \ - "Expected success on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ + "Expected success on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ } while (0) static void guest_monitor_wait(void *arg) diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c new file mode 100644 index 000000000000..40d918aedce6 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/msrs_test.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <asm/msr-index.h> + +#include <stdint.h> + +#include "kvm_util.h" +#include "processor.h" + +/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */ +#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR + +struct kvm_msr { + const struct kvm_x86_cpu_feature feature; + const struct kvm_x86_cpu_feature feature2; + const char *name; + const u64 reset_val; + const u64 write_val; + const u64 rsvd_val; + const u32 index; + const bool is_kvm_defined; +}; + +#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \ +{ \ + .index = msr, \ + .name = str, \ + .write_val = val, \ + .rsvd_val = rsvd, \ + .reset_val = reset, \ + .feature = X86_FEATURE_ ##feat, \ + .feature2 = X86_FEATURE_ ##f2, \ + .is_kvm_defined = is_kvm, \ +} + +#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \ + ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false) + +#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \ + __MSR_TEST(msr, #msr, val, rsvd, reset, feat) + +#define MSR_TEST(msr, val, rsvd, feat) \ + __MSR_TEST(msr, #msr, val, rsvd, 0, feat) + +#define MSR_TEST2(msr, val, rsvd, feat, f2) \ + ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false) + +/* + * Note, use a page aligned value for the canonical value so that the value + * is compatible with MSRs that use bits 11:0 for things other than addresses. + */ +static const u64 canonical_val = 0x123456789000ull; + +/* + * Arbitrary value with bits set in every byte, but not all bits set. This is + * also a non-canonical value, but that's coincidental (any 64-bit value with + * an alternating 0s/1s pattern will be non-canonical). + */ +static const u64 u64_val = 0xaaaa5555aaaa5555ull; + +#define MSR_TEST_CANONICAL(msr, feat) \ + __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat) + +#define MSR_TEST_KVM(msr, val, rsvd, feat) \ + ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true) + +/* + * The main struct must be scoped to a function due to the use of structures to + * define features. For the global structure, allocate enough space for the + * foreseeable future without getting too ridiculous, to minimize maintenance + * costs (bumping the array size every time an MSR is added is really annoying). + */ +static struct kvm_msr msrs[128]; +static int idx; + +static bool ignore_unsupported_msrs; + +static u64 fixup_rdmsr_val(u32 msr, u64 want) +{ + /* + * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM + * is supposed to emulate that behavior based on guest vendor model + * (which is the same as the host vendor model for this test). + */ + if (!host_cpu_is_amd) + return want; + + switch (msr) { + case MSR_IA32_SYSENTER_ESP: + case MSR_IA32_SYSENTER_EIP: + case MSR_TSC_AUX: + return want & GENMASK_ULL(31, 0); + default: + return want; + } +} + +static void __rdmsr(u32 msr, u64 want) +{ + u64 val; + u8 vec; + + vec = rdmsr_safe(msr, &val); + __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr); + + __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx", + want, msr, val); +} + +static void __wrmsr(u32 msr, u64 val) +{ + u8 vec; + + vec = wrmsr_safe(msr, val); + __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)", + ex_str(vec), msr, val); + __rdmsr(msr, fixup_rdmsr_val(msr, val)); +} + +static void guest_test_supported_msr(const struct kvm_msr *msr) +{ + __rdmsr(msr->index, msr->reset_val); + __wrmsr(msr->index, msr->write_val); + GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val)); + + __rdmsr(msr->index, msr->reset_val); +} + +static void guest_test_unsupported_msr(const struct kvm_msr *msr) +{ + u64 val; + u8 vec; + + /* + * KVM's ABI with respect to ignore_msrs is a mess and largely beyond + * repair, just skip the unsupported MSR tests. + */ + if (ignore_unsupported_msrs) + goto skip_wrmsr_gp; + + /* + * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are + * writable only if their associated feature is supported. Skip the + * RDMSR #GP test if the secondary feature is supported, but perform + * the WRMSR #GP test as the to-be-written value is tied to the primary + * feature. For all other MSRs, simply do nothing. + */ + if (this_cpu_has(msr->feature2)) { + if (msr->index != MSR_IA32_U_CET && + msr->index != MSR_IA32_S_CET) + goto skip_wrmsr_gp; + + goto skip_rdmsr_gp; + } + + vec = rdmsr_safe(msr->index, &val); + __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s", + msr->index, ex_str(vec)); + +skip_rdmsr_gp: + vec = wrmsr_safe(msr->index, msr->write_val); + __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s", + msr->index, msr->write_val, ex_str(vec)); + +skip_wrmsr_gp: + GUEST_SYNC(0); +} + +void guest_test_reserved_val(const struct kvm_msr *msr) +{ + /* Skip reserved value checks as well, ignore_msrs is trully a mess. */ + if (ignore_unsupported_msrs) + return; + + /* + * If the CPU will truncate the written value (e.g. SYSENTER on AMD), + * expect success and a truncated value, not #GP. + */ + if (!this_cpu_has(msr->feature) || + msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) { + u8 vec = wrmsr_safe(msr->index, msr->rsvd_val); + + __GUEST_ASSERT(vec == GP_VECTOR, + "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s", + msr->index, msr->rsvd_val, ex_str(vec)); + } else { + __wrmsr(msr->index, msr->rsvd_val); + __wrmsr(msr->index, msr->reset_val); + } +} + +static void guest_main(void) +{ + for (;;) { + const struct kvm_msr *msr = &msrs[READ_ONCE(idx)]; + + if (this_cpu_has(msr->feature)) + guest_test_supported_msr(msr); + else + guest_test_unsupported_msr(msr); + + if (msr->rsvd_val) + guest_test_reserved_val(msr); + + GUEST_SYNC(msr->reset_val); + } +} + +static bool has_one_reg; +static bool use_one_reg; + +#define KVM_X86_MAX_NR_REGS 1 + +static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg) +{ + struct { + struct kvm_reg_list list; + u64 regs[KVM_X86_MAX_NR_REGS]; + } regs = {}; + int r, i; + + /* + * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported + * regs, then the vCPU obviously doesn't support the reg. + */ + r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®s.list); + if (!r) + return false; + + TEST_ASSERT_EQ(errno, E2BIG); + + /* + * KVM x86 is expected to support enumerating a relative small number + * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG + * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For + * simplicity, hardcode the maximum number of regs and manually update + * the test as necessary. + */ + TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS, + "KVM reports %llu regs, test expects at most %u regs, stale test?", + regs.list.n, KVM_X86_MAX_NR_REGS); + + vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®s.list); + for (i = 0; i < regs.list.n; i++) { + if (regs.regs[i] == reg) + return true; + } + + return false; +} + +static void host_test_kvm_reg(struct kvm_vcpu *vcpu) +{ + bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature); + u64 reset_val = msrs[idx].reset_val; + u64 write_val = msrs[idx].write_val; + u64 rsvd_val = msrs[idx].rsvd_val; + u32 reg = msrs[idx].index; + u64 val; + int r; + + if (!use_one_reg) + return; + + TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg); + + if (!has_reg) { + r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val); + TEST_ASSERT(r && errno == EINVAL, + "Expected failure on get_reg(0x%x)", reg); + rsvd_val = 0; + goto out; + } + + val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg)); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + reset_val, reg, val); + + vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val); + val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg)); + TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + write_val, reg, val); + +out: + r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val); + TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val); +} + +static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val) +{ + u64 reset_val = msrs[idx].reset_val; + u32 msr = msrs[idx].index; + u64 val; + + if (!kvm_cpu_has(msrs[idx].feature)) + return; + + val = vcpu_get_msr(vcpu, msr); + TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx", + guest_val, msr, val); + + if (use_one_reg) + vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val); + else + vcpu_set_msr(vcpu, msr, reset_val); + + val = vcpu_get_msr(vcpu, msr); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx", + reset_val, msr, val); + + if (!has_one_reg) + return; + + val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr)); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + reset_val, msr, val); +} + +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + for (;;) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + host_test_msr(vcpu, uc.args[1]); + return; + case UCALL_PRINTF: + pr_info("%s", uc.buffer); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_DONE: + TEST_FAIL("Unexpected UCALL_DONE"); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS) +{ + int i; + + for (i = 0; i < NR_VCPUS; i++) + do_vcpu_run(vcpus[i]); +} + +#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL) + +static void test_msrs(void) +{ + const struct kvm_msr __msrs[] = { + MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE, + MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING, + MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE), + MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE), + + /* + * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add + * entries for each features so that TSC_AUX doesn't exists for + * the "unsupported" vCPU, and obviously to test both cases. + */ + MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID), + MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP), + + MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE), + /* + * SYSENTER_{ESP,EIP} are technically non-canonical on Intel, + * but KVM doesn't emulate that behavior on emulated writes, + * i.e. this test will observe different behavior if the MSR + * writes are handed by hardware vs. KVM. KVM's behavior is + * intended (though far from ideal), so don't bother testing + * non-canonical values. + */ + MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE), + MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE), + + MSR_TEST_CANONICAL(MSR_FS_BASE, LM), + MSR_TEST_CANONICAL(MSR_GS_BASE, LM), + MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM), + MSR_TEST_CANONICAL(MSR_LSTAR, LM), + MSR_TEST_CANONICAL(MSR_CSTAR, LM), + MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM), + + MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT), + MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK), + MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT), + MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK), + + MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK), + }; + + const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE; + const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM; + + /* + * Create three vCPUs, but run them on the same task, to validate KVM's + * context switching of MSR state. Don't pin the task to a pCPU to + * also validate KVM's handling of cross-pCPU migration. Use the full + * set of features for the first two vCPUs, but clear all features in + * third vCPU in order to test both positive and negative paths. + */ + const int NR_VCPUS = 3; + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct kvm_vm *vm; + int i; + + kvm_static_assert(sizeof(__msrs) <= sizeof(msrs)); + kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs)); + memcpy(msrs, __msrs, sizeof(__msrs)); + + ignore_unsupported_msrs = kvm_is_ignore_msrs(); + + vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus); + + sync_global_to_guest(vm, msrs); + sync_global_to_guest(vm, ignore_unsupported_msrs); + + /* + * Clear features in the "unsupported features" vCPU. This needs to be + * done before the first vCPU run as KVM's ABI is that guest CPUID is + * immutable once the vCPU has been run. + */ + for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) { + /* + * Don't clear LM; selftests are 64-bit only, and KVM doesn't + * honor LM=0 for MSRs that are supposed to exist if and only + * if the vCPU is a 64-bit model. Ditto for NONE; clearing a + * fake feature flag will result in false failures. + */ + if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) && + memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none))) + vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature); + } + + for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) { + struct kvm_msr *msr = &msrs[idx]; + + if (msr->is_kvm_defined) { + for (i = 0; i < NR_VCPUS; i++) + host_test_kvm_reg(vcpus[i]); + continue; + } + + /* + * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST + * are consistent with respect to MSRs whose existence is + * enumerated via CPUID. Skip the check for FS/GS.base MSRs, + * as they aren't reported in the save/restore list since their + * state is managed via SREGS. + */ + TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE || + kvm_msr_is_in_save_restore_list(msr->index) == + (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)), + "%s %s in save/restore list, but %s according to CPUID", msr->name, + kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't", + (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ? + "supported" : "unsupported"); + + sync_global_to_guest(vm, idx); + + vcpus_run(vcpus, NR_VCPUS); + vcpus_run(vcpus, NR_VCPUS); + } + + kvm_vm_free(vm); +} + +int main(void) +{ + has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG); + + test_msrs(); + + if (has_one_reg) { + use_one_reg = true; + test_msrs(); + } +} diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 8aaaf25b6111..3eaa216b96c0 100644 --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -14,10 +14,10 @@ #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) /* - * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, - * 1 LOOP. + * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP, + * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP. */ -#define NUM_INSNS_PER_LOOP 4 +#define NUM_INSNS_PER_LOOP 6 /* * Number of "extra" instructions that will be counted, i.e. the number of @@ -75,6 +75,11 @@ static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, + [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL }, }; kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); @@ -158,10 +163,18 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr switch (idx) { case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); break; case INTEL_ARCH_BRANCHES_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); break; case INTEL_ARCH_LLC_REFERENCES_INDEX: case INTEL_ARCH_LLC_MISSES_INDEX: @@ -171,9 +184,12 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr fallthrough; case INTEL_ARCH_CPU_CYCLES_INDEX: case INTEL_ARCH_REFERENCE_CYCLES_INDEX: + case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX: + case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX: GUEST_ASSERT_NE(count, 0); break; case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: + case INTEL_ARCH_TOPDOWN_RETIRING_INDEX: __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, "Expected top-down slots >= %u, got count = %lu", NUM_INSNS_RETIRED, count); @@ -210,9 +226,11 @@ do { \ __asm__ __volatile__("wrmsr\n\t" \ " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ "1:\n\t" \ + FEP "enter $0, $0\n\t" \ clflush "\n\t" \ "mfence\n\t" \ "mov %[m], %%eax\n\t" \ + FEP "leave\n\t" \ FEP "loop 1b\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ @@ -311,7 +329,7 @@ static void guest_test_arch_events(void) } static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t length, uint8_t unavailable_mask) + uint8_t length, uint32_t unavailable_mask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -320,6 +338,9 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, if (!pmu_version) return; + unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit, + X86_PROPERTY_PMU_EVENTS_MASK.lo_bit); + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, pmu_version, perf_capabilities); @@ -344,8 +365,8 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ - "Expected %s on " #insn "(0x%x), got vector %u", \ - expect_gp ? "#GP" : "no fault", msr, vector) \ + "Expected %s on " #insn "(0x%x), got %s", \ + expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \ #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ __GUEST_ASSERT(val == expected, \ @@ -575,6 +596,26 @@ static void test_intel_counters(void) }; /* + * To keep the total runtime reasonable, test only a handful of select, + * semi-arbitrary values for the mask of unavailable PMU events. Test + * 0 (all events available) and all ones (no events available) as well + * as alternating bit sequencues, e.g. to detect if KVM is checking the + * wrong bit(s). + */ + const uint32_t unavailable_masks[] = { + 0x0, + 0xffffffffu, + 0xaaaaaaaau, + 0x55555555u, + 0xf0f0f0f0u, + 0x0f0f0f0fu, + 0xa0a0a0a0u, + 0x0a0a0a0au, + 0x50505050u, + 0x05050505u, + }; + + /* * Test up to PMU v5, which is the current maximum version defined by * Intel, i.e. is the last version that is guaranteed to be backwards * compatible with KVM's existing behavior. @@ -611,16 +652,7 @@ static void test_intel_counters(void) pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", v, perf_caps[i]); - /* - * To keep the total runtime reasonable, test every - * possible non-zero, non-reserved bitmap combination - * only with the native PMU version and the full bit - * vector length. - */ - if (v == pmu_version) { - for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++) - test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k); - } + /* * Test single bits for all PMU version and lengths up * the number of events +1 (to verify KVM doesn't do @@ -629,11 +661,8 @@ static void test_intel_counters(void) * ones i.e. all events being available and unavailable. */ for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { - test_arch_events(v, perf_caps[i], j, 0); - test_arch_events(v, perf_caps[i], j, 0xff); - - for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++) - test_arch_events(v, perf_caps[i], j, BIT(k)); + for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++) + test_arch_events(v, perf_caps[i], j, unavailable_masks[k]); } pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index c15513cd74d1..1c5b7611db24 100644 --- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -214,8 +214,10 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) do { \ uint64_t br = pmc_results.branches_retired; \ uint64_t ir = pmc_results.instructions_retired; \ + bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \ + br >= NUM_BRANCHES : br == NUM_BRANCHES; \ \ - if (br && br != NUM_BRANCHES) \ + if (br && !br_matched) \ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ __func__, br, NUM_BRANCHES); \ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index a1f5ff45d518..7ff6f62e20a3 100644 --- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c @@ -29,7 +29,7 @@ static union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; - u64 anythread_deprecated:1; + u64 pebs_timing_info:1; }; u64 capabilities; } host_cap; @@ -44,6 +44,7 @@ static const union perf_capabilities immutable_caps = { .pebs_arch_reg = 1, .pebs_format = -1, .pebs_baseline = 1, + .pebs_timing_info = 1, }; static const union perf_capabilities format_caps = { @@ -56,8 +57,8 @@ static void guest_test_perf_capabilities_gp(uint64_t val) uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for value '0x%lx', got vector '0x%x'", - val, vector); + "Expected #GP for value '0x%lx', got %s", + val, ex_str(vector)); } static void guest_code(uint64_t current_val) diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index fdebff1165c7..3b4814c55722 100644 --- a/tools/testing/selftests/kvm/x86/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -120,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x) __test_icr(x, icr | i); /* - * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of - * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. + * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use + * vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; for (i = 0; i < 0xff; i++) { diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c index c8a5c5e51661..d038c1571729 100644 --- a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c @@ -81,13 +81,13 @@ static void guest_code(void) vector = xsetbv_safe(0, XFEATURE_MASK_FP); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(FP), got vector '0x%x'", - vector); + "Expected success on XSETBV(FP), got %s", + ex_str(vector)); vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(0x%lx), got vector '0x%x'", - supported_xcr0, vector); + "Expected success on XSETBV(0x%lx), got %s", + supported_xcr0, ex_str(vector)); for (i = 0; i < 64; i++) { if (supported_xcr0 & BIT_ULL(i)) @@ -95,8 +95,8 @@ static void guest_code(void) vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'", - BIT_ULL(i), supported_xcr0, vector); + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s", + BIT_ULL(i), supported_xcr0, ex_str(vector)); } GUEST_DONE(); diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b16986aa6442..02fd1393947a 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -20,14 +20,12 @@ #include <sys/time.h> #include <unistd.h> +#include "../kselftest.h" + #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif -#ifndef __maybe_unused -#define __maybe_unused __attribute__((__unused__)) -#endif - #define REGEX_LANDLOCK_PREFIX "^audit([0-9.:]\\+): domain=\\([0-9a-f]\\+\\)" struct audit_filter { diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 88a3c78f5d98..9acecae36f51 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -22,10 +22,6 @@ #define TMP_DIR "tmp" -#ifndef __maybe_unused -#define __maybe_unused __attribute__((__unused__)) -#endif - /* TEST_F_FORK() should not be used for new tests. */ #define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 530390033929..a448fae57831 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -228,7 +228,10 @@ $(OUTPUT)/%:%.S $(LINK.S) $^ $(LDLIBS) -o $@ endif +# Extract the expected header directory +khdr_output := $(patsubst %/usr/include,%,$(filter %/usr/include,$(KHDR_INCLUDES))) + headers: - $(Q)$(MAKE) -C $(top_srcdir) headers + $(Q)$(MAKE) -f $(top_srcdir)/Makefile -C $(khdr_output) headers .PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index e7b23a8a05fe..c2a8586e51a1 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -58,3 +58,5 @@ pkey_sighandler_tests_32 pkey_sighandler_tests_64 guard-regions merge +prctl_thp_disable +rmap diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index d13b3cef2a2b..eaf9312097f7 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -34,6 +34,7 @@ endif MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +CFLAGS += -Wunreachable-code LDLIBS = -lrt -lpthread -lm # Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is @@ -86,6 +87,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += pfnmap TEST_GEN_FILES += process_madv +TEST_GEN_FILES += prctl_thp_disable TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -101,6 +103,7 @@ TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable TEST_GEN_FILES += guard-regions TEST_GEN_FILES += merge +TEST_GEN_FILES += rmap ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty @@ -228,6 +231,8 @@ $(OUTPUT)/ksm_tests: LDLIBS += -lnuma $(OUTPUT)/migration: LDLIBS += -lnuma +$(OUTPUT)/rmap: LDLIBS += -lnuma + local_config.mk local_config.h: check_config.sh /bin/sh ./check_config.sh $(CC) diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index c744c603d688..6560c26f47d1 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -41,11 +41,6 @@ static size_t hugetlbsizes[10]; static int gup_fd; static bool has_huge_zeropage; -static int sz2ord(size_t size) -{ - return __builtin_ctzll(size / pagesize); -} - static int detect_thp_sizes(size_t sizes[], int max) { int count = 0; @@ -57,7 +52,7 @@ static int detect_thp_sizes(size_t sizes[], int max) if (!pmdsize) return 0; - orders = 1UL << sz2ord(pmdsize); + orders = 1UL << sz2ord(pmdsize, pagesize); orders |= thp_supported_orders(); for (i = 0; orders && count < max; i++) { @@ -1216,8 +1211,8 @@ static void run_anon_test_case(struct test_case const *test_case) size_t size = thpsizes[i]; struct thp_settings settings = *thp_current_settings(); - settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; - settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_NEVER; + settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS; thp_push_settings(&settings); if (size == pmdsize) { @@ -1868,7 +1863,7 @@ int main(int argc, char **argv) if (pmdsize) { /* Only if THP is supported. */ thp_read_settings(&default_settings); - default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; + default_settings.hugepages[sz2ord(pmdsize, pagesize)].enabled = THP_INHERIT; thp_save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c index bdeaac67ff9a..8900b840c17a 100644 --- a/tools/testing/selftests/mm/gup_test.c +++ b/tools/testing/selftests/mm/gup_test.c @@ -139,6 +139,8 @@ int main(int argc, char **argv) break; case 'n': nr_pages = atoi(optarg); + if (nr_pages < 0) + nr_pages = size / psize(); break; case 't': thp = 1; diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 141bf63cbe05..15aadaf24a66 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -2027,11 +2027,10 @@ TEST_F(hmm, hmm_cow_in_device) if (pid == -1) ASSERT_EQ(pid, 0); if (!pid) { - /* Child process waitd for SIGTERM from the parent. */ + /* Child process waits for SIGTERM from the parent. */ while (1) { } - perror("Should not reach this\n"); - exit(0); + /* Should not reach this */ } /* Parent process writes to COW pages(s) and gets a * new copy in system. In case of device private pages, diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index c463d1c09c9b..2bd1dac75c3f 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -65,10 +65,20 @@ static void register_region_with_uffd(char *addr, size_t len) struct uffdio_api uffdio_api; /* Create and enable userfaultfd object. */ - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - if (uffd == -1) - ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno)); + if (uffd == -1) { + switch (errno) { + case EPERM: + ksft_exit_skip("Insufficient permissions, try running as root.\n"); + break; + case ENOSYS: + ksft_exit_skip("userfaultfd is not supported/not enabled.\n"); + break; + default: + ksft_exit_fail_msg("userfaultfd failed with %s\n", strerror(errno)); + break; + } + } uffdio_api.api = UFFD_API; uffdio_api.features = 0; diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index a18c50d51141..3fe7ef04ac62 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -394,7 +394,7 @@ static void *file_setup_area(int nr_hpages) perror("open()"); exit(EXIT_FAILURE); } - p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC, + p = mmap(BASE_ADDR, size, PROT_READ, MAP_PRIVATE, finfo.fd, 0); if (p == MAP_FAILED || p != BASE_ADDR) { perror("mmap()"); diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index d8bd1911dfc0..ac136f04b8d6 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -38,14 +38,11 @@ enum ksm_merge_mode { }; static int mem_fd; -static int ksm_fd; -static int ksm_full_scans_fd; -static int proc_self_ksm_stat_fd; -static int proc_self_ksm_merging_pages_fd; -static int ksm_use_zero_pages_fd; static int pagemap_fd; static size_t pagesize; +static void init_global_file_handles(void); + static bool range_maps_duplicates(char *addr, unsigned long size) { unsigned long offs_a, offs_b, pfn_a, pfn_b; @@ -73,88 +70,6 @@ static bool range_maps_duplicates(char *addr, unsigned long size) return false; } -static long get_my_ksm_zero_pages(void) -{ - char buf[200]; - char *substr_ksm_zero; - size_t value_pos; - ssize_t read_size; - unsigned long my_ksm_zero_pages; - - if (!proc_self_ksm_stat_fd) - return 0; - - read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); - if (read_size < 0) - return -errno; - - buf[read_size] = 0; - - substr_ksm_zero = strstr(buf, "ksm_zero_pages"); - if (!substr_ksm_zero) - return 0; - - value_pos = strcspn(substr_ksm_zero, "0123456789"); - my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10); - - return my_ksm_zero_pages; -} - -static long get_my_merging_pages(void) -{ - char buf[10]; - ssize_t ret; - - if (proc_self_ksm_merging_pages_fd < 0) - return proc_self_ksm_merging_pages_fd; - - ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); - if (ret <= 0) - return -errno; - buf[ret] = 0; - - return strtol(buf, NULL, 10); -} - -static long ksm_get_full_scans(void) -{ - char buf[10]; - ssize_t ret; - - ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); - if (ret <= 0) - return -errno; - buf[ret] = 0; - - return strtol(buf, NULL, 10); -} - -static int ksm_merge(void) -{ - long start_scans, end_scans; - - /* Wait for two full scans such that any possible merging happened. */ - start_scans = ksm_get_full_scans(); - if (start_scans < 0) - return start_scans; - if (write(ksm_fd, "1", 1) != 1) - return -errno; - do { - end_scans = ksm_get_full_scans(); - if (end_scans < 0) - return end_scans; - } while (end_scans < start_scans + 2); - - return 0; -} - -static int ksm_unmerge(void) -{ - if (write(ksm_fd, "2", 1) != 1) - return -errno; - return 0; -} - static char *__mmap_and_merge_range(char val, unsigned long size, int prot, enum ksm_merge_mode mode) { @@ -163,12 +78,12 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, int ret; /* Stabilize accounting by disabling KSM completely. */ - if (ksm_unmerge()) { + if (ksm_stop() < 0) { ksft_print_msg("Disabling (unmerging) KSM failed\n"); return err_map; } - if (get_my_merging_pages() > 0) { + if (ksm_get_self_merging_pages() > 0) { ksft_print_msg("Still pages merged\n"); return err_map; } @@ -218,7 +133,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, } /* Run KSM to trigger merging and wait. */ - if (ksm_merge()) { + if (ksm_start() < 0) { ksft_print_msg("Running KSM failed\n"); goto unmap; } @@ -227,7 +142,7 @@ static char *__mmap_and_merge_range(char val, unsigned long size, int prot, * Check if anything was merged at all. Ignore the zero page that is * accounted differently (depending on kernel support). */ - if (val && !get_my_merging_pages()) { + if (val && !ksm_get_self_merging_pages()) { ksft_print_msg("No pages got merged\n"); goto unmap; } @@ -274,6 +189,7 @@ static void test_unmerge(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -286,15 +202,12 @@ static void test_unmerge_zero_pages(void) ksft_print_msg("[RUN] %s\n", __func__); - if (proc_self_ksm_stat_fd < 0) { - ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n"); - return; - } - if (ksm_use_zero_pages_fd < 0) { - ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + if (ksm_get_self_zero_pages() < 0) { + ksft_test_result_skip("accessing \"/proc/self/ksm_stat\" failed\n"); return; } - if (write(ksm_use_zero_pages_fd, "1", 1) != 1) { + + if (ksm_use_zero_pages() < 0) { ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); return; } @@ -306,7 +219,7 @@ static void test_unmerge_zero_pages(void) /* Check if ksm_zero_pages is updated correctly after KSM merging */ pages_expected = size / pagesize; - if (pages_expected != get_my_ksm_zero_pages()) { + if (pages_expected != ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after merging\n"); goto unmap; } @@ -319,7 +232,7 @@ static void test_unmerge_zero_pages(void) /* Check if ksm_zero_pages is updated correctly after unmerging */ pages_expected /= 2; - if (pages_expected != get_my_ksm_zero_pages()) { + if (pages_expected != ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n"); goto unmap; } @@ -329,7 +242,7 @@ static void test_unmerge_zero_pages(void) *((unsigned int *)&map[offs]) = offs; /* Now we should have no zeropages remaining. */ - if (get_my_ksm_zero_pages()) { + if (ksm_get_self_zero_pages()) { ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n"); goto unmap; } @@ -338,6 +251,7 @@ static void test_unmerge_zero_pages(void) ksft_test_result(!range_maps_duplicates(map, size), "KSM zero pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -366,6 +280,7 @@ static void test_unmerge_discarded(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -452,6 +367,7 @@ static void test_unmerge_uffd_wp(void) close_uffd: close(uffd); unmap: + ksm_stop(); munmap(map, size); } #endif @@ -506,27 +422,30 @@ static int test_child_ksm(void) /* Test if KSM is enabled for the process. */ if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1) - return -1; + return 1; /* Test if merge could really happen. */ map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE); if (map == MAP_MERGE_FAIL) - return -2; + return 2; else if (map == MAP_MERGE_SKIP) - return -3; + return 3; + ksm_stop(); munmap(map, size); return 0; } static void test_child_ksm_err(int status) { - if (status == -1) + if (status == 1) ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n"); - else if (status == -2) + else if (status == 2) ksft_test_result_fail("Merge in child failed\n"); - else if (status == -3) + else if (status == 3) ksft_test_result_skip("Merge in child skipped\n"); + else if (status == 4) + ksft_test_result_fail("Binary not found\n"); } /* Verify that prctl ksm flag is inherited. */ @@ -548,6 +467,7 @@ static void test_prctl_fork(void) child_pid = fork(); if (!child_pid) { + init_global_file_handles(); exit(test_child_ksm()); } else if (child_pid < 0) { ksft_test_result_fail("fork() failed\n"); @@ -595,10 +515,10 @@ static void test_prctl_fork_exec(void) return; } else if (child_pid == 0) { char *prg_name = "./ksm_functional_tests"; - char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME }; + char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME, NULL }; execv(prg_name, argv_for_program); - return; + exit(4); } if (waitpid(child_pid, &status, 0) > 0) { @@ -644,6 +564,7 @@ static void test_prctl_unmerge(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); munmap(map, size); } @@ -677,6 +598,47 @@ static void test_prot_none(void) ksft_test_result(!range_maps_duplicates(map, size), "Pages were unmerged\n"); unmap: + ksm_stop(); + munmap(map, size); +} + +static void test_fork_ksm_merging_page_count(void) +{ + const unsigned int size = 2 * MiB; + char *map; + pid_t child_pid; + int status; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); + if (map == MAP_FAILED) + return; + + child_pid = fork(); + if (!child_pid) { + init_global_file_handles(); + exit(ksm_get_self_merging_pages()); + } else if (child_pid < 0) { + ksft_test_result_fail("fork() failed\n"); + goto unmap; + } + + if (waitpid(child_pid, &status, 0) < 0) { + ksft_test_result_fail("waitpid() failed\n"); + goto unmap; + } + + status = WEXITSTATUS(status); + if (status) { + ksft_test_result_fail("ksm_merging_page in child: %d\n", status); + goto unmap; + } + + ksft_test_result_pass("ksm_merging_pages is not inherited after fork\n"); + +unmap: + ksm_stop(); munmap(map, size); } @@ -685,24 +647,20 @@ static void init_global_file_handles(void) mem_fd = open("/proc/self/mem", O_RDWR); if (mem_fd < 0) ksft_exit_fail_msg("opening /proc/self/mem failed\n"); - ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); - if (ksm_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); - ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); - if (ksm_full_scans_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + if (ksm_stop() < 0) + ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/run\") failed\n"); + if (ksm_get_full_scans() < 0) + ksft_exit_skip("accessing \"/sys/kernel/mm/ksm/full_scans\") failed\n"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); - proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); - proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); - ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + if (ksm_get_self_merging_pages() < 0) + ksft_exit_skip("accessing \"/proc/self/ksm_merging_pages\") failed\n"); } int main(int argc, char **argv) { - unsigned int tests = 8; + unsigned int tests = 9; int err; if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { @@ -734,6 +692,7 @@ int main(int argc, char **argv) test_prctl_fork(); test_prctl_fork_exec(); test_prctl_unmerge(); + test_fork_ksm_merging_page_count(); err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index b6fabd5c27ed..d8d11bc67ddc 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -264,23 +264,6 @@ static void test_softdirty(void) munmap(addr, SIZE); } -static int system_has_softdirty(void) -{ - /* - * There is no way to check if the kernel supports soft-dirty, other - * than by writing to a page and seeing if the bit was set. But the - * tests are intended to check that the bit gets set when it should, so - * doing that check would turn a potentially legitimate fail into a - * skip. Fortunately, we know for sure that arm64 does not support - * soft-dirty. So for now, let's just use the arch as a corse guide. - */ -#if defined(__aarch64__) - return 0; -#else - return 1; -#endif -} - int main(int argc, char **argv) { int nr_tests = 16; @@ -288,7 +271,7 @@ int main(int argc, char **argv) pagesize = getpagesize(); - if (system_has_softdirty()) + if (softdirty_supported()) nr_tests += 5; ksft_print_header(); @@ -300,7 +283,7 @@ int main(int argc, char **argv) test_holes(); test_populate_read(); test_populate_write(); - if (system_has_softdirty()) + if (softdirty_supported()) test_softdirty(); err = ksft_get_fail_cnt(); diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 5bd52a951cbd..bf2863b102e3 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -846,7 +846,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } @@ -899,7 +899,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } @@ -948,7 +948,7 @@ static void mremap_move_multi_invalid_vmas(FILE *maps_fp, } if (err != EFAULT) { errno = err; - perror("mrmeap() unexpected error"); + perror("mremap() unexpected error"); success = false; goto out_unmap; } diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index e6face7c0166..4fc8e578ec7c 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -209,7 +209,7 @@ int userfaultfd_tests(void) wp_addr_range(mem, mem_size); vec_size = mem_size/page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); @@ -247,11 +247,11 @@ int sanity_tests_sd(void) vec_size = num_pages/2; mem_size = num_pages * page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec2 = calloc(vec_size, sizeof(struct page_region)); if (!vec2) ksft_exit_fail_msg("error nomem\n"); @@ -436,7 +436,7 @@ int sanity_tests_sd(void) mem_size = 1050 * page_size; vec_size = mem_size/(page_size*2); - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -491,7 +491,7 @@ int sanity_tests_sd(void) mem_size = 10000 * page_size; vec_size = 50; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -541,7 +541,7 @@ int sanity_tests_sd(void) vec_size = 1000; mem_size = vec_size * page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); if (!vec) ksft_exit_fail_msg("error nomem\n"); @@ -695,8 +695,8 @@ int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip) } vec_size = mem_size/page_size; - vec = malloc(sizeof(struct page_region) * vec_size); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); + vec2 = calloc(vec_size, sizeof(struct page_region)); /* 1. all new pages must be not be written (dirty) */ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, @@ -807,8 +807,8 @@ int hpage_unit_tests(void) unsigned long long vec_size = map_size/page_size; struct page_region *vec, *vec2; - vec = malloc(sizeof(struct page_region) * vec_size); - vec2 = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); + vec2 = calloc(vec_size, sizeof(struct page_region)); if (!vec || !vec2) ksft_exit_fail_msg("malloc failed\n"); @@ -997,7 +997,7 @@ int unmapped_region_tests(void) void *start = (void *)0x10000000; int written, len = 0x00040000; long vec_size = len / page_size; - struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); + struct page_region *vec = calloc(vec_size, sizeof(struct page_region)); /* 1. Get written pages */ written = pagemap_ioctl(start, len, vec, vec_size, 0, 0, @@ -1062,7 +1062,7 @@ int sanity_tests(void) mem_size = 10 * page_size; vec_size = mem_size / page_size; - vec = malloc(sizeof(struct page_region) * vec_size); + vec = calloc(vec_size, sizeof(struct page_region)); mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED || vec == MAP_FAILED) ksft_exit_fail_msg("error nomem\n"); diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c index 866ac023baf5..88659f0a90ea 100644 --- a/tools/testing/selftests/mm/pfnmap.c +++ b/tools/testing/selftests/mm/pfnmap.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' + * Basic VM_PFNMAP tests relying on mmap() of input file provided. + * Use '/dev/mem' as default. * * Copyright 2025, Red Hat, Inc. * @@ -25,6 +26,7 @@ #include "vm_util.h" static sigjmp_buf sigjmp_buf_env; +static char *file = "/dev/mem"; static void signal_handler(int sig) { @@ -51,7 +53,7 @@ static int test_read_access(char *addr, size_t size, size_t pagesize) return ret; } -static int find_ram_target(off_t *phys_addr, +static int find_ram_target(off_t *offset, unsigned long long pagesize) { unsigned long long start, end; @@ -91,7 +93,7 @@ static int find_ram_target(off_t *phys_addr, /* We need two pages. */ if (end > start + 2 * pagesize) { fclose(file); - *phys_addr = start; + *offset = start; return 0; } } @@ -100,7 +102,7 @@ static int find_ram_target(off_t *phys_addr, FIXTURE(pfnmap) { - off_t phys_addr; + off_t offset; size_t pagesize; int dev_mem_fd; char *addr1; @@ -113,23 +115,31 @@ FIXTURE_SETUP(pfnmap) { self->pagesize = getpagesize(); - /* We'll require two physical pages throughout our tests ... */ - if (find_ram_target(&self->phys_addr, self->pagesize)) - SKIP(return, "Cannot find ram target in '/proc/iomem'\n"); + if (strncmp(file, "/dev/mem", strlen("/dev/mem")) == 0) { + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->offset, self->pagesize)) + SKIP(return, + "Cannot find ram target in '/proc/iomem'\n"); + } else { + self->offset = 0; + } - self->dev_mem_fd = open("/dev/mem", O_RDONLY); + self->dev_mem_fd = open(file, O_RDONLY); if (self->dev_mem_fd < 0) - SKIP(return, "Cannot open '/dev/mem'\n"); + SKIP(return, "Cannot open '%s'\n", file); self->size1 = self->pagesize * 2; self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, - self->dev_mem_fd, self->phys_addr); + self->dev_mem_fd, self->offset); if (self->addr1 == MAP_FAILED) - SKIP(return, "Cannot mmap '/dev/mem'\n"); + SKIP(return, "Cannot mmap '%s'\n", file); + + if (!check_vmflag_pfnmap(self->addr1)) + SKIP(return, "Invalid file: '%s'. Not pfnmap'ed\n", file); /* ... and want to be able to read from them. */ if (test_read_access(self->addr1, self->size1, self->pagesize)) - SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); + SKIP(return, "Cannot read-access mmap'ed '%s'\n", file); self->size2 = 0; self->addr2 = MAP_FAILED; @@ -182,7 +192,7 @@ TEST_F(pfnmap, munmap_split) */ self->size2 = self->pagesize; self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, - self->dev_mem_fd, self->phys_addr); + self->dev_mem_fd, self->offset); ASSERT_NE(self->addr2, MAP_FAILED); } @@ -246,4 +256,14 @@ TEST_F(pfnmap, fork) ASSERT_EQ(ret, 0); } -TEST_HARNESS_MAIN +int main(int argc, char **argv) +{ + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--") == 0) { + if (i + 1 < argc && strlen(argv[i + 1]) > 0) + file = argv[i + 1]; + return test_harness_run(i, argv); + } + } + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index ea404f80e6cb..fa15f006fa68 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -84,9 +84,6 @@ extern void abort_hooks(void); #ifndef noinline # define noinline __attribute__((noinline)) #endif -#ifndef __maybe_unused -# define __maybe_unused __attribute__((__unused__)) -#endif int sys_pkey_alloc(unsigned long flags, unsigned long init_val); int sys_pkey_free(unsigned long pkey); diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index b5e076a564c9..302fef54049c 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -41,7 +41,7 @@ static siginfo_t siginfo = {0}; * syscall will attempt to access the PLT in order to call a library function * which is protected by MPK 0 which we don't have access to. */ -static inline __always_inline +static __always_inline long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) { unsigned long ret; diff --git a/tools/testing/selftests/mm/prctl_thp_disable.c b/tools/testing/selftests/mm/prctl_thp_disable.c new file mode 100644 index 000000000000..84b4a4b345af --- /dev/null +++ b/tools/testing/selftests/mm/prctl_thp_disable.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic tests for PR_GET/SET_THP_DISABLE prctl calls + * + * Author(s): Usama Arif <usamaarif642@gmail.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/mman.h> +#include <sys/prctl.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "thp_settings.h" +#include "vm_util.h" + +#ifndef PR_THP_DISABLE_EXCEPT_ADVISED +#define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) +#endif + +enum thp_collapse_type { + THP_COLLAPSE_NONE, + THP_COLLAPSE_MADV_NOHUGEPAGE, + THP_COLLAPSE_MADV_HUGEPAGE, /* MADV_HUGEPAGE before access */ + THP_COLLAPSE_MADV_COLLAPSE, /* MADV_COLLAPSE after access */ +}; + +/* + * Function to mmap a buffer, fault it in, madvise it appropriately (before + * page fault for MADV_HUGE, and after for MADV_COLLAPSE), and check if the + * mmap region is huge. + * Returns: + * 0 if test doesn't give hugepage + * 1 if test gives a hugepage + * -errno if mmap fails + */ +static int test_mmap_thp(enum thp_collapse_type madvise_buf, size_t pmdsize) +{ + char *mem, *mmap_mem; + size_t mmap_size; + int ret; + + /* For alignment purposes, we need twice the THP size. */ + mmap_size = 2 * pmdsize; + mmap_mem = (char *)mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mmap_mem == MAP_FAILED) + return -errno; + + /* We need a THP-aligned memory area. */ + mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); + + if (madvise_buf == THP_COLLAPSE_MADV_HUGEPAGE) + madvise(mem, pmdsize, MADV_HUGEPAGE); + else if (madvise_buf == THP_COLLAPSE_MADV_NOHUGEPAGE) + madvise(mem, pmdsize, MADV_NOHUGEPAGE); + + /* Ensure memory is allocated */ + memset(mem, 1, pmdsize); + + if (madvise_buf == THP_COLLAPSE_MADV_COLLAPSE) + madvise(mem, pmdsize, MADV_COLLAPSE); + + /* HACK: make sure we have a separate VMA that we can check reliably. */ + mprotect(mem, pmdsize, PROT_READ); + + ret = check_huge_anon(mem, 1, pmdsize); + munmap(mmap_mem, mmap_size); + return ret; +} + +static void prctl_thp_disable_completely_test(struct __test_metadata *const _metadata, + size_t pmdsize, + enum thp_enabled thp_policy) +{ + ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 1); + + /* tests after prctl overrides global policy */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 0); + + /* Reset to global policy */ + ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0); + + /* tests after prctl is cleared, and only global policy is effective */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), + thp_policy == THP_ALWAYS ? 1 : 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); +} + +FIXTURE(prctl_thp_disable_completely) +{ + struct thp_settings settings; + size_t pmdsize; +}; + +FIXTURE_VARIANT(prctl_thp_disable_completely) +{ + enum thp_enabled thp_policy; +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, never) +{ + .thp_policy = THP_NEVER, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, madvise) +{ + .thp_policy = THP_MADVISE, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_completely, always) +{ + .thp_policy = THP_ALWAYS, +}; + +FIXTURE_SETUP(prctl_thp_disable_completely) +{ + if (!thp_available()) + SKIP(return, "Transparent Hugepages not available\n"); + + self->pmdsize = read_pmd_pagesize(); + if (!self->pmdsize) + SKIP(return, "Unable to read PMD size\n"); + + if (prctl(PR_SET_THP_DISABLE, 1, NULL, NULL, NULL)) + SKIP(return, "Unable to disable THPs completely for the process\n"); + + thp_save_settings(); + thp_read_settings(&self->settings); + self->settings.thp_enabled = variant->thp_policy; + self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT; + thp_write_settings(&self->settings); +} + +FIXTURE_TEARDOWN(prctl_thp_disable_completely) +{ + thp_restore_settings(); +} + +TEST_F(prctl_thp_disable_completely, nofork) +{ + prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy); +} + +TEST_F(prctl_thp_disable_completely, fork) +{ + int ret = 0; + pid_t pid; + + /* Make sure prctl changes are carried across fork */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + prctl_thp_disable_completely_test(_metadata, self->pmdsize, variant->thp_policy); + return; + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +static void prctl_thp_disable_except_madvise_test(struct __test_metadata *const _metadata, + size_t pmdsize, + enum thp_enabled thp_policy) +{ + ASSERT_EQ(prctl(PR_GET_THP_DISABLE, NULL, NULL, NULL, NULL), 3); + + /* tests after prctl overrides global policy */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); + + /* Reset to global policy */ + ASSERT_EQ(prctl(PR_SET_THP_DISABLE, 0, NULL, NULL, NULL), 0); + + /* tests after prctl is cleared, and only global policy is effective */ + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_NONE, pmdsize), + thp_policy == THP_ALWAYS ? 1 : 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_NOHUGEPAGE, pmdsize), 0); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_HUGEPAGE, pmdsize), + thp_policy == THP_NEVER ? 0 : 1); + + ASSERT_EQ(test_mmap_thp(THP_COLLAPSE_MADV_COLLAPSE, pmdsize), 1); +} + +FIXTURE(prctl_thp_disable_except_madvise) +{ + struct thp_settings settings; + size_t pmdsize; +}; + +FIXTURE_VARIANT(prctl_thp_disable_except_madvise) +{ + enum thp_enabled thp_policy; +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, never) +{ + .thp_policy = THP_NEVER, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, madvise) +{ + .thp_policy = THP_MADVISE, +}; + +FIXTURE_VARIANT_ADD(prctl_thp_disable_except_madvise, always) +{ + .thp_policy = THP_ALWAYS, +}; + +FIXTURE_SETUP(prctl_thp_disable_except_madvise) +{ + if (!thp_available()) + SKIP(return, "Transparent Hugepages not available\n"); + + self->pmdsize = read_pmd_pagesize(); + if (!self->pmdsize) + SKIP(return, "Unable to read PMD size\n"); + + if (prctl(PR_SET_THP_DISABLE, 1, PR_THP_DISABLE_EXCEPT_ADVISED, NULL, NULL)) + SKIP(return, "Unable to set PR_THP_DISABLE_EXCEPT_ADVISED\n"); + + thp_save_settings(); + thp_read_settings(&self->settings); + self->settings.thp_enabled = variant->thp_policy; + self->settings.hugepages[sz2ord(self->pmdsize, getpagesize())].enabled = THP_INHERIT; + thp_write_settings(&self->settings); +} + +FIXTURE_TEARDOWN(prctl_thp_disable_except_madvise) +{ + thp_restore_settings(); +} + +TEST_F(prctl_thp_disable_except_madvise, nofork) +{ + prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize, variant->thp_policy); +} + +TEST_F(prctl_thp_disable_except_madvise, fork) +{ + int ret = 0; + pid_t pid; + + /* Make sure prctl changes are carried across fork */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + prctl_thp_disable_except_madvise_test(_metadata, self->pmdsize, + variant->thp_policy); + return; + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 23ebec367015..2085982dba69 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -557,13 +557,11 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, int nr_iterations = random() % 100; int ret; - while (0) { + while (nr_iterations-- >= 0) { int rpkey = alloc_random_pkey(); ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", ptr, size, orig_prot, pkey, ret); - if (nr_iterations-- < 0) - break; dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx" " shadow: 0x%016llx\n", @@ -1304,7 +1302,7 @@ static void test_mprotect_with_pkey_0(int *ptr, u16 pkey) static void test_ptrace_of_child(int *ptr, u16 pkey) { - __attribute__((__unused__)) int peek_result; + __always_unused int peek_result; pid_t child_pid; void *ignored = 0; long ret; diff --git a/tools/testing/selftests/mm/rmap.c b/tools/testing/selftests/mm/rmap.c new file mode 100644 index 000000000000..13f7bccfd0a9 --- /dev/null +++ b/tools/testing/selftests/mm/rmap.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RMAP functional tests + * + * Author(s): Wei Yang <richard.weiyang@gmail.com> + */ + +#include "../kselftest_harness.h" +#include <strings.h> +#include <pthread.h> +#include <numa.h> +#include <numaif.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <signal.h> +#include <time.h> +#include <sys/sem.h> +#include <unistd.h> +#include <fcntl.h> + +#include "vm_util.h" + +#define TOTAL_LEVEL 5 +#define MAX_CHILDREN 3 + +#define FAIL_ON_CHECK (1 << 0) +#define FAIL_ON_WORK (1 << 1) + +struct sembuf sem_wait = {0, -1, 0}; +struct sembuf sem_signal = {0, 1, 0}; + +enum backend_type { + ANON, + SHM, + NORM_FILE, +}; + +#define PREFIX "kst_rmap" +#define MAX_FILENAME_LEN 256 +const char *suffixes[] = { + "", + "_shm", + "_file", +}; + +struct global_data; +typedef int (*work_fn)(struct global_data *data); +typedef int (*check_fn)(struct global_data *data); +typedef void (*prepare_fn)(struct global_data *data); + +struct global_data { + int worker_level; + + int semid; + int pipefd[2]; + + unsigned int mapsize; + unsigned int rand_seed; + char *region; + + prepare_fn do_prepare; + work_fn do_work; + check_fn do_check; + + enum backend_type backend; + char filename[MAX_FILENAME_LEN]; + + unsigned long *expected_pfn; +}; + +/* + * Create a process tree with TOTAL_LEVEL height and at most MAX_CHILDREN + * children for each. + * + * It will randomly select one process as 'worker' process which will + * 'do_work' until all processes are created. And all other processes will + * wait until 'worker' finish its work. + */ +void propagate_children(struct __test_metadata *_metadata, struct global_data *data) +{ + pid_t root_pid, pid; + unsigned int num_child; + int status; + int ret = 0; + int curr_child, worker_child; + int curr_level = 1; + bool is_worker = true; + + root_pid = getpid(); +repeat: + num_child = rand_r(&data->rand_seed) % MAX_CHILDREN + 1; + worker_child = is_worker ? rand_r(&data->rand_seed) % num_child : -1; + + for (curr_child = 0; curr_child < num_child; curr_child++) { + pid = fork(); + + if (pid < 0) { + perror("Error: fork\n"); + } else if (pid == 0) { + curr_level++; + + if (curr_child != worker_child) + is_worker = false; + + if (curr_level == TOTAL_LEVEL) + break; + + data->rand_seed += curr_child; + goto repeat; + } + } + + if (data->do_prepare) + data->do_prepare(data); + + close(data->pipefd[1]); + + if (is_worker && curr_level == data->worker_level) { + /* This is the worker process, first wait last process created */ + char buf; + + while (read(data->pipefd[0], &buf, 1) > 0) + ; + + if (data->do_work) + ret = data->do_work(data); + + /* Kick others */ + semctl(data->semid, 0, IPC_RMID); + } else { + /* Wait worker finish */ + semop(data->semid, &sem_wait, 1); + if (data->do_check) + ret = data->do_check(data); + } + + /* Wait all child to quit */ + while (wait(&status) > 0) { + if (WIFEXITED(status)) + ret |= WEXITSTATUS(status); + } + + if (getpid() == root_pid) { + if (ret & FAIL_ON_WORK) + SKIP(return, "Failed in worker"); + + ASSERT_EQ(ret, 0); + } else { + exit(ret); + } +} + +FIXTURE(migrate) +{ + struct global_data data; +}; + +FIXTURE_SETUP(migrate) +{ + struct global_data *data = &self->data; + + if (numa_available() < 0) + SKIP(return, "NUMA not available"); + if (numa_bitmask_weight(numa_all_nodes_ptr) <= 1) + SKIP(return, "Not enough NUMA nodes available"); + + data->mapsize = getpagesize(); + + data->expected_pfn = mmap(0, sizeof(unsigned long), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(data->expected_pfn, MAP_FAILED); + + /* Prepare semaphore */ + data->semid = semget(IPC_PRIVATE, 1, 0666 | IPC_CREAT); + ASSERT_NE(data->semid, -1); + ASSERT_NE(semctl(data->semid, 0, SETVAL, 0), -1); + + /* Prepare pipe */ + ASSERT_NE(pipe(data->pipefd), -1); + + data->rand_seed = time(NULL); + srand(data->rand_seed); + + data->worker_level = rand() % TOTAL_LEVEL + 1; + + data->do_prepare = NULL; + data->do_work = NULL; + data->do_check = NULL; + + data->backend = ANON; +}; + +FIXTURE_TEARDOWN(migrate) +{ + struct global_data *data = &self->data; + + if (data->region != MAP_FAILED) + munmap(data->region, data->mapsize); + data->region = MAP_FAILED; + if (data->expected_pfn != MAP_FAILED) + munmap(data->expected_pfn, sizeof(unsigned long)); + data->expected_pfn = MAP_FAILED; + semctl(data->semid, 0, IPC_RMID); + data->semid = -1; + + close(data->pipefd[0]); + + switch (data->backend) { + case ANON: + break; + case SHM: + shm_unlink(data->filename); + break; + case NORM_FILE: + unlink(data->filename); + break; + } +} + +void access_region(struct global_data *data) +{ + /* + * Force read "region" to make sure page fault in. + */ + FORCE_READ(*data->region); +} + +int try_to_move_page(char *region) +{ + int ret; + int node; + int status = 0; + int failures = 0; + + ret = move_pages(0, 1, (void **)®ion, NULL, &status, MPOL_MF_MOVE_ALL); + if (ret != 0) { + perror("Failed to get original numa"); + return FAIL_ON_WORK; + } + + /* Pick up a different target node */ + for (node = 0; node <= numa_max_node(); node++) { + if (numa_bitmask_isbitset(numa_all_nodes_ptr, node) && node != status) + break; + } + + if (node > numa_max_node()) { + ksft_print_msg("Couldn't find available numa node for testing\n"); + return FAIL_ON_WORK; + } + + while (1) { + ret = move_pages(0, 1, (void **)®ion, &node, &status, MPOL_MF_MOVE_ALL); + + /* migrate successfully */ + if (!ret) + break; + + /* error happened */ + if (ret < 0) { + ksft_perror("Failed to move pages"); + return FAIL_ON_WORK; + } + + /* migration is best effort; try again */ + if (++failures >= 100) + return FAIL_ON_WORK; + } + + return 0; +} + +int move_region(struct global_data *data) +{ + int ret; + int pagemap_fd; + + ret = try_to_move_page(data->region); + if (ret != 0) + return ret; + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_WORK; + *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region); + + return 0; +} + +int has_same_pfn(struct global_data *data) +{ + unsigned long pfn; + int pagemap_fd; + + if (data->region == MAP_FAILED) + return 0; + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_CHECK; + + pfn = pagemap_get_pfn(pagemap_fd, data->region); + if (pfn != *data->expected_pfn) + return FAIL_ON_CHECK; + + return 0; +} + +TEST_F(migrate, anon) +{ + struct global_data *data = &self->data; + + /* Map an area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_F(migrate, shm) +{ + int shm_fd; + struct global_data *data = &self->data; + + snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[SHM]); + shm_fd = shm_open(data->filename, O_CREAT | O_RDWR, 0666); + ASSERT_NE(shm_fd, -1); + ftruncate(shm_fd, data->mapsize); + data->backend = SHM; + + /* Map a shared area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_SHARED, shm_fd, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + close(shm_fd); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_F(migrate, file) +{ + int fd; + struct global_data *data = &self->data; + + snprintf(data->filename, MAX_FILENAME_LEN, "%s%s", PREFIX, suffixes[NORM_FILE]); + fd = open(data->filename, O_CREAT | O_RDWR | O_EXCL, 0666); + ASSERT_NE(fd, -1); + ftruncate(fd, data->mapsize); + data->backend = NORM_FILE; + + /* Map a shared area and fault in */ + data->region = mmap(0, data->mapsize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + ASSERT_NE(data->region, MAP_FAILED); + memset(data->region, 0xcf, data->mapsize); + close(fd); + + data->do_prepare = access_region; + data->do_work = move_region; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +void prepare_local_region(struct global_data *data) +{ + /* Allocate range and set the same data */ + data->region = mmap(NULL, data->mapsize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, 0); + if (data->region == MAP_FAILED) + return; + + memset(data->region, 0xcf, data->mapsize); +} + +int merge_and_migrate(struct global_data *data) +{ + int pagemap_fd; + int ret = 0; + + if (data->region == MAP_FAILED) + return FAIL_ON_WORK; + + if (ksm_start() < 0) + return FAIL_ON_WORK; + + ret = try_to_move_page(data->region); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd == -1) + return FAIL_ON_WORK; + *data->expected_pfn = pagemap_get_pfn(pagemap_fd, data->region); + + return ret; +} + +TEST_F(migrate, ksm) +{ + int ret; + struct global_data *data = &self->data; + + if (ksm_stop() < 0) + SKIP(return, "accessing \"/sys/kernel/mm/ksm/run\") failed"); + if (ksm_get_full_scans() < 0) + SKIP(return, "accessing \"/sys/kernel/mm/ksm/full_scan\") failed"); + + ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (ret < 0 && errno == EINVAL) + SKIP(return, "PR_SET_MEMORY_MERGE not supported"); + else if (ret) + ksft_exit_fail_perror("PR_SET_MEMORY_MERGE=1 failed"); + + data->do_prepare = prepare_local_region; + data->do_work = merge_and_migrate; + data->do_check = has_same_pfn; + + propagate_children(_metadata, data); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 471e539d82b8..d9173f2312b7 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -85,6 +85,8 @@ separated by spaces: test handling of page fragment allocation and freeing - vma_merge test VMA merge cases behave as expected +- rmap + test rmap behaves as expected example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -136,7 +138,7 @@ run_gup_matrix() { # -n: How many pages to fetch together? 512 is special # because it's default thp size (or 2M on x86), 123 to # just test partial gup when hit a huge in whatever form - for num in "-n 1" "-n 512" "-n 123"; do + for num in "-n 1" "-n 512" "-n 123" "-n -1"; do CATEGORY="gup_test" run_test ./gup_test \ $huge $test_cmd $write $share $num done @@ -172,13 +174,13 @@ fi # set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then - nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) + orig_nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) needpgs=$((needmem_KB / hpgsize_KB)) tries=2 while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do lackpgs=$((needpgs - freepgs)) echo 3 > /proc/sys/vm/drop_caches - if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then + if ! echo $((lackpgs + orig_nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then echo "Please run this test as root" exit $ksft_skip fi @@ -189,6 +191,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then done < /proc/meminfo tries=$((tries - 1)) done + nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) if [ "$freepgs" -lt "$needpgs" ]; then printf "Not enough huge pages available (%d < %d)\n" \ "$freepgs" "$needpgs" @@ -311,9 +314,11 @@ if $RUN_ALL; then run_gup_matrix else # get_user_pages_fast() benchmark - CATEGORY="gup_test" run_test ./gup_test -u + CATEGORY="gup_test" run_test ./gup_test -u -n 1 + CATEGORY="gup_test" run_test ./gup_test -u -n -1 # pin_user_pages_fast() benchmark - CATEGORY="gup_test" run_test ./gup_test -a + CATEGORY="gup_test" run_test ./gup_test -a -n 1 + CATEGORY="gup_test" run_test ./gup_test -a -n -1 fi # Dump pages 0, 19, and 4096, using pin_user_pages: CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 @@ -322,11 +327,15 @@ CATEGORY="gup_test" run_test ./gup_longterm CATEGORY="userfaultfd" run_test ./uffd-unit-tests uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 -# Hugetlb tests require source and destination huge pages. Pass in half -# the size of the free pages we have, which is used for *each*. +# Hugetlb tests require source and destination huge pages. Pass in almost half +# the size of the free pages we have, which is used for *each*. An adjustment +# of (nr_parallel - 1) is done (see nr_parallel in uffd-stress.c) to have some +# extra hugepages - this is done to prevent the test from failing by racily +# reserving more hugepages than strictly required. # uffd-stress expects a region expressed in MiB, so we adjust # half_ufd_size_MB accordingly. -half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2)) +adjustment=$(( (31 < (nr_cpus - 1)) ? 31 : (nr_cpus - 1) )) +half_ufd_size_MB=$((((freepgs - adjustment) * hpgsize_KB) / 1024 / 2)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 @@ -532,6 +541,12 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh aligned CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned +CATEGORY="rmap" run_test ./rmap + +if [ "${HAVE_HUGEPAGES}" = 1 ]; then + echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages +fi + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix echo "1..${count_total}" | tap_output diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index 8a3f2b4b2186..4ee4db3750c1 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -200,8 +200,11 @@ int main(int argc, char **argv) int pagesize; ksft_print_header(); - ksft_set_plan(15); + if (!softdirty_supported()) + ksft_exit_skip("soft-dirty is not support\n"); + + ksft_set_plan(15); pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY); if (pagemap_fd < 0) ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH); diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index 44a3f8a58806..743af3c05190 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -25,6 +25,8 @@ uint64_t pagesize; unsigned int pageshift; uint64_t pmd_pagesize; +unsigned int pmd_order; +int *expected_orders; #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" #define SMAP_PATH "/proc/self/smaps" @@ -34,28 +36,225 @@ uint64_t pmd_pagesize; #define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" #define PATH_FMT "%s,0x%lx,0x%lx,%d" -#define PFN_MASK ((1UL<<55)-1) -#define KPF_THP (1UL<<22) +const char *pagemap_proc = "/proc/self/pagemap"; +const char *kpageflags_proc = "/proc/kpageflags"; +int pagemap_fd; +int kpageflags_fd; -int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) +static bool is_backed_by_folio(char *vaddr, int order, int pagemap_fd, + int kpageflags_fd) { - uint64_t paddr; - uint64_t page_flags; + const uint64_t folio_head_flags = KPF_THP | KPF_COMPOUND_HEAD; + const uint64_t folio_tail_flags = KPF_THP | KPF_COMPOUND_TAIL; + const unsigned long nr_pages = 1UL << order; + unsigned long pfn_head; + uint64_t pfn_flags; + unsigned long pfn; + unsigned long i; + + pfn = pagemap_get_pfn(pagemap_fd, vaddr); + + /* non present page */ + if (pfn == -1UL) + return false; + + if (pageflags_get(pfn, kpageflags_fd, &pfn_flags)) + goto fail; + + /* check for order-0 pages */ + if (!order) { + if (pfn_flags & (folio_head_flags | folio_tail_flags)) + return false; + return true; + } + + /* non THP folio */ + if (!(pfn_flags & KPF_THP)) + return false; + + pfn_head = pfn & ~(nr_pages - 1); + + if (pageflags_get(pfn_head, kpageflags_fd, &pfn_flags)) + goto fail; + + /* head PFN has no compound_head flag set */ + if ((pfn_flags & folio_head_flags) != folio_head_flags) + return false; + + /* check all tail PFN flags */ + for (i = 1; i < nr_pages; i++) { + if (pageflags_get(pfn_head + i, kpageflags_fd, &pfn_flags)) + goto fail; + if ((pfn_flags & folio_tail_flags) != folio_tail_flags) + return false; + } + + /* + * check the PFN after this folio, but if its flags cannot be obtained, + * assume this folio has the expected order + */ + if (pageflags_get(pfn_head + nr_pages, kpageflags_fd, &pfn_flags)) + return true; + + /* If we find another tail page, then the folio is larger. */ + return (pfn_flags & folio_tail_flags) != folio_tail_flags; +fail: + ksft_exit_fail_msg("Failed to get folio info\n"); + return false; +} + +static int vaddr_pageflags_get(char *vaddr, int pagemap_fd, int kpageflags_fd, + uint64_t *flags) +{ + unsigned long pfn; + + pfn = pagemap_get_pfn(pagemap_fd, vaddr); + + /* non-present PFN */ + if (pfn == -1UL) + return 1; + + if (pageflags_get(pfn, kpageflags_fd, flags)) + return -1; + + return 0; +} + +/* + * gather_after_split_folio_orders - scan through [vaddr_start, len) and record + * folio orders + * + * @vaddr_start: start vaddr + * @len: range length + * @pagemap_fd: file descriptor to /proc/<pid>/pagemap + * @kpageflags_fd: file descriptor to /proc/kpageflags + * @orders: output folio order array + * @nr_orders: folio order array size + * + * gather_after_split_folio_orders() scan through [vaddr_start, len) and check + * all folios within the range and record their orders. All order-0 pages will + * be recorded. Non-present vaddr is skipped. + * + * NOTE: the function is used to check folio orders after a split is performed, + * so it assumes [vaddr_start, len) fully maps to after-split folios within that + * range. + * + * Return: 0 - no error, -1 - unhandled cases + */ +static int gather_after_split_folio_orders(char *vaddr_start, size_t len, + int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders) +{ + uint64_t page_flags = 0; + int cur_order = -1; + char *vaddr; + + if (pagemap_fd == -1 || kpageflags_fd == -1) + return -1; + if (!orders) + return -1; + if (nr_orders <= 0) + return -1; + + for (vaddr = vaddr_start; vaddr < vaddr_start + len;) { + char *next_folio_vaddr; + int status; + + status = vaddr_pageflags_get(vaddr, pagemap_fd, kpageflags_fd, + &page_flags); + if (status < 0) + return -1; + + /* skip non present vaddr */ + if (status == 1) { + vaddr += psize(); + continue; + } - if (pagemap_file) { - pread(pagemap_file, &paddr, sizeof(paddr), - ((long)vaddr >> pageshift) * sizeof(paddr)); + /* all order-0 pages with possible false postive (non folio) */ + if (!(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { + orders[0]++; + vaddr += psize(); + continue; + } - if (kpageflags_file) { - pread(kpageflags_file, &page_flags, sizeof(page_flags), - (paddr & PFN_MASK) * sizeof(page_flags)); + /* skip non thp compound pages */ + if (!(page_flags & KPF_THP)) { + vaddr += psize(); + continue; + } - return !!(page_flags & KPF_THP); + /* vpn points to part of a THP at this point */ + if (page_flags & KPF_COMPOUND_HEAD) + cur_order = 1; + else { + vaddr += psize(); + continue; } + + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); + + if (next_folio_vaddr >= vaddr_start + len) + break; + + while ((status = vaddr_pageflags_get(next_folio_vaddr, + pagemap_fd, kpageflags_fd, + &page_flags)) >= 0) { + /* + * non present vaddr, next compound head page, or + * order-0 page + */ + if (status == 1 || + (page_flags & KPF_COMPOUND_HEAD) || + !(page_flags & (KPF_COMPOUND_HEAD | KPF_COMPOUND_TAIL))) { + if (cur_order < nr_orders) { + orders[cur_order]++; + cur_order = -1; + vaddr = next_folio_vaddr; + } + break; + } + + cur_order++; + next_folio_vaddr = vaddr + (1UL << (cur_order + pshift())); + } + + if (status < 0) + return status; } + if (cur_order > 0 && cur_order < nr_orders) + orders[cur_order]++; return 0; } +static int check_after_split_folio_orders(char *vaddr_start, size_t len, + int pagemap_fd, int kpageflags_fd, int orders[], int nr_orders) +{ + int *vaddr_orders; + int status; + int i; + + vaddr_orders = (int *)malloc(sizeof(int) * nr_orders); + + if (!vaddr_orders) + ksft_exit_fail_msg("Cannot allocate memory for vaddr_orders"); + + memset(vaddr_orders, 0, sizeof(int) * nr_orders); + status = gather_after_split_folio_orders(vaddr_start, len, pagemap_fd, + kpageflags_fd, vaddr_orders, nr_orders); + if (status) + ksft_exit_fail_msg("gather folio info failed\n"); + + for (i = 0; i < nr_orders; i++) + if (vaddr_orders[i] != orders[i]) { + ksft_print_msg("order %d: expected: %d got %d\n", i, + orders[i], vaddr_orders[i]); + status = -1; + } + + free(vaddr_orders); + return status; +} + static void write_file(const char *path, const char *buf, size_t buflen) { int fd; @@ -111,7 +310,7 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp unsigned long rss_anon_before, rss_anon_after; size_t i; - if (!check_huge_anon(one_page, 4, pmd_pagesize)) + if (!check_huge_anon(one_page, nr_hpages, pmd_pagesize)) ksft_exit_fail_msg("No THP is allocated\n"); rss_anon_before = rss_anon(); @@ -135,7 +334,7 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp rss_anon_before, rss_anon_after); } -void split_pmd_zero_pages(void) +static void split_pmd_zero_pages(void) { char *one_page; int nr_hpages = 4; @@ -147,7 +346,7 @@ void split_pmd_zero_pages(void) free(one_page); } -void split_pmd_thp_to_order(int order) +static void split_pmd_thp_to_order(int order) { char *one_page; size_t len = 4 * pmd_pagesize; @@ -173,6 +372,13 @@ void split_pmd_thp_to_order(int order) if (one_page[i] != (char)i) ksft_exit_fail_msg("%ld byte corrupted\n", i); + memset(expected_orders, 0, sizeof(int) * (pmd_order + 1)); + expected_orders[order] = 4 << (pmd_order - order); + + if (check_after_split_folio_orders(one_page, len, pagemap_fd, + kpageflags_fd, expected_orders, + (pmd_order + 1))) + ksft_exit_fail_msg("Unexpected THP split\n"); if (!check_huge_anon(one_page, 0, pmd_pagesize)) ksft_exit_fail_msg("Still AnonHugePages not split\n"); @@ -181,90 +387,97 @@ void split_pmd_thp_to_order(int order) free(one_page); } -void split_pte_mapped_thp(void) +static void split_pte_mapped_thp(void) { - char *one_page, *pte_mapped, *pte_mapped2; - size_t len = 4 * pmd_pagesize; - uint64_t thp_size; + const size_t nr_thps = 4; + const size_t thp_area_size = nr_thps * pmd_pagesize; + const size_t page_area_size = nr_thps * pagesize; + char *thp_area, *tmp, *page_area = MAP_FAILED; size_t i; - const char *pagemap_template = "/proc/%d/pagemap"; - const char *kpageflags_proc = "/proc/kpageflags"; - char pagemap_proc[255]; - int pagemap_fd; - int kpageflags_fd; - if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) - ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno)); + thp_area = mmap((void *)(1UL << 30), thp_area_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (thp_area == MAP_FAILED) { + ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno)); + return; + } - pagemap_fd = open(pagemap_proc, O_RDONLY); - if (pagemap_fd == -1) - ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); + madvise(thp_area, thp_area_size, MADV_HUGEPAGE); - kpageflags_fd = open(kpageflags_proc, O_RDONLY); - if (kpageflags_fd == -1) - ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); + for (i = 0; i < thp_area_size; i++) + thp_area[i] = (char)i; - one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (one_page == MAP_FAILED) - ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); + if (!check_huge_anon(thp_area, nr_thps, pmd_pagesize)) { + ksft_test_result_skip("Not all THPs allocated\n"); + goto out; + } - madvise(one_page, len, MADV_HUGEPAGE); + /* + * To challenge spitting code, we will mremap a single page of each + * THP (page[i] of thp[i]) in the thp_area into page_area. This will + * replace the PMD mappings in the thp_area by PTE mappings first, + * but leaving the THP unsplit, to then create a page-sized hole in + * the thp_area. + * We will then manually trigger splitting of all THPs through the + * single mremap'ed pages of each THP in the page_area. + */ + page_area = mmap(NULL, page_area_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (page_area == MAP_FAILED) { + ksft_test_result_fail("Fail to allocate memory: %s\n", strerror(errno)); + goto out; + } - for (i = 0; i < len; i++) - one_page[i] = (char)i; + for (i = 0; i < nr_thps; i++) { + tmp = mremap(thp_area + pmd_pagesize * i + pagesize * i, + pagesize, pagesize, MREMAP_MAYMOVE|MREMAP_FIXED, + page_area + pagesize * i); + if (tmp != MAP_FAILED) + continue; + ksft_test_result_fail("mremap failed: %s\n", strerror(errno)); + goto out; + } - if (!check_huge_anon(one_page, 4, pmd_pagesize)) - ksft_exit_fail_msg("No THP is allocated\n"); + /* + * Verify that our THPs were not split yet. Note that + * check_huge_anon() cannot be used as it checks for PMD mappings. + */ + for (i = 0; i < nr_thps; i++) { + if (is_backed_by_folio(page_area + i * pagesize, pmd_order, + pagemap_fd, kpageflags_fd)) + continue; + ksft_test_result_fail("THP %zu missing after mremap\n", i); + goto out; + } - /* remap the first pagesize of first THP */ - pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); - - /* remap the Nth pagesize of Nth THP */ - for (i = 1; i < 4; i++) { - pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, - pagesize, pagesize, - MREMAP_MAYMOVE|MREMAP_FIXED, - pte_mapped + pagesize * i); - if (pte_mapped2 == MAP_FAILED) - ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno)); - } - - /* smap does not show THPs after mremap, use kpageflags instead */ - thp_size = 0; - for (i = 0; i < pagesize * 4; i++) - if (i % pagesize == 0 && - is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) - thp_size++; - - if (thp_size != 4) - ksft_exit_fail_msg("Some THPs are missing during mremap\n"); - - /* split all remapped THPs */ - write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, - (uint64_t)pte_mapped + pagesize * 4, 0); - - /* smap does not show THPs after mremap, use kpageflags instead */ - thp_size = 0; - for (i = 0; i < pagesize * 4; i++) { - if (pte_mapped[i] != (char)i) - ksft_exit_fail_msg("%ld byte corrupted\n", i); + /* Split all THPs through the remapped pages. */ + write_debugfs(PID_FMT, getpid(), (uint64_t)page_area, + (uint64_t)page_area + page_area_size, 0); - if (i % pagesize == 0 && - is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) - thp_size++; + /* Corruption during mremap or split? */ + for (i = 0; i < page_area_size; i++) { + if (page_area[i] == (char)i) + continue; + ksft_test_result_fail("%zu byte corrupted\n", i); + goto out; } - if (thp_size) - ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size); + /* Split failed? */ + for (i = 0; i < nr_thps; i++) { + if (is_backed_by_folio(page_area + i * pagesize, 0, + pagemap_fd, kpageflags_fd)) + continue; + ksft_test_result_fail("THP %zu not split\n", i); + } ksft_test_result_pass("Split PTE-mapped huge pages successful\n"); - munmap(one_page, len); - close(pagemap_fd); - close(kpageflags_fd); +out: + munmap(thp_area, thp_area_size); + if (page_area != MAP_FAILED) + munmap(page_area, page_area_size); } -void split_file_backed_thp(int order) +static void split_file_backed_thp(int order) { int status; int fd; @@ -297,7 +510,7 @@ void split_file_backed_thp(int order) status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); if (status >= INPUT_MAX) { - ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); + ksft_print_msg("Fail to create file-backed THP split testing file\n"); goto cleanup; } @@ -366,7 +579,7 @@ out: ksft_exit_fail_msg("Error occurred\n"); } -bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, +static bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, const char **thp_fs_loc) { if (xfs_path) { @@ -382,7 +595,7 @@ bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template, return true; } -void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) +static void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) { int status; @@ -395,8 +608,8 @@ void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp) strerror(errno)); } -int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, - char **addr) +static int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, + int *fd, char **addr) { size_t i; unsigned char buf[1024]; @@ -462,10 +675,11 @@ err_out_unlink: return -1; } -void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, - int order, int offset) +static void split_thp_in_pagecache_to_order_at(size_t fd_size, + const char *fs_loc, int order, int offset) { int fd; + char *split_addr; char *addr; size_t i; char testfile[INPUT_MAX]; @@ -479,14 +693,33 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr); if (err) return; + err = 0; - if (offset == -1) - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, - (uint64_t)addr + fd_size, order); - else - write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, - (uint64_t)addr + fd_size, order, offset); + memset(expected_orders, 0, sizeof(int) * (pmd_order + 1)); + /* + * use [split_addr, split_addr + pagesize) range to split THPs, since + * the debugfs function always split a range with pagesize step and + * providing a full [addr, addr + fd_size) range can trigger multiple + * splits, complicating after-split result checking. + */ + if (offset == -1) { + for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize) + write_debugfs(PID_FMT, getpid(), (uint64_t)split_addr, + (uint64_t)split_addr + pagesize, order); + + expected_orders[order] = fd_size / (pagesize << order); + } else { + int times = fd_size / pmd_pagesize; + + for (split_addr = addr; split_addr < addr + fd_size; split_addr += pmd_pagesize) + write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)split_addr, + (uint64_t)split_addr + pagesize, order, offset); + + for (i = order + 1; i < pmd_order; i++) + expected_orders[i] = times; + expected_orders[order] = 2 * times; + } for (i = 0; i < fd_size; i++) if (*(addr + i) != (char)i) { @@ -495,6 +728,14 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, goto out; } + if (check_after_split_folio_orders(addr, fd_size, pagemap_fd, + kpageflags_fd, expected_orders, + (pmd_order + 1))) { + ksft_print_msg("Unexpected THP split\n"); + err = 1; + goto out; + } + if (!check_huge_file(addr, 0, pmd_pagesize)) { ksft_print_msg("Still FilePmdMapped not split\n"); err = EXIT_FAILURE; @@ -525,6 +766,8 @@ int main(int argc, char **argv) const char *fs_loc; bool created_tmp; int offset; + unsigned int nr_pages; + unsigned int tests; ksft_print_header(); @@ -536,38 +779,58 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(1+8+1+9+9+8*4+2); - pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; pmd_pagesize = read_pmd_pagesize(); if (!pmd_pagesize) ksft_exit_fail_msg("Reading PMD pagesize failed\n"); + nr_pages = pmd_pagesize / pagesize; + pmd_order = sz2ord(pmd_pagesize, pagesize); + + expected_orders = (int *)malloc(sizeof(int) * (pmd_order + 1)); + if (!expected_orders) + ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno)); + + tests = 2 + (pmd_order - 1) + (2 * pmd_order) + (pmd_order - 1) * 4 + 2; + ksft_set_plan(tests); + + pagemap_fd = open(pagemap_proc, O_RDONLY); + if (pagemap_fd == -1) + ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno)); + + kpageflags_fd = open(kpageflags_proc, O_RDONLY); + if (kpageflags_fd == -1) + ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno)); + fd_size = 2 * pmd_pagesize; split_pmd_zero_pages(); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) if (i != 1) split_pmd_thp_to_order(i); split_pte_mapped_thp(); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) split_file_backed_thp(i); created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, &fs_loc); - for (i = 8; i >= 0; i--) + for (i = pmd_order - 1; i >= 0; i--) split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); - for (i = 0; i < 9; i++) + for (i = 0; i < pmd_order; i++) for (offset = 0; - offset < pmd_pagesize / pagesize; - offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) + offset < nr_pages; + offset += MAX(nr_pages / 4, 1 << i)) split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); cleanup_thp_fs(fs_loc, created_tmp); + close(pagemap_fd); + close(kpageflags_fd); + free(expected_orders); + ksft_finished(); return 0; diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh index d73b846736f1..d39096723fca 100755 --- a/tools/testing/selftests/mm/test_vmalloc.sh +++ b/tools/testing/selftests/mm/test_vmalloc.sh @@ -47,14 +47,14 @@ check_test_requirements() fi } -run_perfformance_check() +run_performance_check() { echo "Run performance tests to evaluate how fast vmalloc allocation is." echo "It runs all test cases on one single CPU with sequential order." modprobe $DRIVER $PERF_PARAM > /dev/null 2>&1 echo "Done." - echo "Ccheck the kernel message buffer to see the summary." + echo "Check the kernel message buffer to see the summary." } run_stability_check() @@ -160,7 +160,7 @@ function run_test() usage else if [[ "$1" = "performance" ]]; then - run_perfformance_check + run_performance_check elif [[ "$1" = "stress" ]]; then run_stability_check elif [[ "$1" = "smoke" ]]; then diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index bad60ac52874..574bd0f8ae48 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -382,10 +382,17 @@ unsigned long thp_shmem_supported_orders(void) return __thp_supported_orders(true); } -bool thp_is_enabled(void) +bool thp_available(void) { if (access(THP_SYSFS, F_OK) != 0) return false; + return true; +} + +bool thp_is_enabled(void) +{ + if (!thp_available()) + return false; int mode = thp_read_string("enabled", thp_enabled_strings); diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 6c07f70beee9..76eeb712e5f1 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -84,6 +84,7 @@ void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); unsigned long thp_shmem_supported_orders(void); +bool thp_available(void); bool thp_is_enabled(void); #endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 8e2b08dc5762..4f5e290ff1a6 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -177,13 +177,16 @@ void find_pagesizes(void) globfree(&g); read_sysfs("/proc/sys/kernel/shmmax", &shmmax_val); - if (shmmax_val < NUM_PAGES * largest) - ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", - largest * NUM_PAGES); + if (shmmax_val < NUM_PAGES * largest) { + ksft_print_msg("WARNING: shmmax is too small to run this test.\n"); + ksft_print_msg("Please run the following command to increase shmmax:\n"); + ksft_print_msg("echo %lu > /proc/sys/kernel/shmmax\n", largest * NUM_PAGES); + ksft_exit_skip("Test skipped due to insufficient shmmax value.\n"); + } #if defined(__x86_64__) if (largest != 1U<<30) { - ksft_exit_fail_msg("No GB pages available on x86-64\n" + ksft_exit_skip("No GB pages available on x86-64\n" "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES); } #endif diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index a37088a23ffe..994fe8c03923 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -7,18 +7,29 @@ #include "uffd-common.h" -#define BASE_PMD_ADDR ((void *)(1UL << 30)) - -volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -int uffd = -1, uffd_flags, finished, *pipefd, test_type; -bool map_shared; -bool test_uffdio_wp = true; -unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; uffd_test_case_ops_t *uffd_test_case_ops; -atomic_bool ready_for_fork; + +#define BASE_PMD_ADDR ((void *)(1UL << 30)) + +/* pthread_mutex_t starts at page offset 0 */ +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts) +{ + return (pthread_mutex_t *) (area + nr * gopts->page_size); +} + +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +volatile unsigned long long *area_count(char *area, unsigned long nr, + uffd_global_test_opts_t *gopts) +{ + return (volatile unsigned long long *) + ((unsigned long)(area + nr * gopts->page_size + + sizeof(pthread_mutex_t) + sizeof(unsigned long long) - 1) & + ~(unsigned long)(sizeof(unsigned long long) - 1)); +} static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) { @@ -40,15 +51,15 @@ static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) return mem_fd; } -static void anon_release_pages(char *rel_area) +static void anon_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } -static int anon_allocate_area(void **alloc_area, bool is_src) +static int anon_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + *alloc_area = mmap(NULL, gopts->nr_pages * gopts->page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (*alloc_area == MAP_FAILED) { *alloc_area = NULL; @@ -57,31 +68,32 @@ static int anon_allocate_area(void **alloc_area, bool is_src) return 0; } -static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void noop_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { } -static void hugetlb_release_pages(char *rel_area) +static void hugetlb_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (!map_shared) { - if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) + if (!gopts->map_shared) { + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); } else { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } } -static int hugetlb_allocate_area(void **alloc_area, bool is_src) +static int hugetlb_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { - off_t size = nr_pages * page_size; + off_t size = gopts->nr_pages * gopts->page_size; off_t offset = is_src ? 0 : size; void *area_alias = NULL; char **alloc_area_alias; int mem_fd = uffd_mem_fd_create(size * 2, true); *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE, - (map_shared ? MAP_SHARED : MAP_PRIVATE) | + (gopts->map_shared ? MAP_SHARED : MAP_PRIVATE) | (is_src ? 0 : MAP_NORESERVE), mem_fd, offset); if (*alloc_area == MAP_FAILED) { @@ -89,7 +101,7 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return -errno; } - if (map_shared) { + if (gopts->map_shared) { area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, offset); if (area_alias == MAP_FAILED) @@ -97,9 +109,9 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) } if (is_src) { - alloc_area_alias = &area_src_alias; + alloc_area_alias = &gopts->area_src_alias; } else { - alloc_area_alias = &area_dst_alias; + alloc_area_alias = &gopts->area_dst_alias; } if (area_alias) *alloc_area_alias = area_alias; @@ -108,24 +120,25 @@ static int hugetlb_allocate_area(void **alloc_area, bool is_src) return 0; } -static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void hugetlb_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { - if (!map_shared) + if (!gopts->map_shared) return; - *start = (unsigned long) area_dst_alias + offset; + *start = (unsigned long) gopts->area_dst_alias + offset; } -static void shmem_release_pages(char *rel_area) +static void shmem_release_pages(uffd_global_test_opts_t *gopts, char *rel_area) { - if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) + if (madvise(rel_area, gopts->nr_pages * gopts->page_size, MADV_REMOVE)) err("madvise(MADV_REMOVE) failed"); } -static int shmem_allocate_area(void **alloc_area, bool is_src) +static int shmem_allocate_area(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src) { void *area_alias = NULL; - size_t bytes = nr_pages * page_size, hpage_size = read_pmd_pagesize(); + size_t bytes = gopts->nr_pages * gopts->page_size, hpage_size = read_pmd_pagesize(); unsigned long offset = is_src ? 0 : bytes; char *p = NULL, *p_alias = NULL; int mem_fd = uffd_mem_fd_create(bytes * 2, false); @@ -159,22 +172,23 @@ static int shmem_allocate_area(void **alloc_area, bool is_src) err("mmap of anonymous memory failed at %p", p_alias); if (is_src) - area_src_alias = area_alias; + gopts->area_src_alias = area_alias; else - area_dst_alias = area_alias; + gopts->area_dst_alias = area_alias; close(mem_fd); return 0; } -static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) +static void shmem_alias_mapping(uffd_global_test_opts_t *gopts, __u64 *start, + size_t len, unsigned long offset) { - *start = (unsigned long)area_dst_alias + offset; + *start = (unsigned long)gopts->area_dst_alias + offset; } -static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages) +static void shmem_check_pmd_mapping(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages) { - if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, + if (!check_huge_shmem(gopts->area_dst_alias, expect_nr_hpages, read_pmd_pagesize())) err("Did not find expected %d number of hugepages", expect_nr_hpages); @@ -234,18 +248,18 @@ void uffd_stats_report(struct uffd_args *args, int n_cpus) printf("\n"); } -int userfaultfd_open(uint64_t *features) +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features) { struct uffdio_api uffdio_api; - uffd = uffd_open(UFFD_FLAGS); - if (uffd < 0) + gopts->uffd = uffd_open(UFFD_FLAGS); + if (gopts->uffd < 0) return -1; - uffd_flags = fcntl(uffd, F_GETFD, NULL); + gopts->uffd_flags = fcntl(gopts->uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; uffdio_api.features = *features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + if (ioctl(gopts->uffd, UFFDIO_API, &uffdio_api)) /* Probably lack of CAP_PTRACE? */ return -1; if (uffdio_api.api != UFFD_API) @@ -255,59 +269,63 @@ int userfaultfd_open(uint64_t *features) return 0; } -static inline void munmap_area(void **area) +static inline void munmap_area(uffd_global_test_opts_t *gopts, void **area) { if (*area) - if (munmap(*area, nr_pages * page_size)) + if (munmap(*area, gopts->nr_pages * gopts->page_size)) err("munmap"); *area = NULL; } -void uffd_test_ctx_clear(void) +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts) { size_t i; - if (pipefd) { - for (i = 0; i < nr_parallel * 2; ++i) { - if (close(pipefd[i])) + if (gopts->pipefd) { + for (i = 0; i < gopts->nr_parallel * 2; ++i) { + if (close(gopts->pipefd[i])) err("close pipefd"); } - free(pipefd); - pipefd = NULL; + free(gopts->pipefd); + gopts->pipefd = NULL; } - if (count_verify) { - free(count_verify); - count_verify = NULL; + if (gopts->count_verify) { + free(gopts->count_verify); + gopts->count_verify = NULL; } - if (uffd != -1) { - if (close(uffd)) + if (gopts->uffd != -1) { + if (close(gopts->uffd)) err("close uffd"); - uffd = -1; + gopts->uffd = -1; } - munmap_area((void **)&area_src); - munmap_area((void **)&area_src_alias); - munmap_area((void **)&area_dst); - munmap_area((void **)&area_dst_alias); - munmap_area((void **)&area_remap); + munmap_area(gopts, (void **)&gopts->area_src); + munmap_area(gopts, (void **)&gopts->area_src_alias); + munmap_area(gopts, (void **)&gopts->area_dst); + munmap_area(gopts, (void **)&gopts->area_dst_alias); + munmap_area(gopts, (void **)&gopts->area_remap); } -int uffd_test_ctx_init(uint64_t features, const char **errmsg) +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg) { unsigned long nr, cpu; int ret; + gopts->area_src_alias = NULL; + gopts->area_dst_alias = NULL; + gopts->area_remap = NULL; + if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { - ret = uffd_test_case_ops->pre_alloc(errmsg); + ret = uffd_test_case_ops->pre_alloc(gopts, errmsg); if (ret) return ret; } - ret = uffd_test_ops->allocate_area((void **)&area_src, true); - ret |= uffd_test_ops->allocate_area((void **)&area_dst, false); + ret = uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_src, true); + ret |= uffd_test_ops->allocate_area(gopts, (void **) &gopts->area_dst, false); if (ret) { if (errmsg) *errmsg = "memory allocation failed"; @@ -315,26 +333,26 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) } if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { - ret = uffd_test_case_ops->post_alloc(errmsg); + ret = uffd_test_case_ops->post_alloc(gopts, errmsg); if (ret) return ret; } - ret = userfaultfd_open(&features); + ret = userfaultfd_open(gopts, &features); if (ret) { if (errmsg) *errmsg = "possible lack of privilege"; return ret; } - count_verify = malloc(nr_pages * sizeof(unsigned long long)); - if (!count_verify) + gopts->count_verify = malloc(gopts->nr_pages * sizeof(unsigned long long)); + if (!gopts->count_verify) err("count_verify"); - for (nr = 0; nr < nr_pages; nr++) { - *area_mutex(area_src, nr) = + for (nr = 0; nr < gopts->nr_pages; nr++) { + *area_mutex(gopts->area_src, nr, gopts) = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; - count_verify[nr] = *area_count(area_src, nr) = 1; + gopts->count_verify[nr] = *area_count(gopts->area_src, nr, gopts) = 1; /* * In the transition between 255 to 256, powerpc will * read out of order in my_bcmp and see both bytes as @@ -342,7 +360,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * after the count, to avoid my_bcmp to trigger false * positives. */ - *(area_count(area_src, nr) + 1) = 1; + *(area_count(gopts->area_src, nr, gopts) + 1) = 1; } /* @@ -363,13 +381,13 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) * proactively split the thp and drop any accidentally initialized * pages within area_dst. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - pipefd = malloc(sizeof(int) * nr_parallel * 2); - if (!pipefd) + gopts->pipefd = malloc(sizeof(int) * gopts->nr_parallel * 2); + if (!gopts->pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_parallel; cpu++) - if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) + if (pipe2(&gopts->pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); return 0; @@ -416,9 +434,9 @@ static void continue_range(int ufd, __u64 start, __u64 len, bool wp) ret, (int64_t) req.mapped); } -int uffd_read_msg(int ufd, struct uffd_msg *msg) +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg) { - int ret = read(uffd, msg, sizeof(*msg)); + int ret = read(gopts->uffd, msg, sizeof(*msg)); if (ret != sizeof(*msg)) { if (ret < 0) { @@ -433,7 +451,8 @@ int uffd_read_msg(int ufd, struct uffd_msg *msg) return 0; } -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -442,7 +461,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { /* Write protect page faults */ - wp_range(uffd, msg->arg.pagefault.address, page_size, false); + wp_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, false); args->wp_faults++; } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { uint8_t *area; @@ -460,12 +479,12 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) * (UFFD-registered). */ - area = (uint8_t *)(area_dst + - ((char *)msg->arg.pagefault.address - - area_dst_alias)); - for (b = 0; b < page_size; ++b) + area = (uint8_t *)(gopts->area_dst + + ((char *)msg->arg.pagefault.address - + gopts->area_dst_alias)); + for (b = 0; b < gopts->page_size; ++b) area[b] = ~area[b]; - continue_range(uffd, msg->arg.pagefault.address, page_size, + continue_range(gopts->uffd, msg->arg.pagefault.address, gopts->page_size, args->apply_wp); args->minor_faults++; } else { @@ -493,10 +512,10 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) err("unexpected write fault"); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); - if (copy_page(uffd, offset, args->apply_wp)) + if (copy_page(gopts, offset, args->apply_wp)) args->missing_faults++; } } @@ -504,6 +523,7 @@ void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args) void *uffd_poll_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; unsigned long cpu = args->cpu; struct pollfd pollfd[2]; struct uffd_msg msg; @@ -514,12 +534,12 @@ void *uffd_poll_thread(void *arg) if (!args->handle_fault) args->handle_fault = uffd_handle_page_fault; - pollfd[0].fd = uffd; + pollfd[0].fd = gopts->uffd; pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].fd = gopts->pipefd[cpu*2]; pollfd[1].events = POLLIN; - ready_for_fork = true; + gopts->ready_for_fork = true; for (;;) { ret = poll(pollfd, 2, -1); @@ -537,30 +557,30 @@ void *uffd_poll_thread(void *arg) } if (!(pollfd[0].revents & POLLIN)) err("pollfd[0].revents %d", pollfd[0].revents); - if (uffd_read_msg(uffd, &msg)) + if (uffd_read_msg(gopts, &msg)) continue; switch (msg.event) { default: err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - args->handle_fault(&msg, args); + args->handle_fault(gopts, &msg, args); break; case UFFD_EVENT_FORK: - close(uffd); - uffd = msg.arg.fork.ufd; - pollfd[0].fd = uffd; + close(gopts->uffd); + gopts->uffd = msg.arg.fork.ufd; + pollfd[0].fd = gopts->uffd; break; case UFFD_EVENT_REMOVE: uffd_reg.range.start = msg.arg.remove.start; uffd_reg.range.len = msg.arg.remove.end - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) + if (ioctl(gopts->uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) err("remove failure"); break; case UFFD_EVENT_REMAP: - area_remap = area_dst; /* save for later unmap */ - area_dst = (char *)(unsigned long)msg.arg.remap.to; + gopts->area_remap = gopts->area_dst; /* save for later unmap */ + gopts->area_dst = (char *)(unsigned long)msg.arg.remap.to; break; } } @@ -568,17 +588,18 @@ void *uffd_poll_thread(void *arg) return NULL; } -static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, +static void retry_copy_page(uffd_global_test_opts_t *gopts, struct uffdio_copy *uffdio_copy, unsigned long offset) { - uffd_test_ops->alias_mapping(&uffdio_copy->dst, + uffd_test_ops->alias_mapping(gopts, + &uffdio_copy->dst, uffdio_copy->len, offset); - if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy->copy != -EEXIST) err("UFFDIO_COPY retry error: %"PRId64, - (int64_t)uffdio_copy->copy); + (int64_t)uffdio_copy->copy); } else { err("UFFDIO_COPY retry unexpected: %"PRId64, (int64_t)uffdio_copy->copy); @@ -597,60 +618,60 @@ static void wake_range(int ufd, unsigned long addr, unsigned long len) addr), exit(1); } -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp) +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp) { struct uffdio_copy uffdio_copy; - if (offset >= nr_pages * page_size) + if (offset >= gopts->nr_pages * gopts->page_size) err("unexpected offset %lu\n", offset); - uffdio_copy.dst = (unsigned long) area_dst + offset; - uffdio_copy.src = (unsigned long) area_src + offset; - uffdio_copy.len = page_size; + uffdio_copy.dst = (unsigned long) gopts->area_dst + offset; + uffdio_copy.src = (unsigned long) gopts->area_src + offset; + uffdio_copy.len = gopts->page_size; if (wp) uffdio_copy.mode = UFFDIO_COPY_MODE_WP; else uffdio_copy.mode = 0; uffdio_copy.copy = 0; - if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { + if (ioctl(gopts->uffd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ if (uffdio_copy.copy != -EEXIST) err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); - wake_range(ufd, uffdio_copy.dst, page_size); - } else if (uffdio_copy.copy != page_size) { + wake_range(gopts->uffd, uffdio_copy.dst, gopts->page_size); + } else if (uffdio_copy.copy != gopts->page_size) { err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy); } else { - if (test_uffdio_copy_eexist && retry) { - test_uffdio_copy_eexist = false; - retry_copy_page(ufd, &uffdio_copy, offset); + if (gopts->test_uffdio_copy_eexist && retry) { + gopts->test_uffdio_copy_eexist = false; + retry_copy_page(gopts, &uffdio_copy, offset); } return 1; } return 0; } -int copy_page(int ufd, unsigned long offset, bool wp) +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp) { - return __copy_page(ufd, offset, false, wp); + return __copy_page(gopts, offset, false, wp); } -int move_page(int ufd, unsigned long offset, unsigned long len) +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len) { struct uffdio_move uffdio_move; - if (offset + len > nr_pages * page_size) + if (offset + len > gopts->nr_pages * gopts->page_size) err("unexpected offset %lu and length %lu\n", offset, len); - uffdio_move.dst = (unsigned long) area_dst + offset; - uffdio_move.src = (unsigned long) area_src + offset; + uffdio_move.dst = (unsigned long) gopts->area_dst + offset; + uffdio_move.src = (unsigned long) gopts->area_src + offset; uffdio_move.len = len; uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; uffdio_move.move = 0; - if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { + if (ioctl(gopts->uffd, UFFDIO_MOVE, &uffdio_move)) { /* real retval in uffdio_move.move */ if (uffdio_move.move != -EEXIST) err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); - wake_range(ufd, uffdio_move.dst, len); + wake_range(gopts->uffd, uffdio_move.dst, len); } else if (uffdio_move.move != len) { err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); } else diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 7700cbfa3975..37d3ca55905f 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -56,20 +56,17 @@ #define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__) -/* pthread_mutex_t starts at page offset 0 */ -#define area_mutex(___area, ___nr) \ - ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) -/* - * count is placed in the page after pthread_mutex_t naturally aligned - * to avoid non alignment faults on non-x86 archs. - */ -#define area_count(___area, ___nr) \ - ((volatile unsigned long long *) ((unsigned long) \ - ((___area) + (___nr)*page_size + \ - sizeof(pthread_mutex_t) + \ - sizeof(unsigned long long) - 1) & \ - ~(unsigned long)(sizeof(unsigned long long) \ - - 1))) +struct uffd_global_test_opts { + unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; + char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; + int uffd, uffd_flags, finished, *pipefd, test_type; + bool map_shared; + bool test_uffdio_wp; + unsigned long long *count_verify; + volatile bool test_uffdio_copy_eexist; + atomic_bool ready_for_fork; +}; +typedef struct uffd_global_test_opts uffd_global_test_opts_t; /* Userfaultfd test statistics */ struct uffd_args { @@ -79,50 +76,55 @@ struct uffd_args { unsigned long missing_faults; unsigned long wp_faults; unsigned long minor_faults; + struct uffd_global_test_opts *gopts; /* A custom fault handler; defaults to uffd_handle_page_fault. */ - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args); + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, + struct uffd_args *args); }; struct uffd_test_ops { - int (*allocate_area)(void **alloc_area, bool is_src); - void (*release_pages)(char *rel_area); - void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); - void (*check_pmd_mapping)(void *p, int expect_nr_hpages); + int (*allocate_area)(uffd_global_test_opts_t *gopts, void **alloc_area, bool is_src); + void (*release_pages)(uffd_global_test_opts_t *gopts, char *rel_area); + void (*alias_mapping)(uffd_global_test_opts_t *gopts, + __u64 *start, + size_t len, + unsigned long offset); + void (*check_pmd_mapping)(uffd_global_test_opts_t *gopts, void *p, int expect_nr_hpages); }; typedef struct uffd_test_ops uffd_test_ops_t; struct uffd_test_case_ops { - int (*pre_alloc)(const char **errmsg); - int (*post_alloc)(const char **errmsg); + int (*pre_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); + int (*post_alloc)(uffd_global_test_opts_t *gopts, const char **errmsg); }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; -extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; -extern int uffd, uffd_flags, finished, *pipefd, test_type; -extern bool map_shared; -extern bool test_uffdio_wp; -extern unsigned long long *count_verify; -extern volatile bool test_uffdio_copy_eexist; -extern atomic_bool ready_for_fork; - +extern uffd_global_test_opts_t *uffd_gtest_opts; extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; extern uffd_test_ops_t hugetlb_uffd_test_ops; extern uffd_test_ops_t *uffd_test_ops; extern uffd_test_case_ops_t *uffd_test_case_ops; +pthread_mutex_t *area_mutex(char *area, unsigned long nr, uffd_global_test_opts_t *gopts); +volatile unsigned long long *area_count(char *area, + unsigned long nr, + uffd_global_test_opts_t *gopts); + void uffd_stats_report(struct uffd_args *args, int n_cpus); -int uffd_test_ctx_init(uint64_t features, const char **errmsg); -void uffd_test_ctx_clear(void); -int userfaultfd_open(uint64_t *features); -int uffd_read_msg(int ufd, struct uffd_msg *msg); +int uffd_test_ctx_init(uffd_global_test_opts_t *gopts, uint64_t features, const char **errmsg); +void uffd_test_ctx_clear(uffd_global_test_opts_t *gopts); +int userfaultfd_open(uffd_global_test_opts_t *gopts, uint64_t *features); +int uffd_read_msg(uffd_global_test_opts_t *gopts, struct uffd_msg *msg); void wp_range(int ufd, __u64 start, __u64 len, bool wp); -void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args); -int __copy_page(int ufd, unsigned long offset, bool retry, bool wp); -int copy_page(int ufd, unsigned long offset, bool wp); -int move_page(int ufd, unsigned long offset, unsigned long len); +void uffd_handle_page_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args); +int __copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool retry, bool wp); +int copy_page(uffd_global_test_opts_t *gopts, unsigned long offset, bool wp); +int move_page(uffd_global_test_opts_t *gopts, unsigned long offset, unsigned long len); void *uffd_poll_thread(void *arg); int uffd_open_dev(unsigned int flags); diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 40af7f67c407..b51c89e1cd1a 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -44,6 +44,12 @@ uint64_t features; #define BOUNCE_VERIFY (1<<2) #define BOUNCE_POLL (1<<3) static int bounces; +/* defined globally for this particular test as the sigalrm handler + * depends on test_uffdio_*_eexist. + * XXX: define gopts in main() when we figure out a way to deal with + * test_uffdio_*_eexist. + */ +static uffd_global_test_opts_t *gopts; /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ #define ALARM_INTERVAL_SECS 10 @@ -51,7 +57,7 @@ static char *zeropage; pthread_attr_t attr; #define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + do { __auto_type __tmp = (a); (a) = (b); (b) = __tmp; } while (0) const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" @@ -76,54 +82,58 @@ static void usage(void) exit(1); } -static void uffd_stats_reset(struct uffd_args *args, unsigned long n_cpus) +static void uffd_stats_reset(uffd_global_test_opts_t *gopts, struct uffd_args *args, + unsigned long n_cpus) { int i; for (i = 0; i < n_cpus; i++) { args[i].cpu = i; - args[i].apply_wp = test_uffdio_wp; + args[i].apply_wp = gopts->test_uffdio_wp; args[i].missing_faults = 0; args[i].wp_faults = 0; args[i].minor_faults = 0; + args[i].gopts = gopts; } } static void *locking_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr; unsigned long long count; if (!(bounces & BOUNCE_RANDOM)) { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) - page_nr += cpu * nr_pages_per_cpu; + page_nr += cpu * gopts->nr_pages_per_cpu; } - while (!finished) { + while (!gopts->finished) { if (bounces & BOUNCE_RANDOM) { if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr)) err("getrandom failed"); } else page_nr += 1; - page_nr %= nr_pages; - pthread_mutex_lock(area_mutex(area_dst, page_nr)); - count = *area_count(area_dst, page_nr); - if (count != count_verify[page_nr]) + page_nr %= gopts->nr_pages; + pthread_mutex_lock(area_mutex(gopts->area_dst, page_nr, gopts)); + count = *area_count(gopts->area_dst, page_nr, gopts); + if (count != gopts->count_verify[page_nr]) err("page_nr %lu memory corruption %llu %llu", - page_nr, count, count_verify[page_nr]); + page_nr, count, gopts->count_verify[page_nr]); count++; - *area_count(area_dst, page_nr) = count_verify[page_nr] = count; - pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + *area_count(gopts->area_dst, page_nr, gopts) = gopts->count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(gopts->area_dst, page_nr, gopts)); } return NULL; } -static int copy_page_retry(int ufd, unsigned long offset) +static int copy_page_retry(uffd_global_test_opts_t *gopts, unsigned long offset) { - return __copy_page(ufd, offset, true, test_uffdio_wp); + return __copy_page(gopts, offset, true, gopts->test_uffdio_wp); } pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -131,15 +141,16 @@ pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; static void *uffd_read_thread(void *arg) { struct uffd_args *args = (struct uffd_args *)arg; + uffd_global_test_opts_t *gopts = args->gopts; struct uffd_msg msg; pthread_mutex_unlock(&uffd_read_mutex); /* from here cancellation is ok */ for (;;) { - if (uffd_read_msg(uffd, &msg)) + if (uffd_read_msg(gopts, &msg)) continue; - uffd_handle_page_fault(&msg, args); + uffd_handle_page_fault(gopts, &msg, args); } return NULL; @@ -147,32 +158,34 @@ static void *uffd_read_thread(void *arg) static void *background_thread(void *arg) { - unsigned long cpu = (unsigned long) arg; + struct uffd_args *args = (struct uffd_args *) arg; + uffd_global_test_opts_t *gopts = args->gopts; + unsigned long cpu = (unsigned long) args->cpu; unsigned long page_nr, start_nr, mid_nr, end_nr; - start_nr = cpu * nr_pages_per_cpu; - end_nr = (cpu+1) * nr_pages_per_cpu; + start_nr = cpu * gopts->nr_pages_per_cpu; + end_nr = (cpu+1) * gopts->nr_pages_per_cpu; mid_nr = (start_nr + end_nr) / 2; /* Copy the first half of the pages */ for (page_nr = start_nr; page_nr < mid_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); /* * If we need to test uffd-wp, set it up now. Then we'll have * at least the first half of the pages mapped already which * can be write-protected for testing */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst + start_nr * page_size, - nr_pages_per_cpu * page_size, true); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst + start_nr * gopts->page_size, + gopts->nr_pages_per_cpu * gopts->page_size, true); /* * Continue the 2nd half of the page copying, handling write * protection faults if any */ for (page_nr = mid_nr; page_nr < end_nr; page_nr++) - copy_page_retry(uffd, page_nr * page_size); + copy_page_retry(gopts, page_nr * gopts->page_size); return NULL; } @@ -180,17 +193,21 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_parallel]; - pthread_t uffd_threads[nr_parallel]; - pthread_t background_threads[nr_parallel]; + uffd_global_test_opts_t *gopts = args->gopts; + pthread_t locking_threads[gopts->nr_parallel]; + pthread_t uffd_threads[gopts->nr_parallel]; + pthread_t background_threads[gopts->nr_parallel]; - finished = 0; - for (cpu = 0; cpu < nr_parallel; cpu++) { + gopts->finished = 0; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, - locking_thread, (void *)cpu)) + locking_thread, (void *)&args[cpu])) return 1; if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu])) + if (pthread_create(&uffd_threads[cpu], + &attr, + uffd_poll_thread, + (void *) &args[cpu])) err("uffd_poll_thread create"); } else { if (pthread_create(&uffd_threads[cpu], &attr, @@ -200,10 +217,10 @@ static int stress(struct uffd_args *args) pthread_mutex_lock(&uffd_read_mutex); } if (pthread_create(&background_threads[cpu], &attr, - background_thread, (void *)cpu)) + background_thread, (void *)&args[cpu])) return 1; } - for (cpu = 0; cpu < nr_parallel; cpu++) + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -216,17 +233,17 @@ static int stress(struct uffd_args *args) * UFFDIO_COPY without writing zero pages into area_dst * because the background threads already completed). */ - uffd_test_ops->release_pages(area_src); + uffd_test_ops->release_pages(gopts, gopts->area_src); - finished = 1; - for (cpu = 0; cpu < nr_parallel; cpu++) + gopts->finished = 1; + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_parallel; cpu++) { + for (cpu = 0; cpu < gopts->nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { - if (write(pipefd[cpu*2+1], &c, 1) != 1) + if (write(gopts->pipefd[cpu*2+1], &c, 1) != 1) err("pipefd write error"); if (pthread_join(uffd_threads[cpu], (void *)&args[cpu])) @@ -242,26 +259,26 @@ static int stress(struct uffd_args *args) return 0; } -static int userfaultfd_stress(void) +static int userfaultfd_stress(uffd_global_test_opts_t *gopts) { void *area; unsigned long nr; - struct uffd_args args[nr_parallel]; - uint64_t mem_size = nr_pages * page_size; + struct uffd_args args[gopts->nr_parallel]; + uint64_t mem_size = gopts->nr_pages * gopts->page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_parallel); + memset(args, 0, sizeof(struct uffd_args) * gopts->nr_parallel); - if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) + if (features & UFFD_FEATURE_WP_UNPOPULATED && gopts->test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; - if (uffd_test_ctx_init(flags, NULL)) + if (uffd_test_ctx_init(gopts, flags, NULL)) err("context init failed"); - if (posix_memalign(&area, page_size, page_size)) + if (posix_memalign(&area, gopts->page_size, gopts->page_size)) err("out of memory"); zeropage = area; - bzero(zeropage, page_size); + bzero(zeropage, gopts->page_size); pthread_mutex_lock(&uffd_read_mutex); @@ -284,18 +301,18 @@ static int userfaultfd_stress(void) fflush(stdout); if (bounces & BOUNCE_POLL) - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); else - fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags & ~O_NONBLOCK); /* register */ - if (uffd_register(uffd, area_dst, mem_size, - true, test_uffdio_wp, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure"); - if (area_dst_alias) { - if (uffd_register(uffd, area_dst_alias, mem_size, - true, test_uffdio_wp, false)) + if (gopts->area_dst_alias) { + if (uffd_register(gopts->uffd, gopts->area_dst_alias, mem_size, + true, gopts->test_uffdio_wp, false)) err("register failure alias"); } @@ -323,87 +340,88 @@ static int userfaultfd_stress(void) * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's * required to MADV_DONTNEED here. */ - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - uffd_stats_reset(args, nr_parallel); + uffd_stats_reset(gopts, args, gopts->nr_parallel); /* bounce pass */ if (stress(args)) { - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 1; } /* Clear all the write protections if there is any */ - if (test_uffdio_wp) - wp_range(uffd, (unsigned long)area_dst, - nr_pages * page_size, false); + if (gopts->test_uffdio_wp) + wp_range(gopts->uffd, (unsigned long)gopts->area_dst, + gopts->nr_pages * gopts->page_size, false); /* unregister */ - if (uffd_unregister(uffd, area_dst, mem_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, mem_size)) err("unregister failure"); - if (area_dst_alias) { - if (uffd_unregister(uffd, area_dst_alias, mem_size)) + if (gopts->area_dst_alias) { + if (uffd_unregister(gopts->uffd, gopts->area_dst_alias, mem_size)) err("unregister failure alias"); } /* verification */ if (bounces & BOUNCE_VERIFY) - for (nr = 0; nr < nr_pages; nr++) - if (*area_count(area_dst, nr) != count_verify[nr]) + for (nr = 0; nr < gopts->nr_pages; nr++) + if (*area_count(gopts->area_dst, nr, gopts) != + gopts->count_verify[nr]) err("error area_count %llu %llu %lu\n", - *area_count(area_src, nr), - count_verify[nr], nr); + *area_count(gopts->area_src, nr, gopts), + gopts->count_verify[nr], nr); /* prepare next bounce */ - swap(area_src, area_dst); + swap(gopts->area_src, gopts->area_dst); - swap(area_src_alias, area_dst_alias); + swap(gopts->area_src_alias, gopts->area_dst_alias); - uffd_stats_report(args, nr_parallel); + uffd_stats_report(args, gopts->nr_parallel); } - uffd_test_ctx_clear(); + uffd_test_ctx_clear(gopts); return 0; } -static void set_test_type(const char *type) +static void set_test_type(uffd_global_test_opts_t *gopts, const char *type) { if (!strcmp(type, "anon")) { - test_type = TEST_ANON; + gopts->test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; } else if (!strcmp(type, "hugetlb")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; - map_shared = true; + gopts->map_shared = true; } else if (!strcmp(type, "hugetlb-private")) { - test_type = TEST_HUGETLB; + gopts->test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; } else if (!strcmp(type, "shmem")) { - map_shared = true; - test_type = TEST_SHMEM; + gopts->map_shared = true; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else if (!strcmp(type, "shmem-private")) { - test_type = TEST_SHMEM; + gopts->test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } } -static void parse_test_type_arg(const char *raw_type) +static void parse_test_type_arg(uffd_global_test_opts_t *gopts, const char *raw_type) { - set_test_type(raw_type); + set_test_type(gopts, raw_type); - if (!test_type) + if (!gopts->test_type) err("failed to parse test type argument: '%s'", raw_type); - if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); + if (gopts->test_type == TEST_HUGETLB) + gopts->page_size = default_huge_page_size(); else - page_size = sysconf(_SC_PAGE_SIZE); + gopts->page_size = sysconf(_SC_PAGE_SIZE); - if (!page_size) + if (!gopts->page_size) err("Unable to determine page size"); - if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) + if ((unsigned long) area_count(NULL, 0, gopts) + sizeof(unsigned long long) * 2 + > gopts->page_size) err("Impossible to run this test"); /* @@ -415,21 +433,21 @@ static void parse_test_type_arg(const char *raw_type) if (uffd_get_features(&features) && errno == ENOENT) ksft_exit_skip("failed to get available features (%d)\n", errno); - test_uffdio_wp = test_uffdio_wp && + gopts->test_uffdio_wp = gopts->test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); - if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) - test_uffdio_wp = false; + if (gopts->test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + gopts->test_uffdio_wp = false; - close(uffd); - uffd = -1; + close(gopts->uffd); + gopts->uffd = -1; } static void sigalrm(int sig) { if (sig != SIGALRM) abort(); - test_uffdio_copy_eexist = true; + gopts->test_uffdio_copy_eexist = true; alarm(ALARM_INTERVAL_SECS); } @@ -438,6 +456,8 @@ int main(int argc, char **argv) unsigned long nr_cpus; size_t bytes; + gopts = (uffd_global_test_opts_t *) malloc(sizeof(uffd_global_test_opts_t)); + if (argc < 4) usage(); @@ -445,29 +465,34 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); - parse_test_type_arg(argv[1]); + parse_test_type_arg(gopts, argv[1]); bytes = atol(argv[2]) * 1024 * 1024; - if (test_type == TEST_HUGETLB && - get_free_hugepages() < bytes / page_size) { - printf("skip: Skipping userfaultfd... not enough hugepages\n"); - return KSFT_SKIP; - } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); if (nr_cpus > 32) { /* Don't let calculation below go to zero. */ ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", nr_cpus); - nr_parallel = 32; + gopts->nr_parallel = 32; } else { - nr_parallel = nr_cpus; + gopts->nr_parallel = nr_cpus; + } + + /* + * src and dst each require bytes / page_size number of hugepages. + * Ensure nr_parallel - 1 hugepages on top of that to account + * for racy extra reservation of hugepages. + */ + if (gopts->test_type == TEST_HUGETLB && + get_free_hugepages() < 2 * (bytes / gopts->page_size) + gopts->nr_parallel - 1) { + printf("skip: Skipping userfaultfd... not enough hugepages\n"); + return KSFT_SKIP; } - nr_pages_per_cpu = bytes / page_size / nr_parallel; - if (!nr_pages_per_cpu) { + gopts->nr_pages_per_cpu = bytes / gopts->page_size / gopts->nr_parallel; + if (!gopts->nr_pages_per_cpu) { _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", - bytes, page_size, nr_parallel); + bytes, gopts->page_size, gopts->nr_parallel); usage(); } @@ -476,11 +501,11 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_parallel; + gopts->nr_pages = gopts->nr_pages_per_cpu * gopts->nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", - nr_pages, nr_pages_per_cpu); - return userfaultfd_stress(); + gopts->nr_pages, gopts->nr_pages_per_cpu); + return userfaultfd_stress(gopts); } #else /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 50501b38e34e..9e3be2ee7f1b 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -76,7 +76,7 @@ struct uffd_test_args { typedef struct uffd_test_args uffd_test_args_t; /* Returns: UFFD_TEST_* */ -typedef void (*uffd_test_fn)(uffd_test_args_t *); +typedef void (*uffd_test_fn)(uffd_global_test_opts_t *, uffd_test_args_t *); typedef struct { const char *name; @@ -181,33 +181,6 @@ out: return 1; } -/* - * This function initializes the global variables. TODO: remove global - * vars and then remove this. - */ -static int -uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, - mem_type_t *mem_type, const char **errmsg) -{ - map_shared = mem_type->shared; - uffd_test_ops = mem_type->mem_ops; - uffd_test_case_ops = test->test_case_ops; - - if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) - page_size = default_huge_page_size(); - else - page_size = psize(); - - /* Ensure we have at least 2 pages */ - nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; - /* TODO: remove this global var.. it's so ugly */ - nr_parallel = 1; - - /* Initialize test arguments */ - args->mem_type = mem_type; - - return uffd_test_ctx_init(test->uffd_feature_required, errmsg); -} static bool uffd_feature_supported(uffd_test_case_t *test) { @@ -237,7 +210,8 @@ static int pagemap_open(void) } while (0) typedef struct { - int parent_uffd, child_uffd; + uffd_global_test_opts_t *gopts; + int child_uffd; } fork_event_args; static void *fork_event_consumer(void *data) @@ -245,10 +219,10 @@ static void *fork_event_consumer(void *data) fork_event_args *args = data; struct uffd_msg msg = { 0 }; - ready_for_fork = true; + args->gopts->ready_for_fork = true; /* Read until a full msg received */ - while (uffd_read_msg(args->parent_uffd, &msg)); + while (uffd_read_msg(args->gopts, &msg)); if (msg.event != UFFD_EVENT_FORK) err("wrong message: %u\n", msg.event); @@ -304,9 +278,9 @@ static void unpin_pages(pin_args *args) args->pinned = false; } -static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) +static int pagemap_test_fork(uffd_global_test_opts_t *gopts, bool with_event, bool test_pin) { - fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 }; + fork_event_args args = { .gopts = gopts, .child_uffd = -1 }; pthread_t thread; pid_t child; uint64_t value; @@ -314,10 +288,10 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { - ready_for_fork = false; + gopts->ready_for_fork = false; if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ } @@ -328,14 +302,14 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) fd = pagemap_open(); - if (test_pin && pin_pages(&args, area_dst, page_size)) + if (test_pin && pin_pages(&args, gopts->area_dst, gopts->page_size)) /* * Normally when reach here we have pinned in * previous tests, so shouldn't fail anymore */ err("pin page failed in child"); - value = pagemap_get_entry(fd, area_dst); + value = pagemap_get_entry(fd, gopts->area_dst); /* * After fork(), we should handle uffd-wp bit differently: * @@ -361,70 +335,70 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) return result; } -static void uffd_wp_unpopulated_test(uffd_test_args_t *args) +static void uffd_wp_unpopulated_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { uint64_t value; int pagemap_fd; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Test applying pte marker to anon unpopulated */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); /* Test unprotect on anon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test zap on anon marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - if (madvise(area_dst, page_size, MADV_DONTNEED)) + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Test fault in after marker removed */ - *area_dst = 1; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); /* Test read-zero-page upon pte marker */ - wp_range(uffd, (uint64_t)area_dst, page_size, true); - *(volatile char *)area_dst; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + *(volatile char *)gopts->area_dst; /* Drop it to make pte none again */ - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("madvise(MADV_DONTNEED) failed"); uffd_test_pass(); } -static void uffd_wp_fork_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_test_common(uffd_global_test_opts_t *gopts, uffd_test_args_t *args, bool with_event) { int pagemap_fd; uint64_t value; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in present pte", with_event ? "missing" : "stall"); goto out; @@ -442,79 +416,80 @@ static void uffd_wp_fork_test_common(uffd_test_args_t *args, * to expose pte markers. */ if (args->mem_type->shared) { - if (madvise(area_dst, page_size, MADV_DONTNEED)) + if (madvise(gopts->area_dst, gopts->page_size, MADV_DONTNEED)) err("MADV_DONTNEED"); } else { /* * NOTE: ignore retval because private-hugetlb doesn't yet * support swapping, so it could fail. */ - madvise(area_dst, page_size, MADV_PAGEOUT); + madvise(gopts->area_dst, gopts->page_size, MADV_PAGEOUT); } /* Uffd-wp should persist even swapped out */ - value = pagemap_get_entry(pagemap_fd, area_dst); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, true); - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte", with_event ? "missing" : "stall"); goto out; } /* Unprotect; this tests swap pte modifications */ - wp_range(uffd, (uint64_t)area_dst, page_size, false); - value = pagemap_get_entry(pagemap_fd, area_dst); + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, false); + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); /* Fault in the page from disk */ - *area_dst = 2; - value = pagemap_get_entry(pagemap_fd, area_dst); + *gopts->area_dst = 2; + value = pagemap_get_entry(pagemap_fd, gopts->area_dst); pagemap_check_wp(value, false); uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, nr_pages * page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size)) err("unregister failed"); close(pagemap_fd); } -static void uffd_wp_fork_test(uffd_test_args_t *args) +static void uffd_wp_fork_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, false); + uffd_wp_fork_test_common(gopts, args, false); } -static void uffd_wp_fork_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_test_common(args, true); + uffd_wp_fork_test_common(gopts, args, true); } -static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, +static void uffd_wp_fork_pin_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, bool with_event) { int pagemap_fd; pin_args pin_args = {}; - if (uffd_register(uffd, area_dst, page_size, false, true, false)) + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->page_size, false, true, false)) err("register failed"); pagemap_fd = pagemap_open(); /* Touch the page */ - *area_dst = 1; - wp_range(uffd, (uint64_t)area_dst, page_size, true); + *gopts->area_dst = 1; + wp_range(gopts->uffd, (uint64_t)gopts->area_dst, gopts->page_size, true); /* * 1. First pin, then fork(). This tests fork() special path when * doing early CoW if the page is private. */ - if (pin_pages(&pin_args, area_dst, page_size)) { + if (pin_pages(&pin_args, gopts->area_dst, gopts->page_size)) { uffd_test_skip("Possibly CONFIG_GUP_TEST missing " "or unprivileged"); close(pagemap_fd); - uffd_unregister(uffd, area_dst, page_size); + uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size); return; } - if (pagemap_test_fork(uffd, with_event, false)) { + if (pagemap_test_fork(gopts, with_event, false)) { uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()", with_event ? "missing" : "stall"); unpin_pages(&pin_args); @@ -527,49 +502,50 @@ static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args, * 2. First fork(), then pin (in the child, where test_pin==true). * This tests COR, aka, page unsharing on private memories. */ - if (pagemap_test_fork(uffd, with_event, true)) { + if (pagemap_test_fork(gopts, with_event, true)) { uffd_test_fail("Detected %s uffd-wp bit when RO pin", with_event ? "missing" : "stall"); goto out; } uffd_test_pass(); out: - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("register failed"); close(pagemap_fd); } -static void uffd_wp_fork_pin_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, false); + uffd_wp_fork_pin_test_common(gopts, args, false); } -static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args) +static void uffd_wp_fork_pin_with_event_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_wp_fork_pin_test_common(args, true); + uffd_wp_fork_pin_test_common(gopts, args, true); } -static void check_memory_contents(char *p) +static void check_memory_contents(uffd_global_test_opts_t *gopts, char *p) { unsigned long i, j; uint8_t expected_byte; - for (i = 0; i < nr_pages; ++i) { + for (i = 0; i < gopts->nr_pages; ++i) { expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); - for (j = 0; j < page_size; j++) { - uint8_t v = *(uint8_t *)(p + (i * page_size) + j); + for (j = 0; j < gopts->page_size; j++) { + uint8_t v = *(uint8_t *)(p + (i * gopts->page_size) + j); if (v != expected_byte) err("unexpected page contents"); } } } -static void uffd_minor_test_common(bool test_collapse, bool test_wp) +static void uffd_minor_test_common(uffd_global_test_opts_t *gopts, bool test_collapse, bool test_wp) { unsigned long p; pthread_t uffd_mon; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; /* * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing @@ -577,7 +553,7 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) */ assert(!(test_collapse && test_wp)); - if (uffd_register(uffd, area_dst_alias, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, /* NOTE! MADV_COLLAPSE may not work with uffd-wp */ false, test_wp, true)) err("register failure"); @@ -586,9 +562,9 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * After registering with UFFD, populate the non-UFFD-registered side of * the shared mapping. This should *not* trigger any UFFD minor faults. */ - for (p = 0; p < nr_pages; ++p) - memset(area_dst + (p * page_size), p % ((uint8_t)-1), - page_size); + for (p = 0; p < gopts->nr_pages; ++p) + memset(gopts->area_dst + (p * gopts->page_size), p % ((uint8_t)-1), + gopts->page_size); args.apply_wp = test_wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -600,50 +576,51 @@ static void uffd_minor_test_common(bool test_collapse, bool test_wp) * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); if (test_collapse) { - if (madvise(area_dst_alias, nr_pages * page_size, + if (madvise(gopts->area_dst_alias, gopts->nr_pages * gopts->page_size, MADV_COLLAPSE)) { /* It's fine to fail for this one... */ uffd_test_skip("MADV_COLLAPSE failed"); return; } - uffd_test_ops->check_pmd_mapping(area_dst, - nr_pages * page_size / + uffd_test_ops->check_pmd_mapping(gopts, + gopts->area_dst, + gopts->nr_pages * gopts->page_size / read_pmd_pagesize()); /* * This won't cause uffd-fault - it purely just makes sure there * was no corruption. */ - check_memory_contents(area_dst_alias); + check_memory_contents(gopts, gopts->area_dst_alias); } - if (args.missing_faults != 0 || args.minor_faults != nr_pages) + if (args.missing_faults != 0 || args.minor_faults != gopts->nr_pages) uffd_test_fail("stats check error"); else uffd_test_pass(); } -void uffd_minor_test(uffd_test_args_t *args) +void uffd_minor_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(false, false); + uffd_minor_test_common(gopts, false, false); } -void uffd_minor_wp_test(uffd_test_args_t *args) +void uffd_minor_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(false, true); + uffd_minor_test_common(gopts, false, true); } -void uffd_minor_collapse_test(uffd_test_args_t *args) +void uffd_minor_collapse_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_minor_test_common(true, false); + uffd_minor_test_common(gopts, true, false); } static sigjmp_buf jbuf, *sigbuf; @@ -678,7 +655,7 @@ static void sighndl(int sig, siginfo_t *siginfo, void *ptr) * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal * feature. Using monitor thread, verify no userfault events are generated. */ -static int faulting_process(int signal_test, bool wp) +static int faulting_process(uffd_global_test_opts_t *gopts, int signal_test, bool wp) { unsigned long nr, i; unsigned long long count; @@ -687,7 +664,7 @@ static int faulting_process(int signal_test, bool wp) struct sigaction act; volatile unsigned long signalled = 0; - split_nr_pages = (nr_pages + 1) / 2; + split_nr_pages = (gopts->nr_pages + 1) / 2; if (signal_test) { sigbuf = &jbuf; @@ -701,7 +678,7 @@ static int faulting_process(int signal_test, bool wp) for (nr = 0; nr < split_nr_pages; nr++) { volatile int steps = 1; - unsigned long offset = nr * page_size; + unsigned long offset = nr * gopts->page_size; if (signal_test) { if (sigsetjmp(*sigbuf, 1) != 0) { @@ -713,15 +690,15 @@ static int faulting_process(int signal_test, bool wp) if (steps == 1) { /* This is a MISSING request */ steps++; - if (copy_page(uffd, offset, wp)) + if (copy_page(gopts, offset, wp)) signalled++; } else { /* This is a WP request */ assert(steps == 2); - wp_range(uffd, - (__u64)area_dst + + wp_range(gopts->uffd, + (__u64)gopts->area_dst + offset, - page_size, false); + gopts->page_size, false); } } else { signalled++; @@ -730,51 +707,53 @@ static int faulting_process(int signal_test, bool wp) } } - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } if (signal_test) return signalled != split_nr_pages; - area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, - MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) + gopts->area_dst = mremap(gopts->area_dst, gopts->nr_pages * gopts->page_size, + gopts->nr_pages * gopts->page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, + gopts->area_src); + if (gopts->area_dst == MAP_FAILED) err("mremap"); /* Reset area_src since we just clobbered it */ - area_src = NULL; + gopts->area_src = NULL; - for (; nr < nr_pages; nr++) { - count = *area_count(area_dst, nr); - if (count != count_verify[nr]) { + for (; nr < gopts->nr_pages; nr++) { + count = *area_count(gopts->area_dst, nr, gopts); + if (count != gopts->count_verify[nr]) { err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[nr]); + nr, count, gopts->count_verify[nr]); } /* * Trigger write protection if there is by writing * the same value back. */ - *area_count(area_dst, nr) = count; + *area_count(gopts->area_dst, nr, gopts) = count; } - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); - for (nr = 0; nr < nr_pages; nr++) - for (i = 0; i < page_size; i++) - if (*(area_dst + nr * page_size + i) != 0) + for (nr = 0; nr < gopts->nr_pages; nr++) + for (i = 0; i < gopts->page_size; i++) + if (*(gopts->area_dst + nr * gopts->page_size + i) != 0) err("page %lu offset %lu is not zero", nr, i); return 0; } -static void uffd_sigbus_test_common(bool wp) +static void uffd_sigbus_test_common(uffd_global_test_opts_t *gopts, bool wp) { unsigned long userfaults; pthread_t uffd_mon; @@ -782,25 +761,26 @@ static void uffd_sigbus_test_common(bool wp) int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); - if (faulting_process(1, wp)) + if (faulting_process(gopts, 1, wp)) err("faulting process failed"); - uffd_test_ops->release_pages(area_dst); + uffd_test_ops->release_pages(gopts, gopts->area_dst); args.apply_wp = wp; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -808,12 +788,12 @@ static void uffd_sigbus_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(2, wp)); + exit(faulting_process(gopts, 2, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, (void **)&userfaults)) err("pthread_join()"); @@ -824,28 +804,29 @@ static void uffd_sigbus_test_common(bool wp) uffd_test_pass(); } -static void uffd_sigbus_test(uffd_test_args_t *args) +static void uffd_sigbus_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_sigbus_test_common(false); + uffd_sigbus_test_common(gopts, false); } -static void uffd_sigbus_wp_test(uffd_test_args_t *args) +static void uffd_sigbus_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_sigbus_test_common(true); + uffd_sigbus_test_common(gopts, true); } -static void uffd_events_test_common(bool wp) +static void uffd_events_test_common(uffd_global_test_opts_t *gopts, bool wp) { pthread_t uffd_mon; pid_t pid; int err; char c; struct uffd_args args = { 0 }; + args.gopts = gopts; - ready_for_fork = false; + gopts->ready_for_fork = false; - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, wp, false)) err("register failure"); @@ -853,7 +834,7 @@ static void uffd_events_test_common(bool wp) if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - while (!ready_for_fork) + while (!gopts->ready_for_fork) ; /* Wait for the poll_thread to start executing before forking */ pid = fork(); @@ -861,39 +842,39 @@ static void uffd_events_test_common(bool wp) err("fork"); if (!pid) - exit(faulting_process(0, wp)); + exit(faulting_process(gopts, 0, wp)); waitpid(pid, &err, 0); if (err) err("faulting process failed"); - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); - if (args.missing_faults != nr_pages) + if (args.missing_faults != gopts->nr_pages) uffd_test_fail("Fault counts wrong"); else uffd_test_pass(); } -static void uffd_events_test(uffd_test_args_t *args) +static void uffd_events_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_events_test_common(false); + uffd_events_test_common(gopts, false); } -static void uffd_events_wp_test(uffd_test_args_t *args) +static void uffd_events_wp_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { - uffd_events_test_common(true); + uffd_events_test_common(gopts, true); } -static void retry_uffdio_zeropage(int ufd, +static void retry_uffdio_zeropage(uffd_global_test_opts_t *gopts, struct uffdio_zeropage *uffdio_zeropage) { - uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, + uffd_test_ops->alias_mapping(gopts, &uffdio_zeropage->range.start, uffdio_zeropage->range.len, 0); - if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { + if (ioctl(gopts->uffd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { if (uffdio_zeropage->zeropage != -EEXIST) err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)uffdio_zeropage->zeropage); @@ -903,16 +884,16 @@ static void retry_uffdio_zeropage(int ufd, } } -static bool do_uffdio_zeropage(int ufd, bool has_zeropage) +static bool do_uffdio_zeropage(uffd_global_test_opts_t *gopts, bool has_zeropage) { struct uffdio_zeropage uffdio_zeropage = { 0 }; int ret; __s64 res; - uffdio_zeropage.range.start = (unsigned long) area_dst; - uffdio_zeropage.range.len = page_size; + uffdio_zeropage.range.start = (unsigned long) gopts->area_dst; + uffdio_zeropage.range.len = gopts->page_size; uffdio_zeropage.mode = 0; - ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage); + ret = ioctl(gopts->uffd, UFFDIO_ZEROPAGE, &uffdio_zeropage); res = uffdio_zeropage.zeropage; if (ret) { /* real retval in ufdio_zeropage.zeropage */ @@ -921,10 +902,10 @@ static bool do_uffdio_zeropage(int ufd, bool has_zeropage) else if (res != -EINVAL) err("UFFDIO_ZEROPAGE not -EINVAL"); } else if (has_zeropage) { - if (res != page_size) + if (res != gopts->page_size) err("UFFDIO_ZEROPAGE unexpected size"); else - retry_uffdio_zeropage(ufd, &uffdio_zeropage); + retry_uffdio_zeropage(gopts, &uffdio_zeropage); return true; } else err("UFFDIO_ZEROPAGE succeeded"); @@ -950,25 +931,29 @@ uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len) } /* exercise UFFDIO_ZEROPAGE */ -static void uffd_zeropage_test(uffd_test_args_t *args) +static void uffd_zeropage_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { bool has_zeropage; int i; - has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size); - if (area_dst_alias) + has_zeropage = uffd_register_detect_zeropage(gopts->uffd, + gopts->area_dst, + gopts->page_size); + if (gopts->area_dst_alias) /* Ignore the retval; we already have it */ - uffd_register_detect_zeropage(uffd, area_dst_alias, page_size); + uffd_register_detect_zeropage(gopts->uffd, gopts->area_dst_alias, gopts->page_size); - if (do_uffdio_zeropage(uffd, has_zeropage)) - for (i = 0; i < page_size; i++) - if (area_dst[i] != 0) + if (do_uffdio_zeropage(gopts, has_zeropage)) + for (i = 0; i < gopts->page_size; i++) + if (gopts->area_dst[i] != 0) err("data non-zero at offset %d\n", i); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); - if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size)) + if (gopts->area_dst_alias && uffd_unregister(gopts->uffd, + gopts->area_dst_alias, + gopts->page_size)) err("unregister"); uffd_test_pass(); @@ -987,26 +972,27 @@ static void uffd_register_poison(int uffd, void *addr, uint64_t len) err("registered area doesn't support COPY and POISON ioctls"); } -static void do_uffdio_poison(int uffd, unsigned long offset) +static void do_uffdio_poison(uffd_global_test_opts_t *gopts, unsigned long offset) { struct uffdio_poison uffdio_poison = { 0 }; int ret; __s64 res; - uffdio_poison.range.start = (unsigned long) area_dst + offset; - uffdio_poison.range.len = page_size; + uffdio_poison.range.start = (unsigned long) gopts->area_dst + offset; + uffdio_poison.range.len = gopts->page_size; uffdio_poison.mode = 0; - ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); + ret = ioctl(gopts->uffd, UFFDIO_POISON, &uffdio_poison); res = uffdio_poison.updated; if (ret) err("UFFDIO_POISON error: %"PRId64, (int64_t)res); - else if (res != page_size) + else if (res != gopts->page_size) err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); } -static void uffd_poison_handle_fault( - struct uffd_msg *msg, struct uffd_args *args) +static void uffd_poison_handle_fault(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args) { unsigned long offset; @@ -1017,20 +1003,20 @@ static void uffd_poison_handle_fault( (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; - offset &= ~(page_size-1); + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; + offset &= ~(gopts->page_size-1); /* Odd pages -> copy zeroed page; even pages -> poison. */ - if (offset & page_size) - copy_page(uffd, offset, false); + if (offset & gopts->page_size) + copy_page(gopts, offset, false); else - do_uffdio_poison(uffd, offset); + do_uffdio_poison(gopts, offset); } /* Make sure to cover odd/even, and minimum duplications */ #define UFFD_POISON_TEST_NPAGES 4 -static void uffd_poison_test(uffd_test_args_t *targs) +static void uffd_poison_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { pthread_t uffd_mon; char c; @@ -1039,15 +1025,17 @@ static void uffd_poison_test(uffd_test_args_t *targs) unsigned long nr_sigbus = 0; unsigned long nr, poison_pages = UFFD_POISON_TEST_NPAGES; - if (nr_pages < poison_pages) { - uffd_test_skip("Too few pages for POISON test"); + if (gopts->nr_pages < poison_pages) { + uffd_test_skip("Too less pages for POISON test"); return; } - fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + args.gopts = gopts; + + fcntl(gopts->uffd, F_SETFL, gopts->uffd_flags | O_NONBLOCK); - uffd_register_poison(uffd, area_dst, poison_pages * page_size); - memset(area_src, 0, poison_pages * page_size); + uffd_register_poison(gopts->uffd, gopts->area_dst, poison_pages * gopts->page_size); + memset(gopts->area_src, 0, poison_pages * gopts->page_size); args.handle_fault = uffd_poison_handle_fault; if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) @@ -1060,8 +1048,8 @@ static void uffd_poison_test(uffd_test_args_t *targs) err("sigaction"); for (nr = 0; nr < poison_pages; ++nr) { - unsigned long offset = nr * page_size; - const char *bytes = (const char *) area_dst + offset; + unsigned long offset = nr * gopts->page_size; + const char *bytes = (const char *) gopts->area_dst + offset; const char *i; if (sigsetjmp(*sigbuf, 1)) { @@ -1074,14 +1062,14 @@ static void uffd_poison_test(uffd_test_args_t *targs) continue; } - for (i = bytes; i < bytes + page_size; ++i) { + for (i = bytes; i < bytes + gopts->page_size; ++i) { if (*i) err("nonzero byte in area_dst (%p) at %p: %u", - area_dst, i, *i); + gopts->area_dst, i, *i); } } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("pthread_join()"); @@ -1094,7 +1082,9 @@ static void uffd_poison_test(uffd_test_args_t *targs) } static void -uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, +uffd_move_handle_fault_common(uffd_global_test_opts_t *gopts, + struct uffd_msg *msg, + struct uffd_args *args, unsigned long len) { unsigned long offset; @@ -1106,28 +1096,32 @@ uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) err("unexpected fault type %llu", msg->arg.pagefault.flags); - offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset = (char *)(unsigned long)msg->arg.pagefault.address - gopts->area_dst; offset &= ~(len-1); - if (move_page(uffd, offset, len)) + if (move_page(gopts, offset, len)) args->missing_faults++; } -static void uffd_move_handle_fault(struct uffd_msg *msg, +static void uffd_move_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, page_size); + uffd_move_handle_fault_common(gopts, msg, args, gopts->page_size); } -static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, +static void uffd_move_pmd_handle_fault(uffd_global_test_opts_t *gopts, struct uffd_msg *msg, struct uffd_args *args) { - uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); + uffd_move_handle_fault_common(gopts, msg, args, read_pmd_pagesize()); } static void -uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, - void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) +uffd_move_test_common(uffd_global_test_opts_t *gopts, + uffd_test_args_t *targs, + unsigned long chunk_size, + void (*handle_fault)(struct uffd_global_test_opts *gopts, + struct uffd_msg *msg, struct uffd_args *args) +) { unsigned long nr; pthread_t uffd_mon; @@ -1139,11 +1133,13 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, unsigned long src_offs = 0; unsigned long dst_offs = 0; + args.gopts = gopts; + /* Prevent source pages from being mapped more than once */ - if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_DONTFORK)) err("madvise(MADV_DONTFORK) failure"); - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("register failure"); @@ -1151,22 +1147,22 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) err("uffd_poll_thread create"); - step_size = chunk_size / page_size; - step_count = nr_pages / step_size; + step_size = chunk_size / gopts->page_size; + step_count = gopts->nr_pages / step_size; - if (chunk_size > page_size) { - char *aligned_src = ALIGN_UP(area_src, chunk_size); - char *aligned_dst = ALIGN_UP(area_dst, chunk_size); + if (chunk_size > gopts->page_size) { + char *aligned_src = ALIGN_UP(gopts->area_src, chunk_size); + char *aligned_dst = ALIGN_UP(gopts->area_dst, chunk_size); - if (aligned_src != area_src || aligned_dst != area_dst) { - src_offs = (aligned_src - area_src) / page_size; - dst_offs = (aligned_dst - area_dst) / page_size; + if (aligned_src != gopts->area_src || aligned_dst != gopts->area_dst) { + src_offs = (aligned_src - gopts->area_src) / gopts->page_size; + dst_offs = (aligned_dst - gopts->area_dst) / gopts->page_size; step_count--; } - orig_area_src = area_src; - orig_area_dst = area_dst; - area_src = aligned_src; - area_dst = aligned_dst; + orig_area_src = gopts->area_src; + orig_area_dst = gopts->area_dst; + gopts->area_src = aligned_src; + gopts->area_dst = aligned_dst; } /* @@ -1180,34 +1176,34 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, /* Check area_src content */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); - if (count != count_verify[src_offs + nr + i]) + count = *area_count(gopts->area_src, nr + i, gopts); + if (count != gopts->count_verify[src_offs + nr + i]) err("nr %lu source memory invalid %llu %llu\n", - nr + i, count, count_verify[src_offs + nr + i]); + nr + i, count, gopts->count_verify[src_offs + nr + i]); } /* Faulting into area_dst should move the page or the huge page */ for (i = 0; i < step_size; i++) { - count = *area_count(area_dst, nr + i); - if (count != count_verify[dst_offs + nr + i]) + count = *area_count(gopts->area_dst, nr + i, gopts); + if (count != gopts->count_verify[dst_offs + nr + i]) err("nr %lu memory corruption %llu %llu\n", - nr, count, count_verify[dst_offs + nr + i]); + nr, count, gopts->count_verify[dst_offs + nr + i]); } /* Re-check area_src content which should be empty */ for (i = 0; i < step_size; i++) { - count = *area_count(area_src, nr + i); + count = *area_count(gopts->area_src, nr + i, gopts); if (count != 0) err("nr %lu move failed %llu %llu\n", - nr, count, count_verify[src_offs + nr + i]); + nr, count, gopts->count_verify[src_offs + nr + i]); } } - if (chunk_size > page_size) { - area_src = orig_area_src; - area_dst = orig_area_dst; + if (chunk_size > gopts->page_size) { + gopts->area_src = orig_area_src; + gopts->area_dst = orig_area_dst; } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + if (write(gopts->pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); if (pthread_join(uffd_mon, NULL)) err("join() failed"); @@ -1218,24 +1214,24 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, uffd_test_pass(); } -static void uffd_move_test(uffd_test_args_t *targs) +static void uffd_move_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - uffd_move_test_common(targs, page_size, uffd_move_handle_fault); + uffd_move_test_common(gopts, targs, gopts->page_size, uffd_move_handle_fault); } -static void uffd_move_pmd_test(uffd_test_args_t *targs) +static void uffd_move_pmd_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) err("madvise(MADV_HUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } -static void uffd_move_pmd_split_test(uffd_test_args_t *targs) +static void uffd_move_pmd_split_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { - if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) + if (madvise(gopts->area_dst, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) err("madvise(MADV_NOHUGEPAGE) failure"); - uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_test_common(gopts, targs, read_pmd_pagesize(), uffd_move_pmd_handle_fault); } @@ -1295,6 +1291,11 @@ typedef enum { THR_STATE_UNINTERRUPTIBLE, } thread_state; +typedef struct { + uffd_global_test_opts_t *gopts; + volatile pid_t *pid; +} mmap_changing_thread_args; + static void sleep_short(void) { usleep(1000); @@ -1337,7 +1338,9 @@ static void thread_state_until(pid_t tid, thread_state state) static void *uffd_mmap_changing_thread(void *opaque) { - volatile pid_t *pid = opaque; + mmap_changing_thread_args *args = opaque; + uffd_global_test_opts_t *gopts = args->gopts; + volatile pid_t *pid = args->pid; int ret; /* Unfortunately, it's only fetch-able from the thread itself.. */ @@ -1345,21 +1348,21 @@ static void *uffd_mmap_changing_thread(void *opaque) *pid = syscall(SYS_gettid); /* Inject an event, this will hang solid until the event read */ - ret = madvise(area_dst, page_size, MADV_REMOVE); + ret = madvise(gopts->area_dst, gopts->page_size, MADV_REMOVE); if (ret) err("madvise(MADV_REMOVE) failed"); return NULL; } -static void uffd_consume_message(int fd) +static void uffd_consume_message(uffd_global_test_opts_t *gopts) { struct uffd_msg msg = { 0 }; - while (uffd_read_msg(fd, &msg)); + while (uffd_read_msg(gopts, &msg)); } -static void uffd_mmap_changing_test(uffd_test_args_t *targs) +static void uffd_mmap_changing_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *targs) { /* * This stores the real PID (which can be different from how tid is @@ -1368,13 +1371,14 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) pid_t pid = 0; pthread_t tid; int ret; + mmap_changing_thread_args args = { gopts, &pid }; - if (uffd_register(uffd, area_dst, nr_pages * page_size, + if (uffd_register(gopts->uffd, gopts->area_dst, gopts->nr_pages * gopts->page_size, true, false, false)) err("uffd_register() failed"); /* Create a thread to generate the racy event */ - ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); + ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &args); if (ret) err("pthread_create() failed"); @@ -1388,26 +1392,26 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) /* Wait until the thread hangs at REMOVE event */ thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); - if (!uffdio_mmap_changing_test_copy(uffd)) + if (!uffdio_mmap_changing_test_copy(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_zeropage(uffd)) + if (!uffdio_mmap_changing_test_zeropage(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_move(uffd)) + if (!uffdio_mmap_changing_test_move(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_poison(uffd)) + if (!uffdio_mmap_changing_test_poison(gopts->uffd)) return; - if (!uffdio_mmap_changing_test_continue(uffd)) + if (!uffdio_mmap_changing_test_continue(gopts->uffd)) return; /* * All succeeded above! Recycle everything. Start by reading the * event so as to kick the thread roll again.. */ - uffd_consume_message(uffd); + uffd_consume_message(gopts); ret = pthread_join(tid, NULL); assert(ret == 0); @@ -1415,10 +1419,10 @@ static void uffd_mmap_changing_test(uffd_test_args_t *targs) uffd_test_pass(); } -static int prevent_hugepages(const char **errmsg) +static int prevent_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_NOHUGEPAGE)) { /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ if (errno != EINVAL) { if (errmsg) @@ -1429,10 +1433,10 @@ static int prevent_hugepages(const char **errmsg) return 0; } -static int request_hugepages(const char **errmsg) +static int request_hugepages(uffd_global_test_opts_t *gopts, const char **errmsg) { /* This should be done before source area is populated */ - if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { + if (madvise(gopts->area_src, gopts->nr_pages * gopts->page_size, MADV_HUGEPAGE)) { if (errmsg) { *errmsg = (errno == EINVAL) ? "CONFIG_TRANSPARENT_HUGEPAGE is not set" : @@ -1456,13 +1460,17 @@ struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. */ static void -do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) +do_register_ioctls_test(uffd_global_test_opts_t *gopts, + uffd_test_args_t *args, + bool miss, + bool wp, + bool minor) { uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE); mem_type_t *mem_type = args->mem_type; int ret; - ret = uffd_register_with_ioctls(uffd, area_dst, page_size, + ret = uffd_register_with_ioctls(gopts->uffd, gopts->area_dst, gopts->page_size, miss, wp, minor, &ioctls); /* @@ -1493,18 +1501,18 @@ do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor) "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", " "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls); - if (uffd_unregister(uffd, area_dst, page_size)) + if (uffd_unregister(gopts->uffd, gopts->area_dst, gopts->page_size)) err("unregister"); } -static void uffd_register_ioctls_test(uffd_test_args_t *args) +static void uffd_register_ioctls_test(uffd_global_test_opts_t *gopts, uffd_test_args_t *args) { int miss, wp, minor; for (miss = 0; miss <= 1; miss++) for (wp = 0; wp <= 1; wp++) for (minor = 0; minor <= 1; minor++) - do_register_ioctls_test(args, miss, wp, minor); + do_register_ioctls_test(gopts, args, miss, wp, minor); uffd_test_pass(); } @@ -1742,6 +1750,28 @@ int main(int argc, char *argv[]) } for (j = 0; j < n_mems; j++) { mem_type = &mem_types[j]; + + /* Initialize global test options */ + uffd_global_test_opts_t gopts = { 0 }; + + gopts.map_shared = mem_type->shared; + uffd_test_ops = mem_type->mem_ops; + uffd_test_case_ops = test->test_case_ops; + + if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) + gopts.page_size = default_huge_page_size(); + else + gopts.page_size = psize(); + + /* Ensure we have at least 2 pages */ + gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2) + / gopts.page_size; + + gopts.nr_parallel = 1; + + /* Initialize test arguments */ + args.mem_type = mem_type; + if (!(test->mem_targets & mem_type->mem_flag)) continue; @@ -1756,13 +1786,12 @@ int main(int argc, char *argv[]) uffd_test_skip("feature missing"); continue; } - if (uffd_setup_environment(&args, test, mem_type, - &errmsg)) { + if (uffd_test_ctx_init(&gopts, test->uffd_feature_required, &errmsg)) { uffd_test_skip(errmsg); continue; } - test->uffd_fn(&args); - uffd_test_ctx_clear(); + test->uffd_fn(&gopts, &args); + uffd_test_ctx_clear(&gopts); } } diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c index c2ba7d46c7b4..4e4a591cf527 100644 --- a/tools/testing/selftests/mm/uffd-wp-mremap.c +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -19,11 +19,6 @@ static size_t thpsizes[20]; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; -static int sz2ord(size_t size) -{ - return __builtin_ctzll(size / pagesize); -} - static int detect_thp_sizes(size_t sizes[], int max) { int count = 0; @@ -87,9 +82,9 @@ static void *alloc_one_folio(size_t size, bool private, bool hugetlb) struct thp_settings settings = *thp_current_settings(); if (private) - settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + settings.hugepages[sz2ord(size, pagesize)].enabled = THP_ALWAYS; else - settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS; + settings.shmem_hugepages[sz2ord(size, pagesize)].enabled = SHMEM_ALWAYS; thp_push_settings(&settings); @@ -157,7 +152,8 @@ static bool range_is_swapped(void *addr, size_t size) return true; } -static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb) +static void test_one_folio(uffd_global_test_opts_t *gopts, size_t size, bool private, + bool swapout, bool hugetlb) { struct uffdio_writeprotect wp_prms; uint64_t features = 0; @@ -181,21 +177,21 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb } /* Register range for uffd-wp. */ - if (userfaultfd_open(&features)) { + if (userfaultfd_open(gopts, &features)) { if (errno == ENOENT) ksft_test_result_skip("userfaultfd not available\n"); else ksft_test_result_fail("userfaultfd_open() failed\n"); goto out; } - if (uffd_register(uffd, mem, size, false, true, false)) { + if (uffd_register(gopts->uffd, mem, size, false, true, false)) { ksft_test_result_fail("uffd_register() failed\n"); goto out; } wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP; wp_prms.range.start = (uintptr_t)mem; wp_prms.range.len = size; - if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { + if (ioctl(gopts->uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n"); goto out; } @@ -242,9 +238,9 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb out: if (mem) munmap(mem, size); - if (uffd >= 0) { - close(uffd); - uffd = -1; + if (gopts->uffd >= 0) { + close(gopts->uffd); + gopts->uffd = -1; } } @@ -336,6 +332,7 @@ static const struct testcase testcases[] = { int main(int argc, char **argv) { + uffd_global_test_opts_t gopts = { 0 }; struct thp_settings settings; int i, j, plan = 0; @@ -367,8 +364,8 @@ int main(int argc, char **argv) const struct testcase *tc = &testcases[i]; for (j = 0; j < *tc->nr_sizes; j++) - test_one_folio(tc->sizes[j], tc->private, tc->swapout, - tc->hugetlb); + test_one_folio(&gopts, tc->sizes[j], tc->private, + tc->swapout, tc->hugetlb); } /* If THP is supported, restore original THP settings. */ diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index 896b3f73fc53..306eba825107 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -230,10 +230,10 @@ void testcases_init(void) .msg = "mmap(-1, MAP_HUGETLB) again", }, { - .addr = (void *)(addr_switch_hint - pagesize), + .addr = (void *)(addr_switch_hint - hugepagesize), .size = 2 * hugepagesize, .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(addr_switch_hint - pagesize, 2*hugepagesize, MAP_HUGETLB)", + .msg = "mmap(addr_switch_hint - hugepagesize, 2*hugepagesize, MAP_HUGETLB)", .low_addr_required = 1, .keep_mapped = 1, }, diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 325de53966b6..a7d4b02b21dd 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -9,6 +9,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +orig_nr_hugepages=0 skip() { @@ -76,5 +77,41 @@ check_test_requirements() esac } +save_nr_hugepages() +{ + orig_nr_hugepages=$(cat /proc/sys/vm/nr_hugepages) +} + +restore_nr_hugepages() +{ + echo "$orig_nr_hugepages" > /proc/sys/vm/nr_hugepages +} + +setup_nr_hugepages() +{ + local needpgs=$1 + while read -r name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs="$size" + break + fi + done < /proc/meminfo + if [ "$freepgs" -ge "$needpgs" ]; then + return + fi + local hpgs=$((orig_nr_hugepages + needpgs)) + echo $hpgs > /proc/sys/vm/nr_hugepages + + local nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages) + if [ "$nr_hugepgs" != "$hpgs" ]; then + restore_nr_hugepages + skip "$0: no enough hugepages for testing" + fi +} + check_test_requirements +save_nr_hugepages +# 4 keep_mapped pages, and one for tmp usage +setup_nr_hugepages 5 ./va_high_addr_switch --run-hugetlb +restore_nr_hugepages diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 169dbd692bf5..81b33d8f78f4 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -44,12 +44,18 @@ * On Arm64 the address space is 256TB and support for * high mappings up to 4PB virtual address space has * been added. + * + * On PowerPC64, the address space up to 128TB can be + * mapped without a hint. Addresses beyond 128TB, up to + * 4PB, can be mapped with a hint. + * */ #define NR_CHUNKS_128TB ((128 * SZ_1TB) / MAP_CHUNK_SIZE) /* Number of chunks for 128TB */ #define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL) #define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL) #define NR_CHUNKS_3840TB (NR_CHUNKS_128TB * 30UL) +#define NR_CHUNKS_3968TB (NR_CHUNKS_128TB * 31UL) #define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */ #define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */ @@ -59,6 +65,11 @@ #define HIGH_ADDR_SHIFT 49 #define NR_CHUNKS_LOW NR_CHUNKS_256TB #define NR_CHUNKS_HIGH NR_CHUNKS_3840TB +#elif defined(__PPC64__) +#define HIGH_ADDR_MARK ADDR_MARK_128TB +#define HIGH_ADDR_SHIFT 48 +#define NR_CHUNKS_LOW NR_CHUNKS_128TB +#define NR_CHUNKS_HIGH NR_CHUNKS_3968TB #else #define HIGH_ADDR_MARK ADDR_MARK_128TB #define HIGH_ADDR_SHIFT 48 @@ -227,7 +238,7 @@ int main(int argc, char *argv[]) if (hptr[i] == MAP_FAILED) break; - mark_range(ptr[i], MAP_CHUNK_SIZE); + mark_range(hptr[i], MAP_CHUNK_SIZE); validate_addr(hptr[i], 1); } hchunks = i; diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 9dafa7669ef9..e33cda301dad 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -338,6 +338,19 @@ int detect_hugetlb_page_sizes(size_t sizes[], int max) return count; } +int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags) +{ + size_t count; + + count = pread(kpageflags_fd, flags, sizeof(*flags), + pfn * sizeof(*flags)); + + if (count != sizeof(*flags)) + return -1; + + return 0; +} + /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls) @@ -402,7 +415,7 @@ unsigned long get_free_hugepages(void) return fhp; } -bool check_vmflag_io(void *addr) +static bool check_vmflag(void *addr, const char *flag) { char buffer[MAX_LINE_LENGTH]; const char *flags; @@ -419,13 +432,40 @@ bool check_vmflag_io(void *addr) if (!flaglen) return false; - if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen)) + if (flaglen == strlen(flag) && !memcmp(flags, flag, flaglen)) return true; flags += flaglen; } } +bool check_vmflag_io(void *addr) +{ + return check_vmflag(addr, "io"); +} + +bool check_vmflag_pfnmap(void *addr) +{ + return check_vmflag(addr, "pf"); +} + +bool softdirty_supported(void) +{ + char *addr; + bool supported = false; + const size_t pagesize = getpagesize(); + + /* New mappings are expected to be marked with VM_SOFTDIRTY (sd). */ + addr = mmap(0, pagesize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (!addr) + ksft_exit_fail_msg("mmap failed\n"); + + supported = check_vmflag(addr, "sd"); + munmap(addr, pagesize); + return supported; +} + /* * Open an fd at /proc/$pid/maps and configure procmap_out ready for * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise. @@ -555,3 +595,126 @@ bool detect_huge_zeropage(void) close(fd); return enabled; } + +long ksm_get_self_zero_pages(void) +{ + int proc_self_ksm_stat_fd; + char buf[200]; + char *substr_ksm_zero; + size_t value_pos; + ssize_t read_size; + + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + if (proc_self_ksm_stat_fd < 0) + return -errno; + + read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); + close(proc_self_ksm_stat_fd); + if (read_size < 0) + return -errno; + + buf[read_size] = 0; + + substr_ksm_zero = strstr(buf, "ksm_zero_pages"); + if (!substr_ksm_zero) + return 0; + + value_pos = strcspn(substr_ksm_zero, "0123456789"); + return strtol(substr_ksm_zero + value_pos, NULL, 10); +} + +long ksm_get_self_merging_pages(void) +{ + int proc_self_ksm_merging_pages_fd; + char buf[10]; + ssize_t ret; + + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + if (proc_self_ksm_merging_pages_fd < 0) + return -errno; + + ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); + close(proc_self_ksm_merging_pages_fd); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +long ksm_get_full_scans(void) +{ + int ksm_full_scans_fd; + char buf[10]; + ssize_t ret; + + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + return -errno; + + ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0); + close(ksm_full_scans_fd); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + +int ksm_use_zero_pages(void) +{ + int ksm_use_zero_pages_fd; + ssize_t ret; + + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + if (ksm_use_zero_pages_fd < 0) + return -errno; + + ret = write(ksm_use_zero_pages_fd, "1", 1); + close(ksm_use_zero_pages_fd); + return ret == 1 ? 0 : -errno; +} + +int ksm_start(void) +{ + int ksm_fd; + ssize_t ret; + long start_scans, end_scans; + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + return -errno; + + /* Wait for two full scans such that any possible merging happened. */ + start_scans = ksm_get_full_scans(); + if (start_scans < 0) { + close(ksm_fd); + return start_scans; + } + ret = write(ksm_fd, "1", 1); + close(ksm_fd); + if (ret != 1) + return -errno; + do { + end_scans = ksm_get_full_scans(); + if (end_scans < 0) + return end_scans; + } while (end_scans < start_scans + 2); + + return 0; +} + +int ksm_stop(void) +{ + int ksm_fd; + ssize_t ret; + + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + return -errno; + + ret = write(ksm_fd, "2", 1); + close(ksm_fd); + return ret == 1 ? 0 : -errno; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b55d1809debc..26c30fdc0241 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -18,6 +18,9 @@ #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) +#define KPF_COMPOUND_HEAD BIT_ULL(15) +#define KPF_COMPOUND_TAIL BIT_ULL(16) +#define KPF_THP BIT_ULL(22) /* * Ignore the checkpatch warning, we must read from x but don't want to do * anything with it in order to trigger a read page fault. We therefore must use @@ -85,6 +88,7 @@ bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); int64_t allocate_transhuge(void *ptr, int pagemap_fd); unsigned long default_huge_page_size(void); int detect_hugetlb_page_sizes(size_t sizes[], int max); +int pageflags_get(unsigned long pfn, int kpageflags_fd, uint64_t *flags); int uffd_register(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor); @@ -93,12 +97,14 @@ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); unsigned long get_free_hugepages(void); bool check_vmflag_io(void *addr); +bool check_vmflag_pfnmap(void *addr); int open_procmap(pid_t pid, struct procmap_fd *procmap_out); int query_procmap(struct procmap_fd *procmap); bool find_vma_procmap(struct procmap_fd *procmap, void *address); int close_procmap(struct procmap_fd *procmap); int write_sysfs(const char *file_path, unsigned long val); int read_sysfs(const char *file_path, unsigned long *val); +bool softdirty_supported(void); static inline int open_self_procmap(struct procmap_fd *procmap_out) { @@ -126,9 +132,21 @@ static inline void log_test_result(int result) ksft_test_result_report(result, "%s\n", test_name); } +static inline int sz2ord(size_t size, size_t pagesize) +{ + return __builtin_ctzll(size / pagesize); +} + void *sys_mremap(void *old_address, unsigned long old_size, unsigned long new_size, int flags, void *new_address); +long ksm_get_self_zero_pages(void); +long ksm_get_self_merging_pages(void); +long ksm_get_full_scans(void); +int ksm_use_zero_pages(void); +int ksm_start(void); +int ksm_stop(void); + /* * On ppc64 this will only work with radix 2M hugepage size */ diff --git a/tools/testing/selftests/namespaces/.gitignore b/tools/testing/selftests/namespaces/.gitignore new file mode 100644 index 000000000000..ccfb40837a73 --- /dev/null +++ b/tools/testing/selftests/namespaces/.gitignore @@ -0,0 +1,3 @@ +nsid_test +file_handle_test +init_ino_test diff --git a/tools/testing/selftests/namespaces/Makefile b/tools/testing/selftests/namespaces/Makefile new file mode 100644 index 000000000000..5fe4b3dc07d3 --- /dev/null +++ b/tools/testing/selftests/namespaces/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) + +TEST_GEN_PROGS := nsid_test file_handle_test init_ino_test + +include ../lib.mk + diff --git a/tools/testing/selftests/namespaces/config b/tools/testing/selftests/namespaces/config new file mode 100644 index 000000000000..d09836260262 --- /dev/null +++ b/tools/testing/selftests/namespaces/config @@ -0,0 +1,7 @@ +CONFIG_UTS_NS=y +CONFIG_TIME_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y +CONFIG_NET_NS=y +CONFIG_CGROUPS=y diff --git a/tools/testing/selftests/namespaces/file_handle_test.c b/tools/testing/selftests/namespaces/file_handle_test.c new file mode 100644 index 000000000000..f1bc5773f552 --- /dev/null +++ b/tools/testing/selftests/namespaces/file_handle_test.c @@ -0,0 +1,1429 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <grp.h> +#include <limits.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <linux/unistd.h> +#include "../kselftest_harness.h" + +#ifndef FD_NSFS_ROOT +#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */ +#endif + +TEST(nsfs_net_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open a namespace file descriptor */ + ns_fd = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT as unprivileged user */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + if (fd < 0 && errno == EPERM) { + SKIP(free(handle); close(ns_fd); + return, + "Permission denied for unprivileged user (expected)"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_uts_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open UTS namespace file descriptor */ + ns_fd = open("/proc/self/ns/uts", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_ipc_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open IPC namespace file descriptor */ + ns_fd = open("/proc/self/ns/ipc", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_pid_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open PID namespace file descriptor */ + ns_fd = open("/proc/self/ns/pid", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_mnt_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open mount namespace file descriptor */ + ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_user_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open user namespace file descriptor */ + ns_fd = open("/proc/self/ns/user", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_cgroup_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open cgroup namespace file descriptor */ + ns_fd = open("/proc/self/ns/cgroup", O_RDONLY); + if (ns_fd < 0) { + SKIP(free(handle); return, "cgroup namespace not available"); + } + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_time_handle) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + struct stat st1, st2; + + /* Drop to unprivileged uid/gid */ + ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */ + ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */ + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open time namespace file descriptor */ + ns_fd = open("/proc/self/ns/time", O_RDONLY); + if (ns_fd < 0) { + SKIP(free(handle); return, "time namespace not available"); + } + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Try to open using FD_NSFS_ROOT */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) { + SKIP(free(handle); close(ns_fd); + return, + "open_by_handle_at with FD_NSFS_ROOT not supported"); + } + ASSERT_GE(fd, 0); + + /* Verify we opened the correct namespace */ + ASSERT_EQ(fstat(ns_fd, &st1), 0); + ASSERT_EQ(fstat(fd, &st2), 0); + ASSERT_EQ(st1.st_ino, st2.st_ino); + ASSERT_EQ(st1.st_dev, st2.st_dev); + + close(fd); + close(ns_fd); + free(handle); +} + +TEST(nsfs_user_net_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current network namespace */ + ns_fd = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new network namespace */ + ret = unshare(CLONE_NEWNET); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create network namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's network namespace handle from new user+net namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new network namespace"); + } + + /* Should fail with permission denied since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_uts_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current UTS namespace */ + ns_fd = open("/proc/self/ns/uts", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new UTS namespace */ + ret = unshare(CLONE_NEWUTS); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create UTS namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's UTS namespace handle from new user+uts namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new UTS namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_ipc_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current IPC namespace */ + ns_fd = open("/proc/self/ns/ipc", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new IPC namespace */ + ret = unshare(CLONE_NEWIPC); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create IPC namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's IPC namespace handle from new user+ipc namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new IPC namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_mnt_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current mount namespace */ + ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new mount namespace */ + ret = unshare(CLONE_NEWNS); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create mount namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's mount namespace handle from new user+mnt namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new mount namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_cgroup_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current cgroup namespace */ + ns_fd = open("/proc/self/ns/cgroup", O_RDONLY); + if (ns_fd < 0) { + SKIP(free(handle); close(pipefd[0]); close(pipefd[1]); + return, "cgroup namespace not available"); + } + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new cgroup namespace */ + ret = unshare(CLONE_NEWCGROUP); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create cgroup namespace */ + close(pipefd[1]); + exit(0); + } + + /* Try to open parent's cgroup namespace handle from new user+cgroup namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new cgroup namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_pid_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current PID namespace */ + ns_fd = open("/proc/self/ns/pid", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new PID namespace - requires fork to take effect */ + ret = unshare(CLONE_NEWPID); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create PID namespace */ + close(pipefd[1]); + exit(0); + } + + /* Fork again for PID namespace to take effect */ + pid_t child_pid = fork(); + if (child_pid < 0) { + write(pipefd[1], "N", + 1); /* Unable to fork in PID namespace */ + close(pipefd[1]); + exit(0); + } + + if (child_pid == 0) { + /* Grandchild in new PID namespace */ + /* Try to open parent's PID namespace handle from new user+pid namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", + 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Wait for grandchild */ + waitpid(child_pid, NULL, 0); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new PID namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_user_time_namespace_isolation) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + pid_t pid; + int status; + int pipefd[2]; + char result; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Create pipe for communication */ + ASSERT_EQ(pipe(pipefd), 0); + + /* Get handle for current time namespace */ + ns_fd = open("/proc/self/ns/time", O_RDONLY); + if (ns_fd < 0) { + SKIP(free(handle); close(pipefd[0]); close(pipefd[1]); + return, "time namespace not available"); + } + + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); close(pipefd[0]); + close(pipefd[1]); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + close(ns_fd); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* First create new user namespace to drop privileges */ + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + write(pipefd[1], "U", + 1); /* Unable to create user namespace */ + close(pipefd[1]); + exit(0); + } + + /* Write uid/gid mappings to maintain some capabilities */ + int uid_map_fd = open("/proc/self/uid_map", O_WRONLY); + int gid_map_fd = open("/proc/self/gid_map", O_WRONLY); + int setgroups_fd = open("/proc/self/setgroups", O_WRONLY); + + if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) { + write(pipefd[1], "M", 1); /* Unable to set mappings */ + close(pipefd[1]); + exit(0); + } + + /* Disable setgroups to allow gid mapping */ + write(setgroups_fd, "deny", 4); + close(setgroups_fd); + + /* Map current uid/gid to root in the new namespace */ + char mapping[64]; + snprintf(mapping, sizeof(mapping), "0 %d 1", getuid()); + write(uid_map_fd, mapping, strlen(mapping)); + close(uid_map_fd); + + snprintf(mapping, sizeof(mapping), "0 %d 1", getgid()); + write(gid_map_fd, mapping, strlen(mapping)); + close(gid_map_fd); + + /* Now create new time namespace - requires fork to take effect */ + ret = unshare(CLONE_NEWTIME); + if (ret < 0) { + write(pipefd[1], "N", + 1); /* Unable to create time namespace */ + close(pipefd[1]); + exit(0); + } + + /* Fork again for time namespace to take effect */ + pid_t child_pid = fork(); + if (child_pid < 0) { + write(pipefd[1], "N", + 1); /* Unable to fork in time namespace */ + close(pipefd[1]); + exit(0); + } + + if (child_pid == 0) { + /* Grandchild in new time namespace */ + /* Try to open parent's time namespace handle from new user+time namespace */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY); + + if (fd >= 0) { + /* Should NOT succeed - we're in a different user namespace */ + write(pipefd[1], "S", + 1); /* Unexpected success */ + close(fd); + } else if (errno == ESTALE) { + /* Expected: Stale file handle */ + write(pipefd[1], "P", 1); + } else { + /* Other error */ + write(pipefd[1], "F", 1); + } + + close(pipefd[1]); + exit(0); + } + + /* Wait for grandchild */ + waitpid(child_pid, NULL, 0); + exit(0); + } + + /* Parent process */ + close(pipefd[1]); + ASSERT_EQ(read(pipefd[0], &result, 1), 1); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + if (result == 'U') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new user namespace"); + } + if (result == 'M') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot set uid/gid mappings"); + } + if (result == 'N') { + SKIP(free(handle); close(pipefd[0]); + return, "Cannot create new time namespace"); + } + + /* Should fail with ESTALE since we're in a different user namespace */ + ASSERT_EQ(result, 'P'); + + close(pipefd[0]); + free(handle); +} + +TEST(nsfs_open_flags) +{ + struct file_handle *handle; + int mount_id; + int ret; + int fd; + int ns_fd; + + handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ); + ASSERT_NE(handle, NULL); + + /* Open a namespace file descriptor */ + ns_fd = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(ns_fd, 0); + + /* Get handle for the namespace */ + handle->handle_bytes = MAX_HANDLE_SZ; + ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH); + if (ret < 0 && errno == EOPNOTSUPP) { + SKIP(free(handle); close(ns_fd); + return, "nsfs doesn't support file handles"); + } + ASSERT_EQ(ret, 0); + ASSERT_GT(handle->handle_bytes, 0); + + /* Test invalid flags that should fail */ + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_WRONLY); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EPERM); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDWR); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EPERM); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_TRUNC); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EPERM); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_DIRECT); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EINVAL); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_TMPFILE); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, EINVAL); + + fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_DIRECTORY); + ASSERT_LT(fd, 0); + ASSERT_EQ(errno, ENOTDIR); + + close(ns_fd); + free(handle); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/namespaces/init_ino_test.c b/tools/testing/selftests/namespaces/init_ino_test.c new file mode 100644 index 000000000000..5b6993c3740b --- /dev/null +++ b/tools/testing/selftests/namespaces/init_ino_test.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <linux/nsfs.h> + +#include "../kselftest_harness.h" + +struct ns_info { + const char *name; + const char *proc_path; + unsigned int expected_ino; +}; + +static struct ns_info namespaces[] = { + { "ipc", "/proc/1/ns/ipc", IPC_NS_INIT_INO }, + { "uts", "/proc/1/ns/uts", UTS_NS_INIT_INO }, + { "user", "/proc/1/ns/user", USER_NS_INIT_INO }, + { "pid", "/proc/1/ns/pid", PID_NS_INIT_INO }, + { "cgroup", "/proc/1/ns/cgroup", CGROUP_NS_INIT_INO }, + { "time", "/proc/1/ns/time", TIME_NS_INIT_INO }, + { "net", "/proc/1/ns/net", NET_NS_INIT_INO }, + { "mnt", "/proc/1/ns/mnt", MNT_NS_INIT_INO }, +}; + +TEST(init_namespace_inodes) +{ + struct stat st; + + for (int i = 0; i < sizeof(namespaces) / sizeof(namespaces[0]); i++) { + int ret = stat(namespaces[i].proc_path, &st); + + /* Some namespaces might not be available (e.g., time namespace on older kernels) */ + if (ret < 0) { + if (errno == ENOENT) { + ksft_test_result_skip("%s namespace not available\n", + namespaces[i].name); + continue; + } + ASSERT_GE(ret, 0) + TH_LOG("Failed to stat %s: %s", + namespaces[i].proc_path, strerror(errno)); + } + + ASSERT_EQ(st.st_ino, namespaces[i].expected_ino) + TH_LOG("Namespace %s has inode 0x%lx, expected 0x%x", + namespaces[i].name, st.st_ino, namespaces[i].expected_ino); + + ksft_print_msg("Namespace %s: inode 0x%lx matches expected 0x%x\n", + namespaces[i].name, st.st_ino, namespaces[i].expected_ino); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c new file mode 100644 index 000000000000..e28accd74a57 --- /dev/null +++ b/tools/testing/selftests/namespaces/nsid_test.c @@ -0,0 +1,986 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <assert.h> +#include <fcntl.h> +#include <inttypes.h> +#include <libgen.h> +#include <limits.h> +#include <pthread.h> +#include <string.h> +#include <sys/mount.h> +#include <poll.h> +#include <sys/epoll.h> +#include <sys/resource.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <unistd.h> +#include <linux/fs.h> +#include <linux/limits.h> +#include <linux/nsfs.h> +#include "../kselftest_harness.h" + +TEST(nsid_mntns_basic) +{ + __u64 mnt_ns_id = 0; + int fd_mntns; + int ret; + + /* Open the current mount namespace */ + fd_mntns = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(fd_mntns, 0); + + /* Get the mount namespace ID */ + ret = ioctl(fd_mntns, NS_GET_MNTNS_ID, &mnt_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(mnt_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 mnt_ns_id2 = 0; + ret = ioctl(fd_mntns, NS_GET_ID, &mnt_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mnt_ns_id, mnt_ns_id2); + + close(fd_mntns); +} + +TEST(nsid_mntns_separate) +{ + __u64 parent_mnt_ns_id = 0; + __u64 child_mnt_ns_id = 0; + int fd_parent_mntns, fd_child_mntns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's mount namespace ID */ + fd_parent_mntns = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(fd_parent_mntns, 0); + ret = ioctl(fd_parent_mntns, NS_GET_ID, &parent_mnt_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_mnt_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new mount namespace */ + ret = unshare(CLONE_NEWNS); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_mntns); + SKIP(return, "No permission to create mount namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's mount namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid); + fd_child_mntns = open(path, O_RDONLY); + ASSERT_GE(fd_child_mntns, 0); + + /* Get child's mount namespace ID */ + ret = ioctl(fd_child_mntns, NS_GET_ID, &child_mnt_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_mnt_ns_id, 0); + + /* Parent and child should have different mount namespace IDs */ + ASSERT_NE(parent_mnt_ns_id, child_mnt_ns_id); + + close(fd_parent_mntns); + close(fd_child_mntns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_cgroupns_basic) +{ + __u64 cgroup_ns_id = 0; + int fd_cgroupns; + int ret; + + /* Open the current cgroup namespace */ + fd_cgroupns = open("/proc/self/ns/cgroup", O_RDONLY); + ASSERT_GE(fd_cgroupns, 0); + + /* Get the cgroup namespace ID */ + ret = ioctl(fd_cgroupns, NS_GET_ID, &cgroup_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(cgroup_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 cgroup_ns_id2 = 0; + ret = ioctl(fd_cgroupns, NS_GET_ID, &cgroup_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(cgroup_ns_id, cgroup_ns_id2); + + close(fd_cgroupns); +} + +TEST(nsid_cgroupns_separate) +{ + __u64 parent_cgroup_ns_id = 0; + __u64 child_cgroup_ns_id = 0; + int fd_parent_cgroupns, fd_child_cgroupns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's cgroup namespace ID */ + fd_parent_cgroupns = open("/proc/self/ns/cgroup", O_RDONLY); + ASSERT_GE(fd_parent_cgroupns, 0); + ret = ioctl(fd_parent_cgroupns, NS_GET_ID, &parent_cgroup_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_cgroup_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new cgroup namespace */ + ret = unshare(CLONE_NEWCGROUP); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_cgroupns); + SKIP(return, "No permission to create cgroup namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's cgroup namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/cgroup", pid); + fd_child_cgroupns = open(path, O_RDONLY); + ASSERT_GE(fd_child_cgroupns, 0); + + /* Get child's cgroup namespace ID */ + ret = ioctl(fd_child_cgroupns, NS_GET_ID, &child_cgroup_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_cgroup_ns_id, 0); + + /* Parent and child should have different cgroup namespace IDs */ + ASSERT_NE(parent_cgroup_ns_id, child_cgroup_ns_id); + + close(fd_parent_cgroupns); + close(fd_child_cgroupns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_ipcns_basic) +{ + __u64 ipc_ns_id = 0; + int fd_ipcns; + int ret; + + /* Open the current IPC namespace */ + fd_ipcns = open("/proc/self/ns/ipc", O_RDONLY); + ASSERT_GE(fd_ipcns, 0); + + /* Get the IPC namespace ID */ + ret = ioctl(fd_ipcns, NS_GET_ID, &ipc_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(ipc_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 ipc_ns_id2 = 0; + ret = ioctl(fd_ipcns, NS_GET_ID, &ipc_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(ipc_ns_id, ipc_ns_id2); + + close(fd_ipcns); +} + +TEST(nsid_ipcns_separate) +{ + __u64 parent_ipc_ns_id = 0; + __u64 child_ipc_ns_id = 0; + int fd_parent_ipcns, fd_child_ipcns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's IPC namespace ID */ + fd_parent_ipcns = open("/proc/self/ns/ipc", O_RDONLY); + ASSERT_GE(fd_parent_ipcns, 0); + ret = ioctl(fd_parent_ipcns, NS_GET_ID, &parent_ipc_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_ipc_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new IPC namespace */ + ret = unshare(CLONE_NEWIPC); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_ipcns); + SKIP(return, "No permission to create IPC namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's IPC namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/ipc", pid); + fd_child_ipcns = open(path, O_RDONLY); + ASSERT_GE(fd_child_ipcns, 0); + + /* Get child's IPC namespace ID */ + ret = ioctl(fd_child_ipcns, NS_GET_ID, &child_ipc_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_ipc_ns_id, 0); + + /* Parent and child should have different IPC namespace IDs */ + ASSERT_NE(parent_ipc_ns_id, child_ipc_ns_id); + + close(fd_parent_ipcns); + close(fd_child_ipcns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_utsns_basic) +{ + __u64 uts_ns_id = 0; + int fd_utsns; + int ret; + + /* Open the current UTS namespace */ + fd_utsns = open("/proc/self/ns/uts", O_RDONLY); + ASSERT_GE(fd_utsns, 0); + + /* Get the UTS namespace ID */ + ret = ioctl(fd_utsns, NS_GET_ID, &uts_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(uts_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 uts_ns_id2 = 0; + ret = ioctl(fd_utsns, NS_GET_ID, &uts_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(uts_ns_id, uts_ns_id2); + + close(fd_utsns); +} + +TEST(nsid_utsns_separate) +{ + __u64 parent_uts_ns_id = 0; + __u64 child_uts_ns_id = 0; + int fd_parent_utsns, fd_child_utsns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's UTS namespace ID */ + fd_parent_utsns = open("/proc/self/ns/uts", O_RDONLY); + ASSERT_GE(fd_parent_utsns, 0); + ret = ioctl(fd_parent_utsns, NS_GET_ID, &parent_uts_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_uts_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new UTS namespace */ + ret = unshare(CLONE_NEWUTS); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_utsns); + SKIP(return, "No permission to create UTS namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's UTS namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid); + fd_child_utsns = open(path, O_RDONLY); + ASSERT_GE(fd_child_utsns, 0); + + /* Get child's UTS namespace ID */ + ret = ioctl(fd_child_utsns, NS_GET_ID, &child_uts_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_uts_ns_id, 0); + + /* Parent and child should have different UTS namespace IDs */ + ASSERT_NE(parent_uts_ns_id, child_uts_ns_id); + + close(fd_parent_utsns); + close(fd_child_utsns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_userns_basic) +{ + __u64 user_ns_id = 0; + int fd_userns; + int ret; + + /* Open the current user namespace */ + fd_userns = open("/proc/self/ns/user", O_RDONLY); + ASSERT_GE(fd_userns, 0); + + /* Get the user namespace ID */ + ret = ioctl(fd_userns, NS_GET_ID, &user_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(user_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 user_ns_id2 = 0; + ret = ioctl(fd_userns, NS_GET_ID, &user_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(user_ns_id, user_ns_id2); + + close(fd_userns); +} + +TEST(nsid_userns_separate) +{ + __u64 parent_user_ns_id = 0; + __u64 child_user_ns_id = 0; + int fd_parent_userns, fd_child_userns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's user namespace ID */ + fd_parent_userns = open("/proc/self/ns/user", O_RDONLY); + ASSERT_GE(fd_parent_userns, 0); + ret = ioctl(fd_parent_userns, NS_GET_ID, &parent_user_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_user_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new user namespace */ + ret = unshare(CLONE_NEWUSER); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_userns); + SKIP(return, "No permission to create user namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's user namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + fd_child_userns = open(path, O_RDONLY); + ASSERT_GE(fd_child_userns, 0); + + /* Get child's user namespace ID */ + ret = ioctl(fd_child_userns, NS_GET_ID, &child_user_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_user_ns_id, 0); + + /* Parent and child should have different user namespace IDs */ + ASSERT_NE(parent_user_ns_id, child_user_ns_id); + + close(fd_parent_userns); + close(fd_child_userns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_timens_basic) +{ + __u64 time_ns_id = 0; + int fd_timens; + int ret; + + /* Open the current time namespace */ + fd_timens = open("/proc/self/ns/time", O_RDONLY); + if (fd_timens < 0) { + SKIP(return, "Time namespaces not supported"); + } + + /* Get the time namespace ID */ + ret = ioctl(fd_timens, NS_GET_ID, &time_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(time_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 time_ns_id2 = 0; + ret = ioctl(fd_timens, NS_GET_ID, &time_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(time_ns_id, time_ns_id2); + + close(fd_timens); +} + +TEST(nsid_timens_separate) +{ + __u64 parent_time_ns_id = 0; + __u64 child_time_ns_id = 0; + int fd_parent_timens, fd_child_timens; + int ret; + pid_t pid; + int pipefd[2]; + + /* Open the current time namespace */ + fd_parent_timens = open("/proc/self/ns/time", O_RDONLY); + if (fd_parent_timens < 0) { + SKIP(return, "Time namespaces not supported"); + } + + /* Get parent's time namespace ID */ + ret = ioctl(fd_parent_timens, NS_GET_ID, &parent_time_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_time_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new time namespace */ + ret = unshare(CLONE_NEWTIME); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES || errno == EINVAL) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Fork a grandchild to actually enter the new namespace */ + pid_t grandchild = fork(); + if (grandchild == 0) { + /* Grandchild is in the new namespace */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + pause(); + _exit(0); + } else if (grandchild > 0) { + /* Child writes grandchild PID and waits */ + write(pipefd[1], "Y", 1); + write(pipefd[1], &grandchild, sizeof(grandchild)); + close(pipefd[1]); + pause(); /* Keep the parent alive to maintain the grandchild */ + _exit(0); + } else { + _exit(1); + } + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_timens); + close(pipefd[0]); + SKIP(return, "Cannot create time namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + pid_t grandchild_pid; + ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid)); + close(pipefd[0]); + + /* Open grandchild's time namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/time", grandchild_pid); + fd_child_timens = open(path, O_RDONLY); + ASSERT_GE(fd_child_timens, 0); + + /* Get child's time namespace ID */ + ret = ioctl(fd_child_timens, NS_GET_ID, &child_time_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_time_ns_id, 0); + + /* Parent and child should have different time namespace IDs */ + ASSERT_NE(parent_time_ns_id, child_time_ns_id); + + close(fd_parent_timens); + close(fd_child_timens); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_pidns_basic) +{ + __u64 pid_ns_id = 0; + int fd_pidns; + int ret; + + /* Open the current PID namespace */ + fd_pidns = open("/proc/self/ns/pid", O_RDONLY); + ASSERT_GE(fd_pidns, 0); + + /* Get the PID namespace ID */ + ret = ioctl(fd_pidns, NS_GET_ID, &pid_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(pid_ns_id, 0); + + /* Verify we can get the same ID again */ + __u64 pid_ns_id2 = 0; + ret = ioctl(fd_pidns, NS_GET_ID, &pid_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(pid_ns_id, pid_ns_id2); + + close(fd_pidns); +} + +TEST(nsid_pidns_separate) +{ + __u64 parent_pid_ns_id = 0; + __u64 child_pid_ns_id = 0; + int fd_parent_pidns, fd_child_pidns; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's PID namespace ID */ + fd_parent_pidns = open("/proc/self/ns/pid", O_RDONLY); + ASSERT_GE(fd_parent_pidns, 0); + ret = ioctl(fd_parent_pidns, NS_GET_ID, &parent_pid_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_pid_ns_id, 0); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new PID namespace */ + ret = unshare(CLONE_NEWPID); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Fork a grandchild to actually enter the new namespace */ + pid_t grandchild = fork(); + if (grandchild == 0) { + /* Grandchild is in the new namespace */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + pause(); + _exit(0); + } else if (grandchild > 0) { + /* Child writes grandchild PID and waits */ + write(pipefd[1], "Y", 1); + write(pipefd[1], &grandchild, sizeof(grandchild)); + close(pipefd[1]); + pause(); /* Keep the parent alive to maintain the grandchild */ + _exit(0); + } else { + _exit(1); + } + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_pidns); + close(pipefd[0]); + SKIP(return, "No permission to create PID namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + pid_t grandchild_pid; + ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid)); + close(pipefd[0]); + + /* Open grandchild's PID namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/pid", grandchild_pid); + fd_child_pidns = open(path, O_RDONLY); + ASSERT_GE(fd_child_pidns, 0); + + /* Get child's PID namespace ID */ + ret = ioctl(fd_child_pidns, NS_GET_ID, &child_pid_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_pid_ns_id, 0); + + /* Parent and child should have different PID namespace IDs */ + ASSERT_NE(parent_pid_ns_id, child_pid_ns_id); + + close(fd_parent_pidns); + close(fd_child_pidns); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST(nsid_netns_basic) +{ + __u64 net_ns_id = 0; + __u64 netns_cookie = 0; + int fd_netns; + int sock; + socklen_t optlen; + int ret; + + /* Open the current network namespace */ + fd_netns = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(fd_netns, 0); + + /* Get the network namespace ID via ioctl */ + ret = ioctl(fd_netns, NS_GET_ID, &net_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(net_ns_id, 0); + + /* Create a socket to get the SO_NETNS_COOKIE */ + sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(sock, 0); + + /* Get the network namespace cookie via socket option */ + optlen = sizeof(netns_cookie); + ret = getsockopt(sock, SOL_SOCKET, SO_NETNS_COOKIE, &netns_cookie, &optlen); + ASSERT_EQ(ret, 0); + ASSERT_EQ(optlen, sizeof(netns_cookie)); + + /* The namespace ID and cookie should be identical */ + ASSERT_EQ(net_ns_id, netns_cookie); + + /* Verify we can get the same ID again */ + __u64 net_ns_id2 = 0; + ret = ioctl(fd_netns, NS_GET_ID, &net_ns_id2); + ASSERT_EQ(ret, 0); + ASSERT_EQ(net_ns_id, net_ns_id2); + + close(sock); + close(fd_netns); +} + +TEST(nsid_netns_separate) +{ + __u64 parent_net_ns_id = 0; + __u64 parent_netns_cookie = 0; + __u64 child_net_ns_id = 0; + __u64 child_netns_cookie = 0; + int fd_parent_netns, fd_child_netns; + int parent_sock, child_sock; + socklen_t optlen; + int ret; + pid_t pid; + int pipefd[2]; + + /* Get parent's network namespace ID */ + fd_parent_netns = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(fd_parent_netns, 0); + ret = ioctl(fd_parent_netns, NS_GET_ID, &parent_net_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(parent_net_ns_id, 0); + + /* Get parent's network namespace cookie */ + parent_sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(parent_sock, 0); + optlen = sizeof(parent_netns_cookie); + ret = getsockopt(parent_sock, SOL_SOCKET, SO_NETNS_COOKIE, &parent_netns_cookie, &optlen); + ASSERT_EQ(ret, 0); + + /* Verify parent's ID and cookie match */ + ASSERT_EQ(parent_net_ns_id, parent_netns_cookie); + + /* Create a pipe for synchronization */ + ASSERT_EQ(pipe(pipefd), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* Child process */ + close(pipefd[0]); + + /* Create new network namespace */ + ret = unshare(CLONE_NEWNET); + if (ret != 0) { + /* Skip test if we don't have permission */ + if (errno == EPERM || errno == EACCES) { + write(pipefd[1], "S", 1); /* Signal skip */ + _exit(0); + } + _exit(1); + } + + /* Signal success */ + write(pipefd[1], "Y", 1); + close(pipefd[1]); + + /* Keep namespace alive */ + pause(); + _exit(0); + } + + /* Parent process */ + close(pipefd[1]); + + char buf; + ASSERT_EQ(read(pipefd[0], &buf, 1), 1); + close(pipefd[0]); + + if (buf == 'S') { + /* Child couldn't create namespace, skip test */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + close(fd_parent_netns); + close(parent_sock); + SKIP(return, "No permission to create network namespace"); + } + + ASSERT_EQ(buf, 'Y'); + + /* Open child's network namespace */ + char path[256]; + snprintf(path, sizeof(path), "/proc/%d/ns/net", pid); + fd_child_netns = open(path, O_RDONLY); + ASSERT_GE(fd_child_netns, 0); + + /* Get child's network namespace ID */ + ret = ioctl(fd_child_netns, NS_GET_ID, &child_net_ns_id); + ASSERT_EQ(ret, 0); + ASSERT_NE(child_net_ns_id, 0); + + /* Create socket in child's namespace to get cookie */ + ret = setns(fd_child_netns, CLONE_NEWNET); + if (ret == 0) { + child_sock = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_GE(child_sock, 0); + + optlen = sizeof(child_netns_cookie); + ret = getsockopt(child_sock, SOL_SOCKET, SO_NETNS_COOKIE, &child_netns_cookie, &optlen); + ASSERT_EQ(ret, 0); + + /* Verify child's ID and cookie match */ + ASSERT_EQ(child_net_ns_id, child_netns_cookie); + + close(child_sock); + + /* Return to parent namespace */ + setns(fd_parent_netns, CLONE_NEWNET); + } + + /* Parent and child should have different network namespace IDs */ + ASSERT_NE(parent_net_ns_id, child_net_ns_id); + if (child_netns_cookie != 0) { + ASSERT_NE(parent_netns_cookie, child_netns_cookie); + } + + close(fd_parent_netns); + close(fd_child_netns); + close(parent_sock); + + /* Clean up child process */ + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 47c293c2962f..439101b518ee 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_oob msg_zerocopy @@ -51,6 +52,7 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tcp_port_share tfo timestamping tls diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 2b31d4a93ad7..b5127e968108 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,131 +1,207 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -CFLAGS += -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../ -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ - rtnetlink.sh xfrm_policy.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh -TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh -TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh -TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh -TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh -TEST_PROGS += route_localnet.sh -TEST_PROGS += reuseaddr_ports_exhausted.sh -TEST_PROGS += txtimestamp.sh -TEST_PROGS += vrf-xfrm-tests.sh -TEST_PROGS += rxtimestamp.sh -TEST_PROGS += drop_monitor_tests.sh -TEST_PROGS += vrf_route_leaking.sh -TEST_PROGS += bareudp.sh -TEST_PROGS += amt.sh -TEST_PROGS += unicast_extensions.sh -TEST_PROGS += udpgro_fwd.sh -TEST_PROGS += udpgro_frglist.sh -TEST_PROGS += nat6to4.sh -TEST_PROGS += veth.sh -TEST_PROGS += ioam6.sh -TEST_PROGS += gro.sh -TEST_PROGS += gre_gso.sh -TEST_PROGS += gre_ipv6_lladdr.sh -TEST_PROGS += cmsg_so_mark.sh -TEST_PROGS += cmsg_so_priority.sh -TEST_PROGS += test_so_rcv.sh -TEST_PROGS += cmsg_time.sh cmsg_ip.sh -TEST_PROGS += netns-name.sh -TEST_PROGS += link_netns.py -TEST_PROGS += nl_netdev.py -TEST_PROGS += rtnetlink.py -TEST_PROGS += rtnetlink_notification.sh -TEST_PROGS += srv6_end_dt46_l3vpn_test.sh -TEST_PROGS += srv6_end_dt4_l3vpn_test.sh -TEST_PROGS += srv6_end_dt6_l3vpn_test.sh -TEST_PROGS += srv6_hencap_red_l3vpn_test.sh -TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh -TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh -TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh -TEST_PROGS += srv6_end_flavors_test.sh -TEST_PROGS += srv6_end_dx4_netfilter_test.sh -TEST_PROGS += srv6_end_dx6_netfilter_test.sh -TEST_PROGS += vrf_strict_mode_test.sh -TEST_PROGS += arp_ndisc_evict_nocarrier.sh -TEST_PROGS += ndisc_unsolicited_na_test.sh -TEST_PROGS += arp_ndisc_untracked_subnets.sh -TEST_PROGS += stress_reuseport_listen.sh -TEST_PROGS += l2_tos_ttl_inherit.sh -TEST_PROGS += bind_bhash.sh -TEST_PROGS += ip_local_port_range.sh -TEST_PROGS += rps_default_mask.sh -TEST_PROGS += big_tcp.sh -TEST_PROGS += netns-sysctl.sh -TEST_PROGS += netdev-l2addr.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh -TEST_GEN_FILES = socket nettest -TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any -TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite -TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag -TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie -TEST_GEN_FILES += tcp_fastopen_backup_key -TEST_GEN_FILES += fin_ack_lat -TEST_GEN_FILES += reuseaddr_ports_exhausted -TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp -TEST_GEN_FILES += ipsec -TEST_GEN_FILES += ioam6_parser -TEST_GEN_FILES += gro -TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll -TEST_GEN_FILES += toeplitz -TEST_GEN_FILES += cmsg_sender -TEST_GEN_FILES += stress_reuseport_listen -TEST_GEN_FILES += so_rcv_listener -TEST_PROGS += test_vxlan_vnifiltering.sh -TEST_GEN_FILES += io_uring_zerocopy_tx -TEST_PROGS += io_uring_zerocopy_tx.sh -TEST_GEN_FILES += bind_bhash -TEST_GEN_PROGS += sk_bind_sendto_listen -TEST_GEN_PROGS += sk_connect_zero_addr -TEST_GEN_PROGS += sk_so_peek_off -TEST_PROGS += test_ingress_egress_chaining.sh -TEST_GEN_PROGS += so_incoming_cpu -TEST_PROGS += sctp_vrf.sh -TEST_GEN_FILES += sctp_hello -TEST_GEN_FILES += ip_local_port_range -TEST_GEN_PROGS += bind_wildcard -TEST_GEN_PROGS += bind_timewait -TEST_PROGS += test_vxlan_mdb.sh -TEST_PROGS += test_bridge_neigh_suppress.sh -TEST_PROGS += test_vxlan_nh.sh -TEST_PROGS += test_vxlan_nolocalbypass.sh -TEST_PROGS += test_bridge_backup_port.sh -TEST_PROGS += test_neigh.sh -TEST_PROGS += fdb_flush.sh fdb_notify.sh -TEST_PROGS += fq_band_pktlimit.sh -TEST_PROGS += vlan_hw_filter.sh -TEST_PROGS += vlan_bridge_binding.sh -TEST_PROGS += bpf_offload.py -TEST_PROGS += ipv6_route_update_soft_lockup.sh -TEST_PROGS += busy_poll_test.sh -TEST_GEN_PROGS += proc_net_pktgen -TEST_PROGS += lwt_dst_cache_ref_loop.sh -TEST_PROGS += skf_net_off.sh -TEST_GEN_FILES += skf_net_off -TEST_GEN_FILES += tfo -TEST_PROGS += tfo_passive.sh -TEST_PROGS += broadcast_ether_dst.sh -TEST_PROGS += broadcast_pmtu.sh -TEST_PROGS += ipv6_force_forwarding.sh +TEST_PROGS := \ + altnames.sh \ + amt.sh \ + arp_ndisc_evict_nocarrier.sh \ + arp_ndisc_untracked_subnets.sh \ + bareudp.sh \ + big_tcp.sh \ + bind_bhash.sh \ + bpf_offload.py \ + broadcast_ether_dst.sh \ + broadcast_pmtu.sh \ + busy_poll_test.sh \ + cmsg_ip.sh \ + cmsg_so_mark.sh \ + cmsg_so_priority.sh \ + cmsg_time.sh \ + drop_monitor_tests.sh \ + fcnal-ipv4.sh \ + fcnal-ipv6.sh \ + fcnal-other.sh \ + fdb_flush.sh \ + fdb_notify.sh \ + fib-onlink-tests.sh \ + fib_nexthop_multiprefix.sh \ + fib_nexthop_nongw.sh \ + fib_nexthops.sh \ + fib_rule_tests.sh \ + fib_tests.sh \ + fin_ack_lat.sh \ + fq_band_pktlimit.sh \ + gre_gso.sh \ + gre_ipv6_lladdr.sh \ + gro.sh \ + icmp.sh \ + icmp_redirect.sh \ + io_uring_zerocopy_tx.sh \ + ioam6.sh \ + ip6_gre_headroom.sh \ + ip_defrag.sh \ + ip_local_port_range.sh \ + ipv6_flowlabel.sh \ + ipv6_force_forwarding.sh \ + ipv6_route_update_soft_lockup.sh \ + l2_tos_ttl_inherit.sh \ + l2tp.sh \ + link_netns.py \ + lwt_dst_cache_ref_loop.sh \ + msg_zerocopy.sh \ + nat6to4.sh \ + ndisc_unsolicited_na_test.sh \ + netdev-l2addr.sh \ + netdevice.sh \ + netns-name.sh \ + netns-sysctl.sh \ + nl_netdev.py \ + pmtu.sh \ + psock_snd.sh \ + reuseaddr_ports_exhausted.sh \ + reuseport_addr_any.sh \ + route_hint.sh \ + route_localnet.sh \ + rps_default_mask.sh \ + rtnetlink.py \ + rtnetlink.sh \ + rtnetlink_notification.sh \ + run_afpackettests \ + run_netsocktests \ + rxtimestamp.sh \ + sctp_vrf.sh \ + skf_net_off.sh \ + so_txtime.sh \ + srv6_end_dt46_l3vpn_test.sh \ + srv6_end_dt4_l3vpn_test.sh \ + srv6_end_dt6_l3vpn_test.sh \ + srv6_end_dx4_netfilter_test.sh \ + srv6_end_dx6_netfilter_test.sh \ + srv6_end_flavors_test.sh \ + srv6_end_next_csid_l3vpn_test.sh \ + srv6_end_x_next_csid_l3vpn_test.sh \ + srv6_hencap_red_l3vpn_test.sh \ + srv6_hl2encap_red_l2vpn_test.sh \ + stress_reuseport_listen.sh \ + tcp_fastopen_backup_key.sh \ + test_bpf.sh \ + test_bridge_backup_port.sh \ + test_bridge_neigh_suppress.sh \ + test_ingress_egress_chaining.sh \ + test_neigh.sh \ + test_so_rcv.sh \ + test_vxlan_fdb_changelink.sh \ + test_vxlan_mdb.sh \ + test_vxlan_nh.sh \ + test_vxlan_nolocalbypass.sh \ + test_vxlan_under_vrf.sh \ + test_vxlan_vnifiltering.sh \ + tfo_passive.sh \ + traceroute.sh \ + txtimestamp.sh \ + udpgro.sh \ + udpgro_bench.sh \ + udpgro_frglist.sh \ + udpgro_fwd.sh \ + udpgso.sh \ + udpgso_bench.sh \ + unicast_extensions.sh \ + veth.sh \ + vlan_bridge_binding.sh \ + vlan_hw_filter.sh \ + vrf-xfrm-tests.sh \ + vrf_route_leaking.sh \ + vrf_strict_mode_test.sh \ + xfrm_policy.sh \ +# end of TEST_PROGS + +TEST_PROGS_EXTENDED := \ + toeplitz.sh \ + toeplitz_client.sh \ + xfrm_policy_add_speed.sh \ +# end of TEST_PROGS_EXTENDED + +TEST_GEN_FILES := \ + bind_bhash \ + cmsg_sender \ + fin_ack_lat \ + gro \ + hwtstamp_config \ + io_uring_zerocopy_tx \ + ioam6_parser \ + ip_defrag \ + ip_local_port_range \ + ipsec \ + ipv6_flowlabel \ + ipv6_flowlabel_mgr \ + msg_zerocopy \ + nettest \ + psock_fanout \ + psock_snd \ + psock_tpacket \ + reuseaddr_ports_exhausted \ + reuseport_addr_any \ + rxtimestamp \ + sctp_hello \ + skf_net_off \ + so_netns_cookie \ + so_rcv_listener \ + so_txtime \ + socket \ + stress_reuseport_listen \ + tcp_fastopen_backup_key \ + tcp_inq \ + tcp_mmap \ + tfo \ + timestamping \ + toeplitz \ + txring_overwrite \ + txtimestamp \ + udpgso \ + udpgso_bench_rx \ + udpgso_bench_tx \ +# end of TEST_GEN_FILES + +TEST_GEN_PROGS := \ + bind_timewait \ + bind_wildcard \ + epoll_busy_poll \ + ipv6_fragmentation \ + proc_net_pktgen \ + reuseaddr_conflict \ + reuseport_bpf \ + reuseport_bpf_cpu \ + reuseport_bpf_numa \ + reuseport_dualstack \ + sk_bind_sendto_listen \ + sk_connect_zero_addr \ + sk_so_peek_off \ + so_incoming_cpu \ + tap \ + tcp_port_share \ + tls \ + tun \ +# end of TEST_GEN_PROGS + +TEST_FILES := \ + fcnal-test.sh \ + in_netns.sh \ + lib.sh \ + settings \ + setup_loopback.sh \ + setup_veth.sh \ +# end of TEST_FILES # YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := busy_poller netlink-dumps +YNL_GEN_FILES := busy_poller +YNL_GEN_PROGS := netlink-dumps TEST_GEN_FILES += $(YNL_GEN_FILES) - -TEST_FILES := settings -TEST_FILES += in_netns.sh lib.sh setup_loopback.sh setup_veth.sh +TEST_GEN_PROGS += $(YNL_GEN_PROGS) TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index a4b61c6d0290..de805cbbdf69 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,12 @@ -CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect +CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end + +TEST_GEN_PROGS := \ + diag_uid \ + msg_oob \ + scm_inq \ + scm_pidfd \ + scm_rights \ + unix_connect \ +# end of TEST_GEN_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config index 37368567768c..b5429c15a53c 100644 --- a/tools/testing/selftests/net/af_unix/config +++ b/tools/testing/selftests/net/af_unix/config @@ -1,3 +1,3 @@ -CONFIG_UNIX=y CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX=y CONFIG_UNIX_DIAG=m diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c index 9d22561e7b8f..fc467714387e 100644 --- a/tools/testing/selftests/net/af_unix/scm_inq.c +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -11,11 +11,6 @@ #define NR_CHUNKS 100 #define MSG_LEN 256 -struct scm_inq { - struct cmsghdr cmsghdr; - int inq; -}; - FIXTURE(scm_inq) { int fd[2]; @@ -70,35 +65,38 @@ static void send_chunks(struct __test_metadata *_metadata, static void recv_chunks(struct __test_metadata *_metadata, FIXTURE_DATA(scm_inq) *self) { + char cmsg_buf[CMSG_SPACE(sizeof(int))]; struct msghdr msg = {}; struct iovec iov = {}; - struct scm_inq cmsg; + struct cmsghdr *cmsg; char buf[MSG_LEN]; int i, ret; int inq; msg.msg_iov = &iov; msg.msg_iovlen = 1; - msg.msg_control = &cmsg; - msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); iov.iov_base = buf; iov.iov_len = sizeof(buf); for (i = 0; i < NR_CHUNKS; i++) { memset(buf, 0, sizeof(buf)); - memset(&cmsg, 0, sizeof(cmsg)); + memset(cmsg_buf, 0, sizeof(cmsg_buf)); ret = recvmsg(self->fd[1], &msg, 0); ASSERT_EQ(MSG_LEN, ret); - ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); - ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); - ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); - ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg->cmsg_type); ret = ioctl(self->fd[1], SIOCINQ, &inq); ASSERT_EQ(0, ret); - ASSERT_EQ(cmsg.inq, inq); + ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq); } } diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 37e034874034..ef2921988e5f 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -137,7 +137,6 @@ struct cmsg_data { static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) { struct cmsghdr *cmsg; - int data = 0; if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { log_err("recvmsg: truncated"); @@ -243,7 +242,6 @@ static int cmsg_check_dead(int fd, int expected_pid) int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - pid_t client_pid; struct pidfd_info info = { .mask = PIDFD_INFO_EXIT, }; diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 8b015f16c03d..914f99d153ce 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -271,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata, { #define MSG "x" #define MSGLEN 1 - struct { - struct cmsghdr cmsghdr; - int fd[2]; - } cmsg = { - .cmsghdr = { - .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), - .cmsg_level = SOL_SOCKET, - .cmsg_type = SCM_RIGHTS, - }, - .fd = { - self->fd[inflight * 2], - self->fd[inflight * 2], - }, + int fds[2] = { + self->fd[inflight * 2], + self->fd[inflight * 2], }; + char cmsg_buf[CMSG_SPACE(sizeof(fds))]; struct iovec iov = { .iov_base = MSG, .iov_len = MSGLEN, @@ -294,11 +285,18 @@ void __send_fd(struct __test_metadata *_metadata, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, - .msg_control = &cmsg, - .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), }; + struct cmsghdr *cmsg; int ret; + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); if (variant->disabled) { diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index b2c271b79240..c856d266c8f3 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: - fail(True, "%d BPF programs loaded, expected %d" % - (len(progs), expected)) + fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" % + (len(progs), expected, pp.pformat(progs))) return progs def bpftool_map_list(expected=None, ns=""): diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index a825e628aee7..ded9b925865e 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -491,7 +491,8 @@ int main(int argc, char *argv[]) if (err) { fprintf(stderr, "Can't resolve address [%s]:%s\n", opt.host, opt.service); - return ERN_SOCK_CREATE; + err = ERN_SOCK_CREATE; + goto err_free_buff; } if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP) @@ -500,8 +501,8 @@ int main(int argc, char *argv[]) fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto); if (fd < 0) { fprintf(stderr, "Can't open socket: %s\n", strerror(errno)); - freeaddrinfo(ai); - return ERN_RESOLVE; + err = ERN_RESOLVE; + goto err_free_info; } if (opt.sock.proto == IPPROTO_ICMP) { @@ -574,6 +575,9 @@ int main(int argc, char *argv[]) err_out: close(fd); +err_free_info: freeaddrinfo(ai); +err_free_buff: + free(buf); return err; } diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index c24417d0047b..1e1f253118f5 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,129 +1,130 @@ -CONFIG_USER_NS=y -CONFIG_NET_NS=y +CONFIG_AMT=m +CONFIG_BAREUDP=m CONFIG_BONDING=m CONFIG_BPF_SYSCALL=y -CONFIG_TEST_BPF=m -CONFIG_NUMA=y -CONFIG_RPS=y -CONFIG_SYSFS=y -CONFIG_PROC_SYSCTL=y -CONFIG_NET_VRF=y -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_IPV6=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_VETH=y -CONFIG_NET_IPVTI=y -CONFIG_IPV6_VTI=y -CONFIG_DUMMY=y -CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VXCAN=m +CONFIG_CRYPTO_ARIA=y CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_BTF_MODULES=n -CONFIG_VLAN_8021Q=y +CONFIG_DUMMY=y CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y -CONFIG_NET_FOU=y -CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_NETFILTER=y -CONFIG_NETFILTER_ADVANCED=y -CONFIG_NETFILTER_XTABLES_LEGACY=y -CONFIG_NF_CONNTRACK=m -CONFIG_IPV6_MROUTE=y -CONFIG_IPV6_SIT=y -CONFIG_NF_NAT=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_IPTABLES_LEGACY=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_IPTABLES_LEGACY=m CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m -CONFIG_IP_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_SCTP=m +CONFIG_IPV6=y CONFIG_IPV6_GRE=m +CONFIG_IPV6_ILA=m +CONFIG_IPV6_IOAM6_LWTUNNEL=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_RPL_LWTUNNEL=y CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SIT=y +CONFIG_IPV6_VTI=y +CONFIG_IPVLAN=m +CONFIG_KALLSYMS=y +CONFIG_L2TP=m CONFIG_L2TP_ETH=m CONFIG_L2TP_IP=m -CONFIG_L2TP=m CONFIG_L2TP_V3=y CONFIG_MACSEC=m CONFIG_MACVLAN=y CONFIG_MACVTAP=y CONFIG_MPLS=y +CONFIG_MPLS_IPTUNNEL=m +CONFIG_MPLS_ROUTING=m CONFIG_MPTCP=y -CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_IPV6=y -CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_NAT=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_TARGET_HL=m -CONFIG_NETFILTER_XT_NAT=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m -CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NETDEVSIM=m +CONFIG_NET_DROP_MONITOR=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XT_TARGET_HL=m +CONFIG_NET_FOU=y +CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPIP=y +CONFIG_NET_IPVTI=y +CONFIG_NETKIT=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_FQ=m -CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_NETEM=y CONFIG_NET_SCH_PRIO=m -CONFIG_NFT_COMPAT=m +CONFIG_NET_VRF=y +CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_OVS=y CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_NAT=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_COMPAT=m +CONFIG_NFT_NAT=m +CONFIG_NUMA=y CONFIG_OPENVSWITCH=m CONFIG_OPENVSWITCH_GENEVE=m CONFIG_OPENVSWITCH_GRE=m CONFIG_OPENVSWITCH_VXLAN=m +CONFIG_PROC_SYSCTL=y CONFIG_PSAMPLE=m +CONFIG_RPS=y +CONFIG_SYSFS=y CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m -CONFIG_KALLSYMS=y +CONFIG_TEST_BPF=m CONFIG_TLS=m CONFIG_TRACEPOINTS=y -CONFIG_NET_DROP_MONITOR=m -CONFIG_NETDEVSIM=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS_IPTUNNEL=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_CLS_FLOWER=m -CONFIG_NET_ACT_TUNNEL_KEY=m -CONFIG_NET_ACT_MIRRED=m -CONFIG_BAREUDP=m -CONFIG_IPV6_IOAM6_LWTUNNEL=y -CONFIG_CRYPTO_SM4_GENERIC=y -CONFIG_AMT=m CONFIG_TUN=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VLAN_8021Q=y CONFIG_VXLAN=m -CONFIG_IP_SCTP=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IPVLAN=m -CONFIG_CAN=m -CONFIG_CAN_DEV=m -CONFIG_CAN_VXCAN=m -CONFIG_NETKIT=y -CONFIG_NET_PKTGEN=m -CONFIG_IPV6_ILA=m -CONFIG_IPV6_RPL_LWTUNNEL=y diff --git a/tools/testing/selftests/net/fcnal-ipv4.sh b/tools/testing/selftests/net/fcnal-ipv4.sh new file mode 100755 index 000000000000..82f9c867c3e8 --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv4.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv4 diff --git a/tools/testing/selftests/net/fcnal-ipv6.sh b/tools/testing/selftests/net/fcnal-ipv6.sh new file mode 100755 index 000000000000..ab1fc7aa3caf --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv6.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv6 diff --git a/tools/testing/selftests/net/fcnal-other.sh b/tools/testing/selftests/net/fcnal-other.sh new file mode 100755 index 000000000000..a840cf80b32e --- /dev/null +++ b/tools/testing/selftests/net/fcnal-other.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t other diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 4fcc38907e48..844a580ae74e 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -189,7 +189,7 @@ show_hint() kill_procs() { killall nettest ping ping6 >/dev/null 2>&1 - sleep 1 + slowwait 2 sh -c 'test -z "$(pgrep '"'^(nettest|ping|ping6)$'"')"' } set_ping_group() @@ -424,6 +424,8 @@ create_ns() ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.accept_dad=0 } # create veth pair to connect namespaces and apply addresses. @@ -875,7 +877,7 @@ ipv4_tcp_md5_novrf() # basic use case log_start run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -883,7 +885,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -891,7 +893,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -899,7 +901,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -910,7 +912,7 @@ ipv4_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -918,7 +920,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -926,7 +928,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -943,7 +945,7 @@ ipv4_tcp_md5() # basic use case log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -951,7 +953,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -959,7 +961,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -967,7 +969,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -978,7 +980,7 @@ ipv4_tcp_md5() # client in prefix log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -986,7 +988,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -994,7 +996,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -1005,14 +1007,14 @@ ipv4_tcp_md5() log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -1020,7 +1022,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1028,21 +1030,21 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -1050,7 +1052,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1058,7 +1060,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -1082,14 +1084,14 @@ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() log_start show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" log_start show_hint "Binding both the socket and the key is not required but it works" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" } @@ -1103,25 +1105,25 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" @@ -1193,7 +1195,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -1201,7 +1203,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1221,13 +1223,13 @@ ipv4_tcp_novrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1249,7 +1251,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1257,7 +1259,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1266,7 +1268,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1274,7 +1276,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -1283,7 +1285,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -1291,7 +1293,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1323,19 +1325,19 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1352,7 +1354,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -1374,14 +1376,14 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -1396,7 +1398,7 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to device" run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1406,7 +1408,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since client is not bound to VRF" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -1418,13 +1420,13 @@ ipv4_tcp_vrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1443,7 +1445,7 @@ ipv4_tcp_vrf() do log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -1451,26 +1453,26 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" log_start show_hint "Should fail 'No route to host' since client is out of VRF scope" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" } @@ -1509,7 +1511,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -1522,7 +1524,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1533,31 +1535,31 @@ ipv4_udp_novrf() do log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()" @@ -1580,7 +1582,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1588,7 +1590,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1597,7 +1599,7 @@ ipv4_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1605,25 +1607,25 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1636,28 +1638,28 @@ ipv4_udp_novrf() log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 2 "Global server, device client, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1667,7 +1669,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -1709,19 +1711,19 @@ ipv4_udp_vrf() log_start show_hint "Fails because ingress is in a VRF and global server is disabled" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1733,7 +1735,7 @@ ipv4_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is out of scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local connection" done @@ -1741,26 +1743,26 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1775,19 +1777,19 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1802,13 +1804,13 @@ ipv4_udp_vrf() # log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "VRF client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "Enslaved device client" @@ -1829,31 +1831,31 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1861,7 +1863,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" done @@ -1870,7 +1872,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -s -D -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -2093,7 +2095,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2107,7 +2109,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2120,7 +2122,7 @@ ipv4_rt() a=${NSA_IP} log_start run_cmd nettest ${varg} -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2134,7 +2136,7 @@ ipv4_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2145,7 +2147,7 @@ ipv4_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2161,7 +2163,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2175,7 +2177,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2189,7 +2191,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2200,7 +2202,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2211,7 +2213,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2561,7 +2563,7 @@ ipv6_tcp_md5_novrf() # basic use case log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -2569,7 +2571,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -2577,7 +2579,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -2585,7 +2587,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -2596,7 +2598,7 @@ ipv6_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -2604,7 +2606,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -2612,7 +2614,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2629,7 +2631,7 @@ ipv6_tcp_md5() # basic use case log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -2637,7 +2639,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -2645,7 +2647,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -2653,7 +2655,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -2664,7 +2666,7 @@ ipv6_tcp_md5() # client in prefix log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -2672,7 +2674,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -2680,7 +2682,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -2691,14 +2693,14 @@ ipv6_tcp_md5() log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -2706,7 +2708,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2714,21 +2716,21 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -2736,7 +2738,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2744,7 +2746,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -2772,7 +2774,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2793,7 +2795,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Client" done @@ -2802,7 +2804,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -2822,7 +2824,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -2830,7 +2832,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2839,7 +2841,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -2847,7 +2849,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -2856,7 +2858,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -2865,7 +2867,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" done @@ -2898,7 +2900,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -2907,7 +2909,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2916,7 +2918,7 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2924,7 +2926,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -2943,7 +2945,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -2964,7 +2966,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2973,7 +2975,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2982,13 +2984,13 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2996,7 +2998,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3016,7 +3018,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails 'Connection refused' since client is not in VRF" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -3029,7 +3031,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" done @@ -3038,7 +3040,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails since VRF device does not allow linklocal addresses" run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 1 "Client, VRF bind" @@ -3046,7 +3048,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -3071,7 +3073,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -3079,7 +3081,7 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" @@ -3087,13 +3089,13 @@ ipv6_tcp_vrf() log_start show_hint "Should fail since unbound client is out of VRF scope" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" @@ -3101,7 +3103,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" done @@ -3141,13 +3143,13 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3155,7 +3157,7 @@ ipv6_udp_novrf() a=${NSA_LO_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -3165,7 +3167,7 @@ ipv6_udp_novrf() #log_start #show_hint "Should fail since loopback address is out of scope" #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - #sleep 1 + wait_local_port_listen ${NSA} 12345 udp #run_cmd_nsb nettest -6 -D -r ${a} #log_test_addr ${a} $? 1 "Device server" @@ -3185,25 +3187,25 @@ ipv6_udp_novrf() do log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF" @@ -3225,7 +3227,7 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -3233,7 +3235,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -3242,7 +3244,7 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -6 -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Device server, local connection" done @@ -3250,19 +3252,19 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection" @@ -3271,28 +3273,28 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" done @@ -3300,7 +3302,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3314,7 +3316,7 @@ ipv6_udp_novrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3338,7 +3340,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -3347,7 +3349,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3356,7 +3358,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3378,7 +3380,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local conn" done @@ -3387,7 +3389,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -3396,25 +3398,25 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 1 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -3429,7 +3431,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -3438,7 +3440,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3447,7 +3449,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3465,7 +3467,7 @@ ipv6_udp_vrf() # log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6} log_test $? 0 "VRF client" @@ -3476,7 +3478,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6} log_test $? 0 "Enslaved device client" @@ -3491,13 +3493,13 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" #log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3505,13 +3507,13 @@ ipv6_udp_vrf() a=${VRF_IP6} log_start run_cmd nettest -6 -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3527,25 +3529,25 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3557,7 +3559,7 @@ ipv6_udp_vrf() # link local addresses log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Global server, linklocal IP" @@ -3568,7 +3570,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6} log_test $? 0 "Enslaved device client, linklocal IP" @@ -3579,7 +3581,7 @@ ipv6_udp_vrf() log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Enslaved device client, local conn - linklocal IP" @@ -3592,7 +3594,7 @@ ipv6_udp_vrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3771,7 +3773,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3785,7 +3787,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3799,7 +3801,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3814,7 +3816,7 @@ ipv6_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3825,7 +3827,7 @@ ipv6_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3842,7 +3844,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3856,7 +3858,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3869,7 +3871,7 @@ ipv6_rt() a=${NSA_IP6} log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3880,7 +3882,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3891,7 +3893,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3950,7 +3952,7 @@ netfilter_tcp_reset() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -3968,7 +3970,7 @@ netfilter_icmp() do log_start run_cmd nettest ${arg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4007,7 +4009,7 @@ netfilter_tcp6_reset() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -4025,7 +4027,7 @@ netfilter_icmp6() do log_start run_cmd nettest -6 -s ${arg} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4221,12 +4223,12 @@ use_case_snat_on_vrf() run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port} log_test $? 0 "IPv4 TCP connection over VRF with SNAT" run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port} log_test $? 0 "IPv6 TCP connection over VRF with SNAT" @@ -4272,6 +4274,7 @@ EOF TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" +# note: each TEST_ group needs a dedicated runner, e.g. fcnal-ipv4.sh PAUSE_ON_FAIL=no PAUSE=no @@ -4302,6 +4305,8 @@ elif [ "$TESTS" = "ipv4" ]; then TESTS="$TESTS_IPV4" elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" +elif [ "$TESTS" = "other" ]; then + TESTS="$TESTS_OTHER" fi check_gen_prog "nettest" diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh index c159230c9b62..0b8a2465dd04 100755 --- a/tools/testing/selftests/net/fdb_notify.sh +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -40,16 +40,16 @@ do_test_dup() test_dup_bridge() { - ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add br up type bridge vlan_filtering 1 do_test_dup add "bridge" dev br self do_test_dup del "bridge" dev br self } test_dup_vxlan_self() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_set_master vx br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br do_test_dup add "vxlan" dev vx self dst 192.0.2.1 do_test_dup del "vxlan" dev vx self dst 192.0.2.1 @@ -57,9 +57,9 @@ test_dup_vxlan_self() test_dup_vxlan_master() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_set_master vx br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br do_test_dup add "vxlan master" dev vx master do_test_dup del "vxlan master" dev vx master @@ -67,8 +67,8 @@ test_dup_vxlan_master() test_dup_macvlan_self() { - ip_link_add dd up type dummy - ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru do_test_dup add "macvlan self" dev mv self do_test_dup del "macvlan self" dev mv self @@ -76,10 +76,10 @@ test_dup_macvlan_self() test_dup_macvlan_master() { - ip_link_add br up type bridge vlan_filtering 1 - ip_link_add dd up type dummy - ip_link_add mv up link dd type macvlan mode passthru - ip_link_set_master mv br + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_set_master mv br do_test_dup add "macvlan master" dev mv self do_test_dup del "macvlan master" dev mv self diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index b39f748c2572..2b0a90581e2f 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -467,8 +467,8 @@ ipv6_fdb_grp_fcnal() log_test $? 0 "Get Fdb nexthop group by id" # fdb nexthop group can only contain fdb nexthops - run_cmd "$IP nexthop add id 63 via 2001:db8:91::4" - run_cmd "$IP nexthop add id 64 via 2001:db8:91::5" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::5 dev veth1" run_cmd "$IP nexthop add id 103 group 63/64 fdb" log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" @@ -494,6 +494,26 @@ ipv6_fdb_grp_fcnal() run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb" log_test $? 2 "Fdb Nexthop with encap" + # Replace FDB nexthop to non-FDB and vice versa + run_cmd "$IP nexthop add id 70 via 2001:db8:91::2 fdb" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1" + log_test $? 0 "Replace FDB nexthop to non-FDB nexthop" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 fdb" + log_test $? 0 "Replace non-FDB nexthop to FDB nexthop" + + # Replace FDB nexthop address while in a group + run_cmd "$IP nexthop add id 71 group 70 fdb" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::3 fdb" + log_test $? 0 "Replace FDB nexthop address while in a group" + + # Cannot replace FDB nexthop to non-FDB and vice versa while in a group + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1" + log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group" + run_cmd "$IP nexthop add id 72 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 73 group 72" + run_cmd "$IP nexthop replace id 72 via 2001:db8:91::2 fdb" + log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group" + run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100" run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" log_test $? 0 "Fdb mac add with nexthop group" @@ -547,15 +567,15 @@ ipv4_fdb_grp_fcnal() log_test $? 0 "Get Fdb nexthop group by id" # fdb nexthop group can only contain fdb nexthops - run_cmd "$IP nexthop add id 14 via 172.16.1.2" - run_cmd "$IP nexthop add id 15 via 172.16.1.3" + run_cmd "$IP nexthop add id 14 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1" run_cmd "$IP nexthop add id 103 group 14/15 fdb" log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" # Non fdb nexthop group can not contain fdb nexthops run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb" run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb" - run_cmd "$IP nexthop add id 104 group 14/15" + run_cmd "$IP nexthop add id 104 group 16/17" log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops" # fdb nexthop cannot have blackhole @@ -574,6 +594,26 @@ ipv4_fdb_grp_fcnal() run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb" log_test $? 2 "Fdb Nexthop with encap" + # Replace FDB nexthop to non-FDB and vice versa + run_cmd "$IP nexthop add id 18 via 172.16.1.2 fdb" + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1" + log_test $? 0 "Replace FDB nexthop to non-FDB nexthop" + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 fdb" + log_test $? 0 "Replace non-FDB nexthop to FDB nexthop" + + # Replace FDB nexthop address while in a group + run_cmd "$IP nexthop add id 19 group 18 fdb" + run_cmd "$IP nexthop replace id 18 via 172.16.1.3 fdb" + log_test $? 0 "Replace FDB nexthop address while in a group" + + # Cannot replace FDB nexthop to non-FDB and vice versa while in a group + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1" + log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group" + run_cmd "$IP nexthop add id 20 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 21 group 20" + run_cmd "$IP nexthop replace id 20 via 172.16.1.2 fdb" + log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group" + run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100" run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" log_test $? 0 "Fdb mac add with nexthop group" @@ -582,7 +622,7 @@ ipv4_fdb_grp_fcnal() run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self" log_test $? 255 "Fdb mac add with nexthop" - run_cmd "$IP ro add 172.16.0.0/22 nhid 15" + run_cmd "$IP ro add 172.16.0.0/22 nhid 16" log_test $? 2 "Route add with fdb nexthop" run_cmd "$IP ro add 172.16.0.0/22 nhid 103" diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index d7bb2e80e88c..ff4a00d91a26 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_fdb_learning_limit.sh \ +TEST_PROGS := \ + bridge_activity_notify.sh \ + bridge_fdb_learning_limit.sh \ + bridge_fdb_local_vlan_0.sh \ bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ @@ -18,64 +21,64 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ - gre_multipath_nh_res.sh \ - gre_multipath_nh.sh \ gre_multipath.sh \ + gre_multipath_nh.sh \ + gre_multipath_nh_res.sh \ ip6_forward_instats_vrf.sh \ ip6gre_custom_multipath_hash.sh \ + ip6gre_flat.sh \ ip6gre_flat_key.sh \ ip6gre_flat_keys.sh \ - ip6gre_flat.sh \ + ip6gre_hier.sh \ ip6gre_hier_key.sh \ ip6gre_hier_keys.sh \ - ip6gre_hier.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ + ipip_flat_gre.sh \ ipip_flat_gre_key.sh \ ipip_flat_gre_keys.sh \ - ipip_flat_gre.sh \ + ipip_hier_gre.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ - ipip_hier_gre.sh \ lib_sh_test.sh \ local_termination.sh \ min_max_mtu.sh \ + mirror_gre.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ - mirror_gre_bridge_1q_lag.sh \ mirror_gre_bridge_1q.sh \ + mirror_gre_bridge_1q_lag.sh \ mirror_gre_changes.sh \ mirror_gre_flower.sh \ mirror_gre_lag_lacp.sh \ mirror_gre_neigh.sh \ mirror_gre_nh.sh \ - mirror_gre.sh \ - mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ + mirror_gre_vlan_bridge_1q.sh \ mirror_vlan.sh \ no_forwarding.sh \ pedit_dsfield.sh \ pedit_ip.sh \ pedit_l4port.sh \ - q_in_vni_ipv6.sh \ q_in_vni.sh \ + q_in_vni_ipv6.sh \ + router.sh \ router_bridge.sh \ router_bridge_1d.sh \ router_bridge_1d_lag.sh \ router_bridge_lag.sh \ + router_bridge_pvid_vlan_upper.sh \ router_bridge_vlan.sh \ router_bridge_vlan_upper.sh \ - router_bridge_pvid_vlan_upper.sh \ router_bridge_vlan_upper_pvid.sh \ router_broadcast.sh \ - router_mpath_nh_res.sh \ router_mpath_nh.sh \ + router_mpath_nh_res.sh \ router_mpath_seed.sh \ router_multicast.sh \ router_multipath.sh \ router_nh.sh \ - router.sh \ router_vid_1.sh \ sch_ets.sh \ sch_red.sh \ @@ -85,32 +88,34 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ skbedit_priority.sh \ tc_actions.sh \ tc_chains.sh \ - tc_flower_router.sh \ tc_flower.sh \ - tc_flower_l2_miss.sh \ tc_flower_cfm.sh \ + tc_flower_l2_miss.sh \ tc_flower_port_range.sh \ + tc_flower_router.sh \ tc_mpls_l2vpn.sh \ tc_police.sh \ tc_shblocks.sh \ tc_tunnel_key.sh \ tc_vlan_modify.sh \ - vxlan_asymmetric_ipv6.sh \ vxlan_asymmetric.sh \ + vxlan_asymmetric_ipv6.sh \ + vxlan_bridge_1d.sh \ vxlan_bridge_1d_ipv6.sh \ - vxlan_bridge_1d_port_8472_ipv6.sh \ vxlan_bridge_1d_port_8472.sh \ - vxlan_bridge_1d.sh \ + vxlan_bridge_1d_port_8472_ipv6.sh \ + vxlan_bridge_1q.sh \ vxlan_bridge_1q_ipv6.sh \ vxlan_bridge_1q_mc_ul.sh \ - vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ - vxlan_bridge_1q.sh \ + vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_reserved.sh \ + vxlan_symmetric.sh \ vxlan_symmetric_ipv6.sh \ - vxlan_symmetric.sh +# end of TEST_PROGS -TEST_FILES := devlink_lib.sh \ +TEST_FILES := \ + devlink_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ ip6gre_lib.sh \ @@ -125,10 +130,12 @@ TEST_FILES := devlink_lib.sh \ sch_ets_tests.sh \ sch_tbf_core.sh \ sch_tbf_etsprio.sh \ - tc_common.sh + tc_common.sh \ +# end of TEST_FILES TEST_INCLUDES := \ + $(wildcard ../lib/sh/*.sh) \ ../lib.sh \ - $(wildcard ../lib/sh/*.sh) +# end of TEST_INCLUDES include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index 7b41cff993ad..392a5a91ed37 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -57,6 +57,21 @@ o Code shall be checked using ShellCheck [1] prior to submission. 1. https://www.shellcheck.net/ +Cleanups +-------- + +o lib.sh brings in defer.sh (by way of ../lib.sh) by default. Consider + making use of the defer primitive to schedule automatic cleanups. This + makes it harder to forget to remove a temporary netdevice, kill a running + process or perform other cleanup when the test script is interrupted. + +o When adding a helper that dirties the environment, but schedules all + necessary cleanups through defer, consider prefixing it adf_ for + consistency with lib.sh and ../lib.sh helpers. This serves as an + immediately visible bit of documentation about the helper API. + +o Definitely do the above for any new code in lib.sh, if practical. + Customization ============= diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh new file mode 100755 index 000000000000..522a5b1b046c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | 192.0.2.1/28 | | 192.0.2.2/28 | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + new_inactive_test + existing_active_test + norefresh_test +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + adf_simple_if_init "$h1" 192.0.2.1/28 +} + +h2_create() +{ + adf_simple_if_init "$h2" 192.0.2.2/28 +} + +switch_create() +{ + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \ + ageing_time "$LOW_AGEING_TIME" + adf_ip_link_set_up br1 + + adf_ip_link_set_master "$swp1" br1 + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_master "$swp2" br1 + adf_ip_link_set_up "$swp2" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + adf_vrf_prepare + + h1_create + h2_create + switch_create +} + +fdb_active_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q -v "inactive" +} + +fdb_inactive_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" +} + +new_inactive_test() +{ + local mac="00:11:22:33:44:55" + + # Add a new FDB entry as static and inactive and check that it + # becomes active upon traffic. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_err $? "FDB entry not present as \"inactive\" when should" + + $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q + + busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac" + check_err $? "FDB entry present as \"inactive\" when should not" + + log_test "Transition from inactive to active" + + bridge fdb del "$mac" dev "$swp1" master +} + +existing_active_test() +{ + local mac="00:11:22:33:44:55" + local ageing_time + + # Enable activity notifications on an existing dynamic FDB entry and + # check that it becomes inactive after the ageing time passed. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master dynamic + bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh + + bridge -d fdb get "$mac" br br1 | grep -q "activity_notify" + check_err $? "FDB entry not present as \"activity_notify\" when should" + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_fail $? "FDB entry present as \"inactive\" when should not" + + ageing_time=$(bridge_ageing_time_get br1) + slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac" + check_err $? "FDB entry not present as \"inactive\" when should" + + log_test "Transition from active to inactive" + + bridge fdb del "$mac" dev "$swp1" master +} + +norefresh_test() +{ + local mac="00:11:22:33:44:55" + local updated_time + + # Check that the "updated" time is reset when replacing an FDB entry + # without the "norefresh" keyword and that it is not reset when + # replacing with the "norefresh" keyword. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static + sleep 1 + + bridge fdb replace "$mac" dev "$swp1" master static activity_notify + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -ne 0 ]]; then + check_err 1 "\"updated\" time was not reset when should" + fi + + sleep 1 + bridge fdb replace "$mac" dev "$swp1" master static norefresh + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -eq 0 ]]; then + check_err 1 "\"updated\" time was reset when should not" + fi + + log_test "Resetting of \"updated\" time" + + bridge fdb del "$mac" dev "$swp1" master +} + +if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then + echo "SKIP: iproute2 too old, missing bridge FDB activity notification control" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh new file mode 100755 index 000000000000..694de8ba97e4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh @@ -0,0 +1,387 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +-----------------------+ +-----------------------+ +# | H1 (vrf) | | H2 (vrf) | | H3 (vrf) | +# | + $h1 | | + $h2 | | + $h3 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | | | 192.0.2.18/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | +# | | | | | | | | | +# +----|------------------+ +----|------------------+ +----|------------------+ +# | | | +# +----|-------------------------|-------------------------|------------------+ +# | +--|-------------------------|------------------+ | | +# | | + $swp1 + $swp2 | + $swp3 | +# | | | 192.0.2.17/28 | +# | | BR1 (802.1q) | 2001:db8:2::1/64 | +# | | 192.0.2.3/28 | | +# | | 2001:db8:1::3/64 | | +# | +-----------------------------------------------+ SW | +# +---------------------------------------------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. +#shellcheck disable=SC2034 # ... and global variables + +ALL_TESTS=" + test_d_no_sharing + test_d_sharing + test_q_no_sharing + test_q_sharing + test_addr_set +" + +NUM_NETIFS=6 +source lib.sh + +pMAC=00:11:22:33:44:55 +bMAC=00:11:22:33:44:66 +mMAC=00:11:22:33:44:77 +xMAC=00:11:22:33:44:88 + +host_create() +{ + local h=$1; shift + local ipv4=$1; shift + local ipv6=$1; shift + + adf_simple_if_init "$h" "$ipv4" "$ipv6" + adf_ip_route_add vrf "v$h" 192.0.2.16/28 nexthop via 192.0.2.3 + adf_ip_route_add vrf "v$h" 2001:db8:2::/64 nexthop via 2001:db8:1::3 +} + +h3_create() +{ + adf_simple_if_init "$h3" 192.0.2.18/28 2001:db8:2::2/64 + adf_ip_route_add vrf "v$h3" 192.0.2.0/28 nexthop via 192.0.2.17 + adf_ip_route_add vrf "v$h3" 2001:db8:1::/64 nexthop via 2001:db8:2::1 + + tc qdisc add dev "$h3" clsact + defer tc qdisc del dev "$h3" clsact + + tc filter add dev "$h3" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h3" ingress proto ip pref 104 + + tc qdisc add dev "$h2" clsact + defer tc qdisc del dev "$h2" clsact + + tc filter add dev "$h2" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h2" ingress proto ip pref 104 +} + +switch_create() +{ + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_up "$swp2" + + adf_ip_addr_add "$swp3" 192.0.2.17/28 + adf_ip_addr_add "$swp3" 2001:db8:2::1/64 + adf_ip_link_set_up "$swp3" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + adf_vrf_prepare + adf_forwarding_enable + + host_create "$h1" 192.0.2.1/28 2001:db8:1::1/64 + host_create "$h2" 192.0.2.2/28 2001:db8:1::2/64 + h3_create + + switch_create +} + +adf_bridge_configure() +{ + local dev + + adf_ip_addr_add br 192.0.2.3/28 + adf_ip_addr_add br 2001:db8:1::3/64 + + adf_bridge_vlan_add dev br vid 1 pvid untagged self + adf_bridge_vlan_add dev br vid 2 self + adf_bridge_vlan_add dev br vid 3 self + + for dev in "$swp1" "$swp2"; do + adf_ip_link_set_master "$dev" br + adf_bridge_vlan_add dev "$dev" vid 1 pvid untagged + adf_bridge_vlan_add dev "$dev" vid 2 + adf_bridge_vlan_add dev "$dev" vid 3 + done +} + +adf_bridge_create() +{ + local mac + + adf_ip_link_add br up type bridge vlan_default_pvid 0 "$@" + mac=$(mac_get br) + adf_bridge_configure + adf_ip_link_set_addr br "$mac" +} + +check_fdb_local_vlan_0_support() +{ + if adf_ip_link_add XXbr up type bridge vlan_filtering 1 \ + fdb_local_vlan_0 1 &>/dev/null; then + return 0 + fi + + log_test_skip "FDB sharing" \ + "iproute 2 or the kernel do not support fdb_local_vlan_0" +} + +check_mac_presence() +{ + local should_fail=$1; shift + local dev=$1; shift + local vlan=$1; shift + local mac + + mac=$(mac_get "$dev") + + if ((vlan == 0)); then + vlan=null + fi + + bridge -j fdb show dev "$dev" | + jq -e --arg mac "$mac" --argjson vlan "$vlan" \ + '.[] | select(.mac == $mac) | select(.vlan == $vlan)' > /dev/null + check_err_fail "$should_fail" $? "FDB dev $dev vid $vlan addr $mac exists" +} + +do_sharing_test() +{ + local should_fail=$1; shift + local what=$1; shift + local dev + + RET=0 + + for dev in "$swp1" "$swp2" br; do + check_mac_presence 0 "$dev" 0 + check_mac_presence "$should_fail" "$dev" 1 + check_mac_presence "$should_fail" "$dev" 2 + check_mac_presence "$should_fail" "$dev" 3 + done + + log_test "$what" +} + +do_end_to_end_test() +{ + local mac=$1; shift + local what=$1; shift + local probe_dev=${1-$h3}; shift + local expect=${1-10}; shift + + local t0 + local t1 + local dd + + RET=0 + + # In mausezahn, use $dev MAC as the destination MAC. In the MAC sharing + # context, that will cause an FDB miss on VLAN 1 and prompt a second + # lookup in VLAN 0. + + t0=$(tc_rule_stats_get "$probe_dev" 104 ingress) + + $MZ "$h1" -c 10 -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.18 -t udp "dp=4096,sp=2048" -q + sleep 1 + + t1=$(tc_rule_stats_get "$probe_dev" 104 ingress) + dd=$((t1 - t0)) + + ((dd == expect)) + check_err $? "Expected $expect packets on $probe_dev got $dd" + + log_test "$what" +} + +do_tests() +{ + local should_fail=$1; shift + local what=$1; shift + local swp1_mac + local br_mac + + swp1_mac=$(mac_get "$swp1") + br_mac=$(mac_get br) + + do_sharing_test "$should_fail" "$what" + do_end_to_end_test "$swp1_mac" "$what: end to end, $swp1 MAC" + do_end_to_end_test "$br_mac" "$what: end to end, br MAC" +} + +bridge_standard() +{ + local vlan_filtering=$1; shift + + if ((vlan_filtering)); then + echo 802.1q + else + echo 802.1d + fi +} + +nonexistent_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + # We expect flooding, so $h2 should get the traffic. + do_end_to_end_test "$xMAC" "$standard: Nonexistent FDB" "$h2" +} + +misleading_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + defer_scope_push + # Add an FDB entry on VLAN 0. The lookup on VLAN-aware bridge + # shouldn't pick this up even with fdb_local_vlan_0 enabled, so + # the traffic should be flooded. This all holds on + # vlan_filtering bridge, on non-vlan_filtering one the FDB entry + # is expected to be found as usual, no flooding takes place. + # + # Adding only on VLAN 0 is a bit tricky, because bridge is + # trying to be nice and interprets the request as if the FDB + # should be added on each VLAN. + + bridge fdb add "$mMAC" dev "$swp1" master + bridge fdb del "$mMAC" dev "$swp1" vlan 1 master + bridge fdb del "$mMAC" dev "$swp1" vlan 2 master + bridge fdb del "$mMAC" dev "$swp1" vlan 3 master + + local expect=$((vlan_filtering ? 10 : 0)) + do_end_to_end_test "$mMAC" \ + "$standard: Lookup of non-local MAC on VLAN 0" \ + "$h2" "$expect" + defer_scope_pop +} + +change_mac() +{ + local dev=$1; shift + local mac=$1; shift + local cur_mac + + cur_mac=$(mac_get "$dev") + + log_info "Change $dev MAC $cur_mac -> $mac" + adf_ip_link_set_addr "$dev" "$mac" + defer log_info "Change $dev MAC back" +} + +do_test_no_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + adf_bridge_create vlan_filtering "$vlan_filtering" + setup_wait + + do_tests 0 "$standard, no FDB sharing" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 0 "$standard, no FDB sharing after MAC change" + + in_defer_scope check_fdb_local_vlan_0_support || return + + log_info "Set fdb_local_vlan_0=1" + ip link set dev br type bridge fdb_local_vlan_0 1 + + do_tests 1 "$standard, fdb sharing after toggle" +} + +do_test_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + in_defer_scope check_fdb_local_vlan_0_support || return + + adf_bridge_create vlan_filtering "$vlan_filtering" fdb_local_vlan_0 1 + setup_wait + + do_tests 1 "$standard, FDB sharing" + + nonexistent_fdb_test "$vlan_filtering" + misleading_fdb_test "$vlan_filtering" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 1 "$standard, FDB sharing after MAC change" + + log_info "Set fdb_local_vlan_0=0" + ip link set dev br type bridge fdb_local_vlan_0 0 + + do_tests 0 "$standard, No FDB sharing after toggle" +} + +test_d_no_sharing() +{ + do_test_no_sharing 0 +} + +test_d_sharing() +{ + do_test_sharing 0 +} + +test_q_no_sharing() +{ + do_test_no_sharing 1 +} + +test_q_sharing() +{ + do_test_sharing 1 +} + +adf_addr_set_bridge_create() +{ + adf_ip_link_add br up type bridge vlan_filtering 0 + adf_ip_link_set_addr br "$(mac_get br)" + adf_bridge_configure +} + +test_addr_set() +{ + adf_addr_set_bridge_create + setup_wait + + do_end_to_end_test "$(mac_get br)" "NET_ADDR_SET: end to end, br MAC" +} + +trap cleanup EXIT + +setup_prepare +tests_run diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 18fd69d8d937..ce64518aaa11 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -1,24 +1,23 @@ +CONFIG_BPF_SYSCALL=y CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m -CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE_IGMP_SNOOPING=y -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_NET_VRF=m -CONFIG_BPF_SYSCALL=y +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_IPV6=y CONFIG_IPV6_GRE=m CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y -CONFIG_IP_MROUTE=y -CONFIG_IP_MROUTE_MULTIPLE_TABLES=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y CONFIG_MACVLAN=m +CONFIG_NAMESPACES=y CONFIG_NET_ACT_CT=m +CONFIG_NET_ACT_GACT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m CONFIG_NET_ACT_PEDIT=m @@ -27,29 +26,30 @@ CONFIG_NET_ACT_SAMPLE=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m +CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m -CONFIG_NET_CLS_BASIC=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_META=m +CONFIG_NETFILTER=y CONFIG_NET_IPGRE=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPIP=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_ACT_GACT=m CONFIG_NET_SCH_PRIO=m CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_TBF=m CONFIG_NET_TC_SKB_EXT=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_NETFILTER=y +CONFIG_NET_VRF=m CONFIG_NF_CONNTRACK=m CONFIG_NF_FLOW_TABLE=m CONFIG_NF_TABLES=m CONFIG_VETH=m -CONFIG_NAMESPACES=y -CONFIG_NET_NS=y +CONFIG_VLAN_8021Q=m CONFIG_VXLAN=m CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 7d531f7091e6..5dbfab0e23e3 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -226,7 +226,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:4::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index dda11a4a9450..b4f17a5bbc61 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -321,7 +321,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh index 49fa94b53a1c..25036e38043c 100755 --- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -95,7 +95,7 @@ ipv6_in_too_big_err() # Send too big packets ip vrf exec $vrf_name \ - $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -s 1300 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) test "$((t1 - t0))" -ne 0 @@ -131,7 +131,7 @@ ipv6_in_addr_err() # Disable forwarding temporary while sending the packet sysctl -qw net.ipv6.conf.all.forwarding=0 ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null sysctl -qw net.ipv6.conf.all.forwarding=1 local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) @@ -150,7 +150,7 @@ ipv6_in_discard() # Add a policy to discard ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null ip xfrm policy del dst 2001:1:2::2/128 dir fwd local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index e28b4a079e52..b24acfa52a3a 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -323,7 +323,7 @@ send_flowlabel() # Generate 16384 echo requests, each with a random flow label. ip vrf exec v$h1 sh -c \ "for _ in {1..16384}; do \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ done" } diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 890b3374dacd..a9034f0bb58b 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -571,30 +571,6 @@ wait_for_dev() fi } -cmd_jq() -{ - local cmd=$1 - local jq_exp=$2 - local jq_opts=$3 - local ret - local output - - output="$($cmd)" - # it the command fails, return error right away - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - output=$(echo $output | jq -r $jq_opts "$jq_exp") - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output - # return success only in case of non-empty output - [ ! -z "$output" ] -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then @@ -623,6 +599,12 @@ vrf_cleanup() ip -4 rule del pref 32765 } +adf_vrf_prepare() +{ + vrf_prepare + defer vrf_cleanup +} + __last_tb_id=0 declare -A __TB_IDS @@ -735,6 +717,12 @@ simple_if_fini() vrf_destroy $vrf_name } +adf_simple_if_init() +{ + simple_if_init "$@" + defer simple_if_fini "$@" +} + tunnel_create() { local name=$1; shift @@ -1035,6 +1023,12 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +adf_forwarding_enable() +{ + forwarding_enable + defer forwarding_restore +} + declare -A MTU_ORIG mtu_set() { @@ -1291,8 +1285,8 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping_test() @@ -1322,8 +1316,8 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING6 $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping6_test() diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index a20d22d1df36..8d4ae6c952a1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -238,7 +238,7 @@ test_lag_slave() ip neigh flush dev br1 setup_wait_dev $up_dev setup_wait_dev $host_dev - $ARPING -I br1 192.0.2.130 -qfc 1 + $ARPING -I br1 -qfc 1 192.0.2.130 sleep 2 mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index 1b902cc579f6..a21c771908b3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -196,7 +196,7 @@ test_span_gre_forbidden_egress() bridge vlan add dev $swp3 vid 555 # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev @@ -290,7 +290,7 @@ test_span_gre_fdb_roaming() bridge fdb del dev $swp2 $h3mac vlan 555 master 2>/dev/null # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh index 8f9922c695b0..0453210271dc 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -165,8 +165,7 @@ h1_create() { local i; - simple_if_init $h1 - defer simple_if_fini $h1 + adf_simple_if_init $h1 mtu_set $h1 9900 defer mtu_restore $h1 @@ -182,8 +181,7 @@ h2_create() { local i - simple_if_init $h2 - defer simple_if_fini $h2 + adf_simple_if_init $h2 mtu_set $h2 9900 defer mtu_restore $h2 @@ -251,8 +249,7 @@ setup_prepare() put=$swp2 hut=$h2 - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index af166662b78a..f2a3d9254642 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -52,8 +52,7 @@ PKTSZ=1400 h1_create() { - simple_if_init $h1 192.0.2.1/28 - defer simple_if_fini $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 mtu_set $h1 10000 defer mtu_restore $h1 @@ -65,8 +64,7 @@ h1_create() h2_create() { - simple_if_init $h2 192.0.2.2/28 - defer simple_if_fini $h2 192.0.2.2/28 + adf_simple_if_init $h2 192.0.2.2/28 mtu_set $h2 10000 defer mtu_restore $h2 @@ -74,8 +72,7 @@ h2_create() h3_create() { - simple_if_init $h3 192.0.2.3/28 - defer simple_if_fini $h3 192.0.2.3/28 + adf_simple_if_init $h3 192.0.2.3/28 mtu_set $h3 10000 defer mtu_restore $h3 @@ -125,8 +122,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index ec309a5086bc..070c17faa9e4 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -59,8 +59,7 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev - defer simple_if_fini $dev + adf_simple_if_init $dev mtu_set $dev 10000 defer mtu_restore $dev @@ -149,8 +148,7 @@ setup_prepare() h2_mac=$(mac_get $h2) - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare h1_create h2_create diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh index 462db0b603e7..6a570d256e07 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -119,16 +119,15 @@ source lib.sh h1_create() { - simple_if_init "$h1" - defer simple_if_fini "$h1" + adf_simple_if_init "$h1" - ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 - ip_link_set_up "$h1.10" - ip_addr_add "$h1.10" 192.0.2.1/28 + adf_ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + adf_ip_link_set_up "$h1.10" + adf_ip_addr_add "$h1.10" 192.0.2.1/28 - ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 - ip_link_set_up "$h1.20" - ip_addr_add "$h1.20" 2001:db8:1::1/64 + adf_ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + adf_ip_link_set_up "$h1.20" + adf_ip_addr_add "$h1.20" 2001:db8:1::1/64 } install_capture() @@ -152,51 +151,51 @@ install_capture() h2_create() { # $h2 - ip_link_set_up "$h2" + adf_ip_link_set_up "$h2" # H2 vrf_create "v$h2" defer vrf_destroy "v$h2" - ip_link_set_up "v$h2" + adf_ip_link_set_up "v$h2" # br2 - ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 - ip_link_set_master br2 "v$h2" - ip_link_set_up br2 + adf_ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br2 "v$h2" + adf_ip_link_set_up br2 # $h2 - ip_link_set_master "$h2" br2 + adf_ip_link_set_master "$h2" br2 install_capture "$h2" # v1$h2 - ip_link_set_up "v1$h2" - ip_link_set_master "v1$h2" br2 + adf_ip_link_set_up "v1$h2" + adf_ip_link_set_master "v1$h2" br2 } h3_create() { # $h3 - ip_link_set_up "$h3" + adf_ip_link_set_up "$h3" # H3 vrf_create "v$h3" defer vrf_destroy "v$h3" - ip_link_set_up "v$h3" + adf_ip_link_set_up "v$h3" # br3 - ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 - ip_link_set_master br3 "v$h3" - ip_link_set_up br3 + adf_ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br3 "v$h3" + adf_ip_link_set_up br3 # $h3 - ip_link_set_master "$h3" br3 + adf_ip_link_set_master "$h3" br3 install_capture "$h3" # v1$h3 - ip_link_set_up "v1$h3" - ip_link_set_master "v1$h3" br3 + adf_ip_link_set_up "v1$h3" + adf_ip_link_set_master "v1$h3" br3 } switch_create() @@ -205,35 +204,35 @@ switch_create() # br1 swp1_mac=$(mac_get "$swp1") - ip_link_add br1 type bridge vlan_filtering 1 \ + adf_ip_link_add br1 type bridge vlan_filtering 1 \ vlan_default_pvid 0 mcast_snooping 0 - ip_link_set_addr br1 "$swp1_mac" - ip_link_set_up br1 + adf_ip_link_set_addr br1 "$swp1_mac" + adf_ip_link_set_up br1 # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. - ip_link_add "X$IPMR" up type dummy + adf_ip_link_add "X$IPMR" up type dummy # IPMR - ip_link_add "$IPMR" up type dummy - ip_addr_add "$IPMR" 192.0.2.100/28 - ip_addr_add "$IPMR" 2001:db8:4::1/64 + adf_ip_link_add "$IPMR" up type dummy + adf_ip_addr_add "$IPMR" 192.0.2.100/28 + adf_ip_addr_add "$IPMR" 2001:db8:4::1/64 # $swp1 - ip_link_set_up "$swp1" - ip_link_set_master "$swp1" br1 - bridge_vlan_add vid 10 dev "$swp1" - bridge_vlan_add vid 20 dev "$swp1" + adf_ip_link_set_up "$swp1" + adf_ip_link_set_master "$swp1" br1 + adf_bridge_vlan_add vid 10 dev "$swp1" + adf_bridge_vlan_add vid 20 dev "$swp1" # $swp2 - ip_link_set_up "$swp2" - ip_addr_add "$swp2" 192.0.2.33/28 - ip_addr_add "$swp2" 2001:db8:2::1/64 + adf_ip_link_set_up "$swp2" + adf_ip_addr_add "$swp2" 192.0.2.33/28 + adf_ip_addr_add "$swp2" 2001:db8:2::1/64 # $swp3 - ip_link_set_up "$swp3" - ip_addr_add "$swp3" 192.0.2.65/28 - ip_addr_add "$swp3" 2001:db8:3::1/64 + adf_ip_link_set_up "$swp3" + adf_ip_addr_add "$swp3" 192.0.2.65/28 + adf_ip_addr_add "$swp3" 2001:db8:3::1/64 } vx_create() @@ -241,11 +240,11 @@ vx_create() local name=$1; shift local vid=$1; shift - ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + adf_ip_link_add "$name" up type vxlan dstport "$VXPORT" \ nolearning noudpcsum tos inherit ttl 16 \ "$@" - ip_link_set_master "$name" br1 - bridge_vlan_add vid "$vid" dev "$name" pvid untagged + adf_ip_link_set_master "$name" br1 + adf_bridge_vlan_add vid "$vid" dev "$name" pvid untagged } export -f vx_create @@ -290,39 +289,38 @@ ns_init_common() local ipv6_host=$1; shift # v2$h2 / v2$h3 - ip_link_set_up "$if_in" - ip_addr_add "$if_in" "$ipv4_in" - ip_addr_add "$if_in" "$ipv6_in" + adf_ip_link_set_up "$if_in" + adf_ip_addr_add "$if_in" "$ipv4_in" + adf_ip_addr_add "$if_in" "$ipv6_in" # br1 - ip_link_add br1 type bridge vlan_filtering 1 \ + adf_ip_link_add br1 type bridge vlan_filtering 1 \ vlan_default_pvid 0 mcast_snooping 0 - ip_link_set_up br1 + adf_ip_link_set_up br1 # vx10, vx20 vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" # w1 - ip_link_add w1 type veth peer name w2 - ip_link_set_master w1 br1 - ip_link_set_up w1 - bridge_vlan_add vid 10 dev w1 - bridge_vlan_add vid 20 dev w1 + adf_ip_link_add w1 type veth peer name w2 + adf_ip_link_set_master w1 br1 + adf_ip_link_set_up w1 + adf_bridge_vlan_add vid 10 dev w1 + adf_bridge_vlan_add vid 20 dev w1 # w2 - simple_if_init w2 - defer simple_if_fini w2 + adf_simple_if_init w2 # w2.10 - ip_link_add w2.10 master vw2 link w2 type vlan id 10 - ip_link_set_up w2.10 - ip_addr_add w2.10 "$ipv4_host" + adf_ip_link_add w2.10 master vw2 link w2 type vlan id 10 + adf_ip_link_set_up w2.10 + adf_ip_addr_add w2.10 "$ipv4_host" # w2.20 - ip_link_add w2.20 master vw2 link w2 type vlan id 20 - ip_link_set_up w2.20 - ip_addr_add w2.20 "$ipv6_host" + adf_ip_link_add w2.20 master vw2 link w2 type vlan id 20 + adf_ip_link_set_up w2.20 + adf_ip_addr_add w2.20 "$ipv6_host" } export -f ns_init_common @@ -371,14 +369,11 @@ setup_prepare() swp3=${NETIFS[p5]} h3=${NETIFS[p6]} - vrf_prepare - defer vrf_cleanup + adf_vrf_prepare + adf_forwarding_enable - forwarding_enable - defer forwarding_restore - - ip_link_add "v1$h2" type veth peer name "v2$h2" - ip_link_add "v1$h3" type veth peer name "v2$h3" + adf_ip_link_add "v1$h2" type veth peer name "v2$h2" + adf_ip_link_add "v1$h3" type veth peer name "v2$h3" h1_create h2_create @@ -720,7 +715,7 @@ ipv4_mcroute_fdb_oif0_sep() { adf_install_sg_sep - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" @@ -731,7 +726,7 @@ ipv4_mcroute_fdb_oif0_sep_rx() { adf_install_sg_sep_rx lo - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" @@ -742,7 +737,7 @@ ipv4_mcroute_fdb_sep_rx() { adf_install_sg_sep_rx lo - ip_addr_add lo 192.0.2.120/28 + adf_ip_addr_add lo 192.0.2.120/28 vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute bridge fdb del dev vx10 00:00:00:00:00:00 bridge fdb add \ @@ -754,7 +749,7 @@ ipv6_mcroute_fdb_sep_rx() { adf_install_sg_sep_rx "X$IPMR" - ip_addr_add "X$IPMR" 2001:db8:5::1/64 + adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64 vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute bridge -6 fdb del dev vx20 00:00:00:00:00:00 bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh index 46c31794b91b..709845123727 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -47,8 +47,7 @@ source lib.sh h1_create() { - simple_if_init $h1 192.0.2.1/28 - defer simple_if_fini $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 tc qdisc add dev $h1 clsact defer tc qdisc del dev $h1 clsact @@ -60,24 +59,23 @@ h1_create() switch_create() { - ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 # Make sure the bridge uses the MAC address of the local port and not # that of the VxLAN's device. - ip_link_set_addr br1 $(mac_get $swp1) - ip_link_set_up br1 + adf_ip_link_set_addr br1 $(mac_get $swp1) + adf_ip_link_set_up br1 - ip_link_set_up $rp1 - ip_addr_add $rp1 192.0.2.17/28 - ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + adf_ip_link_set_up $rp1 + adf_ip_addr_add $rp1 192.0.2.17/28 + adf_ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 - ip_link_set_master $swp1 br1 - ip_link_set_up $swp1 + adf_ip_link_set_master $swp1 br1 + adf_ip_link_set_up $swp1 } vrp2_create() { - simple_if_init $rp2 192.0.2.18/28 - defer simple_if_fini $rp2 192.0.2.18/28 + adf_simple_if_init $rp2 192.0.2.18/28 } setup_prepare() @@ -88,11 +86,8 @@ setup_prepare() rp1=${NETIFS[p3]} rp2=${NETIFS[p4]} - vrf_prepare - defer vrf_cleanup - - forwarding_enable - defer forwarding_restore + adf_vrf_prepare + adf_forwarding_enable h1_create switch_create @@ -200,10 +195,10 @@ vxlan_ping_do() vxlan_device_add() { - ip_link_add vx1 up type vxlan id 1000 \ + adf_ip_link_add vx1 up type vxlan id 1000 \ local 192.0.2.17 dstport "$VXPORT" \ nolearning noudpcsum tos inherit ttl 100 "$@" - ip_link_set_master vx1 br1 + adf_ip_link_set_master vx1 br1 } vxlan_all_reserved_bits() diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index d5824eadea10..2b1d9f2b3e9e 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -93,6 +93,7 @@ static bool tx_socket = true; static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; +static bool ipip; static const int num_flush_id_cases = 6; static void vlog(const char *fmt, ...) @@ -114,7 +115,9 @@ static void setup_sock_filter(int fd) int ipproto_off, opt_ipproto_off; int next_off; - if (proto == PF_INET) + if (ipip) + next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol); + else if (proto == PF_INET) next_off = offsetof(struct iphdr, protocol); else next_off = offsetof(struct ipv6hdr, nexthdr); @@ -244,7 +247,7 @@ static void fill_datalinklayer(void *buf) eth->h_proto = ethhdr_proto; } -static void fill_networklayer(void *buf, int payload_len) +static void fill_networklayer(void *buf, int payload_len, int protocol) { struct ipv6hdr *ip6h = buf; struct iphdr *iph = buf; @@ -254,7 +257,7 @@ static void fill_networklayer(void *buf, int payload_len) ip6h->version = 6; ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->nexthdr = protocol; ip6h->hop_limit = 8; if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) error(1, errno, "inet_pton source ip6"); @@ -266,7 +269,7 @@ static void fill_networklayer(void *buf, int payload_len) iph->version = 4; iph->ihl = 5; iph->ttl = 8; - iph->protocol = IPPROTO_TCP; + iph->protocol = protocol; iph->tot_len = htons(sizeof(struct tcphdr) + payload_len + sizeof(struct iphdr)); iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ @@ -313,9 +316,19 @@ static void create_packet(void *buf, int seq_offset, int ack_offset, { memset(buf, 0, total_hdr_len); memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, payload_len, fin); - fill_networklayer(buf + ETH_HLEN, payload_len); + + if (ipip) { + fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr), + IPPROTO_IPIP); + fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr), + payload_len, IPPROTO_TCP); + } else { + fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP); + } + fill_datalinklayer(buf); } @@ -416,6 +429,13 @@ static void recompute_packet(char *buf, char *no_ext, int extlen) iph->tot_len = htons(ntohs(iph->tot_len) + extlen); iph->check = 0; iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + + if (ipip) { + iph += 1; + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } } else { ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); } @@ -670,7 +690,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(9); break; - case 3: /* DF=0, Fixed - should not coalesce */ + case 3: /* DF=0, Fixed - should coalesce */ iph1->frag_off &= ~htons(IP_DF); iph1->id = htons(8); @@ -777,7 +797,7 @@ static void send_fragment4(int fd, struct sockaddr_ll *daddr) */ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); - fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP); fill_datalinklayer(buf); iph->frag_off = htons(0x6000); // DF = 1, MF = 1 @@ -1071,7 +1091,7 @@ static void gro_sender(void) * and min ipv6hdr size. Like MAX_HDR_SIZE, * MAX_PAYLOAD is defined with the larger header of the two. */ - int offset = proto == PF_INET ? 20 : 0; + int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; send_large(txfd, &daddr, remainder); @@ -1188,10 +1208,9 @@ static void gro_receiver(void) correct_payload[0] = PAYLOAD_LEN * 2; check_recv_pkts(rxfd, correct_payload, 1); - printf("DF=0, Fixed - should not coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); + printf("DF=0, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); correct_payload[0] = PAYLOAD_LEN * 2; @@ -1222,7 +1241,7 @@ static void gro_receiver(void) check_recv_pkts(rxfd, correct_payload, 2); } } else if (strcmp(testname, "large") == 0) { - int offset = proto == PF_INET ? 20 : 0; + int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; correct_payload[0] = (MAX_PAYLOAD + offset); @@ -1251,6 +1270,7 @@ static void parse_args(int argc, char **argv) { "iface", required_argument, NULL, 'i' }, { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, + { "ipip", no_argument, NULL, 'e' }, { "rx", no_argument, NULL, 'r' }, { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, @@ -1260,7 +1280,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1270,6 +1290,11 @@ static void parse_args(int argc, char **argv) proto = PF_INET6; ethhdr_proto = htons(ETH_P_IPV6); break; + case 'e': + ipip = true; + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; case 'd': addr4_dst = addr6_dst = optarg; break; @@ -1305,7 +1330,10 @@ int main(int argc, char **argv) { parse_args(argc, argv); - if (proto == PF_INET) { + if (ipip) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2; + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET) { tcp_offset = ETH_HLEN + sizeof(struct iphdr); total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else if (proto == PF_INET6) { diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 9e3f186bc2a1..4c5144c6f652 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -4,7 +4,7 @@ readonly SERVER_MAC="aa:00:00:00:00:02" readonly CLIENT_MAC="aa:00:00:00:00:01" readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") -readonly PROTOS=("ipv4" "ipv6") +readonly PROTOS=("ipv4" "ipv6" "ipip") dev="" test="all" proto="ipv4" diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 884cd2cc0681..4b6afc0fe9f8 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -2,7 +2,11 @@ top_srcdir = ../../../../.. -TEST_PROGS := hsr_ping.sh hsr_redbox.sh +TEST_PROGS := \ + hsr_ping.sh \ + hsr_redbox.sh \ +# end of TEST_PROGS + TEST_FILES += hsr_common.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config index 555a868743f0..205cc4d3d64b 100644 --- a/tools/testing/selftests/net/hsr/config +++ b/tools/testing/selftests/net/hsr/config @@ -1,6 +1,6 @@ +CONFIG_BRIDGE=y +CONFIG_HSR=y CONFIG_IPV6=y CONFIG_NET_SCH_NETEM=m -CONFIG_HSR=y CONFIG_VETH=y -CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=m diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..267ef62b5c72 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs@librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet to + * localhost (::1) on the loopback interface from the autoconfigured link-local + * address. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "../kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +static void setup(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + error(KSFT_FAIL, errno, "unshare"); + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + error(KSFT_FAIL, errno, "socket"); + + /* ensure MTU is smaller than what we plan to send */ + ifr.ifr_mtu = MTU; + if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: set MTU"); + + /* bring up interface */ + if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS"); + ifr.ifr_flags = ifr.ifr_flags | IFF_UP; + if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: bring interface up"); + + if (close(ctl) == -1) + error(KSFT_FAIL, errno, "close"); +} + +int main(void) +{ + struct in6_addr addr = { + .s6_addr[15] = 0x01, /* ::1 */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = htons(9) /* port 9/udp (DISCARD) */ + }; + static char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int s; + + printf("Testing IPv6 fragmentation\n"); + setup(); + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) { + usleep(1000); + goto send_again; + } + error(KSFT_FAIL, errno, "sendmsg"); + } else if (rc != LARGER_THAN_MTU) { + error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i", + rc, LARGER_THAN_MTU); + } + printf("[PASS] sendmsg() returned %zi\n", rc); + if (close(s) == -1) + error(KSFT_FAIL, errno, "close"); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index c7add0dc4c60..feba4ef69a54 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -543,31 +543,31 @@ require_command() fi } -ip_link_add() +adf_ip_link_add() { local name=$1; shift - ip link add name "$name" "$@" - defer ip link del dev "$name" + ip link add name "$name" "$@" && \ + defer ip link del dev "$name" } -ip_link_set_master() +adf_ip_link_set_master() { local member=$1; shift local master=$1; shift - ip link set dev "$member" master "$master" - defer ip link set dev "$member" nomaster + ip link set dev "$member" master "$master" && \ + defer ip link set dev "$member" nomaster } -ip_link_set_addr() +adf_ip_link_set_addr() { local name=$1; shift local addr=$1; shift local old_addr=$(mac_get "$name") - ip link set dev "$name" address "$addr" - defer ip link set dev "$name" address "$old_addr" + ip link set dev "$name" address "$addr" && \ + defer ip link set dev "$name" address "$old_addr" } ip_link_has_flag() @@ -585,44 +585,44 @@ ip_link_is_up() ip_link_has_flag "$1" UP } -ip_link_set_up() +adf_ip_link_set_up() { local name=$1; shift if ! ip_link_is_up "$name"; then - ip link set dev "$name" up - defer ip link set dev "$name" down + ip link set dev "$name" up && \ + defer ip link set dev "$name" down fi } -ip_link_set_down() +adf_ip_link_set_down() { local name=$1; shift if ip_link_is_up "$name"; then - ip link set dev "$name" down - defer ip link set dev "$name" up + ip link set dev "$name" down && \ + defer ip link set dev "$name" up fi } -ip_addr_add() +adf_ip_addr_add() { local name=$1; shift - ip addr add dev "$name" "$@" - defer ip addr del dev "$name" "$@" + ip addr add dev "$name" "$@" && \ + defer ip addr del dev "$name" "$@" } -ip_route_add() +adf_ip_route_add() { - ip route add "$@" - defer ip route del "$@" + ip route add "$@" && \ + defer ip route del "$@" } -bridge_vlan_add() +adf_bridge_vlan_add() { - bridge vlan add "$@" - defer bridge vlan del "$@" + bridge vlan add "$@" && \ + defer bridge vlan del "$@" } wait_local_port_listen() @@ -645,3 +645,27 @@ wait_local_port_listen() sleep 0.1 done } + +cmd_jq() +{ + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] +} diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 88c4bc461459..ce795bc0a1af 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -5,12 +5,16 @@ CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../../ -TEST_FILES := ../../../../../Documentation/netlink/specs -TEST_FILES += ../../../../net/ynl +TEST_FILES := \ + ../../../../net/ynl \ + ../../../../../Documentation/netlink/specs \ +# end of TEST_FILES -TEST_GEN_FILES += csum -TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) -TEST_GEN_FILES += xdp_helper +TEST_GEN_FILES := \ + $(patsubst %.c,%.o,$(wildcard *.bpf.c)) \ + csum \ + xdp_helper \ +# end of TEST_GEN_FILES TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 02be28dcc089..997b85cc216a 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily -from .ynl import NetshaperFamily, DevlinkFamily +from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 8e35ed12ed9e..83b1574f7719 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -72,6 +72,11 @@ def ksft_true(a, comment=""): _fail("Check failed", a, "does not eval to True", comment) +def ksft_not_none(a, comment=""): + if a is None: + _fail("Check failed", a, "is None", comment) + + def ksft_in(a, b, comment=""): if a not in b: _fail("Check failed", a, "not in", b, comment) @@ -92,6 +97,11 @@ def ksft_ge(a, b, comment=""): _fail("Check failed", a, "<", b, comment) +def ksft_gt(a, b, comment=""): + if a <= b: + _fail("Check failed", a, "<=", b, comment) + + def ksft_lt(a, b, comment=""): if a >= b: _fail("Check failed", a, ">=", b, comment) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index f395c90fb0f1..cb40ecef9456 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -1,9 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -import errno import json as _json import os -import random import re import select import socket @@ -21,17 +19,19 @@ def fd_read_timeout(fd, timeout): rlist, _, _ = select.select([fd], [], [], timeout) if rlist: return os.read(fd, 1024) - else: - raise TimeoutError("Timeout waiting for fd read") + raise TimeoutError("Timeout waiting for fd read") class cmd: """ Execute a command on local or remote host. + @shell defaults to false, and class will try to split @comm into a list + if it's a string with spaces. + Use bkg() instead to run a command in the background. """ - def __init__(self, comm, shell=True, fail=True, ns=None, background=False, + def __init__(self, comm, shell=None, fail=True, ns=None, background=False, host=None, timeout=5, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm @@ -45,6 +45,10 @@ class cmd: if host: self.proc = host.cmd(comm) else: + # If user doesn't explicitly request shell try to avoid it. + if shell is None and isinstance(comm, str) and ' ' in comm: + comm = comm.split() + # ksft_wait lets us wait for the background process to fully start, # we pass an FD to the child process, and wait for it to write back. # Similarly term_fd tells child it's time to exit. @@ -111,12 +115,13 @@ class bkg(cmd): with bkg("my_binary", ksft_wait=5): """ - def __init__(self, comm, shell=True, fail=None, ns=None, host=None, + def __init__(self, comm, shell=None, fail=None, ns=None, host=None, exit_wait=False, ksft_wait=None): super().__init__(comm, background=True, shell=shell, fail=fail, ns=ns, host=host, ksft_wait=ksft_wait) self.terminate = not exit_wait and not ksft_wait + self._exit_wait = exit_wait self.check_fail = fail if shell and self.terminate: @@ -127,7 +132,9 @@ class bkg(cmd): return self def __exit__(self, ex_type, ex_value, ex_tb): - return self.process(terminate=self.terminate, fail=self.check_fail) + # Force termination on exception + terminate = self.terminate or (self._exit_wait and ex_type) + return self.process(terminate=terminate, fail=self.check_fail) global_defer_queue = [] @@ -135,8 +142,6 @@ global_defer_queue = [] class defer: def __init__(self, func, *args, **kwargs): - global global_defer_queue - if not callable(func): raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") @@ -224,11 +229,11 @@ def bpftrace(expr, json=None, ns=None, host=None, timeout=None): return cmd_obj -def rand_port(type=socket.SOCK_STREAM): +def rand_port(stype=socket.SOCK_STREAM): """ Get a random unprivileged port. """ - with socket.socket(socket.AF_INET6, type) as s: + with socket.socket(socket.AF_INET6, stype) as s: s.bind(("", 0)) return s.getsockname()[1] @@ -249,3 +254,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin if time.monotonic() > end: raise Exception("Waiting for port listen timed out") time.sleep(sleep) + + +def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'): + """ + Wait for file contents on the local system to satisfy a condition. + test_fn() should take one argument (file contents) and return whether + condition is met. + """ + end = time.monotonic() + deadline + + with open(fname, "r", encoding=encoding) as fp: + while True: + if test_fn(fp.read()): + break + fp.seek(0) + if time.monotonic() > end: + raise TimeoutError("Wait for file contents failed", fname) + time.sleep(sleep) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 2b3a61ea3bfa..32c223e93b2c 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -61,3 +61,8 @@ class DevlinkFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), schema='', recv_size=recv_size) + +class PSPFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('psp.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh index 082f5d38321b..47ab78c4d465 100644 --- a/tools/testing/selftests/net/lib/sh/defer.sh +++ b/tools/testing/selftests/net/lib/sh/defer.sh @@ -1,6 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Whether to pause and allow debugging when an executed deferred command has a +# non-zero exit code. +: "${DEFER_PAUSE_ON_FAIL:=no}" + # map[(scope_id,track,cleanup_id) -> cleanup_command] # track={d=default | p=priority} declare -A __DEFER__JOBS @@ -38,8 +42,20 @@ __defer__run() local track=$1; shift local defer_ix=$1; shift local defer_key=$(__defer__defer_key $track $defer_ix) + local ret + + eval ${__DEFER__JOBS[$defer_key]} + ret=$? + + if [[ "$DEFER_PAUSE_ON_FAIL" == yes && "$ret" -ne 0 ]]; then + echo "Deferred command (track $track index $defer_ix):" + echo " ${__DEFER__JOBS[$defer_key]}" + echo "... ended with an exit status of $ret" + echo "Hit enter to continue, 'q' to quit" + read a + [[ "$a" == q ]] && exit 1 + fi - ${__DEFER__JOBS[$defer_key]} unset __DEFER__JOBS[$defer_key] } @@ -49,7 +65,7 @@ __defer__schedule() local ndefers=$(__defer__ndefers $track) local ndefers_key=$(__defer__ndefer_key $track) local defer_key=$(__defer__defer_key $track $ndefers) - local defer="$@" + local defer="${@@Q}" __DEFER__JOBS[$defer_key]="$defer" __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1)) diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c index 521ba38f2ddd..c368fc045f4b 100644 --- a/tools/testing/selftests/net/lib/xdp_native.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -14,6 +14,8 @@ #define MAX_PAYLOAD_LEN 5000 #define MAX_HDR_LEN 64 +extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak; + enum { XDP_MODE = 0, XDP_PORT = 1, @@ -68,30 +70,57 @@ static void record_stats(struct xdp_md *ctx, __u32 stat_type) static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; struct udphdr *udph = NULL; - struct ethhdr *eth = data; + void *data, *data_end; + struct ethhdr *eth; + int err; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = eth = (void *)(long)ctx->data; if (data + sizeof(*eth) > data_end) return NULL; if (eth->h_proto == bpf_htons(ETH_P_IP)) { - struct iphdr *iph = data + sizeof(*eth); + struct iphdr *iph; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + + sizeof(*udph)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + iph = data + sizeof(*eth); if (iph + 1 > (struct iphdr *)data_end || iph->protocol != IPPROTO_UDP) return NULL; - udph = (void *)eth + sizeof(*iph) + sizeof(*eth); - } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { - struct ipv6hdr *ipv6h = data + sizeof(*eth); + udph = data + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) + + sizeof(*udph)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + ipv6h = data + sizeof(*eth); if (ipv6h + 1 > (struct ipv6hdr *)data_end || ipv6h->nexthdr != IPPROTO_UDP) return NULL; - udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + udph = data + sizeof(*ipv6h) + sizeof(*eth); } else { return NULL; } @@ -145,17 +174,34 @@ static void swap_machdr(void *data) static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; struct udphdr *udph = NULL; - struct ethhdr *eth = data; + void *data, *data_end; + struct ethhdr *eth; + int err; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = eth = (void *)(long)ctx->data; if (data + sizeof(*eth) > data_end) return XDP_PASS; if (eth->h_proto == bpf_htons(ETH_P_IP)) { - struct iphdr *iph = data + sizeof(*eth); - __be32 tmp_ip = iph->saddr; + struct iphdr *iph; + __be32 tmp_ip; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + + sizeof(*udph)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + iph = data + sizeof(*eth); if (iph + 1 > (struct iphdr *)data_end || iph->protocol != IPPROTO_UDP) @@ -169,8 +215,10 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; record_stats(ctx, STATS_RX); + eth = data; swap_machdr((void *)eth); + tmp_ip = iph->saddr; iph->saddr = iph->daddr; iph->daddr = tmp_ip; @@ -178,9 +226,19 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_TX; - } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { - struct ipv6hdr *ipv6h = data + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { struct in6_addr tmp_ipv6; + struct ipv6hdr *ipv6h; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) + + sizeof(*udph)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + ipv6h = data + sizeof(*eth); if (ipv6h + 1 > (struct ipv6hdr *)data_end || ipv6h->nexthdr != IPPROTO_UDP) @@ -194,6 +252,7 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) return XDP_PASS; record_stats(ctx, STATS_RX); + eth = data; swap_machdr((void *)eth); __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); @@ -361,7 +420,6 @@ static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) { - void *data = (void *)(long)ctx->data; struct udphdr *udph = NULL; __s32 *adjust_offset, *val; __u32 key, hdr_len; @@ -373,7 +431,8 @@ static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) if (!udph) return XDP_PASS; - hdr_len = (void *)udph - data + sizeof(struct udphdr); + hdr_len = (void *)udph - (void *)(long)ctx->data + + sizeof(struct udphdr); key = XDP_ADJST_OFFSET; adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); if (!adjust_offset) @@ -513,8 +572,6 @@ static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; struct udphdr *udph_ptr = NULL; __u32 key, size, hdr_len; __s32 *val; @@ -525,7 +582,8 @@ static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) if (!udph_ptr) return XDP_PASS; - hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr); + hdr_len = (void *)udph_ptr - (void *)(long)ctx->data + + sizeof(struct udphdr); key = XDP_ADJST_OFFSET; val = bpf_map_lookup_elem(&map_xdp_setup, &key); diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 4c7e51336ab2..15d144a25d82 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -4,13 +4,31 @@ top_srcdir = ../../../../.. CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) -TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \ - mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \ - simult_flows.sh mptcp_sockopt.sh userspace_pm.sh - -TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag - -TEST_FILES := mptcp_lib.sh settings +TEST_PROGS := \ + diag.sh \ + mptcp_connect.sh \ + mptcp_connect_checksum.sh \ + mptcp_connect_mmap.sh \ + mptcp_connect_sendfile.sh \ + mptcp_join.sh \ + mptcp_sockopt.sh \ + pm_netlink.sh \ + simult_flows.sh \ + userspace_pm.sh \ +# end of TEST_PROGS + +TEST_GEN_FILES := \ + mptcp_connect \ + mptcp_diag \ + mptcp_inq \ + mptcp_sockopt \ + pm_nl_ctl \ +# end of TEST_GEN_FILES + +TEST_FILES := \ + mptcp_lib.sh \ + settings \ +# end of TEST_FILES TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 968d440c03fe..59051ee2a986 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,36 +1,36 @@ +CONFIG_INET_DIAG=m +CONFIG_INET_MPTCP_DIAG=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_KALLSYMS=y CONFIG_MPTCP=y -CONFIG_IPV6=y CONFIG_MPTCP_IPV6=y -CONFIG_INET_DIAG=m -CONFIG_INET_MPTCP_DIAG=m -CONFIG_VETH=y -CONFIG_NET_SCH_NETEM=m -CONFIG_SYN_COOKIES=y +CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_FW=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m -CONFIG_NF_TABLES=m -CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_BPF=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_STATISTIC=m CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y -CONFIG_NFT_TPROXY=m +CONFIG_NFT_COMPAT=m CONFIG_NFT_SOCKET=m -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IP6_NF_FILTER=m -CONFIG_NET_ACT_CSUM=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_SCH_INGRESS=m +CONFIG_NFT_TPROXY=m +CONFIG_SYN_COOKIES=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index f3bcaa48df8f..8e8f6441ad8b 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -502,6 +502,7 @@ static int server(int unixfd) process_one_client(r, unixfd); + close(fd); return 0; } @@ -580,8 +581,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(unixfds[1]); + if (s == 0) { + close(unixfds[0]); + ret = server(unixfds[1]); + close(unixfds[1]); + return ret; + } close(unixfds[1]); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7fd555b123b9..c90d8e8b95cb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -74,6 +74,17 @@ unset join_create_err unset join_bind_err unset join_connect_err +unset fb_ns1 +unset fb_ns2 +unset fb_infinite_map_tx +unset fb_dss_corruption +unset fb_simult_conn +unset fb_mpc_passive +unset fb_mpc_active +unset fb_mpc_data +unset fb_md5_sig +unset fb_dss + # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, @@ -347,6 +358,7 @@ reset_with_add_addr_timeout() tables="${ip6tables}" fi + # set a maximum, to avoid too long timeout with exponential backoff ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ @@ -1399,6 +1411,115 @@ chk_join_tx_nr() print_results "join Tx" ${rc} } +chk_fallback_nr() +{ + local infinite_map_tx=${fb_infinite_map_tx:-0} + local dss_corruption=${fb_dss_corruption:-0} + local simult_conn=${fb_simult_conn:-0} + local mpc_passive=${fb_mpc_passive:-0} + local mpc_active=${fb_mpc_active:-0} + local mpc_data=${fb_mpc_data:-0} + local md5_sig=${fb_md5_sig:-0} + local dss=${fb_dss:-0} + local rc=${KSFT_PASS} + local ns=$1 + local count + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$infinite_map_tx" ]; then + rc=${KSFT_FAIL} + print_check "$ns infinite map tx fallback" + fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss_corruption" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss corruption fallback" + fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$simult_conn" ]; then + rc=${KSFT_FAIL} + print_check "$ns simult conn fallback" + fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_passive" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc passive fallback" + fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_active" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc active fallback" + fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_data" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc data fallback" + fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$md5_sig" ]; then + rc=${KSFT_FAIL} + print_check "$ns MD5 Sig fallback" + fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss fallback" + fail_test "got $count dss fallback[s] in $ns expected $dss" + fi + + return $rc +} + +chk_fallback_nr_all() +{ + local netns=("ns1" "ns2") + local fb_ns=("fb_ns1" "fb_ns2") + local rc=${KSFT_PASS} + + for i in 0 1; do + if [ -n "${!fb_ns[i]}" ]; then + eval "${!fb_ns[i]}" \ + chk_fallback_nr ${netns[i]} || rc=${?} + else + chk_fallback_nr ${netns[i]} || rc=${?} + fi + done + + if [ "${rc}" != "${KSFT_PASS}" ]; then + print_results "fallback" ${rc} + fi +} + chk_join_nr() { local syn_nr=$1 @@ -1484,6 +1605,8 @@ chk_join_nr() join_syn_tx="${join_syn_tx:-${syn_nr}}" \ chk_join_tx_nr + chk_fallback_nr_all + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1547,7 +1670,6 @@ chk_add_nr() local tx="" local rx="" local count - local timeout if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 @@ -1556,15 +1678,13 @@ chk_add_nr() rx=" server" fi - timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) - print_check "add addr rx${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_nr" ] && + { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then fail_test "got $count ADD_ADDR[s] expected $add_nr" else print_ok @@ -1652,18 +1772,15 @@ chk_add_tx_nr() { local add_tx_nr=$1 local echo_tx_nr=$2 - local timeout local count - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - print_check "add addr tx" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_tx_nr" ] && + { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" else print_ok @@ -2151,7 +2268,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 3 3 fi @@ -2163,7 +2281,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 join_syn_tx=3 \ chk_join_nr 1 1 1 chk_add_nr 3 3 @@ -2201,6 +2320,74 @@ signal_address_tests() fi } +laminar_endp_tests() +{ + # no laminar endpoints: routing rules are used + if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # laminar endpoints: this endpoint is used + if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # laminar endpoints: these endpoints are used + if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 dead:beef:3::2 flags laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + pm_nl_add_endpoint $ns2 10.0.4.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + fi + + # laminar endpoints: only one endpoint is used + if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 2 2 + fi + + # laminar endpoints: subflow and laminar flags + if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT && + mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi +} + link_failure_tests() { # accept and use add_addr with additional subflows and link loss @@ -3187,6 +3374,17 @@ deny_join_id0_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 fi + + # default limits, server deny join id 0 + signal + if reset_with_allow_join_id0 "default limits, server deny join id 0" 0 1; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi } fullmesh_tests() @@ -3337,6 +3535,7 @@ fail_tests() join_csum_ns1=+1 join_csum_ns2=+0 \ join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ join_corrupted_pkts="$(pedit_action_pkts)" \ + fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \ chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi @@ -3978,6 +4177,7 @@ all_tests_sorted=( f@subflows_tests e@subflows_error_tests s@signal_address_tests + L@laminar_endp_tests l@link_failure_tests t@add_addr_timeout_tests r@remove_tests diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 112c07c4c37a..286164f7246e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -726,6 +726,7 @@ static int server(int pipefd) process_one_client(r, pipefd); + close(fd); return 0; } @@ -851,8 +852,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(pipefds[1]); + if (s == 0) { + close(pipefds[0]); + ret = server(pipefds[1]); + close(pipefds[1]); + return ret; + } close(pipefds[1]); diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 93fea3442216..65b374232ff5 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -2,6 +2,7 @@ #include <errno.h> #include <error.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -113,6 +114,8 @@ static int capture_events(int fd, int event_group) error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group"); do { + bool server_side = false; + FD_ZERO(&rfds); FD_SET(fd, &rfds); res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) + @@ -187,18 +190,22 @@ static int capture_events(int fd, int event_group) else if (attrs->rta_type == MPTCP_ATTR_ERROR) fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs)); else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE) - fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs)); + server_side = !!*(__u8 *)RTA_DATA(attrs); else if (attrs->rta_type == MPTCP_ATTR_FLAGS) { __u16 flags = *(__u16 *)RTA_DATA(attrs); /* only print when present, easier */ if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0) fprintf(stderr, ",deny_join_id0:1"); + if (flags & MPTCP_PM_EV_FLAG_SERVER_SIDE) + server_side = true; } attrs = RTA_NEXT(attrs, msg_len); } } + if (server_side) + fprintf(stderr, ",server_side:1"); fprintf(stderr, "\n"); } while (1); @@ -823,6 +830,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; else if (!strcmp(tok, "signal")) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; + else if (!strcmp(tok, "laminar")) + flags |= MPTCP_PM_ADDR_FLAG_LAMINAR; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else if (!strcmp(tok, "fullmesh")) @@ -1011,6 +1020,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { + printf("laminar"); + flags &= ~MPTCP_PM_ADDR_FLAG_LAMINAR; + if (flags) + printf(","); + } + if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) { printf("backup"); flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3d45991f24ed..87323942cb8a 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -241,7 +241,7 @@ make_connection() print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] && - [ "${client_serverside}" = 0 ] && [ "${server_serverside}" = 1 ] && + [ "${client_serverside:-0}" = 0 ] && [ "${server_serverside:-0}" = 1 ] && [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ] then test_pass diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index a98ed892f55f..ee2d1a5254f8 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -6,46 +6,52 @@ HOSTPKG_CONFIG := pkg-config MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_PROGS := br_netfilter.sh bridge_brouter.sh -TEST_PROGS += br_netfilter_queue.sh -TEST_PROGS += conntrack_dump_flush.sh -TEST_PROGS += conntrack_icmp_related.sh -TEST_PROGS += conntrack_ipip_mtu.sh -TEST_PROGS += conntrack_tcp_unreplied.sh -TEST_PROGS += conntrack_resize.sh -TEST_PROGS += conntrack_sctp_collision.sh -TEST_PROGS += conntrack_vrf.sh -TEST_PROGS += conntrack_clash.sh -TEST_PROGS += conntrack_reverse_clash.sh -TEST_PROGS += ipvs.sh -TEST_PROGS += nf_conntrack_packetdrill.sh -TEST_PROGS += nf_nat_edemux.sh -TEST_PROGS += nft_audit.sh -TEST_PROGS += nft_concat_range.sh -TEST_PROGS += nft_conntrack_helper.sh -TEST_PROGS += nft_fib.sh -TEST_PROGS += nft_flowtable.sh -TEST_PROGS += nft_interface_stress.sh -TEST_PROGS += nft_meta.sh -TEST_PROGS += nft_nat.sh -TEST_PROGS += nft_nat_zones.sh -TEST_PROGS += nft_queue.sh -TEST_PROGS += nft_synproxy.sh -TEST_PROGS += nft_tproxy_tcp.sh -TEST_PROGS += nft_tproxy_udp.sh -TEST_PROGS += nft_zones_many.sh -TEST_PROGS += rpath.sh -TEST_PROGS += vxlan_mtu_frag.sh -TEST_PROGS += xt_string.sh +TEST_PROGS := \ + br_netfilter.sh \ + br_netfilter_queue.sh \ + bridge_brouter.sh \ + conntrack_clash.sh \ + conntrack_dump_flush.sh \ + conntrack_icmp_related.sh \ + conntrack_ipip_mtu.sh \ + conntrack_resize.sh \ + conntrack_reverse_clash.sh \ + conntrack_sctp_collision.sh \ + conntrack_tcp_unreplied.sh \ + conntrack_vrf.sh \ + ipvs.sh \ + nf_conntrack_packetdrill.sh \ + nf_nat_edemux.sh \ + nft_audit.sh \ + nft_concat_range.sh \ + nft_conntrack_helper.sh \ + nft_fib.sh \ + nft_flowtable.sh \ + nft_interface_stress.sh \ + nft_meta.sh \ + nft_nat.sh \ + nft_nat_zones.sh \ + nft_queue.sh \ + nft_synproxy.sh \ + nft_tproxy_tcp.sh \ + nft_tproxy_udp.sh \ + nft_zones_many.sh \ + rpath.sh \ + vxlan_mtu_frag.sh \ + xt_string.sh \ +# end of TEST_PROGS TEST_PROGS_EXTENDED = nft_concat_range_perf.sh -TEST_GEN_FILES = audit_logread -TEST_GEN_FILES += connect_close nf_queue -TEST_GEN_FILES += conntrack_dump_flush -TEST_GEN_FILES += conntrack_reverse_clash -TEST_GEN_FILES += sctp_collision -TEST_GEN_FILES += udpclash +TEST_GEN_FILES = \ + audit_logread \ + connect_close \ + conntrack_dump_flush \ + conntrack_reverse_clash \ + nf_queue \ + sctp_collision \ + udpclash \ +# end of TEST_GEN_FILES include ../../lib.mk @@ -56,9 +62,12 @@ $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) $(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) $(OUTPUT)/udpclash: LDLIBS += -lpthread -TEST_FILES := lib.sh -TEST_FILES += packetdrill +TEST_FILES := \ + lib.sh \ + packetdrill \ +# end of TEST_FILES TEST_INCLUDES := \ + $(wildcard ../lib/sh/*.sh) \ ../lib.sh \ - $(wildcard ../lib/sh/*.sh) +# end of TEST_INCLUDES diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 79d5b33966ba..12ce61fa15a8 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -1,76 +1,80 @@ CONFIG_AUDIT=y CONFIG_BPF_SYSCALL=y CONFIG_BRIDGE=m -CONFIG_NETFILTER_XTABLES_LEGACY=y -CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_IP=m CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_T_FILTER=m CONFIG_BRIDGE_NETFILTER=m CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y +CONFIG_CRYPTO_SHA1=m CONFIG_DUMMY=m +CONFIG_INET_DIAG=m CONFIG_INET_ESP=m -CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_INET_SCTP_DIAG=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES_LEGACY=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP_NF_RAW=m -CONFIG_IP6_NF_RAW=m CONFIG_IP_SCTP=m +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP_VS=m CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_RR=m -CONFIG_IPV6=y -CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_MACVLAN=m CONFIG_NAMESPACES=y CONFIG_NET_CLS_U32=m -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_NET_NS=y -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_IPIP=m -CONFIG_NET_VRF=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_SYNPROXY=m CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NET_IPIP=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_VRF=y CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_LOG_IPV4=m CONFIG_NF_LOG_IPV6=m CONFIG_NF_NAT=m -CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y +CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_IPV4=y CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_NETDEV=y -CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_COMPAT=m CONFIG_NFT_CT=m @@ -89,12 +93,9 @@ CONFIG_NFT_QUOTA=m CONFIG_NFT_REDIR=m CONFIG_NFT_SYNPROXY=m CONFIG_NFT_TPROXY=m +CONFIG_TUN=m CONFIG_VETH=m CONFIG_VLAN_8021Q=m CONFIG_VXLAN=m -CONFIG_XFRM_USER=m CONFIG_XFRM_STATISTICS=y -CONFIG_NET_PKTGEN=m -CONFIG_TUN=m -CONFIG_INET_DIAG=m -CONFIG_INET_SCTP_DIAG=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh index 1014551dd769..6731fe1eaf2e 100755 --- a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh +++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh @@ -17,9 +17,31 @@ cleanup() checktool "socat -h" "run test without socat" checktool "iptables --version" "run test without iptables" +checktool "conntrack --version" "run test without conntrack" trap cleanup EXIT +connect_done() +{ + local ns="$1" + local port="$2" + + ip netns exec "$ns" ss -nt -o state established "dport = :$port" | grep -q "$port" +} + +check_ctstate() +{ + local ns="$1" + local dp="$2" + + if ! ip netns exec "$ns" conntrack --get -s 192.168.1.2 -d 192.168.1.1 -p tcp \ + --sport 10000 --dport "$dp" --state ESTABLISHED > /dev/null 2>&1;then + echo "FAIL: Did not find expected state for dport $2" + ip netns exec "$ns" bash -c 'conntrack -L; conntrack -S; ss -nt' + ret=1 + fi +} + setup_ns ns1 ns2 # Connect the namespaces using a veth pair @@ -44,15 +66,18 @@ socatpid=$! ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000" # add a virtual IP using DNAT -ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 +ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 || exit 1 # ... and route it to the other namespace ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1 -# add a persistent connection from the other namespace -ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & +# listener should be up by now, wait if it isn't yet. +wait_local_port_listen "$ns1" 5201 tcp -sleep 1 +# add a persistent connection from the other namespace +sleep 10 | ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & +cpid0=$! +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" "5201" # ip daddr:dport will be rewritten to 192.168.1.1 5201 # NAT must reallocate source port 10000 because @@ -71,26 +96,25 @@ fi ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 -sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & +sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & +cpid1=$! -# if connect succeeds, client closes instantly due to EOF on stdin. -# if connect hangs, it will time out after 5s. -echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & +sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & cpid2=$! -time_then=$(date +%s) -wait $cpid2 -rv=$? -time_now=$(date +%s) +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5202 +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5203 -# Check how much time has elapsed, expectation is for -# 'cpid2' to connect and then exit (and no connect delay). -delta=$((time_now - time_then)) +check_ctstate "$ns1" 5202 +check_ctstate "$ns1" 5203 -if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then +kill $socatpid $cpid0 $cpid1 $cpid2 +socatpid=0 + +if [ $ret -eq 0 ]; then echo "PASS: could connect to service via redirected ports" else - echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" + echo "FAIL: socat cannot connect to service via redirect" ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 20e76b395c85..ad97c6227f35 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -29,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -408,6 +408,18 @@ perf_duration 0 " +TYPE_doublecreate=" +display cannot create same element twice +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1900,6 +1912,48 @@ test_bug_avx2_mismatch() fi } +test_bug_doublecreate() +{ + local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4" + local ret=1 + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + add "{ $elements }" || return 1 + # expected to work: 'add' on existing should be no-op. + add "{ $elements }" || return 1 + + # 'create' should return an error. + if nft create element inet filter test "{ $elements }" 2>/dev/null; then + err "Could create an existing element" + return 1 + fi +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -eq 0 ]; then + err "Could create element twice in one transaction" + err "$(nft -a list ruleset)" + return 1 + fi + +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -ne 0 ]; then + err "Could not flush and re-create element in one transaction" + return 1 + fi + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh index 9929a9ffef65..04544905c216 100755 --- a/tools/testing/selftests/net/netfilter/nft_fib.sh +++ b/tools/testing/selftests/net/netfilter/nft_fib.sh @@ -256,12 +256,12 @@ test_ping_unreachable() { local daddr4=$1 local daddr6=$2 - if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then + if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr4" > /dev/null; then echo "FAIL: ${ns1} could reach $daddr4" 1>&2 return 1 fi - if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then + if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr6" > /dev/null; then echo "FAIL: ${ns1} could reach $daddr6" 1>&2 return 1 fi @@ -437,14 +437,17 @@ check_type() local addr="$3" local type="$4" local count="$5" + local lret=0 [ -z "$count" ] && count=1 if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then - echo "FAIL: did not find $iifname . $addr . $type in $setname" + echo "FAIL: did not find $iifname . $addr . $type in $setname with $count packets" ip netns exec "$nsrouter" nft list set inet t "$setname" ret=1 - return 1 + # do not fail right away, delete entry if it exists so later test that + # checks for unwanted keys don't get confused by this *expected* key. + lret=1 fi # delete the entry, this allows to check if anything unexpected appeared @@ -456,7 +459,7 @@ check_type() return 1 fi - return 0 + return $lret } check_local() diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh index a954754b99b3..b3ec2d0a3f56 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -569,7 +569,7 @@ test_redirect6() ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then - echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" lret=1 fi @@ -859,7 +859,7 @@ EOF # from router:service bypass connection tracking. test_port_shadow_notrack "$family" - # test nat based mitigation: fowarded packets coming from service port + # test nat based mitigation: forwarded packets coming from service port # are masqueraded with random highport. test_port_shadow_pat "$family" diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c index 07423f256f96..7618ebe528a4 100644 --- a/tools/testing/selftests/net/netlink-dumps.c +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -31,9 +31,18 @@ struct ext_ack { const char *str; }; -/* 0: no done, 1: done found, 2: extack found, -1: error */ -static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) +enum get_ea_ret { + ERROR = -1, + NO_CTRL = 0, + FOUND_DONE, + FOUND_ERR, + FOUND_EXTACK, +}; + +static enum get_ea_ret +nl_get_extack(char *buf, size_t n, struct ext_ack *ea) { + enum get_ea_ret ret = NO_CTRL; const struct nlmsghdr *nlh; const struct nlattr *attr; ssize_t rem; @@ -41,15 +50,19 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) { nlh = (struct nlmsghdr *)&buf[n - rem]; if (!NLMSG_OK(nlh, rem)) - return -1; + return ERROR; - if (nlh->nlmsg_type != NLMSG_DONE) + if (nlh->nlmsg_type == NLMSG_ERROR) + ret = FOUND_ERR; + else if (nlh->nlmsg_type == NLMSG_DONE) + ret = FOUND_DONE; + else continue; ea->err = -*(int *)NLMSG_DATA(nlh); if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) - return 1; + return ret; ynl_attr_for_each(attr, nlh, sizeof(int)) { switch (ynl_attr_type(attr)) { @@ -68,10 +81,10 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) } } - return 2; + return FOUND_EXTACK; } - return 0; + return ret; } static const struct { @@ -99,9 +112,9 @@ static const struct { TEST(dump_extack) { int netlink_sock; + int i, cnt, ret; char buf[8192]; int one = 1; - int i, cnt; ssize_t n; netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); @@ -118,7 +131,7 @@ TEST(dump_extack) ASSERT_EQ(n, 0); /* Dump so many times we fill up the buffer */ - cnt = 64; + cnt = 80; for (i = 0; i < cnt; i++) { n = send(netlink_sock, &dump_neigh_bad, sizeof(dump_neigh_bad), 0); @@ -140,10 +153,20 @@ TEST(dump_extack) } ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); - EXPECT_EQ(nl_get_extack(buf, n, &ea), 2); + ret = nl_get_extack(buf, n, &ea); + /* Once we fill the buffer we'll see one ENOBUFS followed + * by a number of EBUSYs. Then the last recv() will finally + * trigger and complete the dump. + */ + if (ret == FOUND_ERR && (ea.err == ENOBUFS || ea.err == EBUSY)) + continue; + EXPECT_EQ(ret, FOUND_EXTACK); + EXPECT_EQ(ea.err, EINVAL); EXPECT_EQ(ea.attr_offs, sizeof(struct nlmsghdr) + sizeof(struct ndmsg)); } + /* Make sure last message was a full DONE+extack */ + EXPECT_EQ(ret, FOUND_EXTACK); } static const struct { diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 8a0396bfaf99..b521e0dea506 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket): elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE: up.execute(msg) else: - print("Unkonwn cmd: %d" % msg["cmd"]) + print("Unknown cmd: %d" % msg["cmd"]) except NetlinkError as ne: raise ne diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile index e0926d76b4c8..dbe0388c8512 100644 --- a/tools/testing/selftests/net/ovpn/Makefile +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -19,13 +19,15 @@ LDLIBS += $(VAR_LDLIBS) TEST_FILES = common.sh -TEST_PROGS = test.sh \ - test-large-mtu.sh \ +TEST_PROGS := \ test-chachapoly.sh \ - test-tcp.sh \ - test-float.sh \ + test-close-socket-tcp.sh \ test-close-socket.sh \ - test-close-socket-tcp.sh + test-float.sh \ + test-large-mtu.sh \ + test-tcp.sh \ + test.sh \ +# end of TEST_PROGS TEST_GEN_FILES := ovpn-cli diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config index 71946ba9fa17..42699740936d 100644 --- a/tools/testing/selftests/net/ovpn/config +++ b/tools/testing/selftests/net/ovpn/config @@ -1,10 +1,10 @@ -CONFIG_NET=y -CONFIG_INET=y -CONFIG_STREAM_PARSER=y -CONFIG_NET_UDP_TUNNEL=y -CONFIG_DST_CACHE=y CONFIG_CRYPTO=y CONFIG_CRYPTO_AES=y -CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_CRYPTO_GCM=y +CONFIG_DST_CACHE=y +CONFIG_INET=y +CONFIG_NET=y +CONFIG_NET_UDP_TUNNEL=y CONFIG_OVPN=m +CONFIG_STREAM_PARSER=y diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c index 9201f2905f2c..0a5226196a2e 100644 --- a/tools/testing/selftests/net/ovpn/ovpn-cli.c +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -32,9 +32,10 @@ #include <sys/socket.h> +#include "../../kselftest.h" + /* defines to make checkpatch happy */ #define strscpy strncpy -#define __always_unused __attribute__((__unused__)) /* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we * have to explicitly do it to prevent the kernel from failing upon @@ -1586,6 +1587,7 @@ static int ovpn_listen_mcast(void) sock = nl_socket_alloc(); if (!sock) { fprintf(stderr, "cannot allocate netlink socket\n"); + ret = -ENOMEM; goto err_free; } @@ -2105,6 +2107,7 @@ static int ovpn_run_cmd(struct ovpn_ctx *ovpn) ret = ovpn_listen_mcast(); break; case CMD_INVALID: + ret = -EINVAL; break; } diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile index 31cfb666ba8b..ff54641493e9 100644 --- a/tools/testing/selftests/net/packetdrill/Makefile +++ b/tools/testing/selftests/net/packetdrill/Makefile @@ -1,9 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_INCLUDES := ksft_runner.sh \ - defaults.sh \ - set_sysctls.py \ - ../../kselftest/ktap_helpers.sh +TEST_INCLUDES := \ + defaults.sh \ + ksft_runner.sh \ + set_sysctls.py \ + ../../kselftest/ktap_helpers.sh \ +# end of TEST_INCLUDES TEST_PROGS := $(wildcard *.pkt) diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config index 0237ed98f3c0..c4a19a785521 100644 --- a/tools/testing/selftests/net/packetdrill/config +++ b/tools/testing/selftests/net/packetdrill/config @@ -1,6 +1,6 @@ -CONFIG_IPV6=y -CONFIG_HZ_1000=y CONFIG_HZ=1000 +CONFIG_HZ_1000=y +CONFIG_IPV6=y CONFIG_NET_NS=y CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_FQ=y diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh index 1095a7b22f44..37edd3dc3b07 100755 --- a/tools/testing/selftests/net/packetdrill/defaults.sh +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -51,7 +51,8 @@ sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 -sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen=0x3 +# Use TFO_COOKIE in ksft_runner.sh for this key. sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 sysctl -q net.ipv4.tcp_syncookies=1 diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index a7e790af38ff..b34e5cf0112e 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -3,21 +3,26 @@ source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" -readonly ipv4_args=('--ip_version=ipv4 ' - '--local_ip=192.168.0.1 ' - '--gateway_ip=192.168.0.1 ' - '--netmask_ip=255.255.0.0 ' - '--remote_ip=192.0.2.1 ' - '-D CMSG_LEVEL_IP=SOL_IP ' - '-D CMSG_TYPE_RECVERR=IP_RECVERR ') - -readonly ipv6_args=('--ip_version=ipv6 ' - '--mtu=1520 ' - '--local_ip=fd3d:0a0b:17d6::1 ' - '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' - '--remote_ip=fd3d:fa7b:d17d::1 ' - '-D CMSG_LEVEL_IP=SOL_IPV6 ' - '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') +declare -A ip_args=( + [ipv4]="--ip_version=ipv4 + --local_ip=192.168.0.1 + --gateway_ip=192.168.0.1 + --netmask_ip=255.255.0.0 + --remote_ip=192.0.2.1 + -D TFO_COOKIE=3021b9d889017eeb + -D TFO_COOKIE_ZERO=b7c12350a90dc8f5 + -D CMSG_LEVEL_IP=SOL_IP + -D CMSG_TYPE_RECVERR=IP_RECVERR" + [ipv6]="--ip_version=ipv6 + --mtu=1520 + --local_ip=fd3d:0a0b:17d6::1 + --gateway_ip=fd3d:0a0b:17d6:8888::1 + --remote_ip=fd3d:fa7b:d17d::1 + -D TFO_COOKIE=c1d1e9742a47a9bc + -D TFO_COOKIE_ZERO=82af1a8f9a205c34 + -D CMSG_LEVEL_IP=SOL_IPV6 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" +) if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0 <script>" @@ -38,12 +43,20 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then failfunc=ktap_test_xfail fi +ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2) +if [[ -z $ip_versions ]]; then + ip_versions="ipv4 ipv6" +elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then + ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions" + exit "$KSFT_FAIL" +fi + ktap_print_header -ktap_set_plan 2 +ktap_set_plan $(echo $ip_versions | wc -w) -unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv4" || $failfunc "ipv4" -unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ - && ktap_test_pass "ipv6" || $failfunc "ipv6" +for ip_version in $ip_versions; do + unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass $ip_version || $failfunc $ip_version +done ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt new file mode 100644 index 000000000000..eef01d5f1118 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +0 < . 1:1001(1000) ack 1 win 32792 + +0 > . 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 + +// resend the payload + a FIN + +0 < F. 1:1001(1000) ack 1 win 32792 +// Why do we have a delay and no dsack ? + +0~+.04 > . 1:1(0) ack 1002 + + +0 close(4) = 0 + +// According to RFC 2525, section 2.17 +// we should _not_ send an RST here, because there was no data to consume. + +0 > F. 1:1(0) ack 1002 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt new file mode 100644 index 000000000000..32aff9bc4052 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_COOKIE_NOT_REQD flag on receiving +// SYN with data but without Fast Open cookie option. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x202` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Since TFO_SERVER_COOKIE_NOT_REQD, a TFO socket will be created with +// the data accepted. + +0 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 read(4, ..., 1024) = 1000 + +// Data After SYN will be accepted too. + +0 < . 1001:2001(1000) ack 1 win 5840 + +0 > . 1:1(0) ack 2001 + +// Should change the implementation later to set the SYN flag as well. + +0 read(4, ..., 1024) = 1000 + +0 write(4, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 2001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt new file mode 100644 index 000000000000..649997a58099 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_WO_SOCKOPT1 without setsockopt(TCP_FASTOPEN) + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x402` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt new file mode 100644 index 000000000000..4a00e0d994f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Server w/o TCP_FASTOPEN socket option + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,FO TFO_COOKIE> + +// Data is ignored since TCP_FASTOPEN is not set on the listener + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// The above should block until ack comes in below. + +0 < . 1:31(30) ack 1 win 5840 + +0 accept(3, ..., ...) = 4 + + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 read(4, ..., 512) = 30 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt new file mode 100644 index 000000000000..345ed26ff7f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test that TFO-enabled server would not respond SYN-ACK with any TFO option +// when receiving a pure SYN-data. It should respond a pure SYN-ack. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN with ECN disabled because this has happened in reality + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN w/ ECN enabled + +0 `sysctl -q net.ipv4.tcp_ecn=2` + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > SE. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt new file mode 100644 index 000000000000..98e6f84497cd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO server with SYN that has TFO cookie and data. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt new file mode 100644 index 000000000000..95b1047ffdd5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test zero-payload packet w/ valid TFO cookie - a TFO socket will +// still be created and accepted but read() will not return until a +// later pkt with 10 byte. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +// A TFO socket is created and is writable. + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 1 + +0...0.300 read(4, ..., 512) = 10 + +.3 < P. 1:11(10) ack 1 win 5840 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt new file mode 100644 index 000000000000..f75efd51ed0c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// A reproducer case for a TFO SYNACK RTO undo bug in: +// 794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit") +// This sequence that tickles this bug is: +// - Fast Open server receives TFO SYN with data, sends SYNACK +// - (client receives SYNACK and sends ACK, but ACK is lost) +// - server app sends some data packets +// - (N of the first data packets are lost) +// - server receives client ACK that has a TS ECR matching first SYNACK, +// and also SACKs suggesting the first N data packets were lost +// - server performs undo of SYNACK RTO, then immediately enters recovery +// - buggy behavior in 794200d66273 then performed an undo that caused +// the connection to be in a bad state, in CA_Open with retrans_out != 0 + +// Check that outbound TS Val ticks are as we would expect with 1000 usec per +// timestamp tick: +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:1000(1000) win 65535 <mss 1012,sackOK,TS val 1000 ecr 0,wscale 7,nop,nop,nop,FO TFO_COOKIE> + +0 > S. 0:0(0) ack 1001 <mss 1460,sackOK,TS val 2000 ecr 1000,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +// Application writes more data + +.010 write(4, ..., 10000) = 10000 + +0 > P. 1:5001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 > P. 5001:10001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1010 ecr 2000,sack 2001:5001> + +0 > P. 1:2001(2000) ack 1001 <nop,nop,TS val 2010 ecr 1010> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1011 ecr 2000,sack 2001:6001> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt new file mode 100644 index 000000000000..c3cb0e8bdcf8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the Experimental Option +// +// SYN w/ FOEXP w/o cookie must generates SYN+ACK w/ FOEXP +// w/ a valid cookie, and the cookie must be the same one +// with one generated by IANA FO + +`./defaults.sh` + +// Request a TFO cookie by Experimental Option +// This must generate the same TFO_COOKIE + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,FOEXP TFO_COOKIE> + + +0 close(3) = 0 + +// Test if FOEXP with a valid cookie creates a TFO socket + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP TFO_COOKIE> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt new file mode 100644 index 000000000000..dc09f8d9a381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a FIN pkt with the ACK bit to a TFO socket. +// The socket will go to TCP_CLOSE_WAIT state and data can be +// read until the socket is closed, at which time a FIN will be sent. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// FIN is acked and the socket goes to TCP_CLOSE_WAIT state +// in tcp_fin() called from tcp_data_queue(). + +0 < F. 11:11(0) ack 1 win 32792 + +0 > . 1:1(0) ack 12 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE_WAIT, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 close(4) = 0 + +0 > F. 1:1(0) ack 12 + * > F. 1:1(0) ack 12 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt new file mode 100644 index 000000000000..d5543672e2bd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send an ICMP host_unreachable pkt to a pending SYN_RECV req. +// +// If it's a TFO req, the ICMP error will cause it to switch +// to TCP_CLOSE state but remains in the acceptor queue. + +--ip_version=ipv4 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// Out-of-window icmp is ignored but accounted. + +0 `nstat > /dev/null` + +0 < icmp unreachable [5000:6000(1000)] + +0 `nstat | grep TcpExtOutOfWindowIcmps > /dev/null` + +// Valid ICMP unreach. + +0 < icmp unreachable host_unreachable [0:10(10)] + +// Unlike the non-TFO case, the req is still there to be accepted. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +// tcp_done_with_error() in tcp_v4_err() sets sk->sk_state +// to TCP_CLOSE + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// The 1st read will succeed and return the data in SYN + +0 read(4, ..., 512) = 10 + +// The 2nd read will fail. + +0 read(4, ..., 512) = -1 EHOSTUNREACH (No route to host) + +// But is no longer writable because it's in TCP_CLOSE state. + +0 write(4, ..., 100) = -1 EPIPE (Broken Pipe) + +// inbound pkt will trigger RST because the socket has been moved +// off the TCP hash tables. + +0 < . 1:1(0) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt new file mode 100644 index 000000000000..040d5547ed80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it has been accepted. +// +// First read() will return all the data and this is consistent +// with the non-TFO case. Second read will return -1 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// 1st read will return the data from SYN. +// tcp_reset() sets sk->sk_err to ECONNRESET for SYN_RECV. + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 32792 +// See Step 2 in tcp_validate_incoming(). + +// found_ok_skb in tcp_recvmsg_locked() + +0 read(4, ..., 512) = 10 + +// !copied && sk->sk_err -> sock_error(sk) + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt new file mode 100644 index 000000000000..7f9de6c66cbd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket before it is accepted. +// +// The socket won't go away and after it's accepted the data +// in the SYN pkt can still be read. But that's about all that +// the acceptor can do with the socket. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// 1st read will return the data from SYN. + +0 < R. 11:11(0) win 257 + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 257 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt new file mode 100644 index 000000000000..548a87701b5d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it is accepted. +// +// The socket will change to TCP_CLOSE state with pending data so +// write() will fail. Pending data can be still be read and close() +// won't trigger RST if data is not read +// +// 565b7b2d2e63 ("tcp: do not send reset to already closed sockets") +// https://lore.kernel.org/netdev/4C1A2502.1030502@openvz.org/ + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop, FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// tcp_done() sets sk->sk_state to TCP_CLOSE and clears tp->fastopen_rsk + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt new file mode 100644 index 000000000000..20090bf77655 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a fully established socket with pending data before +// it is accepted. +// +// The socket with pending data won't go away and can still be accepted +// with data read. But it will be in TCP_CLOSE state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Invalid cookie, so accept() fails. + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO aaaaaaaaaaaaaaaa,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK, FO TFO_COOKIE,nop,nop> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// Complete 3WHS and send data and RST + +0 < . 1:1(0) ack 1 win 32792 + +0 < . 1:11(10) ack 1 win 32792 + +0 < R. 11:11(0) win 32792 + +// A valid reset won't make the fully-established socket go away. +// It's just that the acceptor will get a dead, unusable socket +// in TCP_CLOSE state. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt new file mode 100644 index 000000000000..9f52d7de3436 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the server cookie is generated by aes64 encoding of remote and local +// IP addresses with a master key specified via sockopt TCP_FASTOPEN_KEY +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen_key=00000000-00000000-00000000-00000000` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +// Set a key of a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 (big endian). +// This would produce a cookie of TFO_COOKIE like many other +// tests (which the same key but set via sysctl). + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Request a valid cookie TFO_COOKIE + +0 < S 1428932:1428942(10) win 10000 <mss 1012,nop,nop,FO,sackOK,TS val 1 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 1428933 <mss 1460,sackOK,TS val 10000 ecr 1,nop,wscale 8,FO TFO_COOKIE,nop,nop> + +0 < . 1:1(0) ack 1 win 257 <nop,nop,TS val 2 ecr 10000> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 10001 ecr 2> + +0 < F. 1:1(0) ack 2 win 257 <nop,nop,TS val 3 ecr 10001> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 10002 ecr 3> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test setting the key in the listen state, and produces an identical cookie + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6815001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 < . 1001:1001(0) ack 1 win 257 <nop,nop,TS val 12 ecr 10000> + +0 read(4, ..., 8192) = 1000 + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1001 <nop,nop,TS val 10101 ecr 12> + +0 < F. 1001:1001(0) ack 2 win 257 <nop,nop,TS val 112 ecr 10101> + +0 > . 2:2(0) ack 1002 <nop,nop,TS val 10102 ecr 112> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test invalid key length (must be 16 bytes) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 0) = -1 (Invalid Argument) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 3) = -1 (Invalid Argument) + +// Previous cookie won't be accepted b/c this listener uses the global key (0-0-0-0) + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6814001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8,FO TFO_COOKIE_ZERO,nop,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt new file mode 100644 index 000000000000..e82e06da44c9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a listener socket with pending TFO child. +// This will trigger RST pkt to go out. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// RST pkt is generated for each not-yet-accepted TFO child. +// inet_csk_listen_stop() -> inet_child_forget() -> tcp_disconnect() +// -> tcp_need_reset() is true for SYN_RECV + +0 close(3) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt index 26794e7ddfd5..2a148bb14cbf 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt @@ -1,26 +1,30 @@ // SPDX-License-Identifier: GPL-2.0 `./defaults.sh - ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x602 /proc/sys/net/ipv4/tcp_timestamps=0` + ./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 - +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK> + +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK,nop,nop,FO TFO_COOKIE> +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK> // sk->sk_state is TCP_SYN_RECV - +.1 accept(3, ..., ...) = 4 + +0 accept(3, ..., ...) = 4 + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% // tcp_disconnect() sets sk->sk_state to TCP_CLOSE +0 connect(4, AF_UNSPEC, ...) = 0 +0 > R. 1:1(0) ack 11 win 65535 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% // connect() sets sk->sk_state to TCP_SYN_SENT +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress) +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 %{ assert tcpi_state == TCP_SYN_SENT, tcpi_state }% // tp->fastopen_rsk must be NULL +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt new file mode 100644 index 000000000000..09fb63f78a0e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a TFO socket with unread data. +// This will trigger a RST pkt. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// data_was_unread == true in __tcp_close() + +0 close(4) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 88e914c4eef9..a3323c21f001 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1089,10 +1089,11 @@ cleanup() { cleanup_all_ns - ip link del veth_A-C 2>/dev/null - ip link del veth_A-R1 2>/dev/null - cleanup_del_ovs_internal - cleanup_del_ovs_vswitchd + [ -e "/sys/class/net/veth_A-C" ] && ip link del veth_A-C + [ -e "/sys/class/net/veth_A-R1" ] && ip link del veth_A-R1 + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_internal + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_vswitchd + rm -f "$tmpoutfile" } diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index 6e4fef560873..067265b0a554 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -22,10 +22,6 @@ #define PORT_BASE 8000 -#ifndef __maybe_unused -# define __maybe_unused __attribute__ ((__unused__)) -#endif - static __maybe_unused void pair_udp_setfilter(int fd) { /* the filter below checks for all of the following conditions that diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 221270cee3ea..2938045c5cf9 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -22,6 +22,7 @@ * - TPACKET_V3: RX_RING */ +#undef NDEBUG #include <stdio.h> #include <stdlib.h> #include <sys/types.h> @@ -33,7 +34,6 @@ #include <ctype.h> #include <fcntl.h> #include <unistd.h> -#include <bits/wordsize.h> #include <net/ethernet.h> #include <netinet/ip.h> #include <arpa/inet.h> @@ -785,7 +785,7 @@ static int test_kernel_bit_width(void) static int test_user_bit_width(void) { - return __WORDSIZE; + return sizeof(long) * 8; } static const char *tpacket_str[] = { diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index 612a7219990e..762845cc973c 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -5,8 +5,14 @@ all: TEST_PROGS := run.sh -TEST_FILES := include.sh test.py +TEST_FILES := \ + include.sh \ + test.py \ +# end of TEST_FILES -EXTRA_CLEAN := /tmp/rds_logs include.sh +EXTRA_CLEAN := \ + include.sh \ + /tmp/rds_logs \ +# end of EXTRA_CLEAN include ../../lib.mk diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh new file mode 100755 index 000000000000..2db01ece0cc1 --- /dev/null +++ b/tools/testing/selftests/net/route_hint.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test ensures directed broadcast routes use dst hint mechanism + +source lib.sh + +CLIENT_IP4="192.168.0.1" +SERVER_IP4="192.168.0.2" +BROADCAST_ADDRESS="192.168.0.255" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1 + + ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off + ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout" + ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs" + ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +directed_bcast_hint_test() +{ + local rc=0 + + echo "Testing for directed broadcast route hint" + + orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \ + -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q + sleep 1 + new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + + res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc) + + if [ "${res}" -lt 100 ]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected in_brd to be under 100, got ${res}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v bc)" ]; then + echo "SKIP: Could not run test without bc tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup + +directed_bcast_hint_test +exit $? diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 4287a8529890..b200019b3c80 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -54,16 +54,16 @@ cleanup echo 1 > /proc/sys/net/core/rps_default_mask setup -chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK echo 3 > /proc/sys/net/core/rps_default_mask -chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 +chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0 ip link add name $VETH type veth peer netns $NETNS name $VETH ip link set dev $VETH up ip -n $NETNS link set dev $VETH up -chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 -chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0 ip link del dev $VETH ip netns del $NETNS @@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 ip link add name $VETH type veth peer netns $NETNS name $VETH -chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 -chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 +chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0 exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index d6c00efeb664..dbf77513f617 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -313,6 +313,8 @@ kci_test_addrlft() slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." if [ $? -eq 1 ]; then + # troubleshoot the reason for our failure + run_cmd ip addr show dev "$devdummy" check_err 1 end_test "FAIL: preferred_lft addresses remaining" return @@ -323,6 +325,11 @@ kci_test_addrlft() kci_test_promote_secondaries() { + run_cmd ifconfig "$devdummy" + if [ $ret -ne 0 ]; then + end_test "SKIP: ifconfig not installed" + return $ksft_skip + fi promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries) sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1 @@ -519,7 +526,7 @@ kci_test_encap_fou() run_cmd_fail ip -netns "$testns" fou del port 9999 run_cmd ip -netns "$testns" fou del port 7777 if [ $ret -ne 0 ]; then - end_test "FAIL: fou"s + end_test "FAIL: fou" return 1 fi @@ -1201,6 +1208,12 @@ do_test_address_proto() local ret=0 local err + run_cmd_grep 'proto' ip address help + if [ $? -ne 0 ];then + end_test "SKIP: addr proto ${what}: iproute2 too old" + return $ksft_skip + fi + ip address add dev "$devdummy" "$addr3" check_err $? proto=$(address_get_proto "$addr3") diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index db1aeb8c5d1e..be1080003c61 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -39,6 +39,7 @@ static int run_tests(void) { char err_string1[ERR_STRING_SZ]; char err_string2[ERR_STRING_SZ]; + const char *msg1, *msg2; int i, err; err = 0; @@ -56,13 +57,13 @@ static int run_tests(void) errno == -s->expect) continue; - strerror_r(-s->expect, err_string1, ERR_STRING_SZ); - strerror_r(errno, err_string2, ERR_STRING_SZ); + msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "err (%s) got (%s)\n", s->domain, s->type, s->protocol, - err_string1, err_string2); + msg1, msg2); err = -1; break; @@ -70,12 +71,12 @@ static int run_tests(void) close(fd); if (s->expect < 0) { - strerror_r(errno, err_string1, ERR_STRING_SZ); + msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "success got err (%s)\n", s->domain, s->type, s->protocol, - err_string1); + msg1); err = -1; break; diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config index 3605e38711cb..971cb6fa2d63 100644 --- a/tools/testing/selftests/net/tcp_ao/config +++ b/tools/testing/selftests/net/tcp_ao/config @@ -1,8 +1,8 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_RMD160=y CONFIG_CRYPTO_SHA1=y -CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NET_VRF=y CONFIG_TCP_AO=y diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c new file mode 100644 index 000000000000..4c39d599dfce --- /dev/null +++ b/tools/testing/selftests/net/tcp_port_share.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2025 Cloudflare, Inc. + +/* Tests for TCP port sharing (bind bucket reuse). */ + +#include <arpa/inet.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <sched.h> +#include <stdlib.h> + +#include "../kselftest_harness.h" + +#define DST_PORT 30000 +#define SRC_PORT 40000 + +struct sockaddr_inet { + union { + struct sockaddr_storage ss; + struct sockaddr_in6 v6; + struct sockaddr_in v4; + struct sockaddr sa; + }; + socklen_t len; + char str[INET6_ADDRSTRLEN + __builtin_strlen("[]:65535") + 1]; +}; + +const int one = 1; + +static int disconnect(int fd) +{ + return connect(fd, &(struct sockaddr){ AF_UNSPEC }, sizeof(struct sockaddr)); +} + +static int getsockname_port(int fd) +{ + struct sockaddr_inet addr = {}; + int err; + + addr.len = sizeof(addr); + err = getsockname(fd, &addr.sa, &addr.len); + if (err) + return -1; + + switch (addr.sa.sa_family) { + case AF_INET: + return ntohs(addr.v4.sin_port); + case AF_INET6: + return ntohs(addr.v6.sin6_port); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +static void make_inet_addr(int af, const char *ip, __u16 port, + struct sockaddr_inet *addr) +{ + const char *fmt = ""; + + memset(addr, 0, sizeof(*addr)); + + switch (af) { + case AF_INET: + addr->len = sizeof(addr->v4); + addr->v4.sin_family = af; + addr->v4.sin_port = htons(port); + inet_pton(af, ip, &addr->v4.sin_addr); + fmt = "%s:%hu"; + break; + case AF_INET6: + addr->len = sizeof(addr->v6); + addr->v6.sin6_family = af; + addr->v6.sin6_port = htons(port); + inet_pton(af, ip, &addr->v6.sin6_addr); + fmt = "[%s]:%hu"; + break; + } + + snprintf(addr->str, sizeof(addr->str), fmt, ip, port); +} + +FIXTURE(tcp_port_share) {}; + +FIXTURE_VARIANT(tcp_port_share) { + int domain; + /* IP to listen on and connect to */ + const char *dst_ip; + /* Primary IP to connect from */ + const char *src1_ip; + /* Secondary IP to connect from */ + const char *src2_ip; + /* IP to bind to in order to block the source port */ + const char *bind_ip; +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv4) { + .domain = AF_INET, + .dst_ip = "127.0.0.1", + .src1_ip = "127.1.1.1", + .src2_ip = "127.2.2.2", + .bind_ip = "127.3.3.3", +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv6) { + .domain = AF_INET6, + .dst_ip = "::1", + .src1_ip = "2001:db8::1", + .src2_ip = "2001:db8::2", + .bind_ip = "2001:db8::3", +}; + +FIXTURE_SETUP(tcp_port_share) +{ + int sc; + + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set dev lo up"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::1/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::2/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::3/32 nodad"), 0); + + sc = open("/proc/sys/net/ipv4/ip_local_port_range", O_WRONLY); + ASSERT_GE(sc, 0); + ASSERT_GT(dprintf(sc, "%hu %hu\n", SRC_PORT, SRC_PORT), 0); + ASSERT_EQ(close(sc), 0); +} + +FIXTURE_TEARDOWN(tcp_port_share) {} + +/* Verify that an ephemeral port becomes available again after the socket + * bound to it and blocking it from reuse is closed. + */ +TEST_F(tcp_port_share, can_reuse_port_after_bind_and_close) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Bind to <bind_ip>:<SRC_PORT>. Block the port from reuse. */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Try to connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + ASSERT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + /* Unbind from <bind_ip>:<SRC_PORT>. Unblock the port for reuse. */ + ASSERT_EQ(close(pb), 0); + + /* Connect again from <src2_ip>:<SRC_PORT> */ + EXPECT_EQ(connect(c2, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + EXPECT_EQ(getsockname_port(c2), SRC_PORT); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +/* Verify that a socket auto-bound during connect() blocks port reuse after + * disconnect (connect(AF_UNSPEC)) followed by an explicit port bind(). + */ +TEST_F(tcp_port_share, port_block_after_disconnect) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Disconnect the socket and bind it to <bind_ip>:<SRC_PORT> to block the port */ + ASSERT_EQ(disconnect(c1), 0) TH_LOG("disconnect: %m"); + ASSERT_EQ(setsockopt(c1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Trigger port-addr bucket state update with another bind() and close() */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + ASSERT_EQ(close(pb), 0); + + /* Connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket: %m"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + EXPECT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + EXPECT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 1b3f89e2b86e..2a7224fe74f2 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -315,6 +315,29 @@ backup_port() tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" + # Check that packets are forwarded out of vx0 when swp1 is + # administratively down and out of swp1 when it is administratively up + # again. + run_cmd "ip -n $sw1 link set dev swp1 down" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 administratively down" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 up" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 administratively up" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" @@ -322,9 +345,9 @@ backup_port() log_test $? 1 "vx0 not configured as backup port of swp1" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "Forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" @@ -332,9 +355,9 @@ backup_port() log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "No forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" } diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh index 062f957950af..8b414d0edada 100755 --- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -21,7 +21,7 @@ test_set_remote() { RET=0 - ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent check_remotes "fdb append" @@ -74,12 +74,12 @@ test_change_mc_remote() { check_command netstat || return - ip_link_add v1 up type veth peer name v2 - ip_link_set_up v2 + adf_ip_link_add v1 up type veth peer name v2 + adf_ip_link_set_up v2 RET=0 - ip_link_add vx up type vxlan dstport 4789 \ + adf_ip_link_add vx up type vxlan dstport 4789 \ local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000 check_membership "group=224.1.1.1 fail=0" \ diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh index 80bf11fdc046..a4550511830a 100755 --- a/tools/testing/selftests/net/tfo_passive.sh +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -95,7 +95,7 @@ wait res=$(cat $out_file) rm $out_file -if [ $res -eq 0 ]; then +if [ "$res" = "0" ]; then echo "got invalid NAPI ID from passive TFO socket" cleanup_ns exit 1 diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index dd093f9df6f1..e788b84551ca 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -439,6 +439,8 @@ TEST_F(tls, sendfile) EXPECT_GE(filefd, 0); fstat(filefd, &st); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); + + close(filefd); } TEST_F(tls, send_then_sendfile) @@ -460,6 +462,9 @@ TEST_F(tls, send_then_sendfile) EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); + + free(buf); + close(filefd); } static void chunked_sendfile(struct __test_metadata *_metadata, diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index 282f14760940..dbb34c7e09ce 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no ################################################################################ # -log_test() -{ - local rc=$1 - local expected=$2 - local msg="$3" - - if [ ${rc} -eq ${expected} ]; then - printf "TEST: %-60s [ OK ]\n" "${msg}" - nsuccess=$((nsuccess+1)) - else - ret=1 - nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" - if [ "${PAUSE_ON_FAIL}" = "yes" ]; then - echo - echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 - fi - fi -} - run_cmd() { local ns @@ -203,34 +181,137 @@ setup_traceroute6() run_traceroute6() { - if [ ! -x "$(command -v traceroute6)" ]; then - echo "SKIP: Could not run IPV6 test without traceroute6" - return - fi - setup_traceroute6 + RET=0 + # traceroute6 host-2 from host-1 (expects 2000:102::2) run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" - log_test $? 0 "IPV6 traceroute" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute" cleanup_traceroute6 } ################################################################################ +# traceroute6 with VRF test +# +# Verify that in this scenario +# +# ------------------------ N2 +# | | +# ------ ------ N3 ---- +# | R1 | | R2 |------|H2| +# ------ ------ ---- +# | | +# ------------------------ N1 +# | +# ---- +# |H1| +# ---- +# +# Where H1's default route goes through R1 and R1's default route goes through +# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1. +# The interfaces connecting R2 to the different subnets are membmer in a VRF +# and the intention is to check that traceroute6 does not report the VRF's +# address. +# +# Addresses are assigned as follows: +# +# N1: 2000:101::/64 +# N2: 2000:102::/64 +# N3: 2000:103::/64 +# +# R1's host part of address: 1 +# R2's host part of address: 2 +# H1's host part of address: 3 +# H2's host part of address: 4 +# +# For example: +# the IPv6 address of R1's interface on N2 is 2000:102::1/64 + +cleanup_traceroute6_vrf() +{ + cleanup_all_ns +} + +setup_traceroute6_vrf() +{ + # Start clean + cleanup_traceroute6_vrf + + setup_ns h1 h2 r1 r2 + create_ns "$h1" + create_ns "$h2" + create_ns "$r1" + create_ns "$r2" + + ip -n "$r2" link add name vrf100 up type vrf table 100 + ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100 + + # Setup N3 + connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64 + + ip -n "$r2" link set dev eth3 master vrf100 + + ip -n "$h2" route add default via 2000:103::2 + + # Setup N2 + connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64 + + ip -n "$r1" route add default via 2000:102::2 + + ip -n "$r2" link set dev eth2 master vrf100 + + # Setup N1. host-1 and router-2 connect to a bridge in router-1. + ip -n "$r1" link add name br100 up type bridge + ip -n "$r1" addr add 2000:101::1/64 dev br100 + + connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - - + + ip -n "$h1" route add default via 2000:101::1 + + ip -n "$r1" link set dev eth0 master br100 + + connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - - + + ip -n "$r2" link set dev eth1 master vrf100 + + ip -n "$r1" link set dev eth1 master br100 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1 +} + +run_traceroute6_vrf() +{ + setup_traceroute6_vrf + + RET=0 + + # traceroute6 host-2 from host-1 (expects 2000:102::2) + run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute with VRF" + + cleanup_traceroute6_vrf +} + +################################################################################ # traceroute test # -# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. # -# 1.0.3.1/24 +# 1.0.3.3/24 1.0.3.1/24 # ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- # |H1|--------------------------|R1|--------------------------|H2| # ---- N1 ---- N2 ---- # -# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and -# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary -# address on N1. -# +# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and +# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer +# a source address that is on the same subnet as the destination IP of the ICMP +# error message. cleanup_traceroute() { @@ -250,6 +331,7 @@ setup_traceroute() connect_ns $h1 eth0 1.0.1.3/24 - \ $router eth1 1.0.3.1/24 - + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 ip netns exec $h1 ip route add default via 1.0.1.1 ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 @@ -268,35 +350,107 @@ setup_traceroute() run_traceroute() { - if [ ! -x "$(command -v traceroute)" ]; then - echo "SKIP: Could not run IPV4 test without traceroute" - return - fi - setup_traceroute - # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" - log_test $? 0 "IPV4 traceroute" + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute" cleanup_traceroute } ################################################################################ +# traceroute with VRF test +# +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The +# intention is to check that the kernel does not choose an IP assigned to the +# VRF device, but rather an address from the VRF port (eth1) that received the +# packet that generates the ICMP error message. +# +# 1.0.4.1/24 (vrf100) +# 1.0.3.3/24 1.0.3.1/24 +# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- + +cleanup_traceroute_vrf() +{ + cleanup_all_ns +} + +setup_traceroute_vrf() +{ + # Start clean + cleanup_traceroute_vrf + + setup_ns h1 h2 router + create_ns "$h1" + create_ns "$h2" + create_ns "$router" + + ip -n "$router" link add name vrf100 up type vrf table 100 + ip -n "$router" addr add 1.0.4.1/24 dev vrf100 + + connect_ns "$h1" eth0 1.0.1.3/24 - \ + "$router" eth1 1.0.1.1/24 - + + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 + ip -n "$h1" route add default via 1.0.1.1 + + ip -n "$router" link set dev eth1 master vrf100 + ip -n "$router" addr add 1.0.3.1/24 dev eth1 + ip netns exec "$router" sysctl -qw \ + net.ipv4.icmp_errors_use_inbound_ifaddr=1 + + connect_ns "$h2" eth0 1.0.2.4/24 - \ + "$router" eth2 1.0.2.1/24 - + + ip -n "$h2" route add default via 1.0.2.1 + + ip -n "$router" link set dev eth2 master vrf100 + + # Prime the network + ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1 +} + +run_traceroute_vrf() +{ + setup_traceroute_vrf + + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute with VRF" + + cleanup_traceroute_vrf +} + +################################################################################ # Run tests run_tests() { run_traceroute6 + run_traceroute6_vrf run_traceroute + run_traceroute_vrf } ################################################################################ # main -declare -i nfail=0 -declare -i nsuccess=0 - while getopts :pv o do case $o in @@ -306,7 +460,9 @@ do esac done +require_command traceroute6 +require_command traceroute + run_tests -printf "\nTests passed: %3d\n" ${nsuccess} -printf "Tests failed: %3d\n" ${nfail} +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh index e7cb8c678bde..db481af9b6b3 100755 --- a/tools/testing/selftests/net/vlan_bridge_binding.sh +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -18,29 +18,29 @@ setup_prepare() { local port - ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add br up type bridge vlan_filtering 1 for port in d1 d2 d3; do - ip_link_add $port type veth peer name r$port - ip_link_set_up $port - ip_link_set_up r$port - ip_link_set_master $port br + adf_ip_link_add $port type veth peer name r$port + adf_ip_link_set_up $port + adf_ip_link_set_up r$port + adf_ip_link_set_master $port br done - bridge_vlan_add vid 11 dev br self - bridge_vlan_add vid 11 dev d1 master + adf_bridge_vlan_add vid 11 dev br self + adf_bridge_vlan_add vid 11 dev d1 master - bridge_vlan_add vid 12 dev br self - bridge_vlan_add vid 12 dev d2 master + adf_bridge_vlan_add vid 12 dev br self + adf_bridge_vlan_add vid 12 dev d2 master - bridge_vlan_add vid 13 dev br self - bridge_vlan_add vid 13 dev d1 master - bridge_vlan_add vid 13 dev d2 master + adf_bridge_vlan_add vid 13 dev br self + adf_bridge_vlan_add vid 13 dev d1 master + adf_bridge_vlan_add vid 13 dev d2 master - bridge_vlan_add vid 14 dev br self - bridge_vlan_add vid 14 dev d1 master - bridge_vlan_add vid 14 dev d2 master - bridge_vlan_add vid 14 dev d3 master + adf_bridge_vlan_add vid 14 dev br self + adf_bridge_vlan_add vid 14 dev d1 master + adf_bridge_vlan_add vid 14 dev d2 master + adf_bridge_vlan_add vid 14 dev d3 master } operstate_is() @@ -74,7 +74,7 @@ add_one_vlan() local link=$1; shift local id=$1; shift - ip_link_add $link.$id link $link type vlan id $id "$@" + adf_ip_link_add $link.$id link $link type vlan id $id "$@" } add_vlans() @@ -98,7 +98,7 @@ down_netdevs() local dev for dev in "$@"; do - ip_link_set_down $dev + adf_ip_link_set_down $dev done } @@ -207,13 +207,13 @@ test_binding_toggle_off() do_test_binding_off : "on->off" } -dfr_set_binding_on() +adf_set_binding_on() { set_vlans type vlan bridge_binding on defer set_vlans type vlan bridge_binding off } -dfr_set_binding_off() +adf_set_binding_off() { set_vlans type vlan bridge_binding off defer set_vlans type vlan bridge_binding on @@ -223,14 +223,14 @@ test_binding_toggle_on_when_lower_down() { add_vlans bridge_binding off set_vlans up - do_test_binding_on dfr_set_binding_on "off->on when lower down" + do_test_binding_on adf_set_binding_on "off->on when lower down" } test_binding_toggle_off_when_lower_down() { add_vlans bridge_binding on set_vlans up - do_test_binding_off dfr_set_binding_off "on->off when lower down" + do_test_binding_off adf_set_binding_off "on->off when lower down" } test_binding_toggle_on_when_upper_down() diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index e907c2751956..793a2fc33d9f 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -5,10 +5,11 @@ # Inputs: # # YNL_GENS: families we need in the selftests -# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet) +# YNL_GEN_PROGS: TEST_GEN_PROGS which need YNL # YNL_GEN_FILES: TEST_GEN_FILES which need YNL -YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) +YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) \ + $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_PROGS)) YNL_SPECS := \ $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS)) diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index 0fb759ba992e..330e000baeb1 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -262,19 +262,22 @@ REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++ if (f || !p || !done) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ printf("\nSee all results in %s\n", ARGV[1]); }' +# Execute the toplevel kernel Makefile +KBUILD_MAKE = $(MAKE) -C $(srctree) ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) LLVM= + help: @echo "Supported targets under selftests/nolibc:" @echo " all call the \"run\" target below" @echo " help this help" @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" - @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " nolibc-test build the executable (uses \$$CC or \$$CROSS_COMPILE)" @echo " libc-test build an executable using the compiler's default libc instead" @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" @echo " initramfs prepare the initramfs tree with nolibc-test" @echo " defconfig create a fresh new default config (uses \$$XARCH)" - @echo " kernel (re)build the kernel (uses \$$XARCH)" - @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)" + @echo " kernel (re)build the kernel (uses \$$XARCH, \$$CROSS_COMPILE)" + @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH, \$$CROSS_COMPILE)" @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" @echo " clean clean the sysroot, initramfs, build and output files" @@ -340,17 +343,17 @@ initramfs: nolibc-test $(Q)cp nolibc-test initramfs/init defconfig: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG) + $(Q)$(KBUILD_MAKE) $(DEFCONFIG) $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ - $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ + $(KBUILD_MAKE) olddefconfig < /dev/null; \ fi kernel: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null + $(Q)$(KBUILD_MAKE) $(IMAGE_NAME) < /dev/null kernel-standalone: initramfs - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null + $(Q)$(KBUILD_MAKE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null # run the tests after building the kernel run: kernel initramfs.cpio diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index a297ee0d6d07..29de21595fc9 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -196,8 +196,8 @@ int expect_zr(int expr, int llen) } -#define EXPECT_NZ(cond, expr, val) \ - do { if (!(cond)) result(llen, SKIPPED); else ret += expect_nz(expr, llen; } while (0) +#define EXPECT_NZ(cond, expr) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_nz(expr, llen); } while (0) static __attribute__((unused)) int expect_nz(int expr, int llen) @@ -686,7 +686,6 @@ int expect_strtox(int llen, void *func, const char *input, int base, intmax_t ex #define CASE_TEST(name) \ case __LINE__: llen += printf("%d %s", test, #name); -/* constructors validate that they are executed in definition order */ __attribute__((constructor)) static void constructor1(void) { @@ -1334,6 +1333,7 @@ int run_syscall(int min, int max) CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break; CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; CASE_TEST(chroot_exe); EXPECT_SYSER(1, chroot(argv0), -1, ENOTDIR); break; + CASE_TEST(clock_nanosleep); ts.tv_nsec = -1; EXPECT_EQ(1, EINVAL, clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL)); break; CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break; CASE_TEST(close_dup); EXPECT_SYSZR(1, close(dup(0))); break; CASE_TEST(dup_0); tmp = dup(0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c index da0db0e7c969..cd9075444c32 100644 --- a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c +++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c @@ -121,6 +121,8 @@ TEST_F(pci_ep_basic, MSI_TEST) for (i = 1; i <= 32; i++) { pci_ep_ioctl(PCITEST_MSI, i); + if (ret == -EINVAL) + SKIP(return, "MSI%d is disabled", i); EXPECT_FALSE(ret) TH_LOG("Test failed for MSI%d", i); } } @@ -137,6 +139,8 @@ TEST_F(pci_ep_basic, MSIX_TEST) for (i = 1; i <= 2048; i++) { pci_ep_ioctl(PCITEST_MSIX, i); + if (ret == -EINVAL) + SKIP(return, "MSI-X%d is disabled", i); EXPECT_FALSE(ret) TH_LOG("Test failed for MSI-X%d", i); } } diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c index e03fe1b9bba2..b3a72f0ac522 100644 --- a/tools/testing/selftests/perf_events/watermark_signal.c +++ b/tools/testing/selftests/perf_events/watermark_signal.c @@ -17,8 +17,6 @@ #include "../kselftest_harness.h" -#define __maybe_unused __attribute__((__unused__)) - static int sigio_count; static void handle_sigio(int signum __maybe_unused, diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index 6133524710f7..cf7cc0ce0248 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,6 +4,5 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_TIME_NS=y -CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_CGROUPS=y CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h index 4efa6314bd96..864f0c9f1afc 100644 --- a/tools/testing/selftests/powerpc/include/instructions.h +++ b/tools/testing/selftests/powerpc/include/instructions.h @@ -67,7 +67,7 @@ static inline int paste_last(void *i) #define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1) /* This defines the prefixed load/store instructions */ -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ # define stringify_in_c(...) __VA_ARGS__ #else # define __stringify_in_c(...) #__VA_ARGS__ diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 19bb333e2485..9c9735570abf 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -7,6 +7,7 @@ /proc-loadavg-001 /proc-maps-race /proc-multiple-procfs +/proc-net-dev-lseek /proc-empty-vm /proc-pid-vm /proc-self-map-files-001 @@ -18,6 +19,7 @@ /proc-tid0 /proc-uptime-001 /proc-uptime-002 +/proc-pidns /read /self /setns-dcache diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 50aba102201a..a7de2bb6d8be 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -10,6 +10,7 @@ TEST_GEN_PROGS += fd-003-kthread TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 TEST_GEN_PROGS += proc-maps-race +TEST_GEN_PROGS += proc-net-dev-lseek TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 @@ -28,5 +29,6 @@ TEST_GEN_PROGS += setns-sysvipc TEST_GEN_PROGS += thread-self TEST_GEN_PROGS += proc-multiple-procfs TEST_GEN_PROGS += proc-fsconfig-hidepid +TEST_GEN_PROGS += proc-pidns include ../lib.mk diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c index 94bba4553130..a546475db550 100644 --- a/tools/testing/selftests/proc/proc-maps-race.c +++ b/tools/testing/selftests/proc/proc-maps-race.c @@ -32,6 +32,8 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <linux/fs.h> +#include <sys/ioctl.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> @@ -317,6 +319,25 @@ static bool capture_mod_pattern(FIXTURE_DATA(proc_maps_race) *self, strcmp(restored_first_line->text, self->first_line.text) == 0; } +static bool query_addr_at(int maps_fd, void *addr, + unsigned long *vma_start, unsigned long *vma_end) +{ + struct procmap_query q; + + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + /* Find the VMA at the split address */ + q.query_addr = (unsigned long long)addr; + q.query_flags = 0; + if (ioctl(maps_fd, PROCMAP_QUERY, &q)) + return false; + + *vma_start = q.vma_start; + *vma_end = q.vma_end; + + return true; +} + static inline bool split_vma(FIXTURE_DATA(proc_maps_race) *self) { return mmap(self->mod_info->addr, self->page_size, self->mod_info->prot | PROT_EXEC, @@ -559,6 +580,8 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) do { bool last_line_changed; bool first_line_changed; + unsigned long vma_start; + unsigned long vma_end; ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); @@ -595,6 +618,19 @@ TEST_F(proc_maps_race, test_maps_tearing_from_split) first_line_changed = strcmp(new_first_line.text, self->first_line.text) != 0; ASSERT_EQ(last_line_changed, first_line_changed); + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size, + &vma_start, &vma_end)); + /* + * The vma at the split address can be either the same as + * original one (if read before the split) or the same as the + * first line in the second page (if read after the split). + */ + ASSERT_TRUE((vma_start == self->last_line.start_addr && + vma_end == self->last_line.end_addr) || + (vma_start == split_first_line.start_addr && + vma_end == split_first_line.end_addr)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); @@ -636,6 +672,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); start_test_loop(&start_ts, self->verbose); do { + unsigned long vma_start; + unsigned long vma_end; + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); /* Check if we read vmas after shrinking it */ @@ -662,6 +701,16 @@ TEST_F(proc_maps_race, test_maps_tearing_from_resize) "Expand result invalid", self)); } + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr, &vma_start, &vma_end)); + /* + * The vma should stay at the same address and have either the + * original size of 3 pages or 1 page if read after shrinking. + */ + ASSERT_TRUE(vma_start == self->last_line.start_addr && + (vma_end - vma_start == self->page_size * 3 || + vma_end - vma_start == self->page_size)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); @@ -703,6 +752,9 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) clock_gettime(CLOCK_MONOTONIC_COARSE, &start_ts); start_test_loop(&start_ts, self->verbose); do { + unsigned long vma_start; + unsigned long vma_end; + ASSERT_TRUE(read_boundary_lines(self, &new_last_line, &new_first_line)); /* Check if we read vmas after remapping it */ @@ -729,6 +781,19 @@ TEST_F(proc_maps_race, test_maps_tearing_from_remap) "Remap restore result invalid", self)); } + /* Check if PROCMAP_QUERY ioclt() finds the right VMA */ + ASSERT_TRUE(query_addr_at(self->maps_fd, mod_info->addr + self->page_size, + &vma_start, &vma_end)); + /* + * The vma should either stay at the same address and have the + * original size of 3 pages or we should find the remapped vma + * at the remap destination address with size of 1 page. + */ + ASSERT_TRUE((vma_start == self->last_line.start_addr && + vma_end - vma_start == self->page_size * 3) || + (vma_start == self->last_line.start_addr + self->page_size && + vma_end - vma_start == self->page_size)); + clock_gettime(CLOCK_MONOTONIC_COARSE, &end_ts); end_test_iteration(&end_ts, self->verbose); } while (end_ts.tv_sec - start_ts.tv_sec < self->duration_sec); diff --git a/tools/testing/selftests/proc/proc-net-dev-lseek.c b/tools/testing/selftests/proc/proc-net-dev-lseek.c new file mode 100644 index 000000000000..742a3e804451 --- /dev/null +++ b/tools/testing/selftests/proc/proc-net-dev-lseek.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2025 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#undef _GNU_SOURCE +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sched.h> +/* + * Test that lseek("/proc/net/dev/", 0, SEEK_SET) + * a) works, + * b) does what you think it does. + */ +int main(void) +{ + /* /proc/net/dev output is deterministic in fresh netns only. */ + if (unshare(CLONE_NEWNET) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + + const int fd = open("/proc/net/dev", O_RDONLY); + assert(fd >= 0); + + char buf1[4096]; + const ssize_t rv1 = read(fd, buf1, sizeof(buf1)); + /* + * Not "<=", this file can't be empty: + * there is header, "lo" interface with some zeroes. + */ + assert(0 < rv1); + assert(rv1 <= sizeof(buf1)); + + /* Believe it or not, this line broke one day. */ + assert(lseek(fd, 0, SEEK_SET) == 0); + + char buf2[4096]; + const ssize_t rv2 = read(fd, buf2, sizeof(buf2)); + /* Not "<=", see above. */ + assert(0 < rv2); + assert(rv2 <= sizeof(buf2)); + + /* Test that lseek rewinds to the beginning of the file. */ + assert(rv1 == rv2); + assert(memcmp(buf1, buf2, rv1) == 0); + + /* Contents of the file is not validated: this test is about lseek(). */ + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index d04685771952..978cbcb3eb11 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -47,6 +47,10 @@ #include <sys/resource.h> #include <linux/fs.h> +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + #include "../kselftest.h" static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) @@ -218,12 +222,12 @@ static int make_exe(const uint8_t *payload, size_t len) * 2: vsyscall VMA is r-xp vsyscall=emulate */ static volatile int g_vsyscall; -static const char *str_vsyscall; +static const char *str_vsyscall __maybe_unused; -static const char str_vsyscall_0[] = ""; -static const char str_vsyscall_1[] = +static const char str_vsyscall_0[] __maybe_unused = ""; +static const char str_vsyscall_1[] __maybe_unused = "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; -static const char str_vsyscall_2[] = +static const char str_vsyscall_2[] __maybe_unused = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; #ifdef __x86_64__ diff --git a/tools/testing/selftests/proc/proc-pidns.c b/tools/testing/selftests/proc/proc-pidns.c new file mode 100644 index 000000000000..52500597f951 --- /dev/null +++ b/tools/testing/selftests/proc/proc-pidns.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2025 SUSE LLC. + */ + +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/prctl.h> + +#include "../kselftest_harness.h" + +#define ASSERT_ERRNO(expected, _t, seen) \ + __EXPECT(expected, #expected, \ + ({__typeof__(seen) _tmp_seen = (seen); \ + _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1) + +#define ASSERT_ERRNO_EQ(expected, seen) \ + ASSERT_ERRNO(expected, ==, seen) + +#define ASSERT_SUCCESS(seen) \ + ASSERT_ERRNO(0, <=, seen) + +static int touch(char *path) +{ + int fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC, 0644); + if (fd < 0) + return -1; + return close(fd); +} + +FIXTURE(ns) +{ + int host_mntns, host_pidns; + int dummy_pidns; +}; + +FIXTURE_SETUP(ns) +{ + /* Stash the old mntns. */ + self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_mntns); + + /* Create a new mount namespace and make it private. */ + ASSERT_SUCCESS(unshare(CLONE_NEWNS)); + ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); + + /* + * Create a proper tmpfs that we can use and will disappear once we + * leave this mntns. + */ + ASSERT_SUCCESS(mount("tmpfs", "/tmp", "tmpfs", 0, NULL)); + + /* + * Create a pidns we can use for later tests. We need to fork off a + * child so that we get a usable nsfd that we can bind-mount and open. + */ + ASSERT_SUCCESS(mkdir("/tmp/dummy", 0755)); + ASSERT_SUCCESS(touch("/tmp/dummy/pidns")); + ASSERT_SUCCESS(mkdir("/tmp/dummy/proc", 0755)); + + self->host_pidns = open("/proc/self/ns/pid", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_pidns); + ASSERT_SUCCESS(unshare(CLONE_NEWPID)); + + pid_t pid = fork(); + ASSERT_SUCCESS(pid); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + ASSERT_SUCCESS(mount("/proc/self/ns/pid", "/tmp/dummy/pidns", NULL, MS_BIND, NULL)); + ASSERT_SUCCESS(mount("proc", "/tmp/dummy/proc", "proc", 0, NULL)); + exit(0); + } + + int wstatus; + ASSERT_EQ(waitpid(pid, &wstatus, 0), pid); + ASSERT_TRUE(WIFEXITED(wstatus)); + ASSERT_EQ(WEXITSTATUS(wstatus), 0); + + ASSERT_SUCCESS(setns(self->host_pidns, CLONE_NEWPID)); + + self->dummy_pidns = open("/tmp/dummy/pidns", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->dummy_pidns); +} + +FIXTURE_TEARDOWN(ns) +{ + ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS)); + ASSERT_SUCCESS(close(self->host_mntns)); + + ASSERT_SUCCESS(close(self->host_pidns)); + ASSERT_SUCCESS(close(self->dummy_pidns)); +} + +TEST_F(ns, pidns_mount_string_path) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc-host", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-host", "proc", 0, "pidns=/proc/self/ns/pid")); + ASSERT_SUCCESS(access("/tmp/proc-host/self/", X_OK)); + + ASSERT_SUCCESS(mkdir("/tmp/proc-dummy", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-dummy", "proc", 0, "pidns=/tmp/dummy/pidns")); + ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/1/", X_OK)); + ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/self/", X_OK)); +} + +TEST_F(ns, pidns_fsconfig_string_path) +{ + int fsfd = fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy/pidns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_fsconfig_fd) +{ + int fsfd = fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_reconfigure_remount) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc", "proc", 0, "")); + + ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK)); + ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK)); + + ASSERT_ERRNO_EQ(-EBUSY, mount(NULL, "/tmp/proc", NULL, MS_REMOUNT, "pidns=/tmp/dummy/pidns")); + + ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK)); + ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK)); +} + +TEST_F(ns, pidns_reconfigure_fsconfig_string_path) +{ + int fsfd = fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy/pidns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); /* noop */ + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_reconfigure_fsconfig_fd) +{ + int fsfd = fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); /* noop */ + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index fd1ffaa5a135..3c1e5d3f8805 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -39,6 +39,22 @@ do fi done +# Uses global variables startsecs, startns, endsecs, endns, and limit. +# Exit code is success for time not yet elapsed and failure otherwise. +function timecheck { + local done=`awk -v limit=$limit \ + -v startsecs=$startsecs \ + -v startns=$startns \ + -v endsecs=$endsecs \ + -v endns=$endns < /dev/null ' + BEGIN { + delta = (endsecs - startsecs) * 1000 * 1000; + delta += int((endns - startns) / 1000); + print delta >= limit; + }'` + return $done +} + while : do # Check for done. @@ -85,15 +101,20 @@ do n=$(($n+1)) sleep .$sleeptime - # Spin a random duration + # Spin a random duration, but with rather coarse granularity. limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN { srand(n + me + systime()); printf("%06d", int(rand() * spinmax)); }' < /dev/null` n=$(($n+1)) - for i in {1..$limit} + startsecs=`date +%s` + startns=`date +%N` + endsecs=$startns + endns=$endns + while timecheck do - echo > /dev/null + endsecs=`date +%s` + endns=`date +%N` done done diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 611bc03a8dc7..a33ba109ef0b 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -94,6 +94,7 @@ usage () { echo " --do-kvfree / --do-no-kvfree / --no-kvfree" echo " --do-locktorture / --do-no-locktorture / --no-locktorture" echo " --do-none" + echo " --do-normal / --do-no-normal / --no-normal" echo " --do-rcuscale / --do-no-rcuscale / --no-rcuscale" echo " --do-rcutasksflavors / --do-no-rcutasksflavors / --no-rcutasksflavors" echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture" diff --git a/tools/testing/selftests/riscv/README b/tools/testing/selftests/riscv/README new file mode 100644 index 000000000000..443da395da68 --- /dev/null +++ b/tools/testing/selftests/riscv/README @@ -0,0 +1,24 @@ +KSelfTest RISC-V +================ + +- These tests are riscv specific and so not built or run but just skipped + completely when env-variable ARCH is found to be different than 'riscv'. + +- Holding true the above, RISC-V KSFT tests can be run within the + KSelfTest framework using standard Linux top-level-makefile targets: + + $ make TARGETS=riscv kselftest-clean + $ make TARGETS=riscv kselftest + + or + + $ make -C tools/testing/selftests TARGETS=riscv \ + INSTALL_PATH=<your-installation-path> install + + or, alternatively, only specific riscv/ subtargets can be picked: + + $ make -C tools/testing/selftests TARGETS=riscv RISCV_SUBTARGETS="mm vector" \ + INSTALL_PATH=<your-installation-path> install + + Further details on building and running KSFT can be found in: + Documentation/dev-tools/kselftest.rst diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 67d544aaa9a3..06c840e81c8b 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -8,6 +8,7 @@ * exception when executed in all modes. */ #include <endian.h> +#include <asm/fence.h> #if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN) #define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */ @@ -24,8 +25,6 @@ #define REG_L __REG_SEL("ld ", "lw ") #define REG_S __REG_SEL("sd ", "sw ") -#define RISCV_FENCE(p, s) \ - __asm__ __volatile__ ("fence " #p "," #s : : : "memory") #define rseq_smp_mb() RISCV_FENCE(rw, rw) #define rseq_smp_rmb() RISCV_FENCE(r, r) #define rseq_smp_wmb() RISCV_FENCE(w, w) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 663a9cef1952..dcac5cbe7933 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -40,9 +40,9 @@ * Define weak versions to play nice with binaries that are statically linked * against a libc that doesn't support registering its own rseq. */ -__weak ptrdiff_t __rseq_offset; -__weak unsigned int __rseq_size; -__weak unsigned int __rseq_flags; +extern __weak ptrdiff_t __rseq_offset; +extern __weak unsigned int __rseq_size; +extern __weak unsigned int __rseq_flags; static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; static const unsigned int *libc_rseq_size_p = &__rseq_size; @@ -209,7 +209,7 @@ void rseq_init(void) * libc not having registered a restartable sequence. Try to find the * symbols if that's the case. */ - if (!*libc_rseq_size_p) { + if (!libc_rseq_size_p || !*libc_rseq_size_p) { libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 61acbd45ffaa..874f17763536 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -24,6 +24,7 @@ #include <linux/filter.h> #include <sys/prctl.h> #include <sys/ptrace.h> +#include <sys/time.h> #include <sys/user.h> #include <linux/prctl.h> #include <linux/ptrace.h> @@ -73,6 +74,14 @@ #define noinline __attribute__((noinline)) #endif +#ifndef __nocf_check +#define __nocf_check __attribute__((nocf_check)) +#endif + +#ifndef __naked +#define __naked __attribute__((__naked__)) +#endif + #ifndef PR_SET_NO_NEW_PRIVS #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 @@ -3547,6 +3556,10 @@ static void signal_handler(int signal) perror("write from signal"); } +static void signal_handler_nop(int signal) +{ +} + TEST(user_notification_signal) { pid_t pid; @@ -4819,6 +4832,132 @@ TEST(user_notification_wait_killable_fatal) EXPECT_EQ(SIGTERM, WTERMSIG(status)); } +/* Ensure signals after the reply do not interrupt */ +TEST(user_notification_wait_killable_after_reply) +{ + int i, max_iter = 100000; + int listener, status; + int pipe_fds[2]; + pid_t pid; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) + { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall( + __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER | + SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); + ASSERT_GE(listener, 0); + + /* + * Used to count invocations. One token is transferred from the child + * to the parent per syscall invocation, the parent tries to take + * one token per successful RECV. If the syscall is restarted after + * RECV the parent will try to get two tokens while the child only + * provided one. + */ + ASSERT_EQ(pipe(pipe_fds), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct sigaction new_action = { + .sa_handler = signal_handler_nop, + .sa_flags = SA_RESTART, + }; + struct itimerval timer = { + .it_value = { .tv_usec = 1000 }, + .it_interval = { .tv_usec = 1000 }, + }; + char c = 'a'; + + close(pipe_fds[0]); + + /* Setup the sigaction with SA_RESTART */ + if (sigaction(SIGALRM, &new_action, NULL)) { + perror("sigaction"); + exit(1); + } + + /* + * Kill with SIGALRM repeatedly, to try to hit the race when + * handling the syscall. + */ + if (setitimer(ITIMER_REAL, &timer, NULL) < 0) + perror("setitimer"); + + for (i = 0; i < max_iter; ++i) { + int fd; + + /* Send one token per iteration to catch repeats. */ + if (write(pipe_fds[1], &c, sizeof(c)) != 1) { + perror("write"); + exit(1); + } + + fd = syscall(__NR_dup, 0); + if (fd < 0) { + perror("dup"); + exit(1); + } + close(fd); + } + + exit(0); + } + + close(pipe_fds[1]); + + for (i = 0; i < max_iter; ++i) { + struct seccomp_notif req = {}; + struct seccomp_notif_addfd addfd = {}; + struct pollfd pfd = { + .fd = pipe_fds[0], + .events = POLLIN, + }; + char c; + + /* + * Try to receive one token. If it failed, one child syscall + * was restarted after RECV and needed to be handled twice. + */ + ASSERT_EQ(poll(&pfd, 1, 1000), 1) + kill(pid, SIGKILL); + + ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1) + kill(pid, SIGKILL); + + /* + * Get the notification, reply to it as fast as possible to test + * whether the child wrongly skips going into the non-preemptible + * (TASK_KILLABLE) state. + */ + do + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); + while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */ + ASSERT_EQ(ret, 0) + kill(pid, SIGKILL); + + addfd.id = req.id; + addfd.flags = SECCOMP_ADDFD_FLAG_SEND; + addfd.srcfd = 0; + ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0) + kill(pid, SIGKILL); + } + + /* + * Wait for the process to exit, and make sure the process terminated + * with a zero exit code.. + */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + struct tsync_vs_thread_leader_args { pthread_t leader; }; @@ -4896,7 +5035,36 @@ TEST(tsync_vs_dead_thread_leader) EXPECT_EQ(0, status); } -noinline int probed(void) +#ifdef __x86_64__ + +/* + * We need naked probed_uprobe function. Using __nocf_check + * check to skip possible endbr64 instruction and ignoring + * -Wattributes, otherwise the compilation might fail. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wattributes" + +__naked __nocf_check noinline int probed_uprobe(void) +{ + /* + * Optimized uprobe is possible only on top of nop5 instruction. + */ + asm volatile (" \n" + ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n" + "ret \n" + ); +} +#pragma GCC diagnostic pop + +#else +noinline int probed_uprobe(void) +{ + return 1; +} +#endif + +noinline int probed_uretprobe(void) { return 1; } @@ -4949,35 +5117,46 @@ static ssize_t get_uprobe_offset(const void *addr) return found ? (uintptr_t)addr - start + base : -1; } -FIXTURE(URETPROBE) { +FIXTURE(UPROBE) { int fd; }; -FIXTURE_VARIANT(URETPROBE) { +FIXTURE_VARIANT(UPROBE) { /* - * All of the URETPROBE behaviors can be tested with either - * uretprobe attached or not + * All of the U(RET)PROBE behaviors can be tested with either + * u(ret)probe attached or not */ bool attach; + /* + * Test both uprobe and uretprobe. + */ + bool uretprobe; +}; + +FIXTURE_VARIANT_ADD(UPROBE, not_attached) { + .attach = false, + .uretprobe = false, }; -FIXTURE_VARIANT_ADD(URETPROBE, attached) { +FIXTURE_VARIANT_ADD(UPROBE, uprobe_attached) { .attach = true, + .uretprobe = false, }; -FIXTURE_VARIANT_ADD(URETPROBE, not_attached) { - .attach = false, +FIXTURE_VARIANT_ADD(UPROBE, uretprobe_attached) { + .attach = true, + .uretprobe = true, }; -FIXTURE_SETUP(URETPROBE) +FIXTURE_SETUP(UPROBE) { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; ssize_t offset; int type, bit; -#ifndef __NR_uretprobe - SKIP(return, "__NR_uretprobe syscall not defined"); +#if !defined(__NR_uprobe) || !defined(__NR_uretprobe) + SKIP(return, "__NR_uprobe ot __NR_uretprobe syscalls not defined"); #endif if (!variant->attach) @@ -4987,12 +5166,17 @@ FIXTURE_SETUP(URETPROBE) type = determine_uprobe_perf_type(); ASSERT_GE(type, 0); - bit = determine_uprobe_retprobe_bit(); - ASSERT_GE(bit, 0); - offset = get_uprobe_offset(probed); + + if (variant->uretprobe) { + bit = determine_uprobe_retprobe_bit(); + ASSERT_GE(bit, 0); + } + + offset = get_uprobe_offset(variant->uretprobe ? probed_uretprobe : probed_uprobe); ASSERT_GE(offset, 0); - attr.config |= 1 << bit; + if (variant->uretprobe) + attr.config |= 1 << bit; attr.size = attr_sz; attr.type = type; attr.config1 = ptr_to_u64("/proc/self/exe"); @@ -5003,7 +5187,7 @@ FIXTURE_SETUP(URETPROBE) PERF_FLAG_FD_CLOEXEC); } -FIXTURE_TEARDOWN(URETPROBE) +FIXTURE_TEARDOWN(UPROBE) { /* we could call close(self->fd), but we'd need extra filter for * that and since we are calling _exit right away.. @@ -5017,11 +5201,17 @@ static int run_probed_with_filter(struct sock_fprog *prog) return -1; } - probed(); + /* + * Uprobe is optimized after first hit, so let's hit twice. + */ + probed_uprobe(); + probed_uprobe(); + + probed_uretprobe(); return 0; } -TEST_F(URETPROBE, uretprobe_default_allow) +TEST_F(UPROBE, uprobe_default_allow) { struct sock_filter filter[] = { BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), @@ -5034,7 +5224,7 @@ TEST_F(URETPROBE, uretprobe_default_allow) ASSERT_EQ(0, run_probed_with_filter(&prog)); } -TEST_F(URETPROBE, uretprobe_default_block) +TEST_F(UPROBE, uprobe_default_block) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, @@ -5051,11 +5241,14 @@ TEST_F(URETPROBE, uretprobe_default_block) ASSERT_EQ(0, run_probed_with_filter(&prog)); } -TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall) +TEST_F(UPROBE, uprobe_block_syscall) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), +#ifdef __NR_uprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 1, 2), +#endif #ifdef __NR_uretprobe BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1), #endif @@ -5070,11 +5263,14 @@ TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall) ASSERT_EQ(0, run_probed_with_filter(&prog)); } -TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall) +TEST_F(UPROBE, uprobe_default_block_with_syscall) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), +#ifdef __NR_uprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 3, 0), +#endif #ifdef __NR_uretprobe BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0), #endif diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 5596f4df0e9f..b2cc6ea74450 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -879,7 +879,7 @@ "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1", "expExitCode": "0", "verifyCmd": "$TC actions ls action police", - "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200", + "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu (4Gb|4096Mb) pkts_rate 1000 pkts_burst 200", "matchCount": "1", "teardown": [ "$TC actions flush action police" diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c index ba58589a1145..ca2bd03154e4 100644 --- a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -144,6 +144,8 @@ int main(int argc, char **argv) ret = sscanf(index_str, "%d", &index); if (ret < 0) break; + + index &= 0x0f; if (index > WORKLOAD_TYPE_MAX_INDEX) printf("Invalid workload type index\n"); else diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 5d7f4ecfb816..770269efe42a 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -20,6 +20,7 @@ TEST_PROGS += test_generic_09.sh TEST_PROGS += test_generic_10.sh TEST_PROGS += test_generic_11.sh TEST_PROGS += test_generic_12.sh +TEST_PROGS += test_generic_13.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index b71faba86c3b..6b8123c12a7a 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1384,21 +1384,23 @@ static int cmd_dev_list(struct dev_ctx *ctx) static int cmd_dev_get_features(void) { #define const_ilog2(x) (63 - __builtin_clzll(x)) +#define FEAT_NAME(f) [const_ilog2(f)] = #f static const char *feat_map[] = { - [const_ilog2(UBLK_F_SUPPORT_ZERO_COPY)] = "ZERO_COPY", - [const_ilog2(UBLK_F_URING_CMD_COMP_IN_TASK)] = "COMP_IN_TASK", - [const_ilog2(UBLK_F_NEED_GET_DATA)] = "GET_DATA", - [const_ilog2(UBLK_F_USER_RECOVERY)] = "USER_RECOVERY", - [const_ilog2(UBLK_F_USER_RECOVERY_REISSUE)] = "RECOVERY_REISSUE", - [const_ilog2(UBLK_F_UNPRIVILEGED_DEV)] = "UNPRIVILEGED_DEV", - [const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE", - [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", - [const_ilog2(UBLK_F_ZONED)] = "ZONED", - [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", - [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", - [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", - [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", - [const_ilog2(UBLK_F_PER_IO_DAEMON)] = "PER_IO_DAEMON", + FEAT_NAME(UBLK_F_SUPPORT_ZERO_COPY), + FEAT_NAME(UBLK_F_URING_CMD_COMP_IN_TASK), + FEAT_NAME(UBLK_F_NEED_GET_DATA), + FEAT_NAME(UBLK_F_USER_RECOVERY), + FEAT_NAME(UBLK_F_USER_RECOVERY_REISSUE), + FEAT_NAME(UBLK_F_UNPRIVILEGED_DEV), + FEAT_NAME(UBLK_F_CMD_IOCTL_ENCODE), + FEAT_NAME(UBLK_F_USER_COPY), + FEAT_NAME(UBLK_F_ZONED), + FEAT_NAME(UBLK_F_USER_RECOVERY_FAIL_IO), + FEAT_NAME(UBLK_F_UPDATE_SIZE), + FEAT_NAME(UBLK_F_AUTO_BUF_REG), + FEAT_NAME(UBLK_F_QUIESCE), + FEAT_NAME(UBLK_F_PER_IO_DAEMON), + FEAT_NAME(UBLK_F_BUF_REG_OFF_DAEMON), }; struct ublk_dev *dev; __u64 features = 0; @@ -1425,7 +1427,7 @@ static int cmd_dev_get_features(void) feat = feat_map[i]; else feat = "unknown"; - printf("\t%-20s: 0x%llx\n", feat, 1ULL << i); + printf("0x%-16llx: %s\n", 1ULL << i, feat); } } diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh index 9227a208ba53..21a31cd5491a 100755 --- a/tools/testing/selftests/ublk/test_generic_01.sh +++ b/tools/testing/selftests/ublk/test_generic_01.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "sequential io order" dev_id=$(_add_ublk_dev -t null) diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh index 3e80121e3bf5..12920768b1a0 100755 --- a/tools/testing/selftests/ublk/test_generic_02.sh +++ b/tools/testing/selftests/ublk/test_generic_02.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "sequential io order for MQ" dev_id=$(_add_ublk_dev -t null -q 2) diff --git a/tools/testing/selftests/ublk/test_generic_12.sh b/tools/testing/selftests/ublk/test_generic_12.sh index 7abbb00d251d..b4046201b4d9 100755 --- a/tools/testing/selftests/ublk/test_generic_12.sh +++ b/tools/testing/selftests/ublk/test_generic_12.sh @@ -10,6 +10,10 @@ if ! _have_program bpftrace; then exit "$UBLK_SKIP_CODE" fi +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "do imbalanced load, it should be balanced over I/O threads" NTHREADS=6 diff --git a/tools/testing/selftests/ublk/test_generic_13.sh b/tools/testing/selftests/ublk/test_generic_13.sh new file mode 100755 index 000000000000..b7aa90b1cb74 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_13.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_13" +ERR_CODE=0 + +_prep_test "null" "check that feature list is complete" + +if ${UBLK_PROG} features | grep -q unknown; then + echo "# unknown feature detected!" + echo "# did you add a feature and forget to update feat_map in kublk?" + echo "# this failure is expected if running an older test suite against" + echo "# a newer kernel with new features added" + ERR_CODE=255 +fi + +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_null_01.sh b/tools/testing/selftests/ublk/test_null_01.sh index a34203f72668..c2cb8f7a09fe 100755 --- a/tools/testing/selftests/ublk/test_null_01.sh +++ b/tools/testing/selftests/ublk/test_null_01.sh @@ -6,6 +6,10 @@ TID="null_01" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "basic IO test" dev_id=$(_add_ublk_dev -t null) diff --git a/tools/testing/selftests/ublk/test_null_02.sh b/tools/testing/selftests/ublk/test_null_02.sh index 5633ca876655..8accd35beb55 100755 --- a/tools/testing/selftests/ublk/test_null_02.sh +++ b/tools/testing/selftests/ublk/test_null_02.sh @@ -6,6 +6,10 @@ TID="null_02" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "null" "basic IO test with zero copy" dev_id=$(_add_ublk_dev -t null -z) diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh index 566cfd90d192..274295061042 100755 --- a/tools/testing/selftests/ublk/test_stress_05.sh +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -5,6 +5,10 @@ TID="stress_05" ERR_CODE=0 +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + run_io_and_remove() { local size=$1 diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h index 36545d1567f1..a852e0b7153e 100644 --- a/tools/testing/selftests/ublk/utils.h +++ b/tools/testing/selftests/ublk/utils.h @@ -2,8 +2,6 @@ #ifndef KUBLK_UTILS_H #define KUBLK_UTILS_H -#define __maybe_unused __attribute__((unused)) - #ifndef min #define min(a, b) ((a) < (b) ? (a) : (b)) #endif diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 30d5c8f0e5c7..ba322a353aff 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only vdso_test vdso_test_abi -vdso_test_clock_getres vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 918a2caa070e..e361aca22a74 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -4,7 +4,6 @@ include ../../../scripts/Makefile.arch TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu TEST_GEN_PROGS += vdso_test_abi -TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif @@ -29,7 +28,6 @@ CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tab $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c -$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers $(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS) diff --git a/tools/testing/selftests/vDSO/vdso_call.h b/tools/testing/selftests/vDSO/vdso_call.h index bb237d771051..e7205584cbdc 100644 --- a/tools/testing/selftests/vDSO/vdso_call.h +++ b/tools/testing/selftests/vDSO/vdso_call.h @@ -44,7 +44,6 @@ register long _r6 asm ("r6"); \ register long _r7 asm ("r7"); \ register long _r8 asm ("r8"); \ - register long _rval asm ("r3"); \ \ LOADARGS_##nr(fn, args); \ \ @@ -54,13 +53,13 @@ " bns+ 1f\n" \ " neg 3, 3\n" \ "1:" \ - : "+r" (_r0), "=r" (_r3), "+r" (_r4), "+r" (_r5), \ + : "+r" (_r0), "+r" (_r3), "+r" (_r4), "+r" (_r5), \ "+r" (_r6), "+r" (_r7), "+r" (_r8) \ - : "r" (_rval) \ + : \ : "r9", "r10", "r11", "r12", "cr0", "cr1", "cr5", \ "cr6", "cr7", "xer", "lr", "ctr", "memory" \ ); \ - _rval; \ + _r3; \ }) #else diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index a54424e2336f..238d609a457a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -26,24 +26,31 @@ static const char *version; static const char **name; +/* The same as struct __kernel_timespec */ +struct vdso_timespec64 { + uint64_t tv_sec; + uint64_t tv_nsec; +}; + typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz); typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); +typedef long (*vdso_clock_gettime64_t)(clockid_t clk_id, struct vdso_timespec64 *ts); typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); typedef time_t (*vdso_time_t)(time_t *t); -const char *vdso_clock_name[12] = { - "CLOCK_REALTIME", - "CLOCK_MONOTONIC", - "CLOCK_PROCESS_CPUTIME_ID", - "CLOCK_THREAD_CPUTIME_ID", - "CLOCK_MONOTONIC_RAW", - "CLOCK_REALTIME_COARSE", - "CLOCK_MONOTONIC_COARSE", - "CLOCK_BOOTTIME", - "CLOCK_REALTIME_ALARM", - "CLOCK_BOOTTIME_ALARM", - "CLOCK_SGI_CYCLE", - "CLOCK_TAI", +static const char * const vdso_clock_name[] = { + [CLOCK_REALTIME] = "CLOCK_REALTIME", + [CLOCK_MONOTONIC] = "CLOCK_MONOTONIC", + [CLOCK_PROCESS_CPUTIME_ID] = "CLOCK_PROCESS_CPUTIME_ID", + [CLOCK_THREAD_CPUTIME_ID] = "CLOCK_THREAD_CPUTIME_ID", + [CLOCK_MONOTONIC_RAW] = "CLOCK_MONOTONIC_RAW", + [CLOCK_REALTIME_COARSE] = "CLOCK_REALTIME_COARSE", + [CLOCK_MONOTONIC_COARSE] = "CLOCK_MONOTONIC_COARSE", + [CLOCK_BOOTTIME] = "CLOCK_BOOTTIME", + [CLOCK_REALTIME_ALARM] = "CLOCK_REALTIME_ALARM", + [CLOCK_BOOTTIME_ALARM] = "CLOCK_BOOTTIME_ALARM", + [10 /* CLOCK_SGI_CYCLE */] = "CLOCK_SGI_CYCLE", + [CLOCK_TAI] = "CLOCK_TAI", }; static void vdso_test_gettimeofday(void) @@ -70,6 +77,33 @@ static void vdso_test_gettimeofday(void) } } +static void vdso_test_clock_gettime64(clockid_t clk_id) +{ + /* Find clock_gettime64. */ + vdso_clock_gettime64_t vdso_clock_gettime64 = + (vdso_clock_gettime64_t)vdso_sym(version, name[5]); + + if (!vdso_clock_gettime64) { + ksft_print_msg("Couldn't find %s\n", name[5]); + ksft_test_result_skip("%s %s\n", name[5], + vdso_clock_name[clk_id]); + return; + } + + struct vdso_timespec64 ts; + long ret = VDSO_CALL(vdso_clock_gettime64, 2, clk_id, &ts); + + if (ret == 0) { + ksft_print_msg("The time is %lld.%06lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_test_result_pass("%s %s\n", name[5], + vdso_clock_name[clk_id]); + } else { + ksft_test_result_fail("%s %s\n", name[5], + vdso_clock_name[clk_id]); + } +} + static void vdso_test_clock_gettime(clockid_t clk_id) { /* Find clock_gettime. */ @@ -171,23 +205,23 @@ static inline void vdso_test_clock(clockid_t clock_id) ksft_print_msg("clock_id: %s\n", vdso_clock_name[clock_id]); vdso_test_clock_gettime(clock_id); + vdso_test_clock_gettime64(clock_id); vdso_test_clock_getres(clock_id); } -#define VDSO_TEST_PLAN 16 +#define VDSO_TEST_PLAN 29 int main(int argc, char **argv) { unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); ksft_print_header(); - ksft_set_plan(VDSO_TEST_PLAN); - if (!sysinfo_ehdr) { - ksft_print_msg("AT_SYSINFO_EHDR is not present!\n"); - return KSFT_SKIP; - } + if (!sysinfo_ehdr) + ksft_exit_skip("AT_SYSINFO_EHDR is not present!\n"); + + ksft_set_plan(VDSO_TEST_PLAN); version = versions[VDSO_VERSION]; name = (const char **)&names[VDSO_NAMES]; @@ -198,40 +232,17 @@ int main(int argc, char **argv) vdso_test_gettimeofday(); -#if _POSIX_TIMERS > 0 - -#ifdef CLOCK_REALTIME vdso_test_clock(CLOCK_REALTIME); -#endif - -#ifdef CLOCK_BOOTTIME vdso_test_clock(CLOCK_BOOTTIME); -#endif - -#ifdef CLOCK_TAI vdso_test_clock(CLOCK_TAI); -#endif - -#ifdef CLOCK_REALTIME_COARSE vdso_test_clock(CLOCK_REALTIME_COARSE); -#endif - -#ifdef CLOCK_MONOTONIC vdso_test_clock(CLOCK_MONOTONIC); -#endif - -#ifdef CLOCK_MONOTONIC_RAW vdso_test_clock(CLOCK_MONOTONIC_RAW); -#endif - -#ifdef CLOCK_MONOTONIC_COARSE vdso_test_clock(CLOCK_MONOTONIC_COARSE); -#endif - -#endif + vdso_test_clock(CLOCK_PROCESS_CPUTIME_ID); + vdso_test_clock(CLOCK_THREAD_CPUTIME_ID); vdso_test_time(); - ksft_print_cnts(); - return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; + ksft_finished(); } diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c deleted file mode 100644 index b5d5f59f725a..000000000000 --- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * vdso_clock_getres.c: Sample code to test clock_getres. - * Copyright (c) 2019 Arm Ltd. - * - * Compile with: - * gcc -std=gnu99 vdso_clock_getres.c - * - * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit), - * Power (32-bit and 64-bit), S390x (32-bit and 64-bit). - * Might work on other architectures. - */ - -#define _GNU_SOURCE -#include <elf.h> -#include <fcntl.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <time.h> -#include <sys/auxv.h> -#include <sys/mman.h> -#include <sys/time.h> -#include <unistd.h> -#include <sys/syscall.h> - -#include "../kselftest.h" - -static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts) -{ - long ret; - - ret = syscall(SYS_clock_getres, _clkid, _ts); - - return ret; -} - -const char *vdso_clock_name[12] = { - "CLOCK_REALTIME", - "CLOCK_MONOTONIC", - "CLOCK_PROCESS_CPUTIME_ID", - "CLOCK_THREAD_CPUTIME_ID", - "CLOCK_MONOTONIC_RAW", - "CLOCK_REALTIME_COARSE", - "CLOCK_MONOTONIC_COARSE", - "CLOCK_BOOTTIME", - "CLOCK_REALTIME_ALARM", - "CLOCK_BOOTTIME_ALARM", - "CLOCK_SGI_CYCLE", - "CLOCK_TAI", -}; - -/* - * This function calls clock_getres in vdso and by system call - * with different values for clock_id. - * - * Example of output: - * - * clock_id: CLOCK_REALTIME [PASS] - * clock_id: CLOCK_BOOTTIME [PASS] - * clock_id: CLOCK_TAI [PASS] - * clock_id: CLOCK_REALTIME_COARSE [PASS] - * clock_id: CLOCK_MONOTONIC [PASS] - * clock_id: CLOCK_MONOTONIC_RAW [PASS] - * clock_id: CLOCK_MONOTONIC_COARSE [PASS] - */ -static inline int vdso_test_clock(unsigned int clock_id) -{ - struct timespec x, y; - - printf("clock_id: %s", vdso_clock_name[clock_id]); - clock_getres(clock_id, &x); - syscall_clock_getres(clock_id, &y); - - if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) { - printf(" [FAIL]\n"); - return KSFT_FAIL; - } - - printf(" [PASS]\n"); - return KSFT_PASS; -} - -int main(int argc, char **argv) -{ - int ret = 0; - -#if _POSIX_TIMERS > 0 - -#ifdef CLOCK_REALTIME - ret += vdso_test_clock(CLOCK_REALTIME); -#endif - -#ifdef CLOCK_BOOTTIME - ret += vdso_test_clock(CLOCK_BOOTTIME); -#endif - -#ifdef CLOCK_TAI - ret += vdso_test_clock(CLOCK_TAI); -#endif - -#ifdef CLOCK_REALTIME_COARSE - ret += vdso_test_clock(CLOCK_REALTIME_COARSE); -#endif - -#ifdef CLOCK_MONOTONIC - ret += vdso_test_clock(CLOCK_MONOTONIC); -#endif - -#ifdef CLOCK_MONOTONIC_RAW - ret += vdso_test_clock(CLOCK_MONOTONIC_RAW); -#endif - -#ifdef CLOCK_MONOTONIC_COARSE - ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE); -#endif - -#endif - if (ret > 0) - return KSFT_FAIL; - - return KSFT_PASS; -} diff --git a/tools/testing/selftests/vfio/.gitignore b/tools/testing/selftests/vfio/.gitignore new file mode 100644 index 000000000000..7fadc19d3bca --- /dev/null +++ b/tools/testing/selftests/vfio/.gitignore @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +* +!/**/ +!*.c +!*.h +!*.S +!*.sh +!*.mk +!.gitignore +!Makefile diff --git a/tools/testing/selftests/vfio/Makefile b/tools/testing/selftests/vfio/Makefile new file mode 100644 index 000000000000..324ba0175a33 --- /dev/null +++ b/tools/testing/selftests/vfio/Makefile @@ -0,0 +1,21 @@ +CFLAGS = $(KHDR_INCLUDES) +TEST_GEN_PROGS += vfio_dma_mapping_test +TEST_GEN_PROGS += vfio_iommufd_setup_test +TEST_GEN_PROGS += vfio_pci_device_test +TEST_GEN_PROGS += vfio_pci_driver_test +TEST_PROGS_EXTENDED := run.sh +include ../lib.mk +include lib/libvfio.mk + +CFLAGS += -I$(top_srcdir)/tools/include +CFLAGS += -MD +CFLAGS += $(EXTRA_CFLAGS) + +$(TEST_GEN_PROGS): %: %.o $(LIBVFIO_O) + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< $(LIBVFIO_O) $(LDLIBS) -o $@ + +TEST_GEN_PROGS_O = $(patsubst %, %.o, $(TEST_GEN_PROGS)) +TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_PROGS_O) $(LIBVFIO_O)) +-include $(TEST_DEP_FILES) + +EXTRA_CLEAN += $(TEST_GEN_PROGS_O) $(TEST_DEP_FILES) diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c new file mode 100644 index 000000000000..0ca2cbc2a316 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdint.h> +#include <unistd.h> + +#include <linux/bits.h> +#include <linux/errno.h> +#include <linux/idxd.h> +#include <linux/io.h> +#include <linux/pci_ids.h> +#include <linux/sizes.h> + +#include <vfio_util.h> + +#include "registers.h" + +/* Vectors 1+ are available for work queue completion interrupts. */ +#define MSIX_VECTOR 1 + +struct dsa_state { + /* Descriptors for copy and batch operations. */ + struct dsa_hw_desc batch[32]; + struct dsa_hw_desc copy[1024]; + + /* Completion records for copy and batch operations. */ + struct dsa_completion_record copy_completion; + struct dsa_completion_record batch_completion; + + /* Cached device registers (and derived data) for easy access */ + union gen_cap_reg gen_cap; + union wq_cap_reg wq_cap; + union group_cap_reg group_cap; + union engine_cap_reg engine_cap; + union offsets_reg table_offsets; + void *wqcfg_table; + void *grpcfg_table; + u64 max_batches; + u64 max_copies_per_batch; + + /* The number of ongoing memcpy operations. */ + u64 memcpy_count; + + /* Buffers used by dsa_send_msi() to generate an interrupt */ + u64 send_msi_src; + u64 send_msi_dst; +}; + +static inline struct dsa_state *to_dsa_state(struct vfio_pci_device *device) +{ + return device->driver.region.vaddr; +} + +static bool dsa_int_handle_request_required(struct vfio_pci_device *device) +{ + void *bar0 = device->bars[0].vaddr; + union gen_cap_reg gen_cap; + u32 cmd_cap; + + gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET); + if (!gen_cap.cmd_cap) + return false; + + cmd_cap = readl(bar0 + IDXD_CMDCAP_OFFSET); + return (cmd_cap >> IDXD_CMD_REQUEST_INT_HANDLE) & 1; +} + +static int dsa_probe(struct vfio_pci_device *device) +{ + if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_DSA_SPR0)) + return -EINVAL; + + if (dsa_int_handle_request_required(device)) { + printf("Device requires requesting interrupt handles\n"); + return -EINVAL; + } + + return 0; +} + +static void dsa_check_sw_err(struct vfio_pci_device *device) +{ + void *reg = device->bars[0].vaddr + IDXD_SWERR_OFFSET; + union sw_err_reg err = {}; + int i; + + for (i = 0; i < ARRAY_SIZE(err.bits); i++) { + err.bits[i] = readq(reg + offsetof(union sw_err_reg, bits[i])); + + /* No errors */ + if (i == 0 && !err.valid) + return; + } + + fprintf(stderr, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n", + err.bits[0], err.bits[1], err.bits[2], err.bits[3]); + + fprintf(stderr, " valid: 0x%x\n", err.valid); + fprintf(stderr, " overflow: 0x%x\n", err.overflow); + fprintf(stderr, " desc_valid: 0x%x\n", err.desc_valid); + fprintf(stderr, " wq_idx_valid: 0x%x\n", err.wq_idx_valid); + fprintf(stderr, " batch: 0x%x\n", err.batch); + fprintf(stderr, " fault_rw: 0x%x\n", err.fault_rw); + fprintf(stderr, " priv: 0x%x\n", err.priv); + fprintf(stderr, " error: 0x%x\n", err.error); + fprintf(stderr, " wq_idx: 0x%x\n", err.wq_idx); + fprintf(stderr, " operation: 0x%x\n", err.operation); + fprintf(stderr, " pasid: 0x%x\n", err.pasid); + fprintf(stderr, " batch_idx: 0x%x\n", err.batch_idx); + fprintf(stderr, " invalid_flags: 0x%x\n", err.invalid_flags); + fprintf(stderr, " fault_addr: 0x%lx\n", err.fault_addr); + + VFIO_FAIL("Software Error Detected!\n"); +} + +static void dsa_command(struct vfio_pci_device *device, u32 cmd) +{ + union idxd_command_reg cmd_reg = { .cmd = cmd }; + u32 sleep_ms = 1, attempts = 5000 / sleep_ms; + void *bar0 = device->bars[0].vaddr; + u32 status; + u8 err; + + writel(cmd_reg.bits, bar0 + IDXD_CMD_OFFSET); + + for (;;) { + dsa_check_sw_err(device); + + status = readl(bar0 + IDXD_CMDSTS_OFFSET); + if (!(status & IDXD_CMDSTS_ACTIVE)) + break; + + VFIO_ASSERT_GT(--attempts, 0); + usleep(sleep_ms * 1000); + } + + err = status & IDXD_CMDSTS_ERR_MASK; + VFIO_ASSERT_EQ(err, 0, "Error issuing command 0x%x: 0x%x\n", cmd, err); +} + +static void dsa_wq_init(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + union wq_cap_reg wq_cap = dsa->wq_cap; + union wqcfg wqcfg; + u64 wqcfg_size; + int i; + + VFIO_ASSERT_GT((u32)wq_cap.num_wqs, 0); + + wqcfg = (union wqcfg) { + .wq_size = wq_cap.total_wq_size, + .mode = 1, + .priority = 1, + /* + * Disable Address Translation Service (if enabled) so that VFIO + * selftests using this driver can generate I/O page faults. + */ + .wq_ats_disable = wq_cap.wq_ats_support, + .max_xfer_shift = dsa->gen_cap.max_xfer_shift, + .max_batch_shift = dsa->gen_cap.max_batch_shift, + .op_config[0] = BIT(DSA_OPCODE_MEMMOVE) | BIT(DSA_OPCODE_BATCH), + }; + + wqcfg_size = 1UL << (wq_cap.wqcfg_size + IDXD_WQCFG_MIN); + + for (i = 0; i < wqcfg_size / sizeof(wqcfg.bits[0]); i++) + writel(wqcfg.bits[i], dsa->wqcfg_table + offsetof(union wqcfg, bits[i])); +} + +static void dsa_group_init(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + union group_cap_reg group_cap = dsa->group_cap; + union engine_cap_reg engine_cap = dsa->engine_cap; + + VFIO_ASSERT_GT((u32)group_cap.num_groups, 0); + VFIO_ASSERT_GT((u32)engine_cap.num_engines, 0); + + /* Assign work queue 0 and engine 0 to group 0 */ + writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, wqs[0])); + writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, engines)); +} + +static void dsa_register_cache_init(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + void *bar0 = device->bars[0].vaddr; + + dsa->gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET); + dsa->wq_cap.bits = readq(bar0 + IDXD_WQCAP_OFFSET); + dsa->group_cap.bits = readq(bar0 + IDXD_GRPCAP_OFFSET); + dsa->engine_cap.bits = readq(bar0 + IDXD_ENGCAP_OFFSET); + + dsa->table_offsets.bits[0] = readq(bar0 + IDXD_TABLE_OFFSET); + dsa->table_offsets.bits[1] = readq(bar0 + IDXD_TABLE_OFFSET + 8); + + dsa->wqcfg_table = bar0 + dsa->table_offsets.wqcfg * IDXD_TABLE_MULT; + dsa->grpcfg_table = bar0 + dsa->table_offsets.grpcfg * IDXD_TABLE_MULT; + + dsa->max_batches = 1U << (dsa->wq_cap.total_wq_size + IDXD_WQCFG_MIN); + dsa->max_batches = min(dsa->max_batches, ARRAY_SIZE(dsa->batch)); + + dsa->max_copies_per_batch = 1UL << dsa->gen_cap.max_batch_shift; + dsa->max_copies_per_batch = min(dsa->max_copies_per_batch, ARRAY_SIZE(dsa->copy)); +} + +static void dsa_init(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + + VFIO_ASSERT_GE(device->driver.region.size, sizeof(*dsa)); + + vfio_pci_config_writew(device, PCI_COMMAND, + PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER | + PCI_COMMAND_INTX_DISABLE); + + dsa_command(device, IDXD_CMD_RESET_DEVICE); + + dsa_register_cache_init(device); + dsa_wq_init(device); + dsa_group_init(device); + + dsa_command(device, IDXD_CMD_ENABLE_DEVICE); + dsa_command(device, IDXD_CMD_ENABLE_WQ); + + vfio_pci_msix_enable(device, MSIX_VECTOR, 1); + + device->driver.max_memcpy_count = + dsa->max_batches * dsa->max_copies_per_batch; + device->driver.max_memcpy_size = 1UL << dsa->gen_cap.max_xfer_shift; + device->driver.msi = MSIX_VECTOR; +} + +static void dsa_remove(struct vfio_pci_device *device) +{ + dsa_command(device, IDXD_CMD_RESET_DEVICE); + vfio_pci_msix_disable(device); +} + +static int dsa_completion_wait(struct vfio_pci_device *device, + struct dsa_completion_record *completion) +{ + u8 status; + + for (;;) { + dsa_check_sw_err(device); + + status = READ_ONCE(completion->status); + if (status) + break; + + usleep(1000); + } + + if (status == DSA_COMP_SUCCESS) + return 0; + + printf("Error detected during memcpy operation: 0x%x\n", status); + return -1; +} + +static void dsa_copy_desc_init(struct vfio_pci_device *device, + struct dsa_hw_desc *desc, + iova_t src, iova_t dst, u64 size, + bool interrupt) +{ + struct dsa_state *dsa = to_dsa_state(device); + u16 flags; + + flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR; + + if (interrupt) + flags |= IDXD_OP_FLAG_RCI; + + *desc = (struct dsa_hw_desc) { + .opcode = DSA_OPCODE_MEMMOVE, + .flags = flags, + .priv = 1, + .src_addr = src, + .dst_addr = dst, + .xfer_size = size, + .completion_addr = to_iova(device, &dsa->copy_completion), + .int_handle = interrupt ? MSIX_VECTOR : 0, + }; +} + +static void dsa_batch_desc_init(struct vfio_pci_device *device, + struct dsa_hw_desc *desc, + u64 count) +{ + struct dsa_state *dsa = to_dsa_state(device); + + *desc = (struct dsa_hw_desc) { + .opcode = DSA_OPCODE_BATCH, + .flags = IDXD_OP_FLAG_CRAV, + .priv = 1, + .completion_addr = to_iova(device, &dsa->batch_completion), + .desc_list_addr = to_iova(device, &dsa->copy[0]), + .desc_count = count, + }; +} + +static void dsa_desc_write(struct vfio_pci_device *device, struct dsa_hw_desc *desc) +{ + /* Write the contents (not address) of the 64-byte descriptor to the device. */ + iosubmit_cmds512(device->bars[2].vaddr, desc, 1); +} + +static void dsa_memcpy_one(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, bool interrupt) +{ + struct dsa_state *dsa = to_dsa_state(device); + + memset(&dsa->copy_completion, 0, sizeof(dsa->copy_completion)); + + dsa_copy_desc_init(device, &dsa->copy[0], src, dst, size, interrupt); + dsa_desc_write(device, &dsa->copy[0]); +} + +static void dsa_memcpy_batch(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, u64 count) +{ + struct dsa_state *dsa = to_dsa_state(device); + int i; + + memset(&dsa->batch_completion, 0, sizeof(dsa->batch_completion)); + + for (i = 0; i < ARRAY_SIZE(dsa->copy); i++) { + struct dsa_hw_desc *copy_desc = &dsa->copy[i]; + + dsa_copy_desc_init(device, copy_desc, src, dst, size, false); + + /* Don't request completions for individual copies. */ + copy_desc->flags &= ~IDXD_OP_FLAG_RCR; + } + + for (i = 0; i < ARRAY_SIZE(dsa->batch) && count; i++) { + struct dsa_hw_desc *batch_desc = &dsa->batch[i]; + int nr_copies; + + nr_copies = min(count, dsa->max_copies_per_batch); + count -= nr_copies; + + /* + * Batches must have at least 2 copies, so handle the case where + * there is exactly 1 copy left by doing one less copy in this + * batch and then 2 in the next. + */ + if (count == 1) { + nr_copies--; + count++; + } + + dsa_batch_desc_init(device, batch_desc, nr_copies); + + /* Request a completion for the last batch. */ + if (!count) + batch_desc->flags |= IDXD_OP_FLAG_RCR; + + dsa_desc_write(device, batch_desc); + } + + VFIO_ASSERT_EQ(count, 0, "Failed to start %lu copies.\n", count); +} + +static void dsa_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, u64 count) +{ + struct dsa_state *dsa = to_dsa_state(device); + + /* DSA devices require at least 2 copies per batch. */ + if (count == 1) + dsa_memcpy_one(device, src, dst, size, false); + else + dsa_memcpy_batch(device, src, dst, size, count); + + dsa->memcpy_count = count; +} + +static int dsa_memcpy_wait(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + int r; + + if (dsa->memcpy_count == 1) + r = dsa_completion_wait(device, &dsa->copy_completion); + else + r = dsa_completion_wait(device, &dsa->batch_completion); + + dsa->memcpy_count = 0; + + return r; +} + +static void dsa_send_msi(struct vfio_pci_device *device) +{ + struct dsa_state *dsa = to_dsa_state(device); + + dsa_memcpy_one(device, + to_iova(device, &dsa->send_msi_src), + to_iova(device, &dsa->send_msi_dst), + sizeof(dsa->send_msi_src), true); + + VFIO_ASSERT_EQ(dsa_completion_wait(device, &dsa->copy_completion), 0); +} + +const struct vfio_pci_driver_ops dsa_ops = { + .name = "dsa", + .probe = dsa_probe, + .init = dsa_init, + .remove = dsa_remove, + .memcpy_start = dsa_memcpy_start, + .memcpy_wait = dsa_memcpy_wait, + .send_msi = dsa_send_msi, +}; diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h new file mode 120000 index 000000000000..bde657c3c2af --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h @@ -0,0 +1 @@ +../../../../../../../drivers/dma/idxd/registers.h
\ No newline at end of file diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h new file mode 120000 index 000000000000..8ab52ddd4458 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h @@ -0,0 +1 @@ +../../../../../../../drivers/dma/ioat/hw.h
\ No newline at end of file diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c new file mode 100644 index 000000000000..c3b91d9b1f59 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdint.h> +#include <unistd.h> + +#include <linux/errno.h> +#include <linux/io.h> +#include <linux/pci_ids.h> +#include <linux/sizes.h> + +#include <vfio_util.h> + +#include "hw.h" +#include "registers.h" + +#define IOAT_DMACOUNT_MAX UINT16_MAX + +struct ioat_state { + /* Single descriptor used to issue DMA memcpy operations */ + struct ioat_dma_descriptor desc; + + /* Copy buffers used by ioat_send_msi() to generate an interrupt. */ + u64 send_msi_src; + u64 send_msi_dst; +}; + +static inline struct ioat_state *to_ioat_state(struct vfio_pci_device *device) +{ + return device->driver.region.vaddr; +} + +static inline void *ioat_channel_registers(struct vfio_pci_device *device) +{ + return device->bars[0].vaddr + IOAT_CHANNEL_MMIO_SIZE; +} + +static int ioat_probe(struct vfio_pci_device *device) +{ + u8 version; + int r; + + if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_IOAT_SKX)) + return -EINVAL; + + VFIO_ASSERT_NOT_NULL(device->bars[0].vaddr); + + version = readb(device->bars[0].vaddr + IOAT_VER_OFFSET); + switch (version) { + case IOAT_VER_3_2: + case IOAT_VER_3_3: + r = 0; + break; + default: + printf("ioat: Unsupported version: 0x%x\n", version); + r = -EINVAL; + } + return r; +} + +static u64 ioat_channel_status(void *bar) +{ + return readq(bar + IOAT_CHANSTS_OFFSET) & IOAT_CHANSTS_STATUS; +} + +static void ioat_clear_errors(struct vfio_pci_device *device) +{ + void *registers = ioat_channel_registers(device); + u32 errors; + + errors = vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET); + vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors); + + errors = vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET); + vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors); + + errors = readl(registers + IOAT_CHANERR_OFFSET); + writel(errors, registers + IOAT_CHANERR_OFFSET); +} + +static void ioat_reset(struct vfio_pci_device *device) +{ + void *registers = ioat_channel_registers(device); + u32 sleep_ms = 1, attempts = 5000 / sleep_ms; + u8 chancmd; + + ioat_clear_errors(device); + + writeb(IOAT_CHANCMD_RESET, registers + IOAT2_CHANCMD_OFFSET); + + for (;;) { + chancmd = readb(registers + IOAT2_CHANCMD_OFFSET); + if (!(chancmd & IOAT_CHANCMD_RESET)) + break; + + VFIO_ASSERT_GT(--attempts, 0); + usleep(sleep_ms * 1000); + } + + VFIO_ASSERT_EQ(ioat_channel_status(registers), IOAT_CHANSTS_HALTED); +} + +static void ioat_init(struct vfio_pci_device *device) +{ + struct ioat_state *ioat = to_ioat_state(device); + u8 intrctrl; + + VFIO_ASSERT_GE(device->driver.region.size, sizeof(*ioat)); + + vfio_pci_config_writew(device, PCI_COMMAND, + PCI_COMMAND_MEMORY | + PCI_COMMAND_MASTER | + PCI_COMMAND_INTX_DISABLE); + + ioat_reset(device); + + /* Enable the use of MXI-x interrupts for channel interrupts. */ + intrctrl = IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + writeb(intrctrl, device->bars[0].vaddr + IOAT_INTRCTRL_OFFSET); + + vfio_pci_msix_enable(device, 0, device->msix_info.count); + + device->driver.msi = 0; + device->driver.max_memcpy_size = + 1UL << readb(device->bars[0].vaddr + IOAT_XFERCAP_OFFSET); + device->driver.max_memcpy_count = IOAT_DMACOUNT_MAX; +} + +static void ioat_remove(struct vfio_pci_device *device) +{ + ioat_reset(device); + vfio_pci_msix_disable(device); +} + +static void ioat_handle_error(struct vfio_pci_device *device) +{ + void *registers = ioat_channel_registers(device); + + printf("Error detected during memcpy operation!\n" + " CHANERR: 0x%x\n" + " CHANERR_INT: 0x%x\n" + " DMAUNCERRSTS: 0x%x\n", + readl(registers + IOAT_CHANERR_OFFSET), + vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET), + vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET)); + + ioat_reset(device); +} + +static int ioat_memcpy_wait(struct vfio_pci_device *device) +{ + void *registers = ioat_channel_registers(device); + u64 status; + int r = 0; + + /* Wait until all operations complete. */ + for (;;) { + status = ioat_channel_status(registers); + if (status == IOAT_CHANSTS_DONE) + break; + + if (status == IOAT_CHANSTS_HALTED) { + ioat_handle_error(device); + return -1; + } + } + + /* Put the channel into the SUSPENDED state. */ + writeb(IOAT_CHANCMD_SUSPEND, registers + IOAT2_CHANCMD_OFFSET); + for (;;) { + status = ioat_channel_status(registers); + if (status == IOAT_CHANSTS_SUSPENDED) + break; + } + + return r; +} + +static void __ioat_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, + u16 count, bool interrupt) +{ + void *registers = ioat_channel_registers(device); + struct ioat_state *ioat = to_ioat_state(device); + u64 desc_iova; + u16 chanctrl; + + desc_iova = to_iova(device, &ioat->desc); + ioat->desc = (struct ioat_dma_descriptor) { + .ctl_f.op = IOAT_OP_COPY, + .ctl_f.int_en = interrupt, + .src_addr = src, + .dst_addr = dst, + .size = size, + .next = desc_iova, + }; + + /* Tell the device the address of the descriptor. */ + writeq(desc_iova, registers + IOAT2_CHAINADDR_OFFSET); + + /* (Re)Enable the channel interrupt and abort on any errors */ + chanctrl = IOAT_CHANCTRL_INT_REARM | IOAT_CHANCTRL_ANY_ERR_ABORT_EN; + writew(chanctrl, registers + IOAT_CHANCTRL_OFFSET); + + /* Kick off @count DMA copy operation(s). */ + writew(count, registers + IOAT_CHAN_DMACOUNT_OFFSET); +} + +static void ioat_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, + u64 count) +{ + __ioat_memcpy_start(device, src, dst, size, count, false); +} + +static void ioat_send_msi(struct vfio_pci_device *device) +{ + struct ioat_state *ioat = to_ioat_state(device); + + __ioat_memcpy_start(device, + to_iova(device, &ioat->send_msi_src), + to_iova(device, &ioat->send_msi_dst), + sizeof(ioat->send_msi_src), 1, true); + + VFIO_ASSERT_EQ(ioat_memcpy_wait(device), 0); +} + +const struct vfio_pci_driver_ops ioat_ops = { + .name = "ioat", + .probe = ioat_probe, + .init = ioat_init, + .remove = ioat_remove, + .memcpy_start = ioat_memcpy_start, + .memcpy_wait = ioat_memcpy_wait, + .send_msi = ioat_send_msi, +}; diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h new file mode 120000 index 000000000000..0b809cfd8fe6 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h @@ -0,0 +1 @@ +../../../../../../../drivers/dma/ioat/registers.h
\ No newline at end of file diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h new file mode 100644 index 000000000000..ed31606e01b7 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -0,0 +1,295 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H +#define SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H + +#include <fcntl.h> +#include <string.h> +#include <linux/vfio.h> +#include <linux/list.h> +#include <linux/pci_regs.h> + +#include "../../../kselftest.h" + +#define VFIO_LOG_AND_EXIT(...) do { \ + fprintf(stderr, " " __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + exit(KSFT_FAIL); \ +} while (0) + +#define VFIO_ASSERT_OP(_lhs, _rhs, _op, ...) do { \ + typeof(_lhs) __lhs = (_lhs); \ + typeof(_rhs) __rhs = (_rhs); \ + \ + if (__lhs _op __rhs) \ + break; \ + \ + fprintf(stderr, "%s:%u: Assertion Failure\n\n", __FILE__, __LINE__); \ + fprintf(stderr, " Expression: " #_lhs " " #_op " " #_rhs "\n"); \ + fprintf(stderr, " Observed: %#lx %s %#lx\n", \ + (u64)__lhs, #_op, (u64)__rhs); \ + fprintf(stderr, " [errno: %d - %s]\n", errno, strerror(errno)); \ + VFIO_LOG_AND_EXIT(__VA_ARGS__); \ +} while (0) + +#define VFIO_ASSERT_EQ(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, ==, ##__VA_ARGS__) +#define VFIO_ASSERT_NE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, !=, ##__VA_ARGS__) +#define VFIO_ASSERT_LT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <, ##__VA_ARGS__) +#define VFIO_ASSERT_LE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <=, ##__VA_ARGS__) +#define VFIO_ASSERT_GT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >, ##__VA_ARGS__) +#define VFIO_ASSERT_GE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >=, ##__VA_ARGS__) +#define VFIO_ASSERT_TRUE(_a, ...) VFIO_ASSERT_NE(false, (_a), ##__VA_ARGS__) +#define VFIO_ASSERT_FALSE(_a, ...) VFIO_ASSERT_EQ(false, (_a), ##__VA_ARGS__) +#define VFIO_ASSERT_NULL(_a, ...) VFIO_ASSERT_EQ(NULL, _a, ##__VA_ARGS__) +#define VFIO_ASSERT_NOT_NULL(_a, ...) VFIO_ASSERT_NE(NULL, _a, ##__VA_ARGS__) + +#define VFIO_FAIL(_fmt, ...) do { \ + fprintf(stderr, "%s:%u: FAIL\n\n", __FILE__, __LINE__); \ + VFIO_LOG_AND_EXIT(_fmt, ##__VA_ARGS__); \ +} while (0) + +struct vfio_iommu_mode { + const char *name; + const char *container_path; + unsigned long iommu_type; +}; + +/* + * Generator for VFIO selftests fixture variants that replicate across all + * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE() + * which should then use FIXTURE_VARIANT_ADD() to create the variant. + */ +#define FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(...) \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \ +FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__) + +struct vfio_pci_bar { + struct vfio_region_info info; + void *vaddr; +}; + +typedef u64 iova_t; + +#define INVALID_IOVA UINT64_MAX + +struct vfio_dma_region { + struct list_head link; + void *vaddr; + iova_t iova; + u64 size; +}; + +struct vfio_pci_device; + +struct vfio_pci_driver_ops { + const char *name; + + /** + * @probe() - Check if the driver supports the given device. + * + * Return: 0 on success, non-0 on failure. + */ + int (*probe)(struct vfio_pci_device *device); + + /** + * @init() - Initialize the driver for @device. + * + * Must be called after device->driver.region has been initialized. + */ + void (*init)(struct vfio_pci_device *device); + + /** + * remove() - Deinitialize the driver for @device. + */ + void (*remove)(struct vfio_pci_device *device); + + /** + * memcpy_start() - Kick off @count repeated memcpy operations from + * [@src, @src + @size) to [@dst, @dst + @size). + * + * Guarantees: + * - The device will attempt DMA reads on [src, src + size). + * - The device will attempt DMA writes on [dst, dst + size). + * - The device will not generate any interrupts. + * + * memcpy_start() returns immediately, it does not wait for the + * copies to complete. + */ + void (*memcpy_start)(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, u64 count); + + /** + * memcpy_wait() - Wait until the memcpy operations started by + * memcpy_start() have finished. + * + * Guarantees: + * - All in-flight DMAs initiated by memcpy_start() are fully complete + * before memcpy_wait() returns. + * + * Returns non-0 if the driver detects that an error occurred during the + * memcpy, 0 otherwise. + */ + int (*memcpy_wait)(struct vfio_pci_device *device); + + /** + * send_msi() - Make the device send the MSI device->driver.msi. + * + * Guarantees: + * - The device will send the MSI once. + */ + void (*send_msi)(struct vfio_pci_device *device); +}; + +struct vfio_pci_driver { + const struct vfio_pci_driver_ops *ops; + bool initialized; + bool memcpy_in_progress; + + /* Region to be used by the driver (e.g. for in-memory descriptors) */ + struct vfio_dma_region region; + + /* The maximum size that can be passed to memcpy_start(). */ + u64 max_memcpy_size; + + /* The maximum count that can be passed to memcpy_start(). */ + u64 max_memcpy_count; + + /* The MSI vector the device will signal in ops->send_msi(). */ + int msi; +}; + +struct vfio_pci_device { + int fd; + + const struct vfio_iommu_mode *iommu_mode; + int group_fd; + int container_fd; + + int iommufd; + u32 ioas_id; + + struct vfio_device_info info; + struct vfio_region_info config_space; + struct vfio_pci_bar bars[PCI_STD_NUM_BARS]; + + struct vfio_irq_info msi_info; + struct vfio_irq_info msix_info; + + struct list_head dma_regions; + + /* eventfds for MSI and MSI-x interrupts */ + int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1]; + + struct vfio_pci_driver driver; +}; + +/* + * Return the BDF string of the device that the test should use. + * + * If a BDF string is provided by the user on the command line (as the last + * element of argv[]), then this function will return that and decrement argc + * by 1. + * + * Otherwise this function will attempt to use the environment variable + * $VFIO_SELFTESTS_BDF. + * + * If BDF cannot be determined then the test will exit with KSFT_SKIP. + */ +const char *vfio_selftests_get_bdf(int *argc, char *argv[]); +const char *vfio_pci_get_cdev_path(const char *bdf); + +extern const char *default_iommu_mode; + +struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode); +void vfio_pci_device_cleanup(struct vfio_pci_device *device); +void vfio_pci_device_reset(struct vfio_pci_device *device); + +void vfio_pci_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region); +void vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region); + +void vfio_pci_config_access(struct vfio_pci_device *device, bool write, + size_t config, size_t size, void *data); + +#define vfio_pci_config_read(_device, _offset, _type) ({ \ + _type __data; \ + vfio_pci_config_access((_device), false, _offset, sizeof(__data), &__data); \ + __data; \ +}) + +#define vfio_pci_config_readb(_d, _o) vfio_pci_config_read(_d, _o, u8) +#define vfio_pci_config_readw(_d, _o) vfio_pci_config_read(_d, _o, u16) +#define vfio_pci_config_readl(_d, _o) vfio_pci_config_read(_d, _o, u32) + +#define vfio_pci_config_write(_device, _offset, _value, _type) do { \ + _type __data = (_value); \ + vfio_pci_config_access((_device), true, _offset, sizeof(_type), &__data); \ +} while (0) + +#define vfio_pci_config_writeb(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u8) +#define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16) +#define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32) + +void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, + u32 vector, int count); +void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index); +void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector); + +static inline void fcntl_set_nonblock(int fd) +{ + int r; + + r = fcntl(fd, F_GETFL, 0); + VFIO_ASSERT_NE(r, -1, "F_GETFL failed for fd %d\n", fd); + + r = fcntl(fd, F_SETFL, r | O_NONBLOCK); + VFIO_ASSERT_NE(r, -1, "F_SETFL O_NONBLOCK failed for fd %d\n", fd); +} + +static inline void vfio_pci_msi_enable(struct vfio_pci_device *device, + u32 vector, int count) +{ + vfio_pci_irq_enable(device, VFIO_PCI_MSI_IRQ_INDEX, vector, count); +} + +static inline void vfio_pci_msi_disable(struct vfio_pci_device *device) +{ + vfio_pci_irq_disable(device, VFIO_PCI_MSI_IRQ_INDEX); +} + +static inline void vfio_pci_msix_enable(struct vfio_pci_device *device, + u32 vector, int count) +{ + vfio_pci_irq_enable(device, VFIO_PCI_MSIX_IRQ_INDEX, vector, count); +} + +static inline void vfio_pci_msix_disable(struct vfio_pci_device *device) +{ + vfio_pci_irq_disable(device, VFIO_PCI_MSIX_IRQ_INDEX); +} + +iova_t __to_iova(struct vfio_pci_device *device, void *vaddr); +iova_t to_iova(struct vfio_pci_device *device, void *vaddr); + +static inline bool vfio_pci_device_match(struct vfio_pci_device *device, + u16 vendor_id, u16 device_id) +{ + return (vendor_id == vfio_pci_config_readw(device, PCI_VENDOR_ID)) && + (device_id == vfio_pci_config_readw(device, PCI_DEVICE_ID)); +} + +void vfio_pci_driver_probe(struct vfio_pci_device *device); +void vfio_pci_driver_init(struct vfio_pci_device *device); +void vfio_pci_driver_remove(struct vfio_pci_device *device); +int vfio_pci_driver_memcpy(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size); +void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, + u64 count); +int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device); +void vfio_pci_driver_send_msi(struct vfio_pci_device *device); + +#endif /* SELFTESTS_VFIO_LIB_INCLUDE_VFIO_UTIL_H */ diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk b/tools/testing/selftests/vfio/lib/libvfio.mk new file mode 100644 index 000000000000..5d11c3a89a28 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/libvfio.mk @@ -0,0 +1,24 @@ +include $(top_srcdir)/scripts/subarch.include +ARCH ?= $(SUBARCH) + +VFIO_DIR := $(selfdir)/vfio + +LIBVFIO_C := lib/vfio_pci_device.c +LIBVFIO_C += lib/vfio_pci_driver.c + +ifeq ($(ARCH:x86_64=x86),x86) +LIBVFIO_C += lib/drivers/ioat/ioat.c +LIBVFIO_C += lib/drivers/dsa/dsa.c +endif + +LIBVFIO_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBVFIO_C)) + +LIBVFIO_O_DIRS := $(shell dirname $(LIBVFIO_O) | uniq) +$(shell mkdir -p $(LIBVFIO_O_DIRS)) + +CFLAGS += -I$(VFIO_DIR)/lib/include + +$(LIBVFIO_O): $(OUTPUT)/%.o : $(VFIO_DIR)/%.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +EXTRA_CLEAN += $(LIBVFIO_O) diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c new file mode 100644 index 000000000000..0921b2451ba5 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -0,0 +1,594 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <dirent.h> +#include <fcntl.h> +#include <libgen.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/eventfd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <uapi/linux/types.h> +#include <linux/limits.h> +#include <linux/mman.h> +#include <linux/types.h> +#include <linux/vfio.h> +#include <linux/iommufd.h> + +#include "../../../kselftest.h" +#include <vfio_util.h> + +#define PCI_SYSFS_PATH "/sys/bus/pci/devices" + +#define ioctl_assert(_fd, _op, _arg) do { \ + void *__arg = (_arg); \ + int __ret = ioctl((_fd), (_op), (__arg)); \ + VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ +} while (0) + +iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) +{ + struct vfio_dma_region *region; + + list_for_each_entry(region, &device->dma_regions, link) { + if (vaddr < region->vaddr) + continue; + + if (vaddr >= region->vaddr + region->size) + continue; + + return region->iova + (vaddr - region->vaddr); + } + + return INVALID_IOVA; +} + +iova_t to_iova(struct vfio_pci_device *device, void *vaddr) +{ + iova_t iova; + + iova = __to_iova(device, vaddr); + VFIO_ASSERT_NE(iova, INVALID_IOVA, "%p is not mapped into device.\n", vaddr); + + return iova; +} + +static void vfio_pci_irq_set(struct vfio_pci_device *device, + u32 index, u32 vector, u32 count, int *fds) +{ + u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count] = {}; + struct vfio_irq_set *irq = (void *)&buf; + int *irq_fds = (void *)&irq->data; + + irq->argsz = sizeof(buf); + irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER; + irq->index = index; + irq->start = vector; + irq->count = count; + + if (count) { + irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD; + memcpy(irq_fds, fds, sizeof(int) * count); + } else { + irq->flags |= VFIO_IRQ_SET_DATA_NONE; + } + + ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq); +} + +void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector) +{ + struct vfio_irq_set irq = { + .argsz = sizeof(irq), + .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE, + .index = index, + .start = vector, + .count = 1, + }; + + ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq); +} + +static void check_supported_irq_index(u32 index) +{ + /* VFIO selftests only supports MSI and MSI-x for now. */ + VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX || + index == VFIO_PCI_MSIX_IRQ_INDEX, + "Unsupported IRQ index: %u\n", index); +} + +void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector, + int count) +{ + int i; + + check_supported_irq_index(index); + + for (i = vector; i < vector + count; i++) { + VFIO_ASSERT_LT(device->msi_eventfds[i], 0); + device->msi_eventfds[i] = eventfd(0, 0); + VFIO_ASSERT_GE(device->msi_eventfds[i], 0); + } + + vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector); +} + +void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index) +{ + int i; + + check_supported_irq_index(index); + + for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { + if (device->msi_eventfds[i] < 0) + continue; + + VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); + device->msi_eventfds[i] = -1; + } + + vfio_pci_irq_set(device, index, 0, 0, NULL); +} + +static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index, + struct vfio_irq_info *irq_info) +{ + irq_info->argsz = sizeof(*irq_info); + irq_info->index = index; + + ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info); +} + +static void vfio_iommu_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + struct vfio_iommu_type1_dma_map args = { + .argsz = sizeof(args), + .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE, + .vaddr = (u64)region->vaddr, + .iova = region->iova, + .size = region->size, + }; + + ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args); +} + +static void iommufd_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + struct iommu_ioas_map args = { + .size = sizeof(args), + .flags = IOMMU_IOAS_MAP_READABLE | + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_FIXED_IOVA, + .user_va = (u64)region->vaddr, + .iova = region->iova, + .length = region->size, + .ioas_id = device->ioas_id, + }; + + ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args); +} + +void vfio_pci_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + if (device->iommufd) + iommufd_dma_map(device, region); + else + vfio_iommu_dma_map(device, region); + + list_add(®ion->link, &device->dma_regions); +} + +static void vfio_iommu_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + struct vfio_iommu_type1_dma_unmap args = { + .argsz = sizeof(args), + .iova = region->iova, + .size = region->size, + }; + + ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args); +} + +static void iommufd_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + struct iommu_ioas_unmap args = { + .size = sizeof(args), + .iova = region->iova, + .length = region->size, + .ioas_id = device->ioas_id, + }; + + ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args); +} + +void vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + if (device->iommufd) + iommufd_dma_unmap(device, region); + else + vfio_iommu_dma_unmap(device, region); + + list_del(®ion->link); +} + +static void vfio_pci_region_get(struct vfio_pci_device *device, int index, + struct vfio_region_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->argsz = sizeof(*info); + info->index = index; + + ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info); +} + +static void vfio_pci_bar_map(struct vfio_pci_device *device, int index) +{ + struct vfio_pci_bar *bar = &device->bars[index]; + int prot = 0; + + VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); + VFIO_ASSERT_NULL(bar->vaddr); + VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP); + + if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ) + prot |= PROT_READ; + if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE) + prot |= PROT_WRITE; + + bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED, + device->fd, bar->info.offset); + VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED); +} + +static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index) +{ + struct vfio_pci_bar *bar = &device->bars[index]; + + VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS); + VFIO_ASSERT_NOT_NULL(bar->vaddr); + + VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0); + bar->vaddr = NULL; +} + +static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device) +{ + int i; + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (device->bars[i].vaddr) + vfio_pci_bar_unmap(device, i); + } +} + +void vfio_pci_config_access(struct vfio_pci_device *device, bool write, + size_t config, size_t size, void *data) +{ + struct vfio_region_info *config_space = &device->config_space; + int ret; + + if (write) + ret = pwrite(device->fd, data, size, config_space->offset + config); + else + ret = pread(device->fd, data, size, config_space->offset + config); + + VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n", + write ? "write to" : "read from", config); +} + +void vfio_pci_device_reset(struct vfio_pci_device *device) +{ + ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL); +} + +static unsigned int vfio_pci_get_group_from_dev(const char *bdf) +{ + char dev_iommu_group_path[PATH_MAX] = {0}; + char sysfs_path[PATH_MAX] = {0}; + unsigned int group; + int ret; + + snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf); + + ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path)); + VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf); + + ret = sscanf(basename(dev_iommu_group_path), "%u", &group); + VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf); + + return group; +} + +static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf) +{ + struct vfio_group_status group_status = { + .argsz = sizeof(group_status), + }; + char group_path[32]; + int group; + + group = vfio_pci_get_group_from_dev(bdf); + snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group); + + device->group_fd = open(group_path, O_RDWR); + VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path); + + ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status); + VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE); + + ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->container_fd); +} + +static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf) +{ + unsigned long iommu_type = device->iommu_mode->iommu_type; + const char *path = device->iommu_mode->container_path; + int version; + int ret; + + device->container_fd = open(path, O_RDWR); + VFIO_ASSERT_GE(device->container_fd, 0, "open(%s) failed\n", path); + + version = ioctl(device->container_fd, VFIO_GET_API_VERSION); + VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version); + + vfio_pci_group_setup(device, bdf); + + ret = ioctl(device->container_fd, VFIO_CHECK_EXTENSION, iommu_type); + VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type); + + ioctl_assert(device->container_fd, VFIO_SET_IOMMU, (void *)iommu_type); + + device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf); + VFIO_ASSERT_GE(device->fd, 0); +} + +static void vfio_pci_device_setup(struct vfio_pci_device *device) +{ + int i; + + device->info.argsz = sizeof(device->info); + ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info); + + vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space); + + /* Sanity check VFIO does not advertise mmap for config space */ + VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP), + "PCI config space should not support mmap()\n"); + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + struct vfio_pci_bar *bar = device->bars + i; + + vfio_pci_region_get(device, i, &bar->info); + if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP) + vfio_pci_bar_map(device, i); + } + + vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info); + vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info); + + for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) + device->msi_eventfds[i] = -1; +} + +const char *vfio_pci_get_cdev_path(const char *bdf) +{ + char dir_path[PATH_MAX]; + struct dirent *entry; + char *cdev_path; + DIR *dir; + + cdev_path = calloc(PATH_MAX, 1); + VFIO_ASSERT_NOT_NULL(cdev_path); + + snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf); + + dir = opendir(dir_path); + VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path); + + while ((entry = readdir(dir)) != NULL) { + /* Find the file that starts with "vfio" */ + if (strncmp("vfio", entry->d_name, 4)) + continue; + + snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name); + break; + } + + VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n"); + VFIO_ASSERT_EQ(closedir(dir), 0); + + return cdev_path; +} + +/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */ +static const struct vfio_iommu_mode iommu_modes[] = { + { + .name = "vfio_type1_iommu", + .container_path = "/dev/vfio/vfio", + .iommu_type = VFIO_TYPE1_IOMMU, + }, + { + .name = "vfio_type1v2_iommu", + .container_path = "/dev/vfio/vfio", + .iommu_type = VFIO_TYPE1v2_IOMMU, + }, + { + .name = "iommufd_compat_type1", + .container_path = "/dev/iommu", + .iommu_type = VFIO_TYPE1_IOMMU, + }, + { + .name = "iommufd_compat_type1v2", + .container_path = "/dev/iommu", + .iommu_type = VFIO_TYPE1v2_IOMMU, + }, + { + .name = "iommufd", + }, +}; + +const char *default_iommu_mode = "iommufd"; + +static const struct vfio_iommu_mode *lookup_iommu_mode(const char *iommu_mode) +{ + int i; + + if (!iommu_mode) + iommu_mode = default_iommu_mode; + + for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) { + if (strcmp(iommu_mode, iommu_modes[i].name)) + continue; + + return &iommu_modes[i]; + } + + VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode); +} + +static void vfio_device_bind_iommufd(int device_fd, int iommufd) +{ + struct vfio_device_bind_iommufd args = { + .argsz = sizeof(args), + .iommufd = iommufd, + }; + + ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args); +} + +static u32 iommufd_ioas_alloc(int iommufd) +{ + struct iommu_ioas_alloc args = { + .size = sizeof(args), + }; + + ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args); + return args.out_ioas_id; +} + +static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id) +{ + struct vfio_device_attach_iommufd_pt args = { + .argsz = sizeof(args), + .pt_id = pt_id, + }; + + ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args); +} + +static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf) +{ + const char *cdev_path = vfio_pci_get_cdev_path(bdf); + + device->fd = open(cdev_path, O_RDWR); + VFIO_ASSERT_GE(device->fd, 0); + free((void *)cdev_path); + + /* + * Require device->iommufd to be >0 so that a simple non-0 check can be + * used to check if iommufd is enabled. In practice open() will never + * return 0 unless stdin is closed. + */ + device->iommufd = open("/dev/iommu", O_RDWR); + VFIO_ASSERT_GT(device->iommufd, 0); + + vfio_device_bind_iommufd(device->fd, device->iommufd); + device->ioas_id = iommufd_ioas_alloc(device->iommufd); + vfio_device_attach_iommufd_pt(device->fd, device->ioas_id); +} + +struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_mode) +{ + struct vfio_pci_device *device; + + device = calloc(1, sizeof(*device)); + VFIO_ASSERT_NOT_NULL(device); + + INIT_LIST_HEAD(&device->dma_regions); + + device->iommu_mode = lookup_iommu_mode(iommu_mode); + + if (device->iommu_mode->container_path) + vfio_pci_container_setup(device, bdf); + else + vfio_pci_iommufd_setup(device, bdf); + + vfio_pci_device_setup(device); + vfio_pci_driver_probe(device); + + return device; +} + +void vfio_pci_device_cleanup(struct vfio_pci_device *device) +{ + int i; + + if (device->driver.initialized) + vfio_pci_driver_remove(device); + + vfio_pci_bar_unmap_all(device); + + VFIO_ASSERT_EQ(close(device->fd), 0); + + for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) { + if (device->msi_eventfds[i] < 0) + continue; + + VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0); + } + + if (device->iommufd) { + VFIO_ASSERT_EQ(close(device->iommufd), 0); + } else { + VFIO_ASSERT_EQ(close(device->group_fd), 0); + VFIO_ASSERT_EQ(close(device->container_fd), 0); + } + + free(device); +} + +static bool is_bdf(const char *str) +{ + unsigned int s, b, d, f; + int length, count; + + count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length); + return count == 4 && length == strlen(str); +} + +const char *vfio_selftests_get_bdf(int *argc, char *argv[]) +{ + char *bdf; + + if (*argc > 1 && is_bdf(argv[*argc - 1])) + return argv[--(*argc)]; + + bdf = getenv("VFIO_SELFTESTS_BDF"); + if (bdf) { + VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf); + return bdf; + } + + fprintf(stderr, "Unable to determine which device to use, skipping test.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "To pass the device address via environment variable:\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " export VFIO_SELFTESTS_BDF=segment:bus:device.function\n"); + fprintf(stderr, " %s [options]\n", argv[0]); + fprintf(stderr, "\n"); + fprintf(stderr, "To pass the device address via argv:\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " %s [options] segment:bus:device.function\n", argv[0]); + fprintf(stderr, "\n"); + exit(KSFT_SKIP); +} diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c new file mode 100644 index 000000000000..e5e8723ecb41 --- /dev/null +++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdio.h> + +#include "../../../kselftest.h" +#include <vfio_util.h> + +#ifdef __x86_64__ +extern struct vfio_pci_driver_ops dsa_ops; +extern struct vfio_pci_driver_ops ioat_ops; +#endif + +static struct vfio_pci_driver_ops *driver_ops[] = { +#ifdef __x86_64__ + &dsa_ops, + &ioat_ops, +#endif +}; + +void vfio_pci_driver_probe(struct vfio_pci_device *device) +{ + struct vfio_pci_driver_ops *ops; + int i; + + VFIO_ASSERT_NULL(device->driver.ops); + + for (i = 0; i < ARRAY_SIZE(driver_ops); i++) { + ops = driver_ops[i]; + + if (ops->probe(device)) + continue; + + printf("Driver found: %s\n", ops->name); + device->driver.ops = ops; + } +} + +static void vfio_check_driver_op(struct vfio_pci_driver *driver, void *op, + const char *op_name) +{ + VFIO_ASSERT_NOT_NULL(driver->ops); + VFIO_ASSERT_NOT_NULL(op, "Driver has no %s()\n", op_name); + VFIO_ASSERT_EQ(driver->initialized, op != driver->ops->init); + VFIO_ASSERT_EQ(driver->memcpy_in_progress, op == driver->ops->memcpy_wait); +} + +#define VFIO_CHECK_DRIVER_OP(_driver, _op) do { \ + struct vfio_pci_driver *__driver = (_driver); \ + vfio_check_driver_op(__driver, __driver->ops->_op, #_op); \ +} while (0) + +void vfio_pci_driver_init(struct vfio_pci_device *device) +{ + struct vfio_pci_driver *driver = &device->driver; + + VFIO_ASSERT_NOT_NULL(driver->region.vaddr); + VFIO_CHECK_DRIVER_OP(driver, init); + + driver->ops->init(device); + + driver->initialized = true; + + printf("%s: region: vaddr %p, iova 0x%lx, size 0x%lx\n", + driver->ops->name, + driver->region.vaddr, + driver->region.iova, + driver->region.size); + + printf("%s: max_memcpy_size 0x%lx, max_memcpy_count 0x%lx\n", + driver->ops->name, + driver->max_memcpy_size, + driver->max_memcpy_count); +} + +void vfio_pci_driver_remove(struct vfio_pci_device *device) +{ + struct vfio_pci_driver *driver = &device->driver; + + VFIO_CHECK_DRIVER_OP(driver, remove); + + driver->ops->remove(device); + driver->initialized = false; +} + +void vfio_pci_driver_send_msi(struct vfio_pci_device *device) +{ + struct vfio_pci_driver *driver = &device->driver; + + VFIO_CHECK_DRIVER_OP(driver, send_msi); + + driver->ops->send_msi(device); +} + +void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size, + u64 count) +{ + struct vfio_pci_driver *driver = &device->driver; + + VFIO_ASSERT_LE(size, driver->max_memcpy_size); + VFIO_ASSERT_LE(count, driver->max_memcpy_count); + VFIO_CHECK_DRIVER_OP(driver, memcpy_start); + + driver->ops->memcpy_start(device, src, dst, size, count); + driver->memcpy_in_progress = true; +} + +int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device) +{ + struct vfio_pci_driver *driver = &device->driver; + int r; + + VFIO_CHECK_DRIVER_OP(driver, memcpy_wait); + + r = driver->ops->memcpy_wait(device); + driver->memcpy_in_progress = false; + + return r; +} + +int vfio_pci_driver_memcpy(struct vfio_pci_device *device, + iova_t src, iova_t dst, u64 size) +{ + vfio_pci_driver_memcpy_start(device, src, dst, size, 1); + + return vfio_pci_driver_memcpy_wait(device); +} diff --git a/tools/testing/selftests/vfio/run.sh b/tools/testing/selftests/vfio/run.sh new file mode 100755 index 000000000000..0476b6d7adc3 --- /dev/null +++ b/tools/testing/selftests/vfio/run.sh @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +# Global variables initialized in main() and then used during cleanup() when +# the script exits. +declare DEVICE_BDF +declare NEW_DRIVER +declare OLD_DRIVER +declare OLD_NUMVFS +declare DRIVER_OVERRIDE + +function write_to() { + # Unfortunately set -x does not show redirects so use echo to manually + # tell the user what commands are being run. + echo "+ echo \"${2}\" > ${1}" + echo "${2}" > ${1} +} + +function bind() { + write_to /sys/bus/pci/drivers/${2}/bind ${1} +} + +function unbind() { + write_to /sys/bus/pci/drivers/${2}/unbind ${1} +} + +function set_sriov_numvfs() { + write_to /sys/bus/pci/devices/${1}/sriov_numvfs ${2} +} + +function set_driver_override() { + write_to /sys/bus/pci/devices/${1}/driver_override ${2} +} + +function clear_driver_override() { + set_driver_override ${1} "" +} + +function cleanup() { + if [ "${NEW_DRIVER}" ]; then unbind ${DEVICE_BDF} ${NEW_DRIVER} ; fi + if [ "${DRIVER_OVERRIDE}" ]; then clear_driver_override ${DEVICE_BDF} ; fi + if [ "${OLD_DRIVER}" ]; then bind ${DEVICE_BDF} ${OLD_DRIVER} ; fi + if [ "${OLD_NUMVFS}" ]; then set_sriov_numvfs ${DEVICE_BDF} ${OLD_NUMVFS} ; fi +} + +function usage() { + echo "usage: $0 [-d segment:bus:device.function] [-s] [-h] [cmd ...]" >&2 + echo >&2 + echo " -d: The BDF of the device to use for the test (required)" >&2 + echo " -h: Show this help message" >&2 + echo " -s: Drop into a shell rather than running a command" >&2 + echo >&2 + echo " cmd: The command to run and arguments to pass to it." >&2 + echo " Required when not using -s. The SBDF will be " >&2 + echo " appended to the argument list." >&2 + exit 1 +} + +function main() { + local shell + + while getopts "d:hs" opt; do + case $opt in + d) DEVICE_BDF="$OPTARG" ;; + s) shell=true ;; + *) usage ;; + esac + done + + # Shift past all optional arguments. + shift $((OPTIND - 1)) + + # Check that the user passed in the command to run. + [ ! "${shell}" ] && [ $# = 0 ] && usage + + # Check that the user passed in a BDF. + [ "${DEVICE_BDF}" ] || usage + + trap cleanup EXIT + set -e + + test -d /sys/bus/pci/devices/${DEVICE_BDF} + + if [ -f /sys/bus/pci/devices/${DEVICE_BDF}/sriov_numvfs ]; then + OLD_NUMVFS=$(cat /sys/bus/pci/devices/${DEVICE_BDF}/sriov_numvfs) + set_sriov_numvfs ${DEVICE_BDF} 0 + fi + + if [ -L /sys/bus/pci/devices/${DEVICE_BDF}/driver ]; then + OLD_DRIVER=$(basename $(readlink -m /sys/bus/pci/devices/${DEVICE_BDF}/driver)) + unbind ${DEVICE_BDF} ${OLD_DRIVER} + fi + + set_driver_override ${DEVICE_BDF} vfio-pci + DRIVER_OVERRIDE=true + + bind ${DEVICE_BDF} vfio-pci + NEW_DRIVER=vfio-pci + + echo + if [ "${shell}" ]; then + echo "Dropping into ${SHELL} with VFIO_SELFTESTS_BDF=${DEVICE_BDF}" + VFIO_SELFTESTS_BDF=${DEVICE_BDF} ${SHELL} + else + "$@" ${DEVICE_BDF} + fi + echo +} + +main "$@" diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c new file mode 100644 index 000000000000..ab19c54a774d --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <linux/limits.h> +#include <linux/mman.h> +#include <linux/sizes.h> +#include <linux/vfio.h> + +#include <vfio_util.h> + +#include "../kselftest_harness.h" + +static const char *device_bdf; + +struct iommu_mapping { + u64 pgd; + u64 p4d; + u64 pud; + u64 pmd; + u64 pte; +}; + +static void parse_next_value(char **line, u64 *value) +{ + char *token; + + token = strtok_r(*line, " \t|\n", line); + if (!token) + return; + + /* Caller verifies `value`. No need to check return value. */ + sscanf(token, "0x%lx", value); +} + +static int intel_iommu_mapping_get(const char *bdf, u64 iova, + struct iommu_mapping *mapping) +{ + char iommu_mapping_path[PATH_MAX], line[PATH_MAX]; + u64 line_iova = -1; + int ret = -ENOENT; + FILE *file; + char *rest; + + snprintf(iommu_mapping_path, sizeof(iommu_mapping_path), + "/sys/kernel/debug/iommu/intel/%s/domain_translation_struct", + bdf); + + printf("Searching for IOVA 0x%lx in %s\n", iova, iommu_mapping_path); + + file = fopen(iommu_mapping_path, "r"); + VFIO_ASSERT_NOT_NULL(file, "fopen(%s) failed", iommu_mapping_path); + + while (fgets(line, sizeof(line), file)) { + rest = line; + + parse_next_value(&rest, &line_iova); + if (line_iova != (iova / getpagesize())) + continue; + + /* + * Ensure each struct field is initialized in case of empty + * page table values. + */ + memset(mapping, 0, sizeof(*mapping)); + parse_next_value(&rest, &mapping->pgd); + parse_next_value(&rest, &mapping->p4d); + parse_next_value(&rest, &mapping->pud); + parse_next_value(&rest, &mapping->pmd); + parse_next_value(&rest, &mapping->pte); + + ret = 0; + break; + } + + fclose(file); + + if (ret) + printf("IOVA not found\n"); + + return ret; +} + +static int iommu_mapping_get(const char *bdf, u64 iova, + struct iommu_mapping *mapping) +{ + if (!access("/sys/kernel/debug/iommu/intel", F_OK)) + return intel_iommu_mapping_get(bdf, iova, mapping); + + return -EOPNOTSUPP; +} + +FIXTURE(vfio_dma_mapping_test) { + struct vfio_pci_device *device; +}; + +FIXTURE_VARIANT(vfio_dma_mapping_test) { + const char *iommu_mode; + u64 size; + int mmap_flags; +}; + +#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode, _name, _size, _mmap_flags) \ +FIXTURE_VARIANT_ADD(vfio_dma_mapping_test, _iommu_mode ## _ ## _name) { \ + .iommu_mode = #_iommu_mode, \ + .size = (_size), \ + .mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE | (_mmap_flags), \ +} + +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous, 0, 0); +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_2mb, SZ_2M, MAP_HUGETLB | MAP_HUGE_2MB); +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB); + +FIXTURE_SETUP(vfio_dma_mapping_test) +{ + self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); +} + +FIXTURE_TEARDOWN(vfio_dma_mapping_test) +{ + vfio_pci_device_cleanup(self->device); +} + +TEST_F(vfio_dma_mapping_test, dma_map_unmap) +{ + const u64 size = variant->size ?: getpagesize(); + const int flags = variant->mmap_flags; + struct vfio_dma_region region; + struct iommu_mapping mapping; + u64 mapping_size = size; + int rc; + + region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + + /* Skip the test if there aren't enough HugeTLB pages available. */ + if (flags & MAP_HUGETLB && region.vaddr == MAP_FAILED) + SKIP(return, "mmap() failed: %s (%d)\n", strerror(errno), errno); + else + ASSERT_NE(region.vaddr, MAP_FAILED); + + region.iova = (u64)region.vaddr; + region.size = size; + + vfio_pci_dma_map(self->device, ®ion); + printf("Mapped HVA %p (size 0x%lx) at IOVA 0x%lx\n", region.vaddr, size, region.iova); + + ASSERT_EQ(region.iova, to_iova(self->device, region.vaddr)); + + rc = iommu_mapping_get(device_bdf, region.iova, &mapping); + if (rc == -EOPNOTSUPP) + goto unmap; + + /* + * IOMMUFD compatibility-mode does not support huge mappings when + * using VFIO_TYPE1_IOMMU. + */ + if (!strcmp(variant->iommu_mode, "iommufd_compat_type1")) + mapping_size = SZ_4K; + + ASSERT_EQ(0, rc); + printf("Found IOMMU mappings for IOVA 0x%lx:\n", region.iova); + printf("PGD: 0x%016lx\n", mapping.pgd); + printf("P4D: 0x%016lx\n", mapping.p4d); + printf("PUD: 0x%016lx\n", mapping.pud); + printf("PMD: 0x%016lx\n", mapping.pmd); + printf("PTE: 0x%016lx\n", mapping.pte); + + switch (mapping_size) { + case SZ_4K: + ASSERT_NE(0, mapping.pte); + break; + case SZ_2M: + ASSERT_EQ(0, mapping.pte); + ASSERT_NE(0, mapping.pmd); + break; + case SZ_1G: + ASSERT_EQ(0, mapping.pte); + ASSERT_EQ(0, mapping.pmd); + ASSERT_NE(0, mapping.pud); + break; + default: + VFIO_FAIL("Unrecognized size: 0x%lx\n", mapping_size); + } + +unmap: + vfio_pci_dma_unmap(self->device, ®ion); + printf("Unmapped IOVA 0x%lx\n", region.iova); + ASSERT_EQ(INVALID_IOVA, __to_iova(self->device, region.vaddr)); + ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping)); + + ASSERT_TRUE(!munmap(region.vaddr, size)); +} + +int main(int argc, char *argv[]) +{ + device_bdf = vfio_selftests_get_bdf(&argc, argv); + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c new file mode 100644 index 000000000000..3655106b912d --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_iommufd_setup_test.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <uapi/linux/types.h> +#include <linux/limits.h> +#include <linux/sizes.h> +#include <linux/vfio.h> +#include <linux/iommufd.h> + +#include <stdint.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include <vfio_util.h> +#include "../kselftest_harness.h" + +static const char iommu_dev_path[] = "/dev/iommu"; +static const char *cdev_path; + +static int vfio_device_bind_iommufd_ioctl(int cdev_fd, int iommufd) +{ + struct vfio_device_bind_iommufd bind_args = { + .argsz = sizeof(bind_args), + .iommufd = iommufd, + }; + + return ioctl(cdev_fd, VFIO_DEVICE_BIND_IOMMUFD, &bind_args); +} + +static int vfio_device_get_info_ioctl(int cdev_fd) +{ + struct vfio_device_info info_args = { .argsz = sizeof(info_args) }; + + return ioctl(cdev_fd, VFIO_DEVICE_GET_INFO, &info_args); +} + +static int vfio_device_ioas_alloc_ioctl(int iommufd, struct iommu_ioas_alloc *alloc_args) +{ + *alloc_args = (struct iommu_ioas_alloc){ + .size = sizeof(struct iommu_ioas_alloc), + }; + + return ioctl(iommufd, IOMMU_IOAS_ALLOC, alloc_args); +} + +static int vfio_device_attach_iommufd_pt_ioctl(int cdev_fd, u32 pt_id) +{ + struct vfio_device_attach_iommufd_pt attach_args = { + .argsz = sizeof(attach_args), + .pt_id = pt_id, + }; + + return ioctl(cdev_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_args); +} + +static int vfio_device_detach_iommufd_pt_ioctl(int cdev_fd) +{ + struct vfio_device_detach_iommufd_pt detach_args = { + .argsz = sizeof(detach_args), + }; + + return ioctl(cdev_fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_args); +} + +FIXTURE(vfio_cdev) { + int cdev_fd; + int iommufd; +}; + +FIXTURE_SETUP(vfio_cdev) +{ + ASSERT_LE(0, (self->cdev_fd = open(cdev_path, O_RDWR, 0))); + ASSERT_LE(0, (self->iommufd = open(iommu_dev_path, O_RDWR, 0))); +} + +FIXTURE_TEARDOWN(vfio_cdev) +{ + ASSERT_EQ(0, close(self->cdev_fd)); + ASSERT_EQ(0, close(self->iommufd)); +} + +TEST_F(vfio_cdev, bind) +{ + ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); + ASSERT_EQ(0, vfio_device_get_info_ioctl(self->cdev_fd)); +} + +TEST_F(vfio_cdev, get_info_without_bind_fails) +{ + ASSERT_NE(0, vfio_device_get_info_ioctl(self->cdev_fd)); +} + +TEST_F(vfio_cdev, bind_bad_iommufd_fails) +{ + ASSERT_NE(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, -2)); +} + +TEST_F(vfio_cdev, repeated_bind_fails) +{ + ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); + ASSERT_NE(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); +} + +TEST_F(vfio_cdev, attach_detatch_pt) +{ + struct iommu_ioas_alloc alloc_args; + + ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); + ASSERT_EQ(0, vfio_device_ioas_alloc_ioctl(self->iommufd, &alloc_args)); + ASSERT_EQ(0, vfio_device_attach_iommufd_pt_ioctl(self->cdev_fd, alloc_args.out_ioas_id)); + ASSERT_EQ(0, vfio_device_detach_iommufd_pt_ioctl(self->cdev_fd)); +} + +TEST_F(vfio_cdev, attach_invalid_pt_fails) +{ + ASSERT_EQ(0, vfio_device_bind_iommufd_ioctl(self->cdev_fd, self->iommufd)); + ASSERT_NE(0, vfio_device_attach_iommufd_pt_ioctl(self->cdev_fd, UINT32_MAX)); +} + +int main(int argc, char *argv[]) +{ + const char *device_bdf = vfio_selftests_get_bdf(&argc, argv); + + cdev_path = vfio_pci_get_cdev_path(device_bdf); + printf("Using cdev device %s\n", cdev_path); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/vfio/vfio_pci_device_test.c b/tools/testing/selftests/vfio/vfio_pci_device_test.c new file mode 100644 index 000000000000..7a270698e4d2 --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_pci_device_test.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <fcntl.h> +#include <stdlib.h> + +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <linux/limits.h> +#include <linux/pci_regs.h> +#include <linux/sizes.h> +#include <linux/vfio.h> + +#include <vfio_util.h> + +#include "../kselftest_harness.h" + +static const char *device_bdf; + +/* + * Limit the number of MSIs enabled/disabled by the test regardless of the + * number of MSIs the device itself supports, e.g. to avoid hitting IRTE limits. + */ +#define MAX_TEST_MSI 16U + +FIXTURE(vfio_pci_device_test) { + struct vfio_pci_device *device; +}; + +FIXTURE_SETUP(vfio_pci_device_test) +{ + self->device = vfio_pci_device_init(device_bdf, default_iommu_mode); +} + +FIXTURE_TEARDOWN(vfio_pci_device_test) +{ + vfio_pci_device_cleanup(self->device); +} + +#define read_pci_id_from_sysfs(_file) ({ \ + char __sysfs_path[PATH_MAX]; \ + char __buf[32]; \ + int __fd; \ + \ + snprintf(__sysfs_path, PATH_MAX, "/sys/bus/pci/devices/%s/%s", device_bdf, _file); \ + ASSERT_GT((__fd = open(__sysfs_path, O_RDONLY)), 0); \ + ASSERT_GT(read(__fd, __buf, ARRAY_SIZE(__buf)), 0); \ + ASSERT_EQ(0, close(__fd)); \ + (u16)strtoul(__buf, NULL, 0); \ +}) + +TEST_F(vfio_pci_device_test, config_space_read_write) +{ + u16 vendor, device; + u16 command; + + /* Check that Vendor and Device match what the kernel reports. */ + vendor = read_pci_id_from_sysfs("vendor"); + device = read_pci_id_from_sysfs("device"); + ASSERT_TRUE(vfio_pci_device_match(self->device, vendor, device)); + + printf("Vendor: %04x, Device: %04x\n", vendor, device); + + command = vfio_pci_config_readw(self->device, PCI_COMMAND); + ASSERT_FALSE(command & PCI_COMMAND_MASTER); + + vfio_pci_config_writew(self->device, PCI_COMMAND, command | PCI_COMMAND_MASTER); + command = vfio_pci_config_readw(self->device, PCI_COMMAND); + ASSERT_TRUE(command & PCI_COMMAND_MASTER); + printf("Enabled Bus Mastering (command: %04x)\n", command); + + vfio_pci_config_writew(self->device, PCI_COMMAND, command & ~PCI_COMMAND_MASTER); + command = vfio_pci_config_readw(self->device, PCI_COMMAND); + ASSERT_FALSE(command & PCI_COMMAND_MASTER); + printf("Disabled Bus Mastering (command: %04x)\n", command); +} + +TEST_F(vfio_pci_device_test, validate_bars) +{ + struct vfio_pci_bar *bar; + int i; + + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + bar = &self->device->bars[i]; + + if (!(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)) { + printf("BAR %d does not support mmap()\n", i); + ASSERT_EQ(NULL, bar->vaddr); + continue; + } + + /* + * BARs that support mmap() should be automatically mapped by + * vfio_pci_device_init(). + */ + ASSERT_NE(NULL, bar->vaddr); + ASSERT_NE(0, bar->info.size); + printf("BAR %d mapped at %p (size 0x%llx)\n", i, bar->vaddr, bar->info.size); + } +} + +FIXTURE(vfio_pci_irq_test) { + struct vfio_pci_device *device; +}; + +FIXTURE_VARIANT(vfio_pci_irq_test) { + int irq_index; +}; + +FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msi) { + .irq_index = VFIO_PCI_MSI_IRQ_INDEX, +}; + +FIXTURE_VARIANT_ADD(vfio_pci_irq_test, msix) { + .irq_index = VFIO_PCI_MSIX_IRQ_INDEX, +}; + +FIXTURE_SETUP(vfio_pci_irq_test) +{ + self->device = vfio_pci_device_init(device_bdf, default_iommu_mode); +} + +FIXTURE_TEARDOWN(vfio_pci_irq_test) +{ + vfio_pci_device_cleanup(self->device); +} + +TEST_F(vfio_pci_irq_test, enable_trigger_disable) +{ + bool msix = variant->irq_index == VFIO_PCI_MSIX_IRQ_INDEX; + int msi_eventfd; + u32 count; + u64 value; + int i; + + if (msix) + count = self->device->msix_info.count; + else + count = self->device->msi_info.count; + + count = min(count, MAX_TEST_MSI); + + if (!count) + SKIP(return, "MSI%s: not supported\n", msix ? "-x" : ""); + + vfio_pci_irq_enable(self->device, variant->irq_index, 0, count); + printf("MSI%s: enabled %d interrupts\n", msix ? "-x" : "", count); + + for (i = 0; i < count; i++) { + msi_eventfd = self->device->msi_eventfds[i]; + + fcntl_set_nonblock(msi_eventfd); + ASSERT_EQ(-1, read(msi_eventfd, &value, 8)); + ASSERT_EQ(EAGAIN, errno); + + vfio_pci_irq_trigger(self->device, variant->irq_index, i); + + ASSERT_EQ(8, read(msi_eventfd, &value, 8)); + ASSERT_EQ(1, value); + } + + vfio_pci_irq_disable(self->device, variant->irq_index); +} + +TEST_F(vfio_pci_device_test, reset) +{ + if (!(self->device->info.flags & VFIO_DEVICE_FLAGS_RESET)) + SKIP(return, "Device does not support reset\n"); + + vfio_pci_device_reset(self->device); +} + +int main(int argc, char *argv[]) +{ + device_bdf = vfio_selftests_get_bdf(&argc, argv); + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c new file mode 100644 index 000000000000..2dbd70b7db62 --- /dev/null +++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/ioctl.h> +#include <sys/mman.h> + +#include <linux/sizes.h> +#include <linux/vfio.h> + +#include <vfio_util.h> + +#include "../kselftest_harness.h" + +static const char *device_bdf; + +#define ASSERT_NO_MSI(_eventfd) do { \ + u64 __value; \ + \ + ASSERT_EQ(-1, read(_eventfd, &__value, 8)); \ + ASSERT_EQ(EAGAIN, errno); \ +} while (0) + +static void region_setup(struct vfio_pci_device *device, + struct vfio_dma_region *region, u64 size) +{ + const int flags = MAP_SHARED | MAP_ANONYMOUS; + const int prot = PROT_READ | PROT_WRITE; + void *vaddr; + + vaddr = mmap(NULL, size, prot, flags, -1, 0); + VFIO_ASSERT_NE(vaddr, MAP_FAILED); + + region->vaddr = vaddr; + region->iova = (u64)vaddr; + region->size = size; + + vfio_pci_dma_map(device, region); +} + +static void region_teardown(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + vfio_pci_dma_unmap(device, region); + VFIO_ASSERT_EQ(munmap(region->vaddr, region->size), 0); +} + +FIXTURE(vfio_pci_driver_test) { + struct vfio_pci_device *device; + struct vfio_dma_region memcpy_region; + void *vaddr; + int msi_fd; + + u64 size; + void *src; + void *dst; + iova_t src_iova; + iova_t dst_iova; + iova_t unmapped_iova; +}; + +FIXTURE_VARIANT(vfio_pci_driver_test) { + const char *iommu_mode; +}; + +#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \ +FIXTURE_VARIANT_ADD(vfio_pci_driver_test, _iommu_mode) { \ + .iommu_mode = #_iommu_mode, \ +} + +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); + +FIXTURE_SETUP(vfio_pci_driver_test) +{ + struct vfio_pci_driver *driver; + + self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + + driver = &self->device->driver; + + region_setup(self->device, &self->memcpy_region, SZ_1G); + region_setup(self->device, &driver->region, SZ_2M); + + /* Any IOVA that doesn't overlap memcpy_region and driver->region. */ + self->unmapped_iova = 8UL * SZ_1G; + + vfio_pci_driver_init(self->device); + self->msi_fd = self->device->msi_eventfds[driver->msi]; + + /* + * Use the maximum size supported by the device for memcpy operations, + * slimmed down to fit into the memcpy region (divided by 2 so src and + * dst regions do not overlap). + */ + self->size = self->device->driver.max_memcpy_size; + self->size = min(self->size, self->memcpy_region.size / 2); + + self->src = self->memcpy_region.vaddr; + self->dst = self->src + self->size; + + self->src_iova = to_iova(self->device, self->src); + self->dst_iova = to_iova(self->device, self->dst); +} + +FIXTURE_TEARDOWN(vfio_pci_driver_test) +{ + struct vfio_pci_driver *driver = &self->device->driver; + + vfio_pci_driver_remove(self->device); + + region_teardown(self->device, &self->memcpy_region); + region_teardown(self->device, &driver->region); + + vfio_pci_device_cleanup(self->device); +} + +TEST_F(vfio_pci_driver_test, init_remove) +{ + int i; + + for (i = 0; i < 10; i++) { + vfio_pci_driver_remove(self->device); + vfio_pci_driver_init(self->device); + } +} + +TEST_F(vfio_pci_driver_test, memcpy_success) +{ + fcntl_set_nonblock(self->msi_fd); + + memset(self->src, 'x', self->size); + memset(self->dst, 'y', self->size); + + ASSERT_EQ(0, vfio_pci_driver_memcpy(self->device, + self->src_iova, + self->dst_iova, + self->size)); + + ASSERT_EQ(0, memcmp(self->src, self->dst, self->size)); + ASSERT_NO_MSI(self->msi_fd); +} + +TEST_F(vfio_pci_driver_test, memcpy_from_unmapped_iova) +{ + fcntl_set_nonblock(self->msi_fd); + + /* + * Ignore the return value since not all devices will detect and report + * accesses to unmapped IOVAs as errors. + */ + vfio_pci_driver_memcpy(self->device, self->unmapped_iova, + self->dst_iova, self->size); + + ASSERT_NO_MSI(self->msi_fd); +} + +TEST_F(vfio_pci_driver_test, memcpy_to_unmapped_iova) +{ + fcntl_set_nonblock(self->msi_fd); + + /* + * Ignore the return value since not all devices will detect and report + * accesses to unmapped IOVAs as errors. + */ + vfio_pci_driver_memcpy(self->device, self->src_iova, + self->unmapped_iova, self->size); + + ASSERT_NO_MSI(self->msi_fd); +} + +TEST_F(vfio_pci_driver_test, send_msi) +{ + u64 value; + + vfio_pci_driver_send_msi(self->device); + ASSERT_EQ(8, read(self->msi_fd, &value, 8)); + ASSERT_EQ(1, value); +} + +TEST_F(vfio_pci_driver_test, mix_and_match) +{ + u64 value; + int i; + + for (i = 0; i < 10; i++) { + memset(self->src, 'x', self->size); + memset(self->dst, 'y', self->size); + + ASSERT_EQ(0, vfio_pci_driver_memcpy(self->device, + self->src_iova, + self->dst_iova, + self->size)); + + ASSERT_EQ(0, memcmp(self->src, self->dst, self->size)); + + vfio_pci_driver_memcpy(self->device, + self->unmapped_iova, + self->dst_iova, + self->size); + + vfio_pci_driver_send_msi(self->device); + ASSERT_EQ(8, read(self->msi_fd, &value, 8)); + ASSERT_EQ(1, value); + } +} + +TEST_F_TIMEOUT(vfio_pci_driver_test, memcpy_storm, 60) +{ + struct vfio_pci_driver *driver = &self->device->driver; + u64 total_size; + u64 count; + + fcntl_set_nonblock(self->msi_fd); + + /* + * Perform up to 250GiB worth of DMA reads and writes across several + * memcpy operations. Some devices can support even more but the test + * will take too long. + */ + total_size = 250UL * SZ_1G; + count = min(total_size / self->size, driver->max_memcpy_count); + + printf("Kicking off %lu memcpys of size 0x%lx\n", count, self->size); + vfio_pci_driver_memcpy_start(self->device, + self->src_iova, + self->dst_iova, + self->size, count); + + ASSERT_EQ(0, vfio_pci_driver_memcpy_wait(self->device)); + ASSERT_NO_MSI(self->msi_fd); +} + +int main(int argc, char *argv[]) +{ + struct vfio_pci_device *device; + + device_bdf = vfio_selftests_get_bdf(&argc, argv); + + device = vfio_pci_device_init(device_bdf, default_iommu_mode); + if (!device->driver.ops) { + fprintf(stderr, "No driver found for device %s\n", device_bdf); + return KSFT_SKIP; + } + vfio_pci_device_cleanup(device); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index a1f506ba5578..4f09c5db0c7f 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -332,6 +332,12 @@ int main(int argc, char *argv[]) if (oneshot) goto end; + /* Check if WDIOF_KEEPALIVEPING is supported */ + if (!(info.options & WDIOF_KEEPALIVEPING)) { + printf("WDIOC_KEEPALIVE not supported by this device\n"); + goto end; + } + printf("Watchdog Ticking Away!\n"); /* diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 0a5381717e9f..936b18be07cf 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -20,9 +20,10 @@ CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y +CONFIG_IP_NF_TARGET_REJECT=y +CONFIG_IP6_NF_TARGET_REJECT=y +CONFIG_IP_NF_IPTABLES_LEGACY=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y @@ -48,7 +49,6 @@ CONFIG_JUMP_LABEL=y CONFIG_FUTEX=y CONFIG_SHMEM=y CONFIG_SLUB=y -CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_SMP=y CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index 110b34834a6f..82921c75681c 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -14,7 +14,6 @@ Statistics for individual zram devices are exported through sysfs nodes at Kconfig required: CONFIG_ZRAM=y CONFIG_CRYPTO_LZ4=y -CONFIG_ZPOOL=y CONFIG_ZSMALLOC=y ZRAM Testcases |