diff options
Diffstat (limited to 'tools/testing')
-rw-r--r-- | tools/testing/selftests/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/selftests/dma/dma_map_benchmark.c | 21 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/hardware_disable_test.c | 165 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 6 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86_64/processor.c | 3 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c | 159 | ||||
-rw-r--r-- | tools/testing/selftests/mount_setattr/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/mount_setattr/Makefile | 7 | ||||
-rw-r--r-- | tools/testing/selftests/mount_setattr/config | 1 | ||||
-rw-r--r-- | tools/testing/selftests/mount_setattr/mount_setattr_test.c | 1424 | ||||
-rwxr-xr-x | tools/testing/selftests/wireguard/netns.sh | 15 |
13 files changed, 1795 insertions, 10 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 41f0a0adbb80..6c575cf34a71 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -33,6 +33,7 @@ TARGETS += memfd TARGETS += memory-hotplug TARGETS += mincore TARGETS += mount +TARGETS += mount_setattr TARGETS += mqueue TARGETS += nci TARGETS += net diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 537d65968c48..fb23ce9617ea 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -12,9 +12,12 @@ #include <sys/mman.h> #include <linux/types.h> +#define NSEC_PER_MSEC 1000000L + #define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) #define DMA_MAP_MAX_THREADS 1024 #define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) #define DMA_MAP_BIDIRECTIONAL 0 #define DMA_MAP_TO_DEVICE 1 @@ -36,7 +39,8 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u8 expansion[84]; /* For future use */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u8 expansion[80]; /* For future use */ }; int main(int argc, char **argv) @@ -46,12 +50,12 @@ int main(int argc, char **argv) /* default single thread, run 20 seconds on NUMA_NO_NODE */ int threads = 1, seconds = 20, node = -1; /* default dma mask 32bit, bidirectional DMA */ - int bits = 32, dir = DMA_MAP_BIDIRECTIONAL; + int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; int cmd = DMA_MAP_BENCHMARK; char *p; - while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) { + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) { switch (opt) { case 't': threads = atoi(optarg); @@ -68,6 +72,9 @@ int main(int argc, char **argv) case 'd': dir = atoi(optarg); break; + case 'x': + xdelay = atoi(optarg); + break; default: return -1; } @@ -85,6 +92,12 @@ int main(int argc, char **argv) exit(1); } + if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) { + fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n", + DMA_MAP_MAX_TRANS_DELAY); + exit(1); + } + /* suppose the mininum DMA zone is 1MB in the world */ if (bits < 20 || bits > 64) { fprintf(stderr, "invalid dma mask bit, must be in 20-64\n"); @@ -109,6 +122,8 @@ int main(int argc, char **argv) map.node = node; map.dma_bits = bits; map.dma_dir = dir; + map.dma_trans_ns = xdelay; + if (ioctl(fd, cmd, &map)) { perror("ioctl"); exit(1); diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 3a84394829ea..32b87cc77c8e 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -33,6 +33,7 @@ /demand_paging_test /dirty_log_test /dirty_log_perf_test +/hardware_disable_test /kvm_create_max_vcpus /memslot_modification_stress_test /set_memory_region_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 8c8eda429576..a6d61f451f88 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test +TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += set_memory_region_test diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c new file mode 100644 index 000000000000..2f2eeb8a1d86 --- /dev/null +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This test is intended to reproduce a crash that happens when + * kvm_arch_hardware_disable is called and it attempts to unregister the user + * return notifiers. + */ + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <pthread.h> +#include <semaphore.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/wait.h> + +#include <test_util.h> + +#include "kvm_util.h" + +#define VCPU_NUM 4 +#define SLEEPING_THREAD_NUM (1 << 4) +#define FORK_NUM (1ULL << 9) +#define DELAY_US_MAX 2000 +#define GUEST_CODE_PIO_PORT 4 + +sem_t *sem; + +/* Arguments for the pthreads */ +struct payload { + struct kvm_vm *vm; + uint32_t index; +}; + +static void guest_code(void) +{ + for (;;) + ; /* Some busy work */ + printf("Should not be reached.\n"); +} + +static void *run_vcpu(void *arg) +{ + struct payload *payload = (struct payload *)arg; + struct kvm_run *state = vcpu_state(payload->vm, payload->index); + + vcpu_run(payload->vm, payload->index); + + TEST_ASSERT(false, "%s: exited with reason %d: %s\n", + __func__, state->exit_reason, + exit_reason_str(state->exit_reason)); + pthread_exit(NULL); +} + +static void *sleeping_thread(void *arg) +{ + int fd; + + while (true) { + fd = open("/dev/null", O_RDWR); + close(fd); + } + TEST_ASSERT(false, "%s: exited\n", __func__); + pthread_exit(NULL); +} + +static inline void check_create_thread(pthread_t *thread, pthread_attr_t *attr, + void *(*f)(void *), void *arg) +{ + int r; + + r = pthread_create(thread, attr, f, arg); + TEST_ASSERT(r == 0, "%s: failed to create thread", __func__); +} + +static inline void check_set_affinity(pthread_t thread, cpu_set_t *cpu_set) +{ + int r; + + r = pthread_setaffinity_np(thread, sizeof(cpu_set_t), cpu_set); + TEST_ASSERT(r == 0, "%s: failed set affinity", __func__); +} + +static inline void check_join(pthread_t thread, void **retval) +{ + int r; + + r = pthread_join(thread, retval); + TEST_ASSERT(r == 0, "%s: failed to join thread", __func__); +} + +static void run_test(uint32_t run) +{ + struct kvm_vm *vm; + cpu_set_t cpu_set; + pthread_t threads[VCPU_NUM]; + pthread_t throw_away; + struct payload payloads[VCPU_NUM]; + void *b; + uint32_t i, j; + + CPU_ZERO(&cpu_set); + for (i = 0; i < VCPU_NUM; i++) + CPU_SET(i, &cpu_set); + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_create_irqchip(vm); + + fprintf(stderr, "%s: [%d] start vcpus\n", __func__, run); + for (i = 0; i < VCPU_NUM; ++i) { + vm_vcpu_add_default(vm, i, guest_code); + payloads[i].vm = vm; + payloads[i].index = i; + + check_create_thread(&threads[i], NULL, run_vcpu, + (void *)&payloads[i]); + check_set_affinity(threads[i], &cpu_set); + + for (j = 0; j < SLEEPING_THREAD_NUM; ++j) { + check_create_thread(&throw_away, NULL, sleeping_thread, + (void *)NULL); + check_set_affinity(throw_away, &cpu_set); + } + } + fprintf(stderr, "%s: [%d] all threads launched\n", __func__, run); + sem_post(sem); + for (i = 0; i < VCPU_NUM; ++i) + check_join(threads[i], &b); + /* Should not be reached */ + TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run); +} + +int main(int argc, char **argv) +{ + uint32_t i; + int s, r; + pid_t pid; + + sem = sem_open("vm_sem", O_CREAT | O_EXCL, 0644, 0); + sem_unlink("vm_sem"); + + for (i = 0; i < FORK_NUM; ++i) { + pid = fork(); + TEST_ASSERT(pid >= 0, "%s: unable to fork", __func__); + if (pid == 0) + run_test(i); /* This function always exits */ + + fprintf(stderr, "%s: [%d] waiting semaphore\n", __func__, i); + sem_wait(sem); + r = (rand() % DELAY_US_MAX) + 1; + fprintf(stderr, "%s: [%d] waiting %dus\n", __func__, i, r); + usleep(r); + r = waitpid(pid, &s, WNOHANG); + TEST_ASSERT(r != pid, + "%s: [%d] child exited unexpectedly status: [%d]", + __func__, i, s); + fprintf(stderr, "%s: [%d] killing child\n", __func__, i); + kill(pid, SIGKILL); + } + + sem_destroy(sem); + exit(0); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index d787cb802b4a..e5fbf16f725b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -21,6 +21,8 @@ #define KVM_UTIL_PGS_PER_HUGEPG 512 #define KVM_UTIL_MIN_PFN 2 +static int vcpu_mmap_sz(void); + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static void *align(void *x, size_t size) { @@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->state, sizeof(*vcpu->state)); + ret = munmap(vcpu->state, vcpu_mmap_sz()); TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " "errno: %i", ret, errno); close(vcpu->fd); @@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->state)); - vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), + vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " "vcpu id: %u errno: %i", vcpuid, errno); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index de0c76177d02..a8906e60a108 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -720,7 +720,8 @@ struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); struct kvm_cpuid2 *cpuid; - int rc, max_ent; + int max_ent; + int rc = -1; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 9246ea310587..804ff5ff022d 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -13,19 +13,27 @@ #include <stdint.h> #include <time.h> +#include <sched.h> +#include <sys/syscall.h> #define VCPU_ID 5 +#define SHINFO_REGION_GVA 0xc0000000ULL #define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_SLOT 10 #define PAGE_SIZE 4096 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) +#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) + +#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) static struct kvm_vm *vm; #define XEN_HYPERCALL_MSR 0x40000000 +#define MIN_STEAL_TIME 50000 + struct pvclock_vcpu_time_info { u32 version; u32 pad0; @@ -43,11 +51,67 @@ struct pvclock_wall_clock { u32 nsec; } __attribute__((__packed__)); +struct vcpu_runstate_info { + uint32_t state; + uint64_t state_entry_time; + uint64_t time[4]; +}; + +#define RUNSTATE_running 0 +#define RUNSTATE_runnable 1 +#define RUNSTATE_blocked 2 +#define RUNSTATE_offline 3 + static void guest_code(void) { + struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; + + /* Test having the host set runstates manually */ + GUEST_SYNC(RUNSTATE_runnable); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); + GUEST_ASSERT(rs->state == 0); + + GUEST_SYNC(RUNSTATE_blocked); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); + GUEST_ASSERT(rs->state == 0); + + GUEST_SYNC(RUNSTATE_offline); + GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); + GUEST_ASSERT(rs->state == 0); + + /* Test runstate time adjust */ + GUEST_SYNC(4); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); + + /* Test runstate time set */ + GUEST_SYNC(5); + GUEST_ASSERT(rs->state_entry_time >= 0x8000); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); + + /* sched_yield() should result in some 'runnable' time */ + GUEST_SYNC(6); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); + GUEST_DONE(); } +static long get_run_delay(void) +{ + char path[64]; + long val[2]; + FILE *fp; + + sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); + fp = fopen(path, "r"); + fscanf(fp, "%ld %ld ", &val[0], &val[1]); + fclose(fp); + + return val[1]; +} + static int cmp_timespec(struct timespec *a, struct timespec *b) { if (a->tv_sec > b->tv_sec) @@ -66,12 +130,14 @@ int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; - if (!(kvm_check_cap(KVM_CAP_XEN_HVM) & - KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { + int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); + if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available"); exit(KSFT_SKIP); } + bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); + clock_gettime(CLOCK_REALTIME, &min_ts); vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); @@ -80,6 +146,7 @@ int main(int argc, char *argv[]) /* Map a region for the shared_info page */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0); struct kvm_xen_hvm_config hvmc = { .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, @@ -111,6 +178,17 @@ int main(int argc, char *argv[]) }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); + if (do_runstate_tests) { + struct kvm_xen_vcpu_attr st = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + .u.gpa = RUNSTATE_ADDR, + }; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); + } + + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);; + rs->state = 0x5a; + for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; @@ -126,8 +204,56 @@ int main(int argc, char *argv[]) case UCALL_ABORT: TEST_FAIL("%s", (const char *)uc.args[0]); /* NOT REACHED */ - case UCALL_SYNC: + case UCALL_SYNC: { + struct kvm_xen_vcpu_attr rst; + long rundelay; + + /* If no runstate support, bail out early */ + if (!do_runstate_tests) + goto done; + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + + switch (uc.args[1]) { + case RUNSTATE_running...RUNSTATE_offline: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; + rst.u.runstate.state = uc.args[1]; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + case 4: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; + memset(&rst.u, 0, sizeof(rst.u)); + rst.u.runstate.state = (uint64_t)-1; + rst.u.runstate.time_blocked = + 0x5a - rs->time[RUNSTATE_blocked]; + rst.u.runstate.time_offline = + 0x6b6b - rs->time[RUNSTATE_offline]; + rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - + rst.u.runstate.time_offline; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + + case 5: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; + memset(&rst.u, 0, sizeof(rst.u)); + rst.u.runstate.state = RUNSTATE_running; + rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; + rst.u.runstate.time_blocked = 0x6b6b; + rst.u.runstate.time_offline = 0x5a; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + case 6: + /* Yield until scheduler delay exceeds target */ + rundelay = get_run_delay() + MIN_STEAL_TIME; + do { + sched_yield(); + } while (get_run_delay() < rundelay); + break; + } break; + } case UCALL_DONE: goto done; default: @@ -162,6 +288,33 @@ int main(int argc, char *argv[]) TEST_ASSERT(ti2->version && !(ti2->version & 1), "Bad time_info version %x", ti->version); + if (do_runstate_tests) { + /* + * Fetch runstate and check sanity. Strictly speaking in the + * general case we might not expect the numbers to be identical + * but in this case we know we aren't running the vCPU any more. + */ + struct kvm_xen_vcpu_attr rst = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, + }; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); + + TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + } kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/mount_setattr/.gitignore b/tools/testing/selftests/mount_setattr/.gitignore new file mode 100644 index 000000000000..5f74d8488472 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/.gitignore @@ -0,0 +1 @@ +mount_setattr_test diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile new file mode 100644 index 000000000000..2250f7dcb81e --- /dev/null +++ b/tools/testing/selftests/mount_setattr/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for mount selftests. +CFLAGS = -g -I../../../../usr/include/ -Wall -O2 -pthread + +TEST_GEN_FILES += mount_setattr_test + +include ../lib.mk diff --git a/tools/testing/selftests/mount_setattr/config b/tools/testing/selftests/mount_setattr/config new file mode 100644 index 000000000000..416bd53ce982 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/config @@ -0,0 +1 @@ +CONFIG_USER_NS=y diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c new file mode 100644 index 000000000000..4e94e566e040 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -0,0 +1,1424 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <errno.h> +#include <pthread.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/wait.h> +#include <sys/vfs.h> +#include <sys/statvfs.h> +#include <sys/sysinfo.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <grp.h> +#include <stdbool.h> +#include <stdarg.h> + +#include "../kselftest_harness.h" + +#ifndef CLONE_NEWNS +#define CLONE_NEWNS 0x00020000 +#endif + +#ifndef CLONE_NEWUSER +#define CLONE_NEWUSER 0x10000000 +#endif + +#ifndef MS_REC +#define MS_REC 16384 +#endif + +#ifndef MS_RELATIME +#define MS_RELATIME (1 << 21) +#endif + +#ifndef MS_STRICTATIME +#define MS_STRICTATIME (1 << 24) +#endif + +#ifndef MOUNT_ATTR_RDONLY +#define MOUNT_ATTR_RDONLY 0x00000001 +#endif + +#ifndef MOUNT_ATTR_NOSUID +#define MOUNT_ATTR_NOSUID 0x00000002 +#endif + +#ifndef MOUNT_ATTR_NOEXEC +#define MOUNT_ATTR_NOEXEC 0x00000008 +#endif + +#ifndef MOUNT_ATTR_NODIRATIME +#define MOUNT_ATTR_NODIRATIME 0x00000080 +#endif + +#ifndef MOUNT_ATTR__ATIME +#define MOUNT_ATTR__ATIME 0x00000070 +#endif + +#ifndef MOUNT_ATTR_RELATIME +#define MOUNT_ATTR_RELATIME 0x00000000 +#endif + +#ifndef MOUNT_ATTR_NOATIME +#define MOUNT_ATTR_NOATIME 0x00000010 +#endif + +#ifndef MOUNT_ATTR_STRICTATIME +#define MOUNT_ATTR_STRICTATIME 0x00000020 +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 +#endif + +#ifndef MS_SHARED +#define MS_SHARED (1 << 20) +#endif + +#define DEFAULT_THREADS 4 +#define ptr_to_int(p) ((int)((intptr_t)(p))) +#define int_to_ptr(u) ((void *)((intptr_t)(u))) + +#ifndef __NR_mount_setattr + #if defined __alpha__ + #define __NR_mount_setattr 552 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_mount_setattr (442 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_mount_setattr (442 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_mount_setattr (442 + 5000) + #endif + #elif defined __ia64__ + #define __NR_mount_setattr (442 + 1024) + #else + #define __NR_mount_setattr 442 + #endif + +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; +#endif + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #elif defined __ia64__ + #define __NR_open_tree (428 + 1024) + #else + #define __NR_open_tree 428 + #endif +#endif + +#ifndef MOUNT_ATTR_IDMAP +#define MOUNT_ATTR_IDMAP 0x00100000 +#endif + +static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags, + struct mount_attr *attr, size_t size) +{ + return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); +} + +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static int write_file(const char *path, const void *buf, size_t count) +{ + int fd; + ssize_t ret; + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); + if (fd < 0) + return -1; + + ret = write_nointr(fd, buf, count); + close(fd); + if (ret < 0 || (size_t)ret != count) + return -1; + + return 0; +} + +static int create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + char map[100]; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER)) + return -1; + + if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && + errno != ENOENT) + return -1; + + snprintf(map, sizeof(map), "0 %d 1", uid); + if (write_file("/proc/self/uid_map", map, strlen(map))) + return -1; + + + snprintf(map, sizeof(map), "0 %d 1", gid); + if (write_file("/proc/self/gid_map", map, strlen(map))) + return -1; + + if (setgid(0)) + return -1; + + if (setuid(0)) + return -1; + + return 0; +} + +static int prepare_unpriv_mountns(void) +{ + if (create_and_enter_userns()) + return -1; + + if (unshare(CLONE_NEWNS)) + return -1; + + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0)) + return -1; + + return 0; +} + +static int read_mnt_flags(const char *path) +{ + int ret; + struct statvfs stat; + unsigned int mnt_flags; + + ret = statvfs(path, &stat); + if (ret != 0) + return -EINVAL; + + if (stat.f_flag & + ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME | + ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK)) + return -EINVAL; + + mnt_flags = 0; + if (stat.f_flag & ST_RDONLY) + mnt_flags |= MS_RDONLY; + if (stat.f_flag & ST_NOSUID) + mnt_flags |= MS_NOSUID; + if (stat.f_flag & ST_NODEV) + mnt_flags |= MS_NODEV; + if (stat.f_flag & ST_NOEXEC) + mnt_flags |= MS_NOEXEC; + if (stat.f_flag & ST_NOATIME) + mnt_flags |= MS_NOATIME; + if (stat.f_flag & ST_NODIRATIME) + mnt_flags |= MS_NODIRATIME; + if (stat.f_flag & ST_RELATIME) + mnt_flags |= MS_RELATIME; + if (stat.f_flag & ST_SYNCHRONOUS) + mnt_flags |= MS_SYNCHRONOUS; + if (stat.f_flag & ST_MANDLOCK) + mnt_flags |= ST_MANDLOCK; + + return mnt_flags; +} + +static char *get_field(char *src, int nfields) +{ + int i; + char *p = src; + + for (i = 0; i < nfields; i++) { + while (*p && *p != ' ' && *p != '\t') + p++; + + if (!*p) + break; + + p++; + } + + return p; +} + +static void null_endofword(char *word) +{ + while (*word && *word != ' ' && *word != '\t') + word++; + *word = '\0'; +} + +static bool is_shared_mount(const char *path) +{ + size_t len = 0; + char *line = NULL; + FILE *f = NULL; + + f = fopen("/proc/self/mountinfo", "re"); + if (!f) + return false; + + while (getline(&line, &len, f) != -1) { + char *opts, *target; + + target = get_field(line, 4); + if (!target) + continue; + + opts = get_field(target, 2); + if (!opts) + continue; + + null_endofword(target); + + if (strcmp(target, path) != 0) + continue; + + null_endofword(opts); + if (strstr(opts, "shared:")) + return true; + } + + free(line); + fclose(f); + + return false; +} + +static void *mount_setattr_thread(void *data) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID, + .attr_clr = 0, + .propagation = MS_SHARED, + }; + + if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr))) + pthread_exit(int_to_ptr(-1)); + + pthread_exit(int_to_ptr(0)); +} + +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + +static bool mount_setattr_supported(void) +{ + int ret; + + ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0); + if (ret < 0 && errno == ENOSYS) + return false; + + return true; +} + +FIXTURE(mount_setattr) { +}; + +FIXTURE_SETUP(mount_setattr) +{ + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_EQ(prepare_unpriv_mountns(), 0); + + (void)umount2("/mnt", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); + + ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); + + ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); + + ASSERT_EQ(mkdir("/mnt/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", + MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); + + ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", + MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); +} + +FIXTURE_TEARDOWN(mount_setattr) +{ + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + (void)umount2("/mnt/A", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); +} + +TEST_F(mount_setattr, invalid_attributes) +{ + struct mount_attr invalid_attr = { + .attr_set = (1U << 31), + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_set = 0; + invalid_attr.attr_clr = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_clr = 0; + invalid_attr.propagation = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_set = (1U << 31); + invalid_attr.attr_clr = (1U << 31); + invalid_attr.propagation = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); +} + +TEST_F(mount_setattr, extensibility) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + char *s = "dummy"; + struct mount_attr invalid_attr = {}; + struct mount_attr_large { + struct mount_attr attr1; + struct mount_attr attr2; + struct mount_attr attr3; + } large_attr = {}; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL, + sizeof(invalid_attr)), 0); + ASSERT_EQ(errno, EFAULT); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s, + sizeof(invalid_attr)), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr) / 2), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr) / 2), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + large_attr.attr3.attr_set = 0; + large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, basic) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOEXEC; + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, old_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, old_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, old_flags); +} + +TEST_F(mount_setattr, basic_recursive) +{ + int fd; + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOEXEC; + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_clr = MOUNT_ATTR_RDONLY; + attr.propagation = MS_SHARED; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_RDONLY; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); + + fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); + ASSERT_GE(fd, 0); + + /* + * We're holding a fd open for writing so this needs to fail somewhere + * in the middle and the mount options need to be unchanged. + */ + attr.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); + + EXPECT_EQ(close(fd), 0); +} + +TEST_F(mount_setattr, mount_has_writers) +{ + int fd, dfd; + unsigned int old_flags = 0, new_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + .propagation = MS_SHARED, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); + ASSERT_GE(fd, 0); + + /* + * We're holding a fd open to a mount somwhere in the middle so this + * needs to fail somewhere in the middle. After this the mount options + * need to be unchanged. + */ + ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), false); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false); + + dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC); + ASSERT_GE(dfd, 0); + EXPECT_EQ(fsync(dfd), 0); + EXPECT_EQ(close(dfd), 0); + + EXPECT_EQ(fsync(fd), 0); + EXPECT_EQ(close(fd), 0); + + /* All writers are gone so this should succeed. */ + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); +} + +TEST_F(mount_setattr, mixed_mount_options) +{ + unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME, + .attr_set = MOUNT_ATTR_RELATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags1 = read_mnt_flags("/mnt/B"); + ASSERT_GT(old_flags1, 0); + + old_flags2 = read_mnt_flags("/mnt/B/BB"); + ASSERT_GT(old_flags2, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags2; + expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/B"); + ASSERT_EQ(new_flags, expected_flags); + + expected_flags = old_flags2; + expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, time_changes) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; + attr.attr_clr = MOUNT_ATTR__ATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = 0; + attr.attr_clr = MOUNT_ATTR_STRICTATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_clr = MOUNT_ATTR_NOATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME; + attr.attr_clr = MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_NOATIME; + expected_flags |= MS_NODIRATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_NOATIME; + attr.attr_set |= MOUNT_ATTR_RELATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_RELATIME; + attr.attr_set |= MOUNT_ATTR_STRICTATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_STRICTATIME; + attr.attr_set |= MOUNT_ATTR_NOATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags |= MS_NOATIME; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_clr = MOUNT_ATTR_NODIRATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_NODIRATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, multi_threaded) +{ + int i, j, nthreads, ret = 0; + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + pthread_attr_t pattr; + pthread_t threads[DEFAULT_THREADS]; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + /* Try to change mount options from multiple threads. */ + nthreads = get_nprocs_conf(); + if (nthreads > DEFAULT_THREADS) + nthreads = DEFAULT_THREADS; + + pthread_attr_init(&pattr); + for (i = 0; i < nthreads; i++) + ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0); + + for (j = 0; j < i; j++) { + void *retptr = NULL; + + EXPECT_EQ(pthread_join(threads[j], &retptr), 0); + + ret += ptr_to_int(retptr); + EXPECT_EQ(ret, 0); + } + pthread_attr_destroy(&pattr); + + ASSERT_EQ(ret, 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOSUID; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); +} + +TEST_F(mount_setattr, wrong_user_namespace) +{ + int ret; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + EXPECT_EQ(create_and_enter_userns(), 0); + ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EPERM); +} + +TEST_F(mount_setattr, wrong_mount_namespace) +{ + int fd, ret; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC); + ASSERT_GE(fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr)); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EINVAL); +} + +FIXTURE(mount_setattr_idmapped) { +}; + +FIXTURE_SETUP(mount_setattr_idmapped) +{ + int img_fd = -EBADF; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0); + + (void)umount2("/mnt", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); + + ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B", 0777), 0); + ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0); + ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0); + + ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); + ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0); + ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); + + ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); + + ASSERT_EQ(mkdir("/mnt/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", + MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); + + ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", + MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); + + ASSERT_EQ(mkdir("/mnt/C", 0777), 0); + ASSERT_EQ(mkdir("/mnt/D", 0777), 0); + img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600); + ASSERT_GE(img_fd, 0); + ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0); + ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0); + ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0); + ASSERT_EQ(close(img_fd), 0); +} + +FIXTURE_TEARDOWN(mount_setattr_idmapped) +{ + (void)umount2("/mnt/A", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); +} + +/** + * Validate that negative fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_negative) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = -EBADF, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with negative fd"); + } +} + +/** + * Validate that excessively large fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_large) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = INT64_MAX, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with too large fd value"); + } +} + +/** + * Validate that closed fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_closed) +{ + int fd; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_GE(close(fd), 0); + + attr.userns_fd = fd; + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with closed fd"); + } +} + +/** + * Validate that the initial user namespace is rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(errno, EPERM); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid, + unsigned long range) +{ + char map[100], procfile[256]; + + snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid); + snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); + if (write_file(procfile, map, strlen(map))) + return -1; + + + snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid); + snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); + if (write_file(procfile, map, strlen(map))) + return -1; + + return 0; +} + +#define __STACK_SIZE (8 * 1024 * 1024) +static pid_t do_clone(int (*fn)(void *), void *arg, int flags) +{ + void *stack; + + stack = malloc(__STACK_SIZE); + if (!stack) + return -ENOMEM; + +#ifdef __ia64__ + return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL); +#else + return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL); +#endif +} + +static int get_userns_fd_cb(void *data) +{ + return kill(getpid(), SIGSTOP); +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range) +{ + int ret; + pid_t pid; + char path[256]; + + pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER); + if (pid < 0) + return -errno; + + ret = map_ids(pid, nsid, hostid, range); + if (ret < 0) + return ret; + + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + ret = open(path, O_RDONLY | O_CLOEXEC); + kill(pid, SIGKILL); + wait_for_pid(pid); + return ret; +} + +/** + * Validate that an attached mount in our mount namespace can be idmapped. + * (The kernel enforces that the mount's mount namespace and the caller's mount + * namespace match.) + */ +TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that idmapping a mount is rejected if the mount's mount namespace + * and our mount namespace don't match. + * (The kernel enforces that the mount's mount namespace and the caller's mount + * namespace match.) + */ +TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC); + ASSERT_GE(open_tree_fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, + sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that an attached mount in our mount namespace can be idmapped. + */ +TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + /* Changing mount properties on a detached mount. */ + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that a detached mount not in our mount namespace can be idmapped. + */ +TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + /* Changing mount properties on a detached mount. */ + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that currently changing the idmapping of an idmapped mount fails. + */ +TEST_F(mount_setattr_idmapped, change_idmapping) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + + /* Change idmapping on a detached mount that is already idmapped. */ + attr.userns_fd = get_userns_fd(0, 20000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +static bool expected_uid_gid(int dfd, const char *path, int flags, + uid_t expected_uid, gid_t expected_gid) +{ + int ret; + struct stat st; + + ret = fstatat(dfd, path, &st, flags); + if (ret < 0) + return false; + + return st.st_uid == expected_uid && st.st_gid == expected_gid; +} + +TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/A", + AT_RECURSIVE | + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); + + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 74c69b75f6f5..7ed7cd95e58f 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } sleep() { read -t "$1" -N 1 || true; } -waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } @@ -141,6 +141,19 @@ tests() { n2 iperf3 -s -1 -B fd00::2 & waitiperf $netns2 $! n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 + + # TCP over IPv4, in parallel + for max in 4 5 50; do + local pids=( ) + for ((i=0; i < max; ++i)) do + n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & + pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) + done + for ((i=0; i < max; ++i)) do + n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & + done + wait "${pids[@]}" + done } [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" |