diff options
Diffstat (limited to 'samples')
52 files changed, 2821 insertions, 57 deletions
diff --git a/samples/Kconfig b/samples/Kconfig index b288d9991d27..ffef99950206 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -184,6 +184,17 @@ config SAMPLE_TIMER bool "Timer sample" depends on CC_CAN_LINK && HEADERS_INSTALL +config SAMPLE_TSM_MR + tristate "TSM measurement sample" + select TSM_MEASUREMENTS + select VIRT_DRIVERS + help + Build a sample module that emulates MRs (Measurement Registers) and + exposes them to user mode applications through the TSM sysfs + interface (/sys/class/misc/tsm_mr_sample/emulated_mr/). + + The module name will be tsm-mr-sample when built as a module. + config SAMPLE_UHID bool "UHID sample" depends on CC_CAN_LINK && HEADERS_INSTALL @@ -291,8 +302,29 @@ config SAMPLE_CGROUP help Build samples that demonstrate the usage of the cgroup API. +config SAMPLE_CHECK_EXEC + bool "Exec secure bits examples" + depends on CC_CAN_LINK && HEADERS_INSTALL + help + Build a tool to easily configure SECBIT_EXEC_RESTRICT_FILE and + SECBIT_EXEC_DENY_INTERACTIVE, and a simple script interpreter to + demonstrate how they should be used with execveat(2) + + AT_EXECVE_CHECK. + +config SAMPLE_HUNG_TASK + tristate "Hung task detector test code" + depends on DETECT_HUNG_TASK && DEBUG_FS + help + Build a module that provides debugfs files (e.g., mutex, semaphore, + etc.) under <debugfs>/hung_task. If user reads one of these files, + it will sleep long time (256 seconds) with holding a lock. Thus, + if 2 or more processes read the same file concurrently, it will + be detected by the hung_task watchdog. + source "samples/rust/Kconfig" +source "samples/damon/Kconfig" + endif # SAMPLES config HAVE_SAMPLE_FTRACE_DIRECT diff --git a/samples/Makefile b/samples/Makefile index b85fa64390c5..07641e177bd8 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -3,6 +3,7 @@ subdir-$(CONFIG_SAMPLE_AUXDISPLAY) += auxdisplay subdir-$(CONFIG_SAMPLE_ANDROID_BINDERFS) += binderfs +subdir-$(CONFIG_SAMPLE_CHECK_EXEC) += check-exec subdir-$(CONFIG_SAMPLE_CGROUP) += cgroup obj-$(CONFIG_SAMPLE_CONFIGFS) += configfs/ obj-$(CONFIG_SAMPLE_CONNECTOR) += connector/ @@ -39,3 +40,8 @@ obj-$(CONFIG_SAMPLE_KMEMLEAK) += kmemleak/ obj-$(CONFIG_SAMPLE_CORESIGHT_SYSCFG) += coresight/ obj-$(CONFIG_SAMPLE_FPROBE) += fprobe/ obj-$(CONFIG_SAMPLES_RUST) += rust/ +obj-$(CONFIG_SAMPLE_DAMON_WSSE) += damon/ +obj-$(CONFIG_SAMPLE_DAMON_PRCL) += damon/ +obj-$(CONFIG_SAMPLE_DAMON_MTIER) += damon/ +obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task/ +obj-$(CONFIG_SAMPLE_TSM_MR) += tsm-mr/ diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 96a05e70ace3..95a4fa1f1e44 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -123,7 +123,7 @@ always-y += ibumad_kern.o always-y += hbm_out_kern.o always-y += hbm_edt_kern.o -TPROGS_CFLAGS = $(TPROGS_USER_CFLAGS) +COMMON_CFLAGS = $(TPROGS_USER_CFLAGS) TPROGS_LDFLAGS = $(TPROGS_USER_LDFLAGS) ifeq ($(ARCH), arm) @@ -307,7 +307,7 @@ $(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check= VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ - $(abspath ./vmlinux) + $(abspath $(objtree)/vmlinux) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) @@ -376,7 +376,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(BPF_EXTRA_CFLAGS) \ -I$(obj) -I$(srctree)/tools/testing/selftests/bpf/ \ - -I$(LIBBPF_INCLUDE) \ + -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(SRCARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index b7997541f7ee..f93d9145ab8a 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -31,7 +31,6 @@ static inline int proto_ports_offset(__u64 proto) switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: - case IPPROTO_DCCP: case IPPROTO_ESP: case IPPROTO_SCTP: case IPPROTO_UDPLITE: diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c index af29a1bde4e4..3c36c25d9902 100644 --- a/samples/bpf/xdp2skb_meta_kern.c +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -63,7 +63,6 @@ SEC("tc_mark") int _tc_mark(struct __sk_buff *ctx) { void *data = (void *)(unsigned long)ctx->data; - void *data_end = (void *)(unsigned long)ctx->data_end; void *data_meta = (void *)(unsigned long)ctx->data_meta; struct meta_info *meta = data_meta; diff --git a/samples/check-exec/.gitignore b/samples/check-exec/.gitignore new file mode 100644 index 000000000000..cd759a19dacd --- /dev/null +++ b/samples/check-exec/.gitignore @@ -0,0 +1,2 @@ +/inc +/set-exec diff --git a/samples/check-exec/Makefile b/samples/check-exec/Makefile new file mode 100644 index 000000000000..c4f08ad0f8e3 --- /dev/null +++ b/samples/check-exec/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: BSD-3-Clause + +userprogs-always-y := \ + inc \ + set-exec + +userccflags += -I usr/include + +.PHONY: all clean + +all: + $(MAKE) -C ../.. samples/check-exec/ + +clean: + $(MAKE) -C ../.. M=samples/check-exec/ clean diff --git a/samples/check-exec/inc.c b/samples/check-exec/inc.c new file mode 100644 index 000000000000..7f6ef06a2f06 --- /dev/null +++ b/samples/check-exec/inc.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * Very simple script interpreter that can evaluate two different commands (one + * per line): + * - "?" to initialize a counter from user's input; + * - "+" to increment the counter (which is set to 0 by default). + * + * See tools/testing/selftests/exec/check-exec-tests.sh and + * Documentation/userspace-api/check_exec.rst + * + * Copyright © 2024 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/fcntl.h> +#include <linux/prctl.h> +#include <linux/securebits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +static int sys_execveat(int dirfd, const char *pathname, char *const argv[], + char *const envp[], int flags) +{ + return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); +} + +/* Returns 1 on error, 0 otherwise. */ +static int interpret_buffer(char *buffer, size_t buffer_size) +{ + char *line, *saveptr = NULL; + long long number = 0; + + /* Each command is the first character of a line. */ + saveptr = NULL; + line = strtok_r(buffer, "\n", &saveptr); + while (line) { + if (*line != '#' && strlen(line) != 1) { + fprintf(stderr, "# ERROR: Unknown string\n"); + return 1; + } + switch (*line) { + case '#': + /* Skips shebang and comments. */ + break; + case '+': + /* Increments and prints the number. */ + number++; + printf("%lld\n", number); + break; + case '?': + /* Reads integer from stdin. */ + fprintf(stderr, "> Enter new number: \n"); + if (scanf("%lld", &number) != 1) { + fprintf(stderr, + "# WARNING: Failed to read number from stdin\n"); + } + break; + default: + fprintf(stderr, "# ERROR: Unknown character '%c'\n", + *line); + return 1; + } + line = strtok_r(NULL, "\n", &saveptr); + } + return 0; +} + +/* Returns 1 on error, 0 otherwise. */ +static int interpret_stream(FILE *script, char *const script_name, + char *const *const envp, const bool restrict_stream) +{ + int err; + char *const script_argv[] = { script_name, NULL }; + char buf[128] = {}; + size_t buf_size = sizeof(buf); + + /* + * We pass a valid argv and envp to the kernel to emulate a native + * script execution. We must use the script file descriptor instead of + * the script path name to avoid race conditions. + */ + err = sys_execveat(fileno(script), "", script_argv, envp, + AT_EMPTY_PATH | AT_EXECVE_CHECK); + if (err && restrict_stream) { + perror("ERROR: Script execution check"); + return 1; + } + + /* Reads script. */ + buf_size = fread(buf, 1, buf_size - 1, script); + return interpret_buffer(buf, buf_size); +} + +static void print_usage(const char *argv0) +{ + fprintf(stderr, "usage: %s <script.inc> | -i | -c <command>\n\n", + argv0); + fprintf(stderr, "Example:\n"); + fprintf(stderr, " ./set-exec -fi -- ./inc -i < script-exec.inc\n"); +} + +int main(const int argc, char *const argv[], char *const *const envp) +{ + int opt; + char *cmd = NULL; + char *script_name = NULL; + bool interpret_stdin = false; + FILE *script_file = NULL; + int secbits; + bool deny_interactive, restrict_file; + size_t arg_nb; + + secbits = prctl(PR_GET_SECUREBITS); + if (secbits == -1) { + /* + * This should never happen, except with a buggy seccomp + * filter. + */ + perror("ERROR: Failed to get securebits"); + return 1; + } + + deny_interactive = !!(secbits & SECBIT_EXEC_DENY_INTERACTIVE); + restrict_file = !!(secbits & SECBIT_EXEC_RESTRICT_FILE); + + while ((opt = getopt(argc, argv, "c:i")) != -1) { + switch (opt) { + case 'c': + if (cmd) { + fprintf(stderr, "ERROR: Command already set"); + return 1; + } + cmd = optarg; + break; + case 'i': + interpret_stdin = true; + break; + default: + print_usage(argv[0]); + return 1; + } + } + + /* Checks that only one argument is used, or read stdin. */ + arg_nb = !!cmd + !!interpret_stdin; + if (arg_nb == 0 && argc == 2) { + script_name = argv[1]; + } else if (arg_nb != 1) { + print_usage(argv[0]); + return 1; + } + + if (cmd) { + /* + * Other kind of interactive interpretations should be denied + * as well (e.g. CLI arguments passing script snippets, + * environment variables interpreted as script). However, any + * way to pass script files should only be restricted according + * to restrict_file. + */ + if (deny_interactive) { + fprintf(stderr, + "ERROR: Interactive interpretation denied.\n"); + return 1; + } + + return interpret_buffer(cmd, strlen(cmd)); + } + + if (interpret_stdin && !script_name) { + script_file = stdin; + /* + * As for any execve(2) call, this path may be logged by the + * kernel. + */ + script_name = "/proc/self/fd/0"; + /* + * When stdin is used, it can point to a regular file or a + * pipe. Restrict stdin execution according to + * SECBIT_EXEC_DENY_INTERACTIVE but always allow executable + * files (which are not considered as interactive inputs). + */ + return interpret_stream(script_file, script_name, envp, + deny_interactive); + } else if (script_name && !interpret_stdin) { + /* + * In this sample, we don't pass any argument to scripts, but + * otherwise we would have to forge an argv with such + * arguments. + */ + script_file = fopen(script_name, "r"); + if (!script_file) { + perror("ERROR: Failed to open script"); + return 1; + } + /* + * Restricts file execution according to + * SECBIT_EXEC_RESTRICT_FILE. + */ + return interpret_stream(script_file, script_name, envp, + restrict_file); + } + + print_usage(argv[0]); + return 1; +} diff --git a/samples/check-exec/run-script-ask.sh b/samples/check-exec/run-script-ask.sh new file mode 100755 index 000000000000..8ef0fdc37266 --- /dev/null +++ b/samples/check-exec/run-script-ask.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env sh +# SPDX-License-Identifier: BSD-3-Clause + +DIR="$(dirname -- "$0")" + +PATH="${PATH}:${DIR}" + +set -x +"${DIR}/script-ask.inc" diff --git a/samples/check-exec/script-ask.inc b/samples/check-exec/script-ask.inc new file mode 100755 index 000000000000..720a8e649225 --- /dev/null +++ b/samples/check-exec/script-ask.inc @@ -0,0 +1,5 @@ +#!/usr/bin/env inc +# SPDX-License-Identifier: BSD-3-Clause + +? ++ diff --git a/samples/check-exec/script-exec.inc b/samples/check-exec/script-exec.inc new file mode 100755 index 000000000000..3245cb9d8dd1 --- /dev/null +++ b/samples/check-exec/script-exec.inc @@ -0,0 +1,4 @@ +#!/usr/bin/env inc +# SPDX-License-Identifier: BSD-3-Clause + ++ diff --git a/samples/check-exec/script-noexec.inc b/samples/check-exec/script-noexec.inc new file mode 100644 index 000000000000..3245cb9d8dd1 --- /dev/null +++ b/samples/check-exec/script-noexec.inc @@ -0,0 +1,4 @@ +#!/usr/bin/env inc +# SPDX-License-Identifier: BSD-3-Clause + ++ diff --git a/samples/check-exec/set-exec.c b/samples/check-exec/set-exec.c new file mode 100644 index 000000000000..ba86a60a20dd --- /dev/null +++ b/samples/check-exec/set-exec.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * Simple tool to set SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, + * before executing a command. + * + * Copyright © 2024 Microsoft Corporation + */ + +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <errno.h> +#include <linux/prctl.h> +#include <linux/securebits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +static void print_usage(const char *argv0) +{ + fprintf(stderr, "usage: %s -f|-i -- <cmd> [args]...\n\n", argv0); + fprintf(stderr, "Execute a command with\n"); + fprintf(stderr, "- SECBIT_EXEC_RESTRICT_FILE set: -f\n"); + fprintf(stderr, "- SECBIT_EXEC_DENY_INTERACTIVE set: -i\n"); +} + +int main(const int argc, char *const argv[], char *const *const envp) +{ + const char *cmd_path; + char *const *cmd_argv; + int opt, secbits_cur, secbits_new; + bool has_policy = false; + + secbits_cur = prctl(PR_GET_SECUREBITS); + if (secbits_cur == -1) { + /* + * This should never happen, except with a buggy seccomp + * filter. + */ + perror("ERROR: Failed to get securebits"); + return 1; + } + + secbits_new = secbits_cur; + while ((opt = getopt(argc, argv, "fi")) != -1) { + switch (opt) { + case 'f': + secbits_new |= SECBIT_EXEC_RESTRICT_FILE | + SECBIT_EXEC_RESTRICT_FILE_LOCKED; + has_policy = true; + break; + case 'i': + secbits_new |= SECBIT_EXEC_DENY_INTERACTIVE | + SECBIT_EXEC_DENY_INTERACTIVE_LOCKED; + has_policy = true; + break; + default: + print_usage(argv[0]); + return 1; + } + } + + if (!argv[optind] || !has_policy) { + print_usage(argv[0]); + return 1; + } + + if (secbits_cur != secbits_new && + prctl(PR_SET_SECUREBITS, secbits_new)) { + perror("Failed to set secure bit(s)."); + fprintf(stderr, + "Hint: The running kernel may not support this feature.\n"); + return 1; + } + + cmd_path = argv[optind]; + cmd_argv = argv + optind; + fprintf(stderr, "Executing command...\n"); + execvpe(cmd_path, cmd_argv, envp); + fprintf(stderr, "Failed to execute \"%s\": %s\n", cmd_path, + strerror(errno)); + return 1; +} diff --git a/samples/connector/cn_test.c b/samples/connector/cn_test.c index 0958a171d048..73d50b4aebb6 100644 --- a/samples/connector/cn_test.c +++ b/samples/connector/cn_test.c @@ -172,7 +172,7 @@ static int cn_test_init(void) static void cn_test_fini(void) { - del_timer_sync(&cn_test_timer); + timer_delete_sync(&cn_test_timer); cn_del_callback(&cn_test_id); cn_test_id.val--; cn_del_callback(&cn_test_id); diff --git a/samples/damon/Kconfig b/samples/damon/Kconfig new file mode 100644 index 000000000000..cbf96fd8a8bf --- /dev/null +++ b/samples/damon/Kconfig @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-2.0 + +menu "DAMON Samples" + +config SAMPLE_DAMON_WSSE + bool "DAMON sample module for working set size estimation" + depends on DAMON && DAMON_VADDR + help + This builds DAMON sample module for working set size estimation. + + The module receives a pid, monitor access to the virtual address + space of the process, estimate working set size of the process, and + repeatedly prints the size on the kernel log. + + If unsure, say N. + +config SAMPLE_DAMON_PRCL + bool "DAMON sample module for access-aware proactive reclamation" + depends on DAMON && DAMON_VADDR + help + This builds DAMON sample module for access-aware proactive + reclamation. + + The module receives a pid, monitor access to the virtual address + space of the process, find memory regions that not accessed, and + proactively reclaim the regions. + + If unsure, say N. + +config SAMPLE_DAMON_MTIER + bool "DAMON sample module for memory tiering" + depends on DAMON && DAMON_PADDR + help + Thps builds DAMON sample module for memory tierign. + + The module assumes the system is constructed with two NUMA nodes, + which seems as local and remote nodes to all CPUs. For example, + node0 is for DDR5 DRAMs connected via DIMM, while node1 is for DDR4 + DRAMs connected via CXL. + + If unsure, say N. + +endmenu diff --git a/samples/damon/Makefile b/samples/damon/Makefile new file mode 100644 index 000000000000..72f68cbf422a --- /dev/null +++ b/samples/damon/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_SAMPLE_DAMON_WSSE) += wsse.o +obj-$(CONFIG_SAMPLE_DAMON_PRCL) += prcl.o +obj-$(CONFIG_SAMPLE_DAMON_MTIER) += mtier.o diff --git a/samples/damon/mtier.c b/samples/damon/mtier.c new file mode 100644 index 000000000000..36d2cd933f5a --- /dev/null +++ b/samples/damon/mtier.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * memory tiering: migrate cold pages in node 0 and hot pages in node 1 to node + * 1 and node 0, respectively. Adjust the hotness/coldness threshold aiming + * resulting 99.6 % node 0 utilization ratio. + */ + +#define pr_fmt(fmt) "damon_sample_mtier: " fmt + +#include <linux/damon.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +static unsigned long node0_start_addr __read_mostly; +module_param(node0_start_addr, ulong, 0600); + +static unsigned long node0_end_addr __read_mostly; +module_param(node0_end_addr, ulong, 0600); + +static unsigned long node1_start_addr __read_mostly; +module_param(node1_start_addr, ulong, 0600); + +static unsigned long node1_end_addr __read_mostly; +module_param(node1_end_addr, ulong, 0600); + +static int damon_sample_mtier_enable_store( + const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops enable_param_ops = { + .set = damon_sample_mtier_enable_store, + .get = param_get_bool, +}; + +static bool enable __read_mostly; +module_param_cb(enable, &enable_param_ops, &enable, 0600); +MODULE_PARM_DESC(enable, "Enable of disable DAMON_SAMPLE_MTIER"); + +static struct damon_ctx *ctxs[2]; + +static struct damon_ctx *damon_sample_mtier_build_ctx(bool promote) +{ + struct damon_ctx *ctx; + struct damon_attrs attrs; + struct damon_target *target; + struct damon_region *region; + struct damos *scheme; + struct damos_quota_goal *quota_goal; + struct damos_filter *filter; + + ctx = damon_new_ctx(); + if (!ctx) + return NULL; + attrs = (struct damon_attrs) { + .sample_interval = 5 * USEC_PER_MSEC, + .aggr_interval = 100 * USEC_PER_MSEC, + .ops_update_interval = 60 * USEC_PER_MSEC * MSEC_PER_SEC, + .min_nr_regions = 10, + .max_nr_regions = 1000, + }; + + /* + * auto-tune sampling and aggregation interval aiming 4% DAMON-observed + * accesses ratio, keeping sampling interval in [5ms, 10s] range. + */ + attrs.intervals_goal = (struct damon_intervals_goal) { + .access_bp = 400, .aggrs = 3, + .min_sample_us = 5000, .max_sample_us = 10000000, + }; + if (damon_set_attrs(ctx, &attrs)) + goto free_out; + if (damon_select_ops(ctx, DAMON_OPS_PADDR)) + goto free_out; + + target = damon_new_target(); + if (!target) + goto free_out; + damon_add_target(ctx, target); + region = damon_new_region( + promote ? node1_start_addr : node0_start_addr, + promote ? node1_end_addr : node0_end_addr); + if (!region) + goto free_out; + damon_add_region(region, target); + + scheme = damon_new_scheme( + /* access pattern */ + &(struct damos_access_pattern) { + .min_sz_region = PAGE_SIZE, + .max_sz_region = ULONG_MAX, + .min_nr_accesses = promote ? 1 : 0, + .max_nr_accesses = promote ? UINT_MAX : 0, + .min_age_region = 0, + .max_age_region = UINT_MAX}, + /* action */ + promote ? DAMOS_MIGRATE_HOT : DAMOS_MIGRATE_COLD, + 1000000, /* apply interval (1s) */ + &(struct damos_quota){ + /* 200 MiB per sec by most */ + .reset_interval = 1000, + .sz = 200 * 1024 * 1024, + /* ignore size of region when prioritizing */ + .weight_sz = 0, + .weight_nr_accesses = 100, + .weight_age = 100, + }, + &(struct damos_watermarks){}, + promote ? 0 : 1); /* migrate target node id */ + if (!scheme) + goto free_out; + damon_set_schemes(ctx, &scheme, 1); + quota_goal = damos_new_quota_goal( + promote ? DAMOS_QUOTA_NODE_MEM_USED_BP : + DAMOS_QUOTA_NODE_MEM_FREE_BP, + promote ? 9970 : 50); + if (!quota_goal) + goto free_out; + quota_goal->nid = 0; + damos_add_quota_goal(&scheme->quota, quota_goal); + filter = damos_new_filter(DAMOS_FILTER_TYPE_YOUNG, true, promote); + if (!filter) + goto free_out; + damos_add_filter(scheme, filter); + return ctx; +free_out: + damon_destroy_ctx(ctx); + return NULL; +} + +static int damon_sample_mtier_start(void) +{ + struct damon_ctx *ctx; + + ctx = damon_sample_mtier_build_ctx(true); + if (!ctx) + return -ENOMEM; + ctxs[0] = ctx; + ctx = damon_sample_mtier_build_ctx(false); + if (!ctx) { + damon_destroy_ctx(ctxs[0]); + return -ENOMEM; + } + ctxs[1] = ctx; + return damon_start(ctxs, 2, true); +} + +static void damon_sample_mtier_stop(void) +{ + damon_stop(ctxs, 2); + damon_destroy_ctx(ctxs[0]); + damon_destroy_ctx(ctxs[1]); +} + +static int damon_sample_mtier_enable_store( + const char *val, const struct kernel_param *kp) +{ + bool enabled = enable; + int err; + + err = kstrtobool(val, &enable); + if (err) + return err; + + if (enable == enabled) + return 0; + + if (enable) + return damon_sample_mtier_start(); + damon_sample_mtier_stop(); + return 0; +} + +static int __init damon_sample_mtier_init(void) +{ + return 0; +} + +module_init(damon_sample_mtier_init); diff --git a/samples/damon/prcl.c b/samples/damon/prcl.c new file mode 100644 index 000000000000..056b1b21a0fe --- /dev/null +++ b/samples/damon/prcl.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * proactive reclamation: monitor access pattern of a given process, find + * regions that seems not accessed, and proactively page out the regions. + */ + +#define pr_fmt(fmt) "damon_sample_prcl: " fmt + +#include <linux/damon.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +static int target_pid __read_mostly; +module_param(target_pid, int, 0600); + +static int damon_sample_prcl_enable_store( + const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops enable_param_ops = { + .set = damon_sample_prcl_enable_store, + .get = param_get_bool, +}; + +static bool enable __read_mostly; +module_param_cb(enable, &enable_param_ops, &enable, 0600); +MODULE_PARM_DESC(enable, "Enable of disable DAMON_SAMPLE_WSSE"); + +static struct damon_ctx *ctx; +static struct pid *target_pidp; + +static int damon_sample_prcl_after_aggregate(struct damon_ctx *c) +{ + struct damon_target *t; + + damon_for_each_target(t, c) { + struct damon_region *r; + unsigned long wss = 0; + + damon_for_each_region(r, t) { + if (r->nr_accesses > 0) + wss += r->ar.end - r->ar.start; + } + pr_info("wss: %lu\n", wss); + } + return 0; +} + +static int damon_sample_prcl_start(void) +{ + struct damon_target *target; + struct damos *scheme; + + pr_info("start\n"); + + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + if (damon_select_ops(ctx, DAMON_OPS_VADDR)) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + + target = damon_new_target(); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + target_pidp = find_get_pid(target_pid); + if (!target_pidp) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + target->pid = target_pidp; + + ctx->callback.after_aggregation = damon_sample_prcl_after_aggregate; + + scheme = damon_new_scheme( + &(struct damos_access_pattern) { + .min_sz_region = PAGE_SIZE, + .max_sz_region = ULONG_MAX, + .min_nr_accesses = 0, + .max_nr_accesses = 0, + .min_age_region = 50, + .max_age_region = UINT_MAX}, + DAMOS_PAGEOUT, + 0, + &(struct damos_quota){}, + &(struct damos_watermarks){}, + NUMA_NO_NODE); + if (!scheme) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_set_schemes(ctx, &scheme, 1); + + return damon_start(&ctx, 1, true); +} + +static void damon_sample_prcl_stop(void) +{ + pr_info("stop\n"); + if (ctx) { + damon_stop(&ctx, 1); + damon_destroy_ctx(ctx); + } + if (target_pidp) + put_pid(target_pidp); +} + +static int damon_sample_prcl_enable_store( + const char *val, const struct kernel_param *kp) +{ + bool enabled = enable; + int err; + + err = kstrtobool(val, &enable); + if (err) + return err; + + if (enable == enabled) + return 0; + + if (enable) + return damon_sample_prcl_start(); + damon_sample_prcl_stop(); + return 0; +} + +static int __init damon_sample_prcl_init(void) +{ + return 0; +} + +module_init(damon_sample_prcl_init); diff --git a/samples/damon/wsse.c b/samples/damon/wsse.c new file mode 100644 index 000000000000..11be25803274 --- /dev/null +++ b/samples/damon/wsse.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * working set size estimation: monitor access pattern of given process and + * print estimated working set size (total size of regions that showing some + * access). + */ + +#define pr_fmt(fmt) "damon_sample_wsse: " fmt + +#include <linux/damon.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +static int target_pid __read_mostly; +module_param(target_pid, int, 0600); + +static int damon_sample_wsse_enable_store( + const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops enable_param_ops = { + .set = damon_sample_wsse_enable_store, + .get = param_get_bool, +}; + +static bool enable __read_mostly; +module_param_cb(enable, &enable_param_ops, &enable, 0600); +MODULE_PARM_DESC(enable, "Enable or disable DAMON_SAMPLE_WSSE"); + +static struct damon_ctx *ctx; +static struct pid *target_pidp; + +static int damon_sample_wsse_after_aggregate(struct damon_ctx *c) +{ + struct damon_target *t; + + damon_for_each_target(t, c) { + struct damon_region *r; + unsigned long wss = 0; + + damon_for_each_region(r, t) { + if (r->nr_accesses > 0) + wss += r->ar.end - r->ar.start; + } + pr_info("wss: %lu\n", wss); + } + return 0; +} + +static int damon_sample_wsse_start(void) +{ + struct damon_target *target; + + pr_info("start\n"); + + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + if (damon_select_ops(ctx, DAMON_OPS_VADDR)) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + + target = damon_new_target(); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + target_pidp = find_get_pid(target_pid); + if (!target_pidp) { + damon_destroy_ctx(ctx); + return -EINVAL; + } + target->pid = target_pidp; + + ctx->callback.after_aggregation = damon_sample_wsse_after_aggregate; + return damon_start(&ctx, 1, true); +} + +static void damon_sample_wsse_stop(void) +{ + pr_info("stop\n"); + if (ctx) { + damon_stop(&ctx, 1); + damon_destroy_ctx(ctx); + } + if (target_pidp) + put_pid(target_pidp); +} + +static int damon_sample_wsse_enable_store( + const char *val, const struct kernel_param *kp) +{ + bool enabled = enable; + int err; + + err = kstrtobool(val, &enable); + if (err) + return err; + + if (enable == enabled) + return 0; + + if (enable) + return damon_sample_wsse_start(); + damon_sample_wsse_stop(); + return 0; +} + +static int __init damon_sample_wsse_init(void) +{ + return 0; +} + +module_init(damon_sample_wsse_init); diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c index 0a50b05add96..bfe98ce826f3 100644 --- a/samples/fprobe/fprobe_example.c +++ b/samples/fprobe/fprobe_example.c @@ -50,7 +50,7 @@ static void show_backtrace(void) static int sample_entry_handler(struct fprobe *fp, unsigned long ip, unsigned long ret_ip, - struct pt_regs *regs, void *data) + struct ftrace_regs *fregs, void *data) { if (use_trace) /* @@ -67,7 +67,7 @@ static int sample_entry_handler(struct fprobe *fp, unsigned long ip, } static void sample_exit_handler(struct fprobe *fp, unsigned long ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *regs, void *data) { unsigned long rip = ret_ip; diff --git a/samples/ftrace/sample-trace-array.c b/samples/ftrace/sample-trace-array.c index d0ee9001c7b3..4147616102f9 100644 --- a/samples/ftrace/sample-trace-array.c +++ b/samples/ftrace/sample-trace-array.c @@ -82,7 +82,7 @@ static int simple_thread(void *arg) while (!kthread_should_stop()) simple_thread_func(count++); - del_timer(&mytimer); + timer_delete(&mytimer); cancel_work_sync(&trace_work); /* @@ -112,7 +112,7 @@ static int __init sample_trace_array_init(void) /* * If context specific per-cpu buffers havent already been allocated. */ - trace_printk_init_buffers(); + trace_array_init_printk(tr); simple_tsk = kthread_run(simple_thread, NULL, "sample-instance"); if (IS_ERR(simple_tsk)) { diff --git a/samples/hid/Makefile b/samples/hid/Makefile index 8ea59e9631a3..db5a077c77fc 100644 --- a/samples/hid/Makefile +++ b/samples/hid/Makefile @@ -40,16 +40,17 @@ BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic endif endif -TPROGS_CFLAGS += -Wall -O2 -TPROGS_CFLAGS += -Wmissing-prototypes -TPROGS_CFLAGS += -Wstrict-prototypes +COMMON_CFLAGS += -Wall -O2 +COMMON_CFLAGS += -Wmissing-prototypes +COMMON_CFLAGS += -Wstrict-prototypes +TPROGS_CFLAGS += $(COMMON_CFLAGS) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include ifdef SYSROOT -TPROGS_CFLAGS += --sysroot=$(SYSROOT) +COMMON_CFLAGS += --sysroot=$(SYSROOT) TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib endif @@ -112,7 +113,7 @@ clean: $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like - $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ + $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(HID_SAMPLES_PATH)/../../ \ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ $@ install_headers @@ -163,7 +164,7 @@ $(obj)/hid_surface_dial.o: $(obj)/hid_surface_dial.skel.h VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ - $(abspath ./vmlinux) + $(abspath $(objtree)/vmlinux) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) diff --git a/samples/hung_task/Makefile b/samples/hung_task/Makefile new file mode 100644 index 000000000000..86036f1a204d --- /dev/null +++ b/samples/hung_task/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_SAMPLE_HUNG_TASK) += hung_task_tests.o diff --git a/samples/hung_task/hung_task_tests.c b/samples/hung_task/hung_task_tests.c new file mode 100644 index 000000000000..a5c09bd3a47d --- /dev/null +++ b/samples/hung_task/hung_task_tests.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * hung_task_tests.c - Sample code for testing hung tasks with mutex, + * semaphore, etc. + * + * Usage: Load this module and read `<debugfs>/hung_task/mutex`, + * `<debugfs>/hung_task/semaphore`, etc., with 2 or more processes. + * + * This is for testing kernel hung_task error messages with various locking + * mechanisms (e.g., mutex, semaphore, etc.). Note that this may freeze + * your system or cause a panic. Use only for testing purposes. + */ + +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/semaphore.h> + +#define HUNG_TASK_DIR "hung_task" +#define HUNG_TASK_MUTEX_FILE "mutex" +#define HUNG_TASK_SEM_FILE "semaphore" +#define SLEEP_SECOND 256 + +static const char dummy_string[] = "This is a dummy string."; +static DEFINE_MUTEX(dummy_mutex); +static DEFINE_SEMAPHORE(dummy_sem, 1); +static struct dentry *hung_task_dir; + +/* Mutex-based read function */ +static ssize_t read_dummy_mutex(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Second task waits on mutex, entering uninterruptible sleep */ + guard(mutex)(&dummy_mutex); + + /* First task sleeps here, interruptible */ + msleep_interruptible(SLEEP_SECOND * 1000); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* Semaphore-based read function */ +static ssize_t read_dummy_semaphore(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + /* Second task waits on semaphore, entering uninterruptible sleep */ + down(&dummy_sem); + + /* First task sleeps here, interruptible */ + msleep_interruptible(SLEEP_SECOND * 1000); + + up(&dummy_sem); + + return simple_read_from_buffer(user_buf, count, ppos, dummy_string, + sizeof(dummy_string)); +} + +/* File operations for mutex */ +static const struct file_operations hung_task_mutex_fops = { + .read = read_dummy_mutex, +}; + +/* File operations for semaphore */ +static const struct file_operations hung_task_sem_fops = { + .read = read_dummy_semaphore, +}; + +static int __init hung_task_tests_init(void) +{ + hung_task_dir = debugfs_create_dir(HUNG_TASK_DIR, NULL); + if (IS_ERR(hung_task_dir)) + return PTR_ERR(hung_task_dir); + + /* Create debugfs files for mutex and semaphore tests */ + debugfs_create_file(HUNG_TASK_MUTEX_FILE, 0400, hung_task_dir, NULL, + &hung_task_mutex_fops); + debugfs_create_file(HUNG_TASK_SEM_FILE, 0400, hung_task_dir, NULL, + &hung_task_sem_fops); + + return 0; +} + +static void __exit hung_task_tests_exit(void) +{ + debugfs_remove_recursive(hung_task_dir); +} + +module_init(hung_task_tests_init); +module_exit(hung_task_tests_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Masami Hiramatsu <mhiramat@kernel.org>"); +MODULE_AUTHOR("Zi Li <amaindex@outlook.com>"); +MODULE_DESCRIPTION("Simple sleep under lock files for testing hung task"); diff --git a/samples/kmemleak/kmemleak-test.c b/samples/kmemleak/kmemleak-test.c index 544c36d51d56..8609812a37eb 100644 --- a/samples/kmemleak/kmemleak-test.c +++ b/samples/kmemleak/kmemleak-test.c @@ -40,25 +40,25 @@ static int kmemleak_test_init(void) pr_info("Kmemleak testing\n"); /* make some orphan objects */ - pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); - pr_info("kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); - pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); - pr_info("kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); - pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); - pr_info("kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); - pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); - pr_info("kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); + pr_info("kmalloc(32) = 0x%px\n", kmalloc(32, GFP_KERNEL)); + pr_info("kmalloc(32) = 0x%px\n", kmalloc(32, GFP_KERNEL)); + pr_info("kmalloc(1024) = 0x%px\n", kmalloc(1024, GFP_KERNEL)); + pr_info("kmalloc(1024) = 0x%px\n", kmalloc(1024, GFP_KERNEL)); + pr_info("kmalloc(2048) = 0x%px\n", kmalloc(2048, GFP_KERNEL)); + pr_info("kmalloc(2048) = 0x%px\n", kmalloc(2048, GFP_KERNEL)); + pr_info("kmalloc(4096) = 0x%px\n", kmalloc(4096, GFP_KERNEL)); + pr_info("kmalloc(4096) = 0x%px\n", kmalloc(4096, GFP_KERNEL)); #ifndef CONFIG_MODULES - pr_info("kmem_cache_alloc(files_cachep) = %p\n", + pr_info("kmem_cache_alloc(files_cachep) = 0x%px\n", kmem_cache_alloc(files_cachep, GFP_KERNEL)); - pr_info("kmem_cache_alloc(files_cachep) = %p\n", + pr_info("kmem_cache_alloc(files_cachep) = 0x%px\n", kmem_cache_alloc(files_cachep, GFP_KERNEL)); #endif - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); - pr_info("vmalloc(64) = %p\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); + pr_info("vmalloc(64) = 0x%px\n", vmalloc(64)); /* * Add elements to a list. They should only appear as orphan @@ -66,7 +66,7 @@ static int kmemleak_test_init(void) */ for (i = 0; i < 10; i++) { elem = kzalloc(sizeof(*elem), GFP_KERNEL); - pr_info("kzalloc(sizeof(*elem)) = %p\n", elem); + pr_info("kzalloc(sizeof(*elem)) = 0x%px\n", elem); if (!elem) return -ENOMEM; INIT_LIST_HEAD(&elem->list); @@ -75,11 +75,11 @@ static int kmemleak_test_init(void) for_each_possible_cpu(i) { per_cpu(kmemleak_test_pointer, i) = kmalloc(129, GFP_KERNEL); - pr_info("kmalloc(129) = %p\n", + pr_info("kmalloc(129) = 0x%px\n", per_cpu(kmemleak_test_pointer, i)); } - pr_info("__alloc_percpu(64, 4) = %p\n", __alloc_percpu(64, 4)); + pr_info("__alloc_percpu(64, 4) = 0x%px\n", __alloc_percpu(64, 4)); return 0; } diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index 57565dfd74a2..4e2854c6f9a3 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -58,6 +58,7 @@ static inline int landlock_restrict_self(const int ruleset_fd, #define ENV_TCP_BIND_NAME "LL_TCP_BIND" #define ENV_TCP_CONNECT_NAME "LL_TCP_CONNECT" #define ENV_SCOPED_NAME "LL_SCOPED" +#define ENV_FORCE_LOG_NAME "LL_FORCE_LOG" #define ENV_DELIMITER ":" static int str2num(const char *numstr, __u64 *num_dst) @@ -91,6 +92,9 @@ static int parse_path(char *env_path, const char ***const path_list) } } *path_list = malloc(num_paths * sizeof(**path_list)); + if (!*path_list) + return -1; + for (i = 0; i < num_paths; i++) (*path_list)[i] = strsep(&env_path, ENV_DELIMITER); @@ -127,6 +131,10 @@ static int populate_ruleset_fs(const char *const env_var, const int ruleset_fd, env_path_name = strdup(env_path_name); unsetenv(env_var); num_paths = parse_path(env_path_name, &path_list); + if (num_paths < 0) { + fprintf(stderr, "Failed to allocate memory\n"); + goto out_free_name; + } if (num_paths == 1 && path_list[0][0] == '\0') { /* * Allows to not use all possible restrictions (e.g. use @@ -288,7 +296,7 @@ out_unset: /* clang-format on */ -#define LANDLOCK_ABI_LAST 6 +#define LANDLOCK_ABI_LAST 7 #define XSTR(s) #s #define STR(s) XSTR(s) @@ -315,6 +323,9 @@ static const char help[] = " - \"a\" to restrict opening abstract unix sockets\n" " - \"s\" to restrict sending signals\n" "\n" + "A sandboxer should not log denied access requests to avoid spamming logs, " + "but to test audit we can set " ENV_FORCE_LOG_NAME "=1\n" + "\n" "Example:\n" ENV_FS_RO_NAME "=\"${PATH}:/lib:/usr:/proc:/etc:/dev/urandom\" " ENV_FS_RW_NAME "=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" " @@ -333,7 +344,7 @@ int main(const int argc, char *const argv[], char *const *const envp) const char *cmd_path; char *const *cmd_argv; int ruleset_fd, abi; - char *env_port_name; + char *env_port_name, *env_force_log; __u64 access_fs_ro = ACCESS_FS_ROUGHLY_READ, access_fs_rw = ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE; @@ -344,6 +355,8 @@ int main(const int argc, char *const argv[], char *const *const envp) .scoped = LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | LANDLOCK_SCOPE_SIGNAL, }; + int supported_restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; + int set_restrict_flags = 0; if (argc < 2) { fprintf(stderr, help, argv[0]); @@ -415,6 +428,13 @@ int main(const int argc, char *const argv[], char *const *const envp) /* Removes LANDLOCK_SCOPE_* for ABI < 6 */ ruleset_attr.scoped &= ~(LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET | LANDLOCK_SCOPE_SIGNAL); + __attribute__((fallthrough)); + case 6: + /* Removes LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON for ABI < 7 */ + supported_restrict_flags &= + ~LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; + + /* Must be printed for any ABI < LANDLOCK_ABI_LAST. */ fprintf(stderr, "Hint: You should update the running kernel " "to leverage Landlock features " @@ -449,6 +469,24 @@ int main(const int argc, char *const argv[], char *const *const envp) if (check_ruleset_scope(ENV_SCOPED_NAME, &ruleset_attr)) return 1; + /* Enables optional logs. */ + env_force_log = getenv(ENV_FORCE_LOG_NAME); + if (env_force_log) { + if (strcmp(env_force_log, "1") != 0) { + fprintf(stderr, "Unknown value for " ENV_FORCE_LOG_NAME + " (only \"1\" is handled)\n"); + return 1; + } + if (!(supported_restrict_flags & + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON)) { + fprintf(stderr, + "Audit logs not supported by current kernel\n"); + return 1; + } + set_restrict_flags |= LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; + unsetenv(ENV_FORCE_LOG_NAME); + } + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); if (ruleset_fd < 0) { @@ -476,7 +514,7 @@ int main(const int argc, char *const argv[], char *const *const envp) perror("Failed to restrict privileges"); goto err_close_ruleset; } - if (landlock_restrict_self(ruleset_fd, 0)) { + if (landlock_restrict_self(ruleset_fd, set_restrict_flags)) { perror("Failed to enforce ruleset"); goto err_close_ruleset; } diff --git a/samples/livepatch/livepatch-callbacks-busymod.c b/samples/livepatch/livepatch-callbacks-busymod.c index 378e2d40271a..fadc2a85cb35 100644 --- a/samples/livepatch/livepatch-callbacks-busymod.c +++ b/samples/livepatch/livepatch-callbacks-busymod.c @@ -44,8 +44,7 @@ static void busymod_work_func(struct work_struct *work) static int livepatch_callbacks_mod_init(void) { pr_info("%s\n", __func__); - schedule_delayed_work(&work, - msecs_to_jiffies(1000 * 0)); + schedule_delayed_work(&work, 0); return 0; } @@ -57,4 +56,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c index 11c3f4357812..9e69d9caed25 100644 --- a/samples/livepatch/livepatch-callbacks-demo.c +++ b/samples/livepatch/livepatch-callbacks-demo.c @@ -192,5 +192,6 @@ static void livepatch_callbacks_demo_exit(void) module_init(livepatch_callbacks_demo_init); module_exit(livepatch_callbacks_demo_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-callbacks-mod.c b/samples/livepatch/livepatch-callbacks-mod.c index 2a074f422a51..d1851b471ad9 100644 --- a/samples/livepatch/livepatch-callbacks-mod.c +++ b/samples/livepatch/livepatch-callbacks-mod.c @@ -38,4 +38,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index cd76d7ebe598..5263a2f31c48 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -66,5 +66,6 @@ static void livepatch_exit(void) module_init(livepatch_init); module_exit(livepatch_exit); +MODULE_DESCRIPTION("Kernel Live Patching Sample Module"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index 6701641bf12d..cbf68ca40097 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -72,8 +72,7 @@ static struct dummy *livepatch_fix1_dummy_alloc(void) if (!d) return NULL; - d->jiffies_expire = jiffies + - msecs_to_jiffies(1000 * EXPIRE_PERIOD); + d->jiffies_expire = jiffies + secs_to_jiffies(EXPIRE_PERIOD); /* * Patch: save the extra memory location into a SV_LEAK shadow @@ -169,5 +168,6 @@ static void livepatch_shadow_fix1_exit(void) module_init(livepatch_shadow_fix1_init); module_exit(livepatch_shadow_fix1_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index 361046a4f10c..b99122cb221f 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -128,5 +128,6 @@ static void livepatch_shadow_fix2_exit(void) module_init(livepatch_shadow_fix2_init); module_exit(livepatch_shadow_fix2_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-mod.c b/samples/livepatch/livepatch-shadow-mod.c index 7e753b0d2fa6..5d83ad5a8118 100644 --- a/samples/livepatch/livepatch-shadow-mod.c +++ b/samples/livepatch/livepatch-shadow-mod.c @@ -101,8 +101,7 @@ static __used noinline struct dummy *dummy_alloc(void) if (!d) return NULL; - d->jiffies_expire = jiffies + - msecs_to_jiffies(1000 * EXPIRE_PERIOD); + d->jiffies_expire = jiffies + secs_to_jiffies(EXPIRE_PERIOD); /* Oops, forgot to save leak! */ leak = kzalloc(sizeof(*leak), GFP_KERNEL); @@ -152,8 +151,7 @@ static void alloc_work_func(struct work_struct *work) list_add(&d->list, &dummy_list); mutex_unlock(&dummy_list_mutex); - schedule_delayed_work(&alloc_dwork, - msecs_to_jiffies(1000 * ALLOC_PERIOD)); + schedule_delayed_work(&alloc_dwork, secs_to_jiffies(ALLOC_PERIOD)); } /* @@ -184,16 +182,13 @@ static void cleanup_work_func(struct work_struct *work) } mutex_unlock(&dummy_list_mutex); - schedule_delayed_work(&cleanup_dwork, - msecs_to_jiffies(1000 * CLEANUP_PERIOD)); + schedule_delayed_work(&cleanup_dwork, secs_to_jiffies(CLEANUP_PERIOD)); } static int livepatch_shadow_mod_init(void) { - schedule_delayed_work(&alloc_dwork, - msecs_to_jiffies(1000 * ALLOC_PERIOD)); - schedule_delayed_work(&cleanup_dwork, - msecs_to_jiffies(1000 * CLEANUP_PERIOD)); + schedule_delayed_work(&alloc_dwork, secs_to_jiffies(ALLOC_PERIOD)); + schedule_delayed_work(&cleanup_dwork, secs_to_jiffies(CLEANUP_PERIOD)); return 0; } diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig index b0f74a81c8f9..7f7371a004ee 100644 --- a/samples/rust/Kconfig +++ b/samples/rust/Kconfig @@ -10,6 +10,17 @@ menuconfig SAMPLES_RUST if SAMPLES_RUST +config SAMPLE_RUST_CONFIGFS + tristate "Configfs sample" + depends on CONFIGFS_FS + help + This option builds the Rust configfs sample. + + To compile this as a module, choose M here: + the module will be called rust_configfs. + + If unsure, say N. + config SAMPLE_RUST_MINIMAL tristate "Minimal" help @@ -20,6 +31,16 @@ config SAMPLE_RUST_MINIMAL If unsure, say N. +config SAMPLE_RUST_MISC_DEVICE + tristate "Misc device" + help + This option builds the Rust misc device. + + To compile this as a module, choose M here: + the module will be called rust_misc_device. + + If unsure, say N. + config SAMPLE_RUST_PRINT tristate "Printing macros" help @@ -30,6 +51,60 @@ config SAMPLE_RUST_PRINT If unsure, say N. +config SAMPLE_RUST_DMA + tristate "DMA Test Driver" + depends on PCI + help + This option builds the Rust DMA Test driver sample. + + To compile this as a module, choose M here: + the module will be called rust_dma. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_PCI + tristate "PCI Driver" + depends on PCI + help + This option builds the Rust PCI driver sample. + + To compile this as a module, choose M here: + the module will be called driver_pci. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_PLATFORM + tristate "Platform Driver" + help + This option builds the Rust Platform driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_platform. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_FAUX + tristate "Faux Driver" + help + This option builds the Rust Faux driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_faux. + + If unsure, say N. + +config SAMPLE_RUST_DRIVER_AUXILIARY + tristate "Auxiliary Driver" + depends on PCI + select AUXILIARY_BUS + help + This option builds the Rust auxiliary driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_auxiliary. + + If unsure, say N. + config SAMPLE_RUST_HOSTPROGS bool "Host programs" help diff --git a/samples/rust/Makefile b/samples/rust/Makefile index c1a5c1655395..bd2faad63b4f 100644 --- a/samples/rust/Makefile +++ b/samples/rust/Makefile @@ -2,7 +2,14 @@ ccflags-y += -I$(src) # needed for trace events obj-$(CONFIG_SAMPLE_RUST_MINIMAL) += rust_minimal.o +obj-$(CONFIG_SAMPLE_RUST_MISC_DEVICE) += rust_misc_device.o obj-$(CONFIG_SAMPLE_RUST_PRINT) += rust_print.o +obj-$(CONFIG_SAMPLE_RUST_DMA) += rust_dma.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_PCI) += rust_driver_pci.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_PLATFORM) += rust_driver_platform.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_FAUX) += rust_driver_faux.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_AUXILIARY) += rust_driver_auxiliary.o +obj-$(CONFIG_SAMPLE_RUST_CONFIGFS) += rust_configfs.o rust_print-y := rust_print_main.o rust_print_events.o diff --git a/samples/rust/rust_configfs.rs b/samples/rust/rust_configfs.rs new file mode 100644 index 000000000000..60ddbe62cda3 --- /dev/null +++ b/samples/rust/rust_configfs.rs @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust configfs sample. + +use kernel::alloc::flags; +use kernel::c_str; +use kernel::configfs; +use kernel::configfs_attrs; +use kernel::new_mutex; +use kernel::page::PAGE_SIZE; +use kernel::prelude::*; +use kernel::sync::Mutex; + +module! { + type: RustConfigfs, + name: "rust_configfs", + author: "Rust for Linux Contributors", + description: "Rust configfs sample", + license: "GPL", +} + +#[pin_data] +struct RustConfigfs { + #[pin] + config: configfs::Subsystem<Configuration>, +} + +#[pin_data] +struct Configuration { + message: &'static CStr, + #[pin] + bar: Mutex<(KBox<[u8; PAGE_SIZE]>, usize)>, +} + +impl Configuration { + fn new() -> impl PinInit<Self, Error> { + try_pin_init!(Self { + message: c_str!("Hello World\n"), + bar <- new_mutex!((KBox::new([0; PAGE_SIZE], flags::GFP_KERNEL)?, 0)), + }) + } +} + +impl kernel::InPlaceModule for RustConfigfs { + fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { + pr_info!("Rust configfs sample (init)\n"); + + // Define a subsystem with the data type `Configuration`, two + // attributes, `message` and `bar` and child group type `Child`. `mkdir` + // in the directory representing this subsystem will create directories + // backed by the `Child` type. + let item_type = configfs_attrs! { + container: configfs::Subsystem<Configuration>, + data: Configuration, + child: Child, + attributes: [ + message: 0, + bar: 1, + ], + }; + + try_pin_init!(Self { + config <- configfs::Subsystem::new( + c_str!("rust_configfs"), item_type, Configuration::new() + ), + }) + } +} + +#[vtable] +impl configfs::GroupOperations for Configuration { + type Child = Child; + + fn make_group(&self, name: &CStr) -> Result<impl PinInit<configfs::Group<Child>, Error>> { + // Define a group with data type `Child`, one attribute `baz` and child + // group type `GrandChild`. `mkdir` in the directory representing this + // group will create directories backed by the `GrandChild` type. + let tpe = configfs_attrs! { + container: configfs::Group<Child>, + data: Child, + child: GrandChild, + attributes: [ + baz: 0, + ], + }; + + Ok(configfs::Group::new(name.try_into()?, tpe, Child::new())) + } +} + +#[vtable] +impl configfs::AttributeOperations<0> for Configuration { + type Data = Configuration; + + fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + pr_info!("Show message\n"); + let data = container.message; + page[0..data.len()].copy_from_slice(data); + Ok(data.len()) + } +} + +#[vtable] +impl configfs::AttributeOperations<1> for Configuration { + type Data = Configuration; + + fn show(container: &Configuration, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + pr_info!("Show bar\n"); + let guard = container.bar.lock(); + let data = guard.0.as_slice(); + let len = guard.1; + page[0..len].copy_from_slice(&data[0..len]); + Ok(len) + } + + fn store(container: &Configuration, page: &[u8]) -> Result { + pr_info!("Store bar\n"); + let mut guard = container.bar.lock(); + guard.0[0..page.len()].copy_from_slice(page); + guard.1 = page.len(); + Ok(()) + } +} + +// `pin_data` cannot handle structs without braces. +#[pin_data] +struct Child {} + +impl Child { + fn new() -> impl PinInit<Self, Error> { + try_pin_init!(Self {}) + } +} + +#[vtable] +impl configfs::GroupOperations for Child { + type Child = GrandChild; + + fn make_group(&self, name: &CStr) -> Result<impl PinInit<configfs::Group<GrandChild>, Error>> { + // Define a group with data type `GrandChild`, one attribute `gc`. As no + // child type is specified, it will not be possible to create subgroups + // in this group, and `mkdir`in the directory representing this group + // will return an error. + let tpe = configfs_attrs! { + container: configfs::Group<GrandChild>, + data: GrandChild, + attributes: [ + gc: 0, + ], + }; + + Ok(configfs::Group::new( + name.try_into()?, + tpe, + GrandChild::new(), + )) + } +} + +#[vtable] +impl configfs::AttributeOperations<0> for Child { + type Data = Child; + + fn show(_container: &Child, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + pr_info!("Show baz\n"); + let data = c"Hello Baz\n".to_bytes(); + page[0..data.len()].copy_from_slice(data); + Ok(data.len()) + } +} + +// `pin_data` cannot handle structs without braces. +#[pin_data] +struct GrandChild {} + +impl GrandChild { + fn new() -> impl PinInit<Self, Error> { + try_pin_init!(Self {}) + } +} + +#[vtable] +impl configfs::AttributeOperations<0> for GrandChild { + type Data = GrandChild; + + fn show(_container: &GrandChild, page: &mut [u8; PAGE_SIZE]) -> Result<usize> { + pr_info!("Show grand child\n"); + let data = c"Hello GC\n".to_bytes(); + page[0..data.len()].copy_from_slice(data); + Ok(data.len()) + } +} diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs new file mode 100644 index 000000000000..874c2c964afa --- /dev/null +++ b/samples/rust/rust_dma.rs @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust DMA api test (based on QEMU's `pci-testdev`). +//! +//! To make this driver probe, QEMU must be run with `-device pci-testdev`. + +use kernel::{bindings, device::Core, dma::CoherentAllocation, pci, prelude::*, types::ARef}; + +struct DmaSampleDriver { + pdev: ARef<pci::Device>, + ca: CoherentAllocation<MyStruct>, +} + +const TEST_VALUES: [(u32, u32); 5] = [ + (0xa, 0xb), + (0xc, 0xd), + (0xe, 0xf), + (0xab, 0xba), + (0xcd, 0xef), +]; + +struct MyStruct { + h: u32, + b: u32, +} + +impl MyStruct { + fn new(h: u32, b: u32) -> Self { + Self { h, b } + } +} +// SAFETY: All bit patterns are acceptable values for `MyStruct`. +unsafe impl kernel::transmute::AsBytes for MyStruct {} +// SAFETY: Instances of `MyStruct` have no uninitialized portions. +unsafe impl kernel::transmute::FromBytes for MyStruct {} + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <DmaSampleDriver as pci::Driver>::IdInfo, + [( + pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, 0x5), + () + )] +); + +impl pci::Driver for DmaSampleDriver { + type IdInfo = (); + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self>>> { + dev_info!(pdev.as_ref(), "Probe DMA test driver.\n"); + + let ca: CoherentAllocation<MyStruct> = + CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; + + || -> Result { + for (i, value) in TEST_VALUES.into_iter().enumerate() { + kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1)); + } + + Ok(()) + }()?; + + let drvdata = KBox::new( + Self { + pdev: pdev.into(), + ca, + }, + GFP_KERNEL, + )?; + + Ok(drvdata.into()) + } +} + +impl Drop for DmaSampleDriver { + fn drop(&mut self) { + dev_info!(self.pdev.as_ref(), "Unload DMA test driver.\n"); + + let _ = || -> Result { + for (i, value) in TEST_VALUES.into_iter().enumerate() { + assert_eq!(kernel::dma_read!(self.ca[i].h), value.0); + assert_eq!(kernel::dma_read!(self.ca[i].b), value.1); + } + Ok(()) + }(); + } +} + +kernel::module_pci_driver! { + type: DmaSampleDriver, + name: "rust_dma", + authors: ["Abdiel Janulgue"], + description: "Rust DMA test", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_auxiliary.rs b/samples/rust/rust_driver_auxiliary.rs new file mode 100644 index 000000000000..3e15e6d002bb --- /dev/null +++ b/samples/rust/rust_driver_auxiliary.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust auxiliary driver sample (based on a PCI driver for QEMU's `pci-testdev`). +//! +//! To make this driver probe, QEMU must be run with `-device pci-testdev`. + +use kernel::{ + auxiliary, bindings, c_str, device::Core, driver, error::Error, pci, prelude::*, str::CStr, + InPlaceModule, +}; + +use pin_init::PinInit; + +const MODULE_NAME: &CStr = <LocalModule as kernel::ModuleMetadata>::NAME; +const AUXILIARY_NAME: &CStr = c_str!("auxiliary"); + +struct AuxiliaryDriver; + +kernel::auxiliary_device_table!( + AUX_TABLE, + MODULE_AUX_TABLE, + <AuxiliaryDriver as auxiliary::Driver>::IdInfo, + [(auxiliary::DeviceId::new(MODULE_NAME, AUXILIARY_NAME), ())] +); + +impl auxiliary::Driver for AuxiliaryDriver { + type IdInfo = (); + + const ID_TABLE: auxiliary::IdTable<Self::IdInfo> = &AUX_TABLE; + + fn probe(adev: &auxiliary::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self>>> { + dev_info!( + adev.as_ref(), + "Probing auxiliary driver for auxiliary device with id={}\n", + adev.id() + ); + + ParentDriver::connect(adev)?; + + let this = KBox::new(Self, GFP_KERNEL)?; + + Ok(this.into()) + } +} + +struct ParentDriver { + _reg: [auxiliary::Registration; 2], +} + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <ParentDriver as pci::Driver>::IdInfo, + [( + pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, 0x5), + () + )] +); + +impl pci::Driver for ParentDriver { + type IdInfo = (); + + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self>>> { + let this = KBox::new( + Self { + _reg: [ + auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 0, MODULE_NAME)?, + auxiliary::Registration::new(pdev.as_ref(), AUXILIARY_NAME, 1, MODULE_NAME)?, + ], + }, + GFP_KERNEL, + )?; + + Ok(this.into()) + } +} + +impl ParentDriver { + fn connect(adev: &auxiliary::Device) -> Result<()> { + let parent = adev.parent().ok_or(EINVAL)?; + let pdev: &pci::Device = parent.try_into()?; + + dev_info!( + adev.as_ref(), + "Connect auxiliary {} with parent: VendorID={:#x}, DeviceID={:#x}\n", + adev.id(), + pdev.vendor_id(), + pdev.device_id() + ); + + Ok(()) + } +} + +#[pin_data] +struct SampleModule { + #[pin] + _pci_driver: driver::Registration<pci::Adapter<ParentDriver>>, + #[pin] + _aux_driver: driver::Registration<auxiliary::Adapter<AuxiliaryDriver>>, +} + +impl InPlaceModule for SampleModule { + fn init(module: &'static kernel::ThisModule) -> impl PinInit<Self, Error> { + try_pin_init!(Self { + _pci_driver <- driver::Registration::new(MODULE_NAME, module), + _aux_driver <- driver::Registration::new(MODULE_NAME, module), + }) + } +} + +module! { + type: SampleModule, + name: "rust_driver_auxiliary", + author: "Danilo Krummrich", + description: "Rust auxiliary driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_faux.rs b/samples/rust/rust_driver_faux.rs new file mode 100644 index 000000000000..ecc9fd378cbd --- /dev/null +++ b/samples/rust/rust_driver_faux.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +//! Rust faux device sample. + +use kernel::{c_str, faux, prelude::*, Module}; + +module! { + type: SampleModule, + name: "rust_faux_driver", + authors: ["Lyude Paul"], + description: "Rust faux device sample", + license: "GPL", +} + +struct SampleModule { + _reg: faux::Registration, +} + +impl Module for SampleModule { + fn init(_module: &'static ThisModule) -> Result<Self> { + pr_info!("Initialising Rust Faux Device Sample\n"); + + let reg = faux::Registration::new(c_str!("rust-faux-sample-device"), None)?; + + dev_info!(reg.as_ref(), "Hello from faux device!\n"); + + Ok(Self { _reg: reg }) + } +} diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs new file mode 100644 index 000000000000..15147e4401b2 --- /dev/null +++ b/samples/rust/rust_driver_pci.rs @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust PCI driver sample (based on QEMU's `pci-testdev`). +//! +//! To make this driver probe, QEMU must be run with `-device pci-testdev`. + +use kernel::{bindings, c_str, device::Core, devres::Devres, pci, prelude::*, types::ARef}; + +struct Regs; + +impl Regs { + const TEST: usize = 0x0; + const OFFSET: usize = 0x4; + const DATA: usize = 0x8; + const COUNT: usize = 0xC; + const END: usize = 0x10; +} + +type Bar0 = pci::Bar<{ Regs::END }>; + +#[derive(Debug)] +struct TestIndex(u8); + +impl TestIndex { + const NO_EVENTFD: Self = Self(0); +} + +struct SampleDriver { + pdev: ARef<pci::Device>, + bar: Devres<Bar0>, +} + +kernel::pci_device_table!( + PCI_TABLE, + MODULE_PCI_TABLE, + <SampleDriver as pci::Driver>::IdInfo, + [( + pci::DeviceId::from_id(bindings::PCI_VENDOR_ID_REDHAT, 0x5), + TestIndex::NO_EVENTFD + )] +); + +impl SampleDriver { + fn testdev(index: &TestIndex, bar: &Bar0) -> Result<u32> { + // Select the test. + bar.write8(index.0, Regs::TEST); + + let offset = u32::from_le(bar.read32(Regs::OFFSET)) as usize; + let data = bar.read8(Regs::DATA); + + // Write `data` to `offset` to increase `count` by one. + // + // Note that we need `try_write8`, since `offset` can't be checked at compile-time. + bar.try_write8(data, offset)?; + + Ok(bar.read32(Regs::COUNT)) + } +} + +impl pci::Driver for SampleDriver { + type IdInfo = TestIndex; + + const ID_TABLE: pci::IdTable<Self::IdInfo> = &PCI_TABLE; + + fn probe(pdev: &pci::Device<Core>, info: &Self::IdInfo) -> Result<Pin<KBox<Self>>> { + dev_dbg!( + pdev.as_ref(), + "Probe Rust PCI driver sample (PCI ID: 0x{:x}, 0x{:x}).\n", + pdev.vendor_id(), + pdev.device_id() + ); + + pdev.enable_device_mem()?; + pdev.set_master(); + + let bar = pdev.iomap_region_sized::<{ Regs::END }>(0, c_str!("rust_driver_pci"))?; + + let drvdata = KBox::new( + Self { + pdev: pdev.into(), + bar, + }, + GFP_KERNEL, + )?; + + let bar = drvdata.bar.access(pdev.as_ref())?; + dev_info!( + pdev.as_ref(), + "pci-testdev data-match count: {}\n", + Self::testdev(info, bar)? + ); + + Ok(drvdata.into()) + } +} + +impl Drop for SampleDriver { + fn drop(&mut self) { + dev_dbg!(self.pdev.as_ref(), "Remove Rust PCI driver sample.\n"); + } +} + +kernel::module_pci_driver! { + type: SampleDriver, + name: "rust_driver_pci", + authors: ["Danilo Krummrich"], + description: "Rust PCI driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_driver_platform.rs b/samples/rust/rust_driver_platform.rs new file mode 100644 index 000000000000..8b42b3cfb363 --- /dev/null +++ b/samples/rust/rust_driver_platform.rs @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Rust Platform driver sample. + +use kernel::{c_str, device::Core, of, platform, prelude::*, types::ARef}; + +struct SampleDriver { + pdev: ARef<platform::Device>, +} + +struct Info(u32); + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <SampleDriver as platform::Driver>::IdInfo, + [(of::DeviceId::new(c_str!("test,rust-device")), Info(42))] +); + +impl platform::Driver for SampleDriver { + type IdInfo = Info; + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe( + pdev: &platform::Device<Core>, + info: Option<&Self::IdInfo>, + ) -> Result<Pin<KBox<Self>>> { + dev_dbg!(pdev.as_ref(), "Probe Rust Platform driver sample.\n"); + + if let Some(info) = info { + dev_info!(pdev.as_ref(), "Probed with info: '{}'.\n", info.0); + } + + let drvdata = KBox::new(Self { pdev: pdev.into() }, GFP_KERNEL)?; + + Ok(drvdata.into()) + } +} + +impl Drop for SampleDriver { + fn drop(&mut self) { + dev_dbg!(self.pdev.as_ref(), "Remove Rust Platform driver sample.\n"); + } +} + +kernel::module_platform_driver! { + type: SampleDriver, + name: "rust_driver_platform", + authors: ["Danilo Krummrich"], + description: "Rust Platform driver", + license: "GPL v2", +} diff --git a/samples/rust/rust_minimal.rs b/samples/rust/rust_minimal.rs index 4aaf117bf8e3..1fc7a1be6b6d 100644 --- a/samples/rust/rust_minimal.rs +++ b/samples/rust/rust_minimal.rs @@ -7,7 +7,7 @@ use kernel::prelude::*; module! { type: RustMinimal, name: "rust_minimal", - author: "Rust for Linux Contributors", + authors: ["Rust for Linux Contributors"], description: "Rust minimal sample", license: "GPL", } diff --git a/samples/rust/rust_misc_device.rs b/samples/rust/rust_misc_device.rs new file mode 100644 index 000000000000..c881fd6dbd08 --- /dev/null +++ b/samples/rust/rust_misc_device.rs @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Rust misc device sample. +//! +//! Below is an example userspace C program that exercises this sample's functionality. +//! +//! ```c +//! #include <stdio.h> +//! #include <stdlib.h> +//! #include <errno.h> +//! #include <fcntl.h> +//! #include <unistd.h> +//! #include <sys/ioctl.h> +//! +//! #define RUST_MISC_DEV_FAIL _IO('|', 0) +//! #define RUST_MISC_DEV_HELLO _IO('|', 0x80) +//! #define RUST_MISC_DEV_GET_VALUE _IOR('|', 0x81, int) +//! #define RUST_MISC_DEV_SET_VALUE _IOW('|', 0x82, int) +//! +//! int main() { +//! int value, new_value; +//! int fd, ret; +//! +//! // Open the device file +//! printf("Opening /dev/rust-misc-device for reading and writing\n"); +//! fd = open("/dev/rust-misc-device", O_RDWR); +//! if (fd < 0) { +//! perror("open"); +//! return errno; +//! } +//! +//! // Make call into driver to say "hello" +//! printf("Calling Hello\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_HELLO, NULL); +//! if (ret < 0) { +//! perror("ioctl: Failed to call into Hello"); +//! close(fd); +//! return errno; +//! } +//! +//! // Get initial value +//! printf("Fetching initial value\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &value); +//! if (ret < 0) { +//! perror("ioctl: Failed to fetch the initial value"); +//! close(fd); +//! return errno; +//! } +//! +//! value++; +//! +//! // Set value to something different +//! printf("Submitting new value (%d)\n", value); +//! ret = ioctl(fd, RUST_MISC_DEV_SET_VALUE, &value); +//! if (ret < 0) { +//! perror("ioctl: Failed to submit new value"); +//! close(fd); +//! return errno; +//! } +//! +//! // Ensure new value was applied +//! printf("Fetching new value\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_GET_VALUE, &new_value); +//! if (ret < 0) { +//! perror("ioctl: Failed to fetch the new value"); +//! close(fd); +//! return errno; +//! } +//! +//! if (value != new_value) { +//! printf("Failed: Committed and retrieved values are different (%d - %d)\n", value, new_value); +//! close(fd); +//! return -1; +//! } +//! +//! // Call the unsuccessful ioctl +//! printf("Attempting to call in to an non-existent IOCTL\n"); +//! ret = ioctl(fd, RUST_MISC_DEV_FAIL, NULL); +//! if (ret < 0) { +//! perror("ioctl: Succeeded to fail - this was expected"); +//! } else { +//! printf("ioctl: Failed to fail\n"); +//! close(fd); +//! return -1; +//! } +//! +//! // Close the device file +//! printf("Closing /dev/rust-misc-device\n"); +//! close(fd); +//! +//! printf("Success\n"); +//! return 0; +//! } +//! ``` + +use core::pin::Pin; + +use kernel::{ + c_str, + device::Device, + fs::File, + ioctl::{_IO, _IOC_SIZE, _IOR, _IOW}, + miscdevice::{MiscDevice, MiscDeviceOptions, MiscDeviceRegistration}, + new_mutex, + prelude::*, + sync::Mutex, + types::ARef, + uaccess::{UserSlice, UserSliceReader, UserSliceWriter}, +}; + +const RUST_MISC_DEV_HELLO: u32 = _IO('|' as u32, 0x80); +const RUST_MISC_DEV_GET_VALUE: u32 = _IOR::<i32>('|' as u32, 0x81); +const RUST_MISC_DEV_SET_VALUE: u32 = _IOW::<i32>('|' as u32, 0x82); + +module! { + type: RustMiscDeviceModule, + name: "rust_misc_device", + authors: ["Lee Jones"], + description: "Rust misc device sample", + license: "GPL", +} + +#[pin_data] +struct RustMiscDeviceModule { + #[pin] + _miscdev: MiscDeviceRegistration<RustMiscDevice>, +} + +impl kernel::InPlaceModule for RustMiscDeviceModule { + fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> { + pr_info!("Initialising Rust Misc Device Sample\n"); + + let options = MiscDeviceOptions { + name: c_str!("rust-misc-device"), + }; + + try_pin_init!(Self { + _miscdev <- MiscDeviceRegistration::register(options), + }) + } +} + +struct Inner { + value: i32, +} + +#[pin_data(PinnedDrop)] +struct RustMiscDevice { + #[pin] + inner: Mutex<Inner>, + dev: ARef<Device>, +} + +#[vtable] +impl MiscDevice for RustMiscDevice { + type Ptr = Pin<KBox<Self>>; + + fn open(_file: &File, misc: &MiscDeviceRegistration<Self>) -> Result<Pin<KBox<Self>>> { + let dev = ARef::from(misc.device()); + + dev_info!(dev, "Opening Rust Misc Device Sample\n"); + + KBox::try_pin_init( + try_pin_init! { + RustMiscDevice { + inner <- new_mutex!( Inner{ value: 0_i32 } ), + dev: dev, + } + }, + GFP_KERNEL, + ) + } + + fn ioctl(me: Pin<&RustMiscDevice>, _file: &File, cmd: u32, arg: usize) -> Result<isize> { + dev_info!(me.dev, "IOCTLing Rust Misc Device Sample\n"); + + let size = _IOC_SIZE(cmd); + + match cmd { + RUST_MISC_DEV_GET_VALUE => me.get_value(UserSlice::new(arg, size).writer())?, + RUST_MISC_DEV_SET_VALUE => me.set_value(UserSlice::new(arg, size).reader())?, + RUST_MISC_DEV_HELLO => me.hello()?, + _ => { + dev_err!(me.dev, "-> IOCTL not recognised: {}\n", cmd); + return Err(ENOTTY); + } + }; + + Ok(0) + } +} + +#[pinned_drop] +impl PinnedDrop for RustMiscDevice { + fn drop(self: Pin<&mut Self>) { + dev_info!(self.dev, "Exiting the Rust Misc Device Sample\n"); + } +} + +impl RustMiscDevice { + fn set_value(&self, mut reader: UserSliceReader) -> Result<isize> { + let new_value = reader.read::<i32>()?; + let mut guard = self.inner.lock(); + + dev_info!( + self.dev, + "-> Copying data from userspace (value: {})\n", + new_value + ); + + guard.value = new_value; + Ok(0) + } + + fn get_value(&self, mut writer: UserSliceWriter) -> Result<isize> { + let guard = self.inner.lock(); + let value = guard.value; + + // Free-up the lock and use our locally cached instance from here + drop(guard); + + dev_info!( + self.dev, + "-> Copying data to userspace (value: {})\n", + &value + ); + + writer.write::<i32>(&value)?; + Ok(0) + } + + fn hello(&self) -> Result<isize> { + dev_info!(self.dev, "-> Hello from the Rust Misc Device\n"); + + Ok(0) + } +} diff --git a/samples/rust/rust_print_main.rs b/samples/rust/rust_print_main.rs index aed90a6feecf..8ea95e8c2f36 100644 --- a/samples/rust/rust_print_main.rs +++ b/samples/rust/rust_print_main.rs @@ -8,7 +8,7 @@ use kernel::prelude::*; module! { type: RustPrint, name: "rust_print", - author: "Rust for Linux Contributors", + authors: ["Rust for Linux Contributors"], description: "Rust printing macros sample", license: "GPL", } @@ -34,6 +34,24 @@ fn arc_print() -> Result { // Uses `dbg` to print, will move `c` (for temporary debugging purposes). dbg!(c); + { + // `Arc` can be used to delegate dynamic dispatch and the following is an example. + // Both `i32` and `&str` implement `Display`. This enables us to express a unified + // behaviour, contract or protocol on both `i32` and `&str` into a single `Arc` of + // type `Arc<dyn Display>`. + + use core::fmt::Display; + fn arc_dyn_print(arc: &Arc<dyn Display>) { + pr_info!("Arc<dyn Display> says {arc}"); + } + + let a_i32_display: Arc<dyn Display> = Arc::new(42i32, GFP_KERNEL)?; + let a_str_display: Arc<dyn Display> = a.clone(); + + arc_dyn_print(&a_i32_display); + arc_dyn_print(&a_str_display); + } + // Pretty-prints the debug formatting with lower-case hexadecimal integers. pr_info!("{:#x?}", a); @@ -83,7 +101,7 @@ impl Drop for RustPrint { } mod trace { - use core::ffi::c_int; + use kernel::ffi::c_int; kernel::declare_trace! { /// # Safety diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 999f78d380ae..1a05fc153353 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -319,7 +319,8 @@ TRACE_EVENT(foo_bar, __assign_cpumask(cpum, cpumask_bits(mask)); ), - TP_printk("foo %s %d %s %s %s %s %s %s (%s) (%s) %s", __entry->foo, __entry->bar, + TP_printk("foo %s %d %s %s %s %s %s %s (%s) (%s) %s [%d] %*pbl", + __entry->foo, __entry->bar, /* * Notice here the use of some helper functions. This includes: @@ -370,7 +371,10 @@ TRACE_EVENT(foo_bar, __get_str(str), __get_str(lstr), __get_bitmask(cpus), __get_cpumask(cpum), - __get_str(vstr)) + __get_str(vstr), + __get_dynamic_array_len(cpus), + __get_dynamic_array_len(cpus), + __get_dynamic_array(cpus)) ); /* diff --git a/samples/tsm-mr/Makefile b/samples/tsm-mr/Makefile new file mode 100644 index 000000000000..587c3947b3a7 --- /dev/null +++ b/samples/tsm-mr/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_SAMPLE_TSM_MR) += tsm_mr_sample.o diff --git a/samples/tsm-mr/tsm_mr_sample.c b/samples/tsm-mr/tsm_mr_sample.c new file mode 100644 index 000000000000..a2c652148639 --- /dev/null +++ b/samples/tsm-mr/tsm_mr_sample.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024-2005 Intel Corporation. All rights reserved. */ + +#define pr_fmt(x) KBUILD_MODNAME ": " x + +#include <linux/module.h> +#include <linux/tsm-mr.h> +#include <linux/miscdevice.h> +#include <crypto/hash.h> + +static struct { + u8 static_mr[SHA384_DIGEST_SIZE]; + u8 config_mr[SHA512_DIGEST_SIZE]; + u8 rtmr0[SHA256_DIGEST_SIZE]; + u8 rtmr1[SHA384_DIGEST_SIZE]; + u8 report_digest[SHA512_DIGEST_SIZE]; +} sample_report = { + .static_mr = "static_mr", + .config_mr = "config_mr", + .rtmr0 = "rtmr0", + .rtmr1 = "rtmr1", +}; + +static int sample_report_refresh(const struct tsm_measurements *tm) +{ + struct crypto_shash *tfm; + int rc; + + tfm = crypto_alloc_shash(hash_algo_name[HASH_ALGO_SHA512], 0, 0); + if (IS_ERR(tfm)) { + pr_err("crypto_alloc_shash failed: %ld\n", PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + + rc = crypto_shash_tfm_digest(tfm, (u8 *)&sample_report, + offsetof(typeof(sample_report), + report_digest), + sample_report.report_digest); + crypto_free_shash(tfm); + if (rc) + pr_err("crypto_shash_tfm_digest failed: %d\n", rc); + return rc; +} + +static int sample_report_extend_mr(const struct tsm_measurements *tm, + const struct tsm_measurement_register *mr, + const u8 *data) +{ + SHASH_DESC_ON_STACK(desc, 0); + int rc; + + desc->tfm = crypto_alloc_shash(hash_algo_name[mr->mr_hash], 0, 0); + if (IS_ERR(desc->tfm)) { + pr_err("crypto_alloc_shash failed: %ld\n", PTR_ERR(desc->tfm)); + return PTR_ERR(desc->tfm); + } + + rc = crypto_shash_init(desc); + if (!rc) + rc = crypto_shash_update(desc, mr->mr_value, mr->mr_size); + if (!rc) + rc = crypto_shash_finup(desc, data, mr->mr_size, mr->mr_value); + crypto_free_shash(desc->tfm); + if (rc) + pr_err("SHA calculation failed: %d\n", rc); + return rc; +} + +#define MR_(mr, hash) .mr_value = &sample_report.mr, TSM_MR_(mr, hash) +static const struct tsm_measurement_register sample_mrs[] = { + /* static MR, read-only */ + { MR_(static_mr, SHA384) }, + /* config MR, read-only */ + { MR_(config_mr, SHA512) | TSM_MR_F_NOHASH }, + /* RTMR, direct extension prohibited */ + { MR_(rtmr0, SHA256) | TSM_MR_F_LIVE }, + /* RTMR, direct extension allowed */ + { MR_(rtmr1, SHA384) | TSM_MR_F_RTMR }, + /* RTMR, crypto agile, alaised to rtmr0 and rtmr1, respectively */ + { .mr_value = &sample_report.rtmr0, + TSM_MR_(rtmr_crypto_agile, SHA256) | TSM_MR_F_RTMR }, + { .mr_value = &sample_report.rtmr1, + TSM_MR_(rtmr_crypto_agile, SHA384) | TSM_MR_F_RTMR }, + /* sha512 digest of the whole structure */ + { MR_(report_digest, SHA512) | TSM_MR_F_LIVE }, +}; +#undef MR_ + +static struct tsm_measurements sample_tm = { + .mrs = sample_mrs, + .nr_mrs = ARRAY_SIZE(sample_mrs), + .refresh = sample_report_refresh, + .write = sample_report_extend_mr, +}; + +static const struct attribute_group *sample_groups[] = { + NULL, + NULL, +}; + +static struct miscdevice sample_misc_dev = { + .name = KBUILD_MODNAME, + .minor = MISC_DYNAMIC_MINOR, + .groups = sample_groups, +}; + +static int __init tsm_mr_sample_init(void) +{ + int rc; + + sample_groups[0] = tsm_mr_create_attribute_group(&sample_tm); + if (IS_ERR(sample_groups[0])) + return PTR_ERR(sample_groups[0]); + + rc = misc_register(&sample_misc_dev); + if (rc) + tsm_mr_free_attribute_group(sample_groups[0]); + return rc; +} + +static void __exit tsm_mr_sample_exit(void) +{ + misc_deregister(&sample_misc_dev); + tsm_mr_free_attribute_group(sample_groups[0]); +} + +module_init(tsm_mr_sample_init); +module_exit(tsm_mr_sample_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Sample module using tsm-mr to expose emulated MRs"); diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore index 79212d91285b..8708341bc082 100644 --- a/samples/vfs/.gitignore +++ b/samples/vfs/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only /test-fsmount +/test-list-all-mounts /test-statx +/mountinfo diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile index 6377a678134a..6554b73a75c8 100644 --- a/samples/vfs/Makefile +++ b/samples/vfs/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -userprogs-always-y += test-fsmount test-statx +userprogs-always-y += test-fsmount test-statx mountinfo test-list-all-mounts userccflags += -I usr/include diff --git a/samples/vfs/mountinfo.c b/samples/vfs/mountinfo.c new file mode 100644 index 000000000000..bc78275cac69 --- /dev/null +++ b/samples/vfs/mountinfo.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* + * Use pidfds, nsfds, listmount() and statmount() mimic the + * contents of /proc/self/mountinfo. + */ +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <alloca.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> + +#include "samples-vfs.h" + +/* max mounts per listmount call */ +#define MAXMOUNTS 1024 + +/* size of struct statmount (including trailing string buffer) */ +#define STATMOUNT_BUFSIZE 4096 + +static bool ext_format; + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open -1 +#endif + +/* + * There are no bindings in glibc for listmount() and statmount() (yet), + * make our own here. + */ +static int statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask, + struct statmount *buf, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static ssize_t listmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 last_mnt_id, + __u64 list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_listmount, &req, list, num, flags); +} + +static void show_mnt_attrs(__u64 flags) +{ + printf("%s", flags & MOUNT_ATTR_RDONLY ? "ro" : "rw"); + + if (flags & MOUNT_ATTR_NOSUID) + printf(",nosuid"); + if (flags & MOUNT_ATTR_NODEV) + printf(",nodev"); + if (flags & MOUNT_ATTR_NOEXEC) + printf(",noexec"); + + switch (flags & MOUNT_ATTR__ATIME) { + case MOUNT_ATTR_RELATIME: + printf(",relatime"); + break; + case MOUNT_ATTR_NOATIME: + printf(",noatime"); + break; + case MOUNT_ATTR_STRICTATIME: + /* print nothing */ + break; + } + + if (flags & MOUNT_ATTR_NODIRATIME) + printf(",nodiratime"); + if (flags & MOUNT_ATTR_NOSYMFOLLOW) + printf(",nosymfollow"); + if (flags & MOUNT_ATTR_IDMAP) + printf(",idmapped"); +} + +static void show_propagation(struct statmount *sm) +{ + if (sm->mnt_propagation & MS_SHARED) + printf(" shared:%llu", sm->mnt_peer_group); + if (sm->mnt_propagation & MS_SLAVE) { + printf(" master:%llu", sm->mnt_master); + if (sm->propagate_from && sm->propagate_from != sm->mnt_master) + printf(" propagate_from:%llu", sm->propagate_from); + } + if (sm->mnt_propagation & MS_UNBINDABLE) + printf(" unbindable"); +} + +static void show_sb_flags(__u64 flags) +{ + printf("%s", flags & MS_RDONLY ? "ro" : "rw"); + if (flags & MS_SYNCHRONOUS) + printf(",sync"); + if (flags & MS_DIRSYNC) + printf(",dirsync"); + if (flags & MS_MANDLOCK) + printf(",mand"); + if (flags & MS_LAZYTIME) + printf(",lazytime"); +} + +static int dump_mountinfo(__u64 mnt_id, __u64 mnt_ns_id) +{ + int ret; + struct statmount *buf = alloca(STATMOUNT_BUFSIZE); + const __u64 mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | + STATMOUNT_PROPAGATE_FROM | STATMOUNT_FS_TYPE | + STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | + STATMOUNT_MNT_OPTS | STATMOUNT_FS_SUBTYPE | + STATMOUNT_SB_SOURCE; + + ret = statmount(mnt_id, mnt_ns_id, mask, buf, STATMOUNT_BUFSIZE, 0); + if (ret < 0) { + perror("statmount"); + return 1; + } + + if (ext_format) + printf("0x%llx 0x%llx 0x%llx ", mnt_ns_id, mnt_id, buf->mnt_parent_id); + + printf("%u %u %u:%u %s %s ", buf->mnt_id_old, buf->mnt_parent_id_old, + buf->sb_dev_major, buf->sb_dev_minor, + &buf->str[buf->mnt_root], + &buf->str[buf->mnt_point]); + show_mnt_attrs(buf->mnt_attr); + show_propagation(buf); + + printf(" - %s", &buf->str[buf->fs_type]); + if (buf->mask & STATMOUNT_FS_SUBTYPE) + printf(".%s", &buf->str[buf->fs_subtype]); + if (buf->mask & STATMOUNT_SB_SOURCE) + printf(" %s ", &buf->str[buf->sb_source]); + else + printf(" :none "); + + show_sb_flags(buf->sb_flags); + if (buf->mask & STATMOUNT_MNT_OPTS) + printf(",%s", &buf->str[buf->mnt_opts]); + printf("\n"); + return 0; +} + +static int dump_mounts(__u64 mnt_ns_id) +{ + __u64 mntid[MAXMOUNTS]; + __u64 last_mnt_id = 0; + ssize_t count; + int i; + + /* + * Get a list of all mntids in mnt_ns_id. If it returns MAXMOUNTS + * mounts, then go again until we get everything. + */ + do { + count = listmount(LSMT_ROOT, mnt_ns_id, last_mnt_id, mntid, MAXMOUNTS, 0); + if (count < 0 || count > MAXMOUNTS) { + errno = count < 0 ? errno : count; + perror("listmount"); + return 1; + } + + /* Walk the returned mntids and print info about each */ + for (i = 0; i < count; ++i) { + int ret = dump_mountinfo(mntid[i], mnt_ns_id); + + if (ret != 0) + return ret; + } + /* Set up last_mnt_id to pick up where we left off */ + last_mnt_id = mntid[count - 1]; + } while (count == MAXMOUNTS); + return 0; +} + +static void usage(const char * const prog) +{ + printf("Usage:\n"); + printf("%s [-e] [-p pid] [-r] [-h]\n", prog); + printf(" -e: extended format\n"); + printf(" -h: print usage message\n"); + printf(" -p: get mount namespace from given pid\n"); + printf(" -r: recursively print all mounts in all child namespaces\n"); +} + +int main(int argc, char * const *argv) +{ + struct mnt_ns_info mni = { .size = MNT_NS_INFO_SIZE_VER0 }; + int pidfd, mntns, ret, opt; + pid_t pid = getpid(); + bool recursive = false; + + while ((opt = getopt(argc, argv, "ehp:r")) != -1) { + switch (opt) { + case 'e': + ext_format = true; + break; + case 'h': + usage(argv[0]); + return 0; + case 'p': + pid = atoi(optarg); + break; + case 'r': + recursive = true; + break; + } + } + + /* Get a pidfd for pid */ + pidfd = syscall(__NR_pidfd_open, pid, 0); + if (pidfd < 0) { + perror("pidfd_open"); + return 1; + } + + /* Get the mnt namespace for pidfd */ + mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, NULL); + if (mntns < 0) { + perror("PIDFD_GET_MNT_NAMESPACE"); + return 1; + } + close(pidfd); + + /* get info about mntns. In particular, the mnt_ns_id */ + ret = ioctl(mntns, NS_MNT_GET_INFO, &mni); + if (ret < 0) { + perror("NS_MNT_GET_INFO"); + return 1; + } + + do { + int ret; + + ret = dump_mounts(mni.mnt_ns_id); + if (ret) + return ret; + + if (!recursive) + break; + + /* get the next mntns (and overwrite the old mount ns info) */ + ret = ioctl(mntns, NS_MNT_GET_NEXT, &mni); + close(mntns); + mntns = ret; + } while (mntns >= 0); + + return 0; +} diff --git a/samples/vfs/samples-vfs.h b/samples/vfs/samples-vfs.h new file mode 100644 index 000000000000..498baf581b56 --- /dev/null +++ b/samples/vfs/samples-vfs.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SAMPLES_VFS_H +#define __SAMPLES_VFS_H + +#include <errno.h> +#include <linux/types.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> + +#define die_errno(format, ...) \ + do { \ + fprintf(stderr, "%m | %s: %d: %s: " format "\n", __FILE__, \ + __LINE__, __func__, ##__VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } while (0) + +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings */ + __u64 __spare2[44]; + char str[]; /* Variable size part containing strings */ +}; + +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +#ifndef MNT_ID_REQ_SIZE_VER0 +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#endif + +#ifndef MNT_ID_REQ_SIZE_VER1 +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ +#endif + +/* Get the id for a mount namespace */ +#ifndef NS_GET_MNTNS_ID +#define NS_GET_MNTNS_ID _IO(0xb7, 0x5) +#endif + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#ifndef MNT_NS_INFO_SIZE_VER0 +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ +#endif + +#ifndef NS_MNT_GET_INFO +#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info) +#endif + +#ifndef NS_MNT_GET_NEXT +#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info) +#endif + +#ifndef NS_MNT_GET_PREV +#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info) +#endif + +#ifndef PIDFD_GET_MNT_NAMESPACE +#define PIDFD_GET_MNT_NAMESPACE _IO(0xFF, 3) +#endif + +#ifndef __NR_listmount +#define __NR_listmount 458 +#endif + +#ifndef __NR_statmount +#define __NR_statmount 457 +#endif + +#ifndef LSMT_ROOT +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#endif + +/* @mask bits for statmount(2) */ +#ifndef STATMOUNT_SB_BASIC +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#endif + +#ifndef STATMOUNT_MNT_BASIC +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#endif + +#ifndef STATMOUNT_PROPAGATE_FROM +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#endif + +#ifndef STATMOUNT_MNT_ROOT +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#endif + +#ifndef STATMOUNT_MNT_POINT +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#endif + +#ifndef STATMOUNT_FS_TYPE +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#endif + +#ifndef STATMOUNT_MNT_NS_ID +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#endif + +#ifndef STATMOUNT_MNT_OPTS +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#endif + +#ifndef STATMOUNT_FS_SUBTYPE +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#endif + +#ifndef STATMOUNT_SB_SOURCE +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#endif + +#ifndef STATMOUNT_OPT_ARRAY +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#endif + +#ifndef STATMOUNT_OPT_SEC_ARRAY +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#endif + +#ifndef STATX_MNT_ID_UNIQUE +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ +#endif + +#ifndef STATMOUNT_MNT_UIDMAP +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#endif + +#ifndef STATMOUNT_MNT_GIDMAP +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ +#endif + +#ifndef MOUNT_ATTR_RDONLY +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#endif + +#ifndef MOUNT_ATTR_NOSUID +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#endif + +#ifndef MOUNT_ATTR_NODEV +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#endif + +#ifndef MOUNT_ATTR_NOEXEC +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#endif + +#ifndef MOUNT_ATTR__ATIME +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#endif + +#ifndef MOUNT_ATTR_RELATIME +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#endif + +#ifndef MOUNT_ATTR_NOATIME +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#endif + +#ifndef MOUNT_ATTR_STRICTATIME +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#endif + +#ifndef MOUNT_ATTR_NODIRATIME +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#endif + +#ifndef MOUNT_ATTR_IDMAP +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#endif + +#ifndef MOUNT_ATTR_NOSYMFOLLOW +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ +#endif + +#ifndef MS_RDONLY +#define MS_RDONLY 1 /* Mount read-only */ +#endif + +#ifndef MS_SYNCHRONOUS +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#endif + +#ifndef MS_MANDLOCK +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#endif + +#ifndef MS_DIRSYNC +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#endif + +#ifndef MS_UNBINDABLE +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#endif + +#ifndef MS_PRIVATE +#define MS_PRIVATE (1<<18) /* change to private */ +#endif + +#ifndef MS_SLAVE +#define MS_SLAVE (1<<19) /* change to slave */ +#endif + +#ifndef MS_SHARED +#define MS_SHARED (1<<20) /* change to shared */ +#endif + +#ifndef MS_LAZYTIME +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ +#endif + +#endif /* __SAMPLES_VFS_H */ diff --git a/samples/vfs/test-list-all-mounts.c b/samples/vfs/test-list-all-mounts.c new file mode 100644 index 000000000000..713c174626aa --- /dev/null +++ b/samples/vfs/test-list-all-mounts.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <errno.h> +#include <limits.h> +#include <linux/types.h> +#include <inttypes.h> +#include <stdio.h> + +#include "../../tools/testing/selftests/pidfd/pidfd.h" +#include "samples-vfs.h" + +static int __statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask, + struct statmount *stmnt, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER1, + .mnt_id = mnt_id, + .param = mask, + .mnt_ns_id = mnt_ns_id, + }; + + return syscall(__NR_statmount, &req, stmnt, bufsize, flags); +} + +static struct statmount *sys_statmount(__u64 mnt_id, __u64 mnt_ns_id, + __u64 mask, unsigned int flags) +{ + size_t bufsize = 1 << 15; + struct statmount *stmnt = NULL, *tmp = NULL; + int ret; + + for (;;) { + tmp = realloc(stmnt, bufsize); + if (!tmp) + goto out; + + stmnt = tmp; + ret = __statmount(mnt_id, mnt_ns_id, mask, stmnt, bufsize, flags); + if (!ret) + return stmnt; + + if (errno != EOVERFLOW) + goto out; + + bufsize <<= 1; + if (bufsize >= UINT_MAX / 2) + goto out; + } + +out: + free(stmnt); + return NULL; +} + +static ssize_t sys_listmount(__u64 mnt_id, __u64 last_mnt_id, __u64 mnt_ns_id, + __u64 list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER1, + .mnt_id = mnt_id, + .param = last_mnt_id, + .mnt_ns_id = mnt_ns_id, + }; + + return syscall(__NR_listmount, &req, list, num, flags); +} + +int main(int argc, char *argv[]) +{ +#define LISTMNT_BUFFER 10 + __u64 list[LISTMNT_BUFFER], last_mnt_id = 0; + int ret, pidfd, fd_mntns; + struct mnt_ns_info info = {}; + + pidfd = sys_pidfd_open(getpid(), 0); + if (pidfd < 0) + die_errno("pidfd_open failed"); + + fd_mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, 0); + if (fd_mntns < 0) + die_errno("ioctl(PIDFD_GET_MNT_NAMESPACE) failed"); + + ret = ioctl(fd_mntns, NS_MNT_GET_INFO, &info); + if (ret < 0) + die_errno("ioctl(NS_GET_MNTNS_ID) failed"); + + printf("Listing %u mounts for mount namespace %" PRIu64 "\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + for (;;) { + ssize_t nr_mounts; +next: + nr_mounts = sys_listmount(LSMT_ROOT, last_mnt_id, + info.mnt_ns_id, list, LISTMNT_BUFFER, + 0); + if (nr_mounts <= 0) { + int fd_mntns_next; + + printf("Finished listing %u mounts for mount namespace %" PRIu64 "\n\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + fd_mntns_next = ioctl(fd_mntns, NS_MNT_GET_NEXT, &info); + if (fd_mntns_next < 0) { + if (errno == ENOENT) { + printf("Finished listing all mount namespaces\n"); + exit(0); + } + die_errno("ioctl(NS_MNT_GET_NEXT) failed"); + } + close(fd_mntns); + fd_mntns = fd_mntns_next; + last_mnt_id = 0; + printf("Listing %u mounts for mount namespace %" PRIu64 "\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + goto next; + } + + for (size_t cur = 0; cur < nr_mounts; cur++) { + struct statmount *stmnt; + + last_mnt_id = list[cur]; + + stmnt = sys_statmount(last_mnt_id, info.mnt_ns_id, + STATMOUNT_SB_BASIC | + STATMOUNT_MNT_BASIC | + STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT | + STATMOUNT_MNT_NS_ID | + STATMOUNT_MNT_OPTS | + STATMOUNT_FS_TYPE | + STATMOUNT_MNT_UIDMAP | + STATMOUNT_MNT_GIDMAP, 0); + if (!stmnt) { + printf("Failed to statmount(%" PRIu64 ") in mount namespace(%" PRIu64 ")\n", + (uint64_t)last_mnt_id, (uint64_t)info.mnt_ns_id); + continue; + } + + printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n", + (uint64_t)stmnt->mnt_id, + (uint64_t)stmnt->mnt_parent_id, + (stmnt->mask & STATMOUNT_FS_TYPE) ? stmnt->str + stmnt->fs_type : "", + (stmnt->mask & STATMOUNT_MNT_ROOT) ? stmnt->str + stmnt->mnt_root : "", + (stmnt->mask & STATMOUNT_MNT_POINT) ? stmnt->str + stmnt->mnt_point : "", + (stmnt->mask & STATMOUNT_MNT_OPTS) ? stmnt->str + stmnt->mnt_opts : ""); + + if (stmnt->mask & STATMOUNT_MNT_UIDMAP) { + const char *idmap = stmnt->str + stmnt->mnt_uidmap; + + for (size_t idx = 0; idx < stmnt->mnt_uidmap_num; idx++) { + printf("mnt_uidmap[%zu]:\t%s\n", idx, idmap); + idmap += strlen(idmap) + 1; + } + } + + if (stmnt->mask & STATMOUNT_MNT_GIDMAP) { + const char *idmap = stmnt->str + stmnt->mnt_gidmap; + + for (size_t idx = 0; idx < stmnt->mnt_gidmap_num; idx++) { + printf("mnt_gidmap[%zu]:\t%s\n", idx, idmap); + idmap += strlen(idmap) + 1; + } + } + + printf("\n"); + + free(stmnt); + } + } + + exit(0); +} |