diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup/rstat.c | 25 | ||||
-rw-r--r-- | kernel/configs/tiny.config | 1 | ||||
-rw-r--r-- | kernel/module/main.c | 94 | ||||
-rw-r--r-- | kernel/rcu/tree_stall.h | 26 | ||||
-rw-r--r-- | kernel/sched/ext_idle.c | 37 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 15 |
6 files changed, 163 insertions, 35 deletions
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index ce4752ab9e09..cbeaa499a96a 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -47,8 +47,20 @@ static spinlock_t *ss_rstat_lock(struct cgroup_subsys *ss) static raw_spinlock_t *ss_rstat_cpu_lock(struct cgroup_subsys *ss, int cpu) { - if (ss) + if (ss) { + /* + * Depending on config, the subsystem per-cpu lock type may be an + * empty struct. In enviromnents where this is the case, allocation + * of this field is not performed in ss_rstat_init(). Avoid a + * cpu-based offset relative to NULL by returning early. When the + * lock type is zero in size, the corresponding lock functions are + * no-ops so passing them NULL is acceptable. + */ + if (sizeof(*ss->rstat_ss_cpu_lock) == 0) + return NULL; + return per_cpu_ptr(ss->rstat_ss_cpu_lock, cpu); + } return per_cpu_ptr(&rstat_base_cpu_lock, cpu); } @@ -510,20 +522,15 @@ int __init ss_rstat_init(struct cgroup_subsys *ss) { int cpu; -#ifdef CONFIG_SMP /* - * On uniprocessor machines, arch_spinlock_t is defined as an empty - * struct. Avoid allocating a size of zero by having this block - * excluded in this case. It's acceptable to leave the subsystem locks - * unitialized since the associated lock functions are no-ops in the - * non-smp case. + * Depending on config, the subsystem per-cpu lock type may be an empty + * struct. Avoid allocating a size of zero in this case. */ - if (ss) { + if (ss && sizeof(*ss->rstat_ss_cpu_lock)) { ss->rstat_ss_cpu_lock = alloc_percpu(raw_spinlock_t); if (!ss->rstat_ss_cpu_lock) return -ENOMEM; } -#endif spin_lock_init(ss_rstat_lock(ss)); for_each_possible_cpu(cpu) diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config index b753695c5a8f..5dd0f0a34a73 100644 --- a/kernel/configs/tiny.config +++ b/kernel/configs/tiny.config @@ -2,3 +2,4 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_KERNEL_XZ=y CONFIG_SLUB=y CONFIG_SLUB_TINY=y +CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y diff --git a/kernel/module/main.c b/kernel/module/main.c index 3d64e69cc03e..413ac6ea3702 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -170,6 +170,30 @@ static inline void add_taint_module(struct module *mod, unsigned flag, } /* + * Like strncmp(), except s/-/_/g as per scripts/Makefile.lib:name-fix-token rule. + */ +static int mod_strncmp(const char *str_a, const char *str_b, size_t n) +{ + for (int i = 0; i < n; i++) { + char a = str_a[i]; + char b = str_b[i]; + int d; + + if (a == '-') a = '_'; + if (b == '-') b = '_'; + + d = a - b; + if (d) + return d; + + if (!a) + break; + } + + return 0; +} + +/* * A thread that wants to hold a reference to a module only while it * is running can call this to safely exit. */ @@ -1083,6 +1107,46 @@ static char *get_modinfo(const struct load_info *info, const char *tag) return get_next_modinfo(info, tag, NULL); } +/** + * verify_module_namespace() - does @modname have access to this symbol's @namespace + * @namespace: export symbol namespace + * @modname: module name + * + * If @namespace is prefixed with "module:" to indicate it is a module namespace + * then test if @modname matches any of the comma separated patterns. + * + * The patterns only support tail-glob. + */ +static bool verify_module_namespace(const char *namespace, const char *modname) +{ + size_t len, modlen = strlen(modname); + const char *prefix = "module:"; + const char *sep; + bool glob; + + if (!strstarts(namespace, prefix)) + return false; + + for (namespace += strlen(prefix); *namespace; namespace = sep) { + sep = strchrnul(namespace, ','); + len = sep - namespace; + + glob = false; + if (sep[-1] == '*') { + len--; + glob = true; + } + + if (*sep) + sep++; + + if (mod_strncmp(namespace, modname, len) == 0 && (glob || len == modlen)) + return true; + } + + return false; +} + static int verify_namespace_is_imported(const struct load_info *info, const struct kernel_symbol *sym, struct module *mod) @@ -1092,6 +1156,10 @@ static int verify_namespace_is_imported(const struct load_info *info, namespace = kernel_symbol_namespace(sym); if (namespace && namespace[0]) { + + if (verify_module_namespace(namespace, mod->name)) + return 0; + for_each_modinfo_entry(imported_namespace, info, "import_ns") { if (strcmp(namespace, imported_namespace) == 0) return 0; @@ -1658,15 +1726,30 @@ static void module_license_taint_check(struct module *mod, const char *license) } } -static void setup_modinfo(struct module *mod, struct load_info *info) +static int setup_modinfo(struct module *mod, struct load_info *info) { const struct module_attribute *attr; + char *imported_namespace; int i; for (i = 0; (attr = modinfo_attrs[i]); i++) { if (attr->setup) attr->setup(mod, get_modinfo(info, attr->attr.name)); } + + for_each_modinfo_entry(imported_namespace, info, "import_ns") { + /* + * 'module:' prefixed namespaces are implicit, disallow + * explicit imports. + */ + if (strstarts(imported_namespace, "module:")) { + pr_err("%s: module tries to import module namespace: %s\n", + mod->name, imported_namespace); + return -EPERM; + } + } + + return 0; } static void free_modinfo(struct module *mod) @@ -3323,7 +3406,9 @@ static int load_module(struct load_info *info, const char __user *uargs, goto free_unload; /* Set up MODINFO_ATTR fields */ - setup_modinfo(mod, info); + err = setup_modinfo(mod, info); + if (err) + goto free_modinfo; /* Fix up syms, so that st_value is a pointer to location. */ err = simplify_symbols(mod, info); @@ -3386,11 +3471,12 @@ static int load_module(struct load_info *info, const char __user *uargs, goto sysfs_cleanup; } + if (codetag_load_module(mod)) + goto sysfs_cleanup; + /* Get rid of temporary copy. */ free_copy(info, flags); - codetag_load_module(mod); - /* Done! */ trace_module_load(mod); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 56b21219442b..486c00536207 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -20,6 +20,28 @@ int sysctl_panic_on_rcu_stall __read_mostly; int sysctl_max_rcu_stall_to_panic __read_mostly; +#ifdef CONFIG_SYSFS + +static unsigned int rcu_stall_count; + +static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%u\n", rcu_stall_count); +} + +static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count); + +static __init int kernel_rcu_stall_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL); + return 0; +} + +late_initcall(kernel_rcu_stall_sysfs_init); + +#endif // CONFIG_SYSFS + #ifdef CONFIG_PROVE_RCU #define RCU_STALL_DELAY_DELTA (5 * HZ) #else @@ -784,6 +806,10 @@ static void check_cpu_stall(struct rcu_data *rdp) if (kvm_check_and_clear_guest_paused()) return; +#ifdef CONFIG_SYSFS + ++rcu_stall_count; +#endif + rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps); if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) { pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name); diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 66da03cc0b33..6d29d3cbc670 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -138,6 +138,7 @@ found: goto retry; } +#ifdef CONFIG_NUMA /* * Tracks nodes that have not yet been visited when searching for an idle * CPU across all available nodes. @@ -186,6 +187,13 @@ static s32 pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, i return cpu; } +#else +static inline s32 +pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u64 flags) +{ + return -EBUSY; +} +#endif /* * Find an idle CPU in the system, starting from @node. @@ -447,11 +455,18 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL; const struct cpumask *allowed = cpus_allowed ?: p->cpus_ptr; int node = scx_cpu_node_if_enabled(prev_cpu); + bool is_prev_allowed; s32 cpu; preempt_disable(); /* + * Check whether @prev_cpu is still within the allowed set. If not, + * we can still try selecting a nearby CPU. + */ + is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed); + + /* * Determine the subset of CPUs usable by @p within @cpus_allowed. */ if (allowed != p->cpus_ptr) { @@ -465,21 +480,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, cpu = -EBUSY; goto out_enable; } - - /* - * If @prev_cpu is not in the allowed CPUs, skip topology - * optimizations and try to pick any idle CPU usable by the - * task. - * - * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize - * the current node, as it may optimize some waker->wakee - * workloads. - */ - if (!cpumask_test_cpu(prev_cpu, allowed)) { - node = scx_cpu_node_if_enabled(smp_processor_id()); - cpu = scx_pick_idle_cpu(allowed, node, flags); - goto out_enable; - } } /* @@ -525,7 +525,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, * then avoid a migration. */ cpu = smp_processor_id(); - if (cpus_share_cache(cpu, prev_cpu) && + if (is_prev_allowed && cpus_share_cache(cpu, prev_cpu) && scx_idle_test_and_clear_cpu(prev_cpu)) { cpu = prev_cpu; goto out_unlock; @@ -562,7 +562,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, /* * Keep using @prev_cpu if it's part of a fully idle core. */ - if (cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) && + if (is_prev_allowed && + cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) && scx_idle_test_and_clear_cpu(prev_cpu)) { cpu = prev_cpu; goto out_unlock; @@ -611,7 +612,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, /* * Use @prev_cpu if it's idle. */ - if (scx_idle_test_and_clear_cpu(prev_cpu)) { + if (is_prev_allowed && scx_idle_test_and_clear_cpu(prev_cpu)) { cpu = prev_cpu; goto out_unlock; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1af952cba48d..4203fad56b6c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -188,7 +188,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, op->saved_func(ip, parent_ip, op, fregs); } -static void ftrace_sync_ipi(void *data) +void ftrace_sync_ipi(void *data) { /* Probably not needed, but do it anyway */ smp_rmb(); @@ -7438,9 +7438,10 @@ void ftrace_release_mod(struct module *mod) mutex_lock(&ftrace_lock); - if (ftrace_disabled) - goto out_unlock; - + /* + * To avoid the UAF problem after the module is unloaded, the + * 'mod_map' resource needs to be released unconditionally. + */ list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { if (mod_map->mod == mod) { list_del_rcu(&mod_map->list); @@ -7449,6 +7450,9 @@ void ftrace_release_mod(struct module *mod) } } + if (ftrace_disabled) + goto out_unlock; + /* * Each module has its own ftrace_pages, remove * them from the list. @@ -7627,6 +7631,9 @@ allocate_ftrace_mod_map(struct module *mod, { struct ftrace_mod_map *mod_map; + if (ftrace_disabled) + return NULL; + mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL); if (!mod_map) return NULL; |