summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/rstat.c25
-rw-r--r--kernel/configs/tiny.config1
-rw-r--r--kernel/module/main.c94
-rw-r--r--kernel/rcu/tree_stall.h26
-rw-r--r--kernel/sched/ext_idle.c37
-rw-r--r--kernel/trace/ftrace.c15
6 files changed, 163 insertions, 35 deletions
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index ce4752ab9e09..cbeaa499a96a 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -47,8 +47,20 @@ static spinlock_t *ss_rstat_lock(struct cgroup_subsys *ss)
static raw_spinlock_t *ss_rstat_cpu_lock(struct cgroup_subsys *ss, int cpu)
{
- if (ss)
+ if (ss) {
+ /*
+ * Depending on config, the subsystem per-cpu lock type may be an
+ * empty struct. In enviromnents where this is the case, allocation
+ * of this field is not performed in ss_rstat_init(). Avoid a
+ * cpu-based offset relative to NULL by returning early. When the
+ * lock type is zero in size, the corresponding lock functions are
+ * no-ops so passing them NULL is acceptable.
+ */
+ if (sizeof(*ss->rstat_ss_cpu_lock) == 0)
+ return NULL;
+
return per_cpu_ptr(ss->rstat_ss_cpu_lock, cpu);
+ }
return per_cpu_ptr(&rstat_base_cpu_lock, cpu);
}
@@ -510,20 +522,15 @@ int __init ss_rstat_init(struct cgroup_subsys *ss)
{
int cpu;
-#ifdef CONFIG_SMP
/*
- * On uniprocessor machines, arch_spinlock_t is defined as an empty
- * struct. Avoid allocating a size of zero by having this block
- * excluded in this case. It's acceptable to leave the subsystem locks
- * unitialized since the associated lock functions are no-ops in the
- * non-smp case.
+ * Depending on config, the subsystem per-cpu lock type may be an empty
+ * struct. Avoid allocating a size of zero in this case.
*/
- if (ss) {
+ if (ss && sizeof(*ss->rstat_ss_cpu_lock)) {
ss->rstat_ss_cpu_lock = alloc_percpu(raw_spinlock_t);
if (!ss->rstat_ss_cpu_lock)
return -ENOMEM;
}
-#endif
spin_lock_init(ss_rstat_lock(ss));
for_each_possible_cpu(cpu)
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index b753695c5a8f..5dd0f0a34a73 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -2,3 +2,4 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_KERNEL_XZ=y
CONFIG_SLUB=y
CONFIG_SLUB_TINY=y
+CONFIG_LD_DEAD_CODE_DATA_ELIMINATION=y
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 3d64e69cc03e..413ac6ea3702 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -170,6 +170,30 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
}
/*
+ * Like strncmp(), except s/-/_/g as per scripts/Makefile.lib:name-fix-token rule.
+ */
+static int mod_strncmp(const char *str_a, const char *str_b, size_t n)
+{
+ for (int i = 0; i < n; i++) {
+ char a = str_a[i];
+ char b = str_b[i];
+ int d;
+
+ if (a == '-') a = '_';
+ if (b == '-') b = '_';
+
+ d = a - b;
+ if (d)
+ return d;
+
+ if (!a)
+ break;
+ }
+
+ return 0;
+}
+
+/*
* A thread that wants to hold a reference to a module only while it
* is running can call this to safely exit.
*/
@@ -1083,6 +1107,46 @@ static char *get_modinfo(const struct load_info *info, const char *tag)
return get_next_modinfo(info, tag, NULL);
}
+/**
+ * verify_module_namespace() - does @modname have access to this symbol's @namespace
+ * @namespace: export symbol namespace
+ * @modname: module name
+ *
+ * If @namespace is prefixed with "module:" to indicate it is a module namespace
+ * then test if @modname matches any of the comma separated patterns.
+ *
+ * The patterns only support tail-glob.
+ */
+static bool verify_module_namespace(const char *namespace, const char *modname)
+{
+ size_t len, modlen = strlen(modname);
+ const char *prefix = "module:";
+ const char *sep;
+ bool glob;
+
+ if (!strstarts(namespace, prefix))
+ return false;
+
+ for (namespace += strlen(prefix); *namespace; namespace = sep) {
+ sep = strchrnul(namespace, ',');
+ len = sep - namespace;
+
+ glob = false;
+ if (sep[-1] == '*') {
+ len--;
+ glob = true;
+ }
+
+ if (*sep)
+ sep++;
+
+ if (mod_strncmp(namespace, modname, len) == 0 && (glob || len == modlen))
+ return true;
+ }
+
+ return false;
+}
+
static int verify_namespace_is_imported(const struct load_info *info,
const struct kernel_symbol *sym,
struct module *mod)
@@ -1092,6 +1156,10 @@ static int verify_namespace_is_imported(const struct load_info *info,
namespace = kernel_symbol_namespace(sym);
if (namespace && namespace[0]) {
+
+ if (verify_module_namespace(namespace, mod->name))
+ return 0;
+
for_each_modinfo_entry(imported_namespace, info, "import_ns") {
if (strcmp(namespace, imported_namespace) == 0)
return 0;
@@ -1658,15 +1726,30 @@ static void module_license_taint_check(struct module *mod, const char *license)
}
}
-static void setup_modinfo(struct module *mod, struct load_info *info)
+static int setup_modinfo(struct module *mod, struct load_info *info)
{
const struct module_attribute *attr;
+ char *imported_namespace;
int i;
for (i = 0; (attr = modinfo_attrs[i]); i++) {
if (attr->setup)
attr->setup(mod, get_modinfo(info, attr->attr.name));
}
+
+ for_each_modinfo_entry(imported_namespace, info, "import_ns") {
+ /*
+ * 'module:' prefixed namespaces are implicit, disallow
+ * explicit imports.
+ */
+ if (strstarts(imported_namespace, "module:")) {
+ pr_err("%s: module tries to import module namespace: %s\n",
+ mod->name, imported_namespace);
+ return -EPERM;
+ }
+ }
+
+ return 0;
}
static void free_modinfo(struct module *mod)
@@ -3323,7 +3406,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto free_unload;
/* Set up MODINFO_ATTR fields */
- setup_modinfo(mod, info);
+ err = setup_modinfo(mod, info);
+ if (err)
+ goto free_modinfo;
/* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(mod, info);
@@ -3386,11 +3471,12 @@ static int load_module(struct load_info *info, const char __user *uargs,
goto sysfs_cleanup;
}
+ if (codetag_load_module(mod))
+ goto sysfs_cleanup;
+
/* Get rid of temporary copy. */
free_copy(info, flags);
- codetag_load_module(mod);
-
/* Done! */
trace_module_load(mod);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 56b21219442b..486c00536207 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -20,6 +20,28 @@
int sysctl_panic_on_rcu_stall __read_mostly;
int sysctl_max_rcu_stall_to_panic __read_mostly;
+#ifdef CONFIG_SYSFS
+
+static unsigned int rcu_stall_count;
+
+static ssize_t rcu_stall_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *page)
+{
+ return sysfs_emit(page, "%u\n", rcu_stall_count);
+}
+
+static struct kobj_attribute rcu_stall_count_attr = __ATTR_RO(rcu_stall_count);
+
+static __init int kernel_rcu_stall_sysfs_init(void)
+{
+ sysfs_add_file_to_group(kernel_kobj, &rcu_stall_count_attr.attr, NULL);
+ return 0;
+}
+
+late_initcall(kernel_rcu_stall_sysfs_init);
+
+#endif // CONFIG_SYSFS
+
#ifdef CONFIG_PROVE_RCU
#define RCU_STALL_DELAY_DELTA (5 * HZ)
#else
@@ -784,6 +806,10 @@ static void check_cpu_stall(struct rcu_data *rdp)
if (kvm_check_and_clear_guest_paused())
return;
+#ifdef CONFIG_SYSFS
+ ++rcu_stall_count;
+#endif
+
rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_NORM, (void *)j - gps);
if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
pr_err("INFO: %s detected stall, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index 66da03cc0b33..6d29d3cbc670 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -138,6 +138,7 @@ found:
goto retry;
}
+#ifdef CONFIG_NUMA
/*
* Tracks nodes that have not yet been visited when searching for an idle
* CPU across all available nodes.
@@ -186,6 +187,13 @@ static s32 pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, i
return cpu;
}
+#else
+static inline s32
+pick_idle_cpu_from_online_nodes(const struct cpumask *cpus_allowed, int node, u64 flags)
+{
+ return -EBUSY;
+}
+#endif
/*
* Find an idle CPU in the system, starting from @node.
@@ -447,11 +455,18 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL;
const struct cpumask *allowed = cpus_allowed ?: p->cpus_ptr;
int node = scx_cpu_node_if_enabled(prev_cpu);
+ bool is_prev_allowed;
s32 cpu;
preempt_disable();
/*
+ * Check whether @prev_cpu is still within the allowed set. If not,
+ * we can still try selecting a nearby CPU.
+ */
+ is_prev_allowed = cpumask_test_cpu(prev_cpu, allowed);
+
+ /*
* Determine the subset of CPUs usable by @p within @cpus_allowed.
*/
if (allowed != p->cpus_ptr) {
@@ -465,21 +480,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
cpu = -EBUSY;
goto out_enable;
}
-
- /*
- * If @prev_cpu is not in the allowed CPUs, skip topology
- * optimizations and try to pick any idle CPU usable by the
- * task.
- *
- * If %SCX_OPS_BUILTIN_IDLE_PER_NODE is enabled, prioritize
- * the current node, as it may optimize some waker->wakee
- * workloads.
- */
- if (!cpumask_test_cpu(prev_cpu, allowed)) {
- node = scx_cpu_node_if_enabled(smp_processor_id());
- cpu = scx_pick_idle_cpu(allowed, node, flags);
- goto out_enable;
- }
}
/*
@@ -525,7 +525,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
* then avoid a migration.
*/
cpu = smp_processor_id();
- if (cpus_share_cache(cpu, prev_cpu) &&
+ if (is_prev_allowed && cpus_share_cache(cpu, prev_cpu) &&
scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
@@ -562,7 +562,8 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
/*
* Keep using @prev_cpu if it's part of a fully idle core.
*/
- if (cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
+ if (is_prev_allowed &&
+ cpumask_test_cpu(prev_cpu, idle_cpumask(node)->smt) &&
scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
@@ -611,7 +612,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
/*
* Use @prev_cpu if it's idle.
*/
- if (scx_idle_test_and_clear_cpu(prev_cpu)) {
+ if (is_prev_allowed && scx_idle_test_and_clear_cpu(prev_cpu)) {
cpu = prev_cpu;
goto out_unlock;
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1af952cba48d..4203fad56b6c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -188,7 +188,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
op->saved_func(ip, parent_ip, op, fregs);
}
-static void ftrace_sync_ipi(void *data)
+void ftrace_sync_ipi(void *data)
{
/* Probably not needed, but do it anyway */
smp_rmb();
@@ -7438,9 +7438,10 @@ void ftrace_release_mod(struct module *mod)
mutex_lock(&ftrace_lock);
- if (ftrace_disabled)
- goto out_unlock;
-
+ /*
+ * To avoid the UAF problem after the module is unloaded, the
+ * 'mod_map' resource needs to be released unconditionally.
+ */
list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) {
if (mod_map->mod == mod) {
list_del_rcu(&mod_map->list);
@@ -7449,6 +7450,9 @@ void ftrace_release_mod(struct module *mod)
}
}
+ if (ftrace_disabled)
+ goto out_unlock;
+
/*
* Each module has its own ftrace_pages, remove
* them from the list.
@@ -7627,6 +7631,9 @@ allocate_ftrace_mod_map(struct module *mod,
{
struct ftrace_mod_map *mod_map;
+ if (ftrace_disabled)
+ return NULL;
+
mod_map = kmalloc(sizeof(*mod_map), GFP_KERNEL);
if (!mod_map)
return NULL;