diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-10-04 11:28:45 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-10-04 11:28:45 -0700 |
| commit | 67da125e30ab17b5b8874eb32882e81cdec17ec8 (patch) | |
| tree | b17cff8995c3156b74a3ab7243e31b6d23219e7b /kernel | |
| parent | 48e3694ae7fae347c1193c84f384f4ea41086075 (diff) | |
| parent | 1d289fc5691c7a970a285bc53292bac9e37c89a6 (diff) | |
Merge tag 'rcu.2025.09.26a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux
Pull RCU updates from Paul McKenney:
"Documentation updates:
- Update whatisRCU.rst and checklist.rst for recent RCU API additions
- Fix RCU documentation formatting and typos
- Replace dead Ottawa Linux Symposium links in RTFP.txt
Miscellaneous RCU updates:
- Document that rcu_barrier() hurries RCU_LAZY callbacks
- Remove redundant interrupt disabling from
rcu_preempt_deferred_qs_handler()
- Move list_for_each_rcu from list.h to rculist.h, and adjust the
include directive in kernel/cgroup/dmem.c accordingly
- Make initial set of changes to accommodate upcoming
system_percpu_wq changes
SRCU updates:
- Create an srcu_read_lock_fast_notrace() for eventual use in
tracing, including adding guards
- Document the reliance on per-CPU operations as implicit RCU readers
in __srcu_read_{,un}lock_fast()
- Document the srcu_flip() function's memory-barrier D's relationship
to SRCU-fast readers
- Remove a redundant preempt_disable() and preempt_enable() pair from
srcu_gp_start_if_needed()
Torture-test updates:
- Fix jitter.sh spin time so that it actually varies as advertised.
It is still quite coarse-grained, but at least it does now vary
- Update torture.sh help text to include the not-so-new --do-normal
parameter, which permits (for example) testing KCSAN kernels
without doing non-debug kernels
- Fix a number of false-positive diagnostics that were being
triggered by rcutorture starting before boot completed. Running
multiple near-CPU-bound rcutorture processes when there is only the
boot CPU is after all a bit excessive
- Substitute kcalloc() for kzalloc()
- Remove a redundant kfree() and NULL out kfree()ed objects"
* tag 'rcu.2025.09.26a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux: (31 commits)
rcu: WQ_UNBOUND added to sync_wq workqueue
rcu: WQ_PERCPU added to alloc_workqueue users
rcu: replace use of system_wq with system_percpu_wq
refperf: Set reader_tasks to NULL after kfree()
refperf: Remove redundant kfree() after torture_stop_kthread()
srcu/tiny: Remove preempt_disable/enable() in srcu_gp_start_if_needed()
srcu: Document srcu_flip() memory-barrier D relation to SRCU-fast
srcu: Document __srcu_read_{,un}lock_fast() implicit RCU readers
rculist: move list_for_each_rcu() to where it belongs
refscale: Use kcalloc() instead of kzalloc()
rcutorture: Use kcalloc() instead of kzalloc()
docs: rcu: Replace multiple dead OLS links in RTFP.txt
doc: Fix typo in RCU's torture.rst documentation
Documentation: RCU: Retitle toctree index
Documentation: RCU: Reduce toctree depth
Documentation: RCU: Wrap kvm-remote.sh rerun snippet in literal code block
rcu: docs: Requirements.rst: Abide by conventions of kernel documentation
doc: Add RCU guards to checklist.rst
doc: Update whatisRCU.rst for recent RCU API additions
rcutorture: Delay forward-progress testing until boot completes
...
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup/dmem.c | 1 | ||||
| -rw-r--r-- | kernel/rcu/rcutorture.c | 27 | ||||
| -rw-r--r-- | kernel/rcu/refscale.c | 4 | ||||
| -rw-r--r-- | kernel/rcu/srcutiny.c | 4 | ||||
| -rw-r--r-- | kernel/rcu/srcutree.c | 10 | ||||
| -rw-r--r-- | kernel/rcu/tasks.h | 4 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 9 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 5 | ||||
| -rw-r--r-- | kernel/torture.c | 7 |
9 files changed, 49 insertions, 22 deletions
diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index 10b63433f057..e12b946278b6 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -14,6 +14,7 @@ #include <linux/mutex.h> #include <linux/page_counter.h> #include <linux/parser.h> +#include <linux/rculist.h> #include <linux/slab.h> struct dmem_cgroup_region { diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 7a893d51d02b..29fe3c01312f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1528,7 +1528,7 @@ static void do_rtws_sync(struct torture_random_state *trsp, void (*sync)(void)) static int rcu_torture_writer(void *arg) { - bool boot_ended; + bool booting_still = false; bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal(); unsigned long cookie; struct rcu_gp_oldstate cookie_full; @@ -1539,6 +1539,7 @@ rcu_torture_writer(void *arg) struct rcu_gp_oldstate gp_snap1_full; int i; int idx; + unsigned long j; int oldnice = task_nice(current); struct rcu_gp_oldstate *rgo = NULL; int rgo_size = 0; @@ -1571,16 +1572,26 @@ rcu_torture_writer(void *arg) return 0; } if (cur_ops->poll_active > 0) { - ulo = kzalloc(cur_ops->poll_active * sizeof(ulo[0]), GFP_KERNEL); + ulo = kcalloc(cur_ops->poll_active, sizeof(*ulo), GFP_KERNEL); if (!WARN_ON(!ulo)) ulo_size = cur_ops->poll_active; } if (cur_ops->poll_active_full > 0) { - rgo = kzalloc(cur_ops->poll_active_full * sizeof(rgo[0]), GFP_KERNEL); + rgo = kcalloc(cur_ops->poll_active_full, sizeof(*rgo), GFP_KERNEL); if (!WARN_ON(!rgo)) rgo_size = cur_ops->poll_active_full; } + // If the system is still booting, let it finish. + j = jiffies; + while (!torture_must_stop() && !rcu_inkernel_boot_has_ended()) { + booting_still = true; + schedule_timeout_interruptible(HZ); + } + if (booting_still) + pr_alert("%s" TORTURE_FLAG " Waited %lu jiffies for boot to complete.\n", + torture_type, jiffies - j); + do { rcu_torture_writer_state = RTWS_FIXED_DELAY; torture_hrtimeout_us(500, 1000, &rand); @@ -1769,13 +1780,11 @@ rcu_torture_writer(void *arg) !rcu_gp_is_normal(); } rcu_torture_writer_state = RTWS_STUTTER; - boot_ended = rcu_inkernel_boot_has_ended(); stutter_waited = stutter_wait("rcu_torture_writer"); if (stutter_waited && !atomic_read(&rcu_fwd_cb_nodelay) && !cur_ops->slow_gps && !torture_must_stop() && - boot_ended && time_after(jiffies, stallsdone)) for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) if (list_empty(&rcu_tortures[i].rtort_free) && @@ -2437,7 +2446,8 @@ rcu_torture_reader(void *arg) torture_hrtimeout_us(500, 1000, &rand); lastsleep = jiffies + 10; } - while (torture_num_online_cpus() < mynumonline && !torture_must_stop()) + while (!torture_must_stop() && + (torture_num_online_cpus() < mynumonline || !rcu_inkernel_boot_has_ended())) schedule_timeout_interruptible(HZ / 5); stutter_wait("rcu_torture_reader"); } while (!torture_must_stop()); @@ -2756,7 +2766,8 @@ rcu_torture_stats_print(void) cur_ops->stats(); if (rtcv_snap == rcu_torture_current_version && rcu_access_pointer(rcu_torture_current) && - !rcu_stall_is_suppressed()) { + !rcu_stall_is_suppressed() && + rcu_inkernel_boot_has_ended()) { int __maybe_unused flags = 0; unsigned long __maybe_unused gp_seq = 0; @@ -3446,6 +3457,8 @@ static int rcu_torture_fwd_prog(void *args) int tested_tries = 0; VERBOSE_TOROUT_STRING("rcu_torture_fwd_progress task started"); + while (!rcu_inkernel_boot_has_ended()) + schedule_timeout_interruptible(HZ / 10); rcu_bind_current_to_nocb(); if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST)) set_user_nice(current, MAX_NICE); diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index df646e0694a8..19841704d8f5 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -1021,7 +1021,7 @@ static int main_func(void *arg) set_user_nice(current, MAX_NICE); VERBOSE_SCALEOUT("main_func task started"); - result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL); + result_avg = kcalloc(nruns, sizeof(*result_avg), GFP_KERNEL); buf = kzalloc(800 + 64, GFP_KERNEL); if (!result_avg || !buf) { SCALEOUT_ERRSTRING("out of memory"); @@ -1133,9 +1133,9 @@ ref_scale_cleanup(void) reader_tasks[i].task); } kfree(reader_tasks); + reader_tasks = NULL; torture_stop_kthread("main_task", main_task); - kfree(main_task); // Do scale-type-specific cleanup operations. if (cur_ops->cleanup != NULL) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 6e9fe2ce1075..e3b64a5e0ec7 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -176,10 +176,9 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp) { unsigned long cookie; - preempt_disable(); // Needed for PREEMPT_LAZY + lockdep_assert_preemption_disabled(); // Needed for PREEMPT_LAZY cookie = get_state_synchronize_srcu(ssp); if (ULONG_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) { - preempt_enable(); return; } WRITE_ONCE(ssp->srcu_idx_max, cookie); @@ -189,7 +188,6 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp) else if (list_empty(&ssp->srcu_work.entry)) list_add(&ssp->srcu_work.entry, &srcu_boot_list); } - preempt_enable(); } /* diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c5e8ebc493d5..1ff94b76d91f 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1168,6 +1168,16 @@ static void srcu_flip(struct srcu_struct *ssp) * counter update. Note that both this memory barrier and the * one in srcu_readers_active_idx_check() provide the guarantee * for __srcu_read_lock(). + * + * Note that this is a performance optimization, in which we spend + * an otherwise unnecessary smp_mb() in order to reduce the number + * of full per-CPU-variable scans in srcu_readers_lock_idx() and + * srcu_readers_unlock_idx(). But this performance optimization + * is not so optimal for SRCU-fast, where we would be spending + * not smp_mb(), but rather synchronize_rcu(). At the same time, + * the overhead of the smp_mb() is in the noise, so there is no + * point in omitting it in the SRCU-fast case. So the same code + * is executed either way. */ smp_mb(); /* D */ /* Pairs with C. */ } diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index f92443561d36..2dc044fd126e 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -553,13 +553,13 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu rtpcp_next = rtp->rtpcp_array[index]; if (rtpcp_next->cpu < smp_load_acquire(&rtp->percpu_dequeue_lim)) { cpuwq = rcu_cpu_beenfullyonline(rtpcp_next->cpu) ? rtpcp_next->cpu : WORK_CPU_UNBOUND; - queue_work_on(cpuwq, system_wq, &rtpcp_next->rtp_work); + queue_work_on(cpuwq, system_percpu_wq, &rtpcp_next->rtp_work); index++; if (index < num_possible_cpus()) { rtpcp_next = rtp->rtpcp_array[index]; if (rtpcp_next->cpu < smp_load_acquire(&rtp->percpu_dequeue_lim)) { cpuwq = rcu_cpu_beenfullyonline(rtpcp_next->cpu) ? rtpcp_next->cpu : WORK_CPU_UNBOUND; - queue_work_on(cpuwq, system_wq, &rtpcp_next->rtp_work); + queue_work_on(cpuwq, system_percpu_wq, &rtpcp_next->rtp_work); } } } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8eff357b0436..31690ffa452a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3800,6 +3800,11 @@ static void rcu_barrier_handler(void *cpu_in) * to complete. For example, if there are no RCU callbacks queued anywhere * in the system, then rcu_barrier() is within its rights to return * immediately, without waiting for anything, much less an RCU grace period. + * In fact, rcu_barrier() will normally not result in any RCU grace periods + * beyond those that were already destined to be executed. + * + * In kernels built with CONFIG_RCU_LAZY=y, this function also hurries all + * pending lazy RCU callbacks. */ void rcu_barrier(void) { @@ -4885,10 +4890,10 @@ void __init rcu_init(void) rcutree_online_cpu(cpu); /* Create workqueue for Tree SRCU and for expedited GPs. */ - rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); + rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM | WQ_PERCPU, 0); WARN_ON(!rcu_gp_wq); - sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM, 0); + sync_wq = alloc_workqueue("sync_wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); WARN_ON(!sync_wq); /* Respect if explicitly disabled via a boot parameter. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 4cd170b2d655..d85763336b3c 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -626,11 +626,10 @@ notrace void rcu_preempt_deferred_qs(struct task_struct *t) */ static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp) { - unsigned long flags; struct rcu_data *rdp; + lockdep_assert_irqs_disabled(); rdp = container_of(iwp, struct rcu_data, defer_qs_iw); - local_irq_save(flags); /* * If the IRQ work handler happens to run in the middle of RCU read-side @@ -647,8 +646,6 @@ static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp) */ if (rcu_preempt_depth() > 0) WRITE_ONCE(rdp->defer_qs_iw_pending, DEFER_QS_IDLE); - - local_irq_restore(flags); } /* diff --git a/kernel/torture.c b/kernel/torture.c index 3a0a8cc60401..1ea9f67953a7 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -359,6 +359,8 @@ torture_onoff(void *arg) torture_hrtimeout_jiffies(onoff_holdoff, &rand); VERBOSE_TOROUT_STRING("torture_onoff end holdoff"); } + while (!rcu_inkernel_boot_has_ended()) + schedule_timeout_interruptible(HZ / 10); while (!torture_must_stop()) { if (disable_onoff_at_boot && !rcu_inkernel_boot_has_ended()) { torture_hrtimeout_jiffies(HZ / 10, &rand); @@ -797,8 +799,9 @@ static unsigned long torture_init_jiffies; static void torture_print_module_parms(void) { - pr_alert("torture module --- %s: disable_onoff_at_boot=%d ftrace_dump_at_shutdown=%d verbose_sleep_frequency=%d verbose_sleep_duration=%d random_shuffle=%d\n", - torture_type, disable_onoff_at_boot, ftrace_dump_at_shutdown, verbose_sleep_frequency, verbose_sleep_duration, random_shuffle); + pr_alert("torture module --- %s: disable_onoff_at_boot=%d ftrace_dump_at_shutdown=%d verbose_sleep_frequency=%d verbose_sleep_duration=%d random_shuffle=%d%s\n", + torture_type, disable_onoff_at_boot, ftrace_dump_at_shutdown, verbose_sleep_frequency, verbose_sleep_duration, random_shuffle, + rcu_inkernel_boot_has_ended() ? "" : " still booting"); } /* |
