summaryrefslogtreecommitdiff
path: root/kernel/watchdog.c
diff options
context:
space:
mode:
authorLi Huafei <lihuafei1@huawei.com>2024-10-22 03:30:03 +0800
committerIngo Molnar <mingo@kernel.org>2025-03-06 12:05:33 +0100
commitd6834d9c990333bfa433bc1816e2417f268eebbe (patch)
treeafcbc9507bc70298519083a3e0f0b6688d6a8c02 /kernel/watchdog.c
parent5e7adc81ae1b27ff565714d2933b291cf1e1271f (diff)
watchdog/hardlockup/perf: Fix perf_event memory leak
During stress-testing, we found a kmemleak report for perf_event: unreferenced object 0xff110001410a33e0 (size 1328): comm "kworker/4:11", pid 288, jiffies 4294916004 hex dump (first 32 bytes): b8 be c2 3b 02 00 11 ff 22 01 00 00 00 00 ad de ...;...."....... f0 33 0a 41 01 00 11 ff f0 33 0a 41 01 00 11 ff .3.A.....3.A.... backtrace (crc 24eb7b3a): [<00000000e211b653>] kmem_cache_alloc_node_noprof+0x269/0x2e0 [<000000009d0985fa>] perf_event_alloc+0x5f/0xcf0 [<00000000084ad4a2>] perf_event_create_kernel_counter+0x38/0x1b0 [<00000000fde96401>] hardlockup_detector_event_create+0x50/0xe0 [<0000000051183158>] watchdog_hardlockup_enable+0x17/0x70 [<00000000ac89727f>] softlockup_start_fn+0x15/0x40 ... Our stress test includes CPU online and offline cycles, and updating the watchdog configuration. After reading the code, I found that there may be a race between cleaning up perf_event after updating watchdog and disabling event when the CPU goes offline: CPU0 CPU1 CPU2 (update watchdog) (hotplug offline CPU1) ... _cpu_down(CPU1) cpus_read_lock() // waiting for cpu lock softlockup_start_all smp_call_on_cpu(CPU1) softlockup_start_fn ... watchdog_hardlockup_enable(CPU1) perf create E1 watchdog_ev[CPU1] = E1 cpus_read_unlock() cpus_write_lock() cpuhp_kick_ap_work(CPU1) cpuhp_thread_fun ... watchdog_hardlockup_disable(CPU1) watchdog_ev[CPU1] = NULL dead_event[CPU1] = E1 __lockup_detector_cleanup for each dead_events_mask release each dead_event /* * CPU1 has not been added to * dead_events_mask, then E1 * will not be released */ CPU1 -> dead_events_mask cpumask_clear(&dead_events_mask) // dead_events_mask is cleared, E1 is leaked In this case, the leaked perf_event E1 matches the perf_event leak reported by kmemleak. Due to the low probability of problem recurrence (only reported once), I added some hack delays in the code: static void __lockup_detector_reconfigure(void) { ... watchdog_hardlockup_start(); cpus_read_unlock(); + mdelay(100); /* * Must be called outside the cpus locked section to prevent * recursive locking in the perf code. ... } void watchdog_hardlockup_disable(unsigned int cpu) { ... perf_event_disable(event); this_cpu_write(watchdog_ev, NULL); this_cpu_write(dead_event, event); + mdelay(100); cpumask_set_cpu(smp_processor_id(), &dead_events_mask); atomic_dec(&watchdog_cpus); ... } void hardlockup_detector_perf_cleanup(void) { ... perf_event_release_kernel(event); per_cpu(dead_event, cpu) = NULL; } + mdelay(100); cpumask_clear(&dead_events_mask); } Then, simultaneously performing CPU on/off and switching watchdog, it is almost certain to reproduce this leak. The problem here is that releasing perf_event is not within the CPU hotplug read-write lock. Commit: 941154bd6937 ("watchdog/hardlockup/perf: Prevent CPU hotplug deadlock") introduced deferred release to solve the deadlock caused by calling get_online_cpus() when releasing perf_event. Later, commit: efe951d3de91 ("perf/x86: Fix perf,x86,cpuhp deadlock") removed the get_online_cpus() call on the perf_event release path to solve another deadlock problem. Therefore, it is now possible to move the release of perf_event back into the CPU hotplug read-write lock, and release the event immediately after disabling it. Fixes: 941154bd6937 ("watchdog/hardlockup/perf: Prevent CPU hotplug deadlock") Signed-off-by: Li Huafei <lihuafei1@huawei.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20241021193004.308303-1-lihuafei1@huawei.com
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r--kernel/watchdog.c25
1 files changed, 0 insertions, 25 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b2da7de39d06..18156023e461 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -347,8 +347,6 @@ static int __init watchdog_thresh_setup(char *str)
}
__setup("watchdog_thresh=", watchdog_thresh_setup);
-static void __lockup_detector_cleanup(void);
-
#ifdef CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM
enum stats_per_group {
STATS_SYSTEM,
@@ -886,11 +884,6 @@ static void __lockup_detector_reconfigure(void)
watchdog_hardlockup_start();
cpus_read_unlock();
- /*
- * Must be called outside the cpus locked section to prevent
- * recursive locking in the perf code.
- */
- __lockup_detector_cleanup();
}
void lockup_detector_reconfigure(void)
@@ -940,24 +933,6 @@ static inline void lockup_detector_setup(void)
}
#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
-static void __lockup_detector_cleanup(void)
-{
- lockdep_assert_held(&watchdog_mutex);
- hardlockup_detector_perf_cleanup();
-}
-
-/**
- * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
- *
- * Caller must not hold the cpu hotplug rwsem.
- */
-void lockup_detector_cleanup(void)
-{
- mutex_lock(&watchdog_mutex);
- __lockup_detector_cleanup();
- mutex_unlock(&watchdog_mutex);
-}
-
/**
* lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
*