diff options
author | Chen Yu <yu.c.chen@intel.com> | 2025-07-04 21:56:20 +0800 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2025-07-09 21:07:56 -0700 |
commit | db6cc3f4ac2e6cdc898fc9cbc8b32ae1bf56bdad (patch) | |
tree | 844e9ed8cb0d436dac0c3a6346c74bcd4512a81b | |
parent | 82241a83cd15aaaf28200a40ad1a8b480012edaf (diff) |
Revert "sched/numa: add statistics of numa balance task"
This reverts commit ad6b26b6a0a79166b53209df2ca1cf8636296382.
This commit introduces per-memcg/task NUMA balance statistics, but
unfortunately it introduced a NULL pointer exception due to the following
race condition: After a swap task candidate was chosen, its mm_struct
pointer was set to NULL due to task exit. Later, when performing the
actual task swapping, the p->mm caused the problem.
CPU0 CPU1
:
...
task_numa_migrate
task_numa_find_cpu
task_numa_compare
# a normal task p is chosen
env->best_task = p
# p exit:
exit_signals(p);
p->flags |= PF_EXITING
exit_mm
p->mm = NULL;
migrate_swap_stop
__migrate_swap_task((arg->src_task, arg->dst_cpu)
count_memcg_event_mm(p->mm, NUMA_TASK_SWAP)# p->mm is NULL
task_lock() should be held and the PF_EXITING flag needs to be checked to
prevent this from happening. After discussion, the conclusion was that
adding a lock is not worthwhile for some statistics calculations. Revert
the change and rely on the tracepoint for this purpose.
Link: https://lkml.kernel.org/r/20250704135620.685752-1-yu.c.chen@intel.com
Link: https://lkml.kernel.org/r/20250708064917.BBD13C4CEED@smtp.kernel.org
Fixes: ad6b26b6a0a7 ("sched/numa: add statistics of numa balance task")
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Reported-by: Jirka Hladky <jhladky@redhat.com>
Closes: https://lore.kernel.org/all/CAE4VaGBLJxpd=NeRJXpSCuw=REhC5LWJpC29kDy-Zh2ZDyzQZA@mail.gmail.com/
Reported-by: Srikanth Aithal <Srikanth.Aithal@amd.com>
Reported-by: Suneeth D <Suneeth.D@amd.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Hladky <jhladky@redhat.com>
Cc: Libo Chen <libo.chen@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r-- | Documentation/admin-guide/cgroup-v2.rst | 6 | ||||
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | include/linux/vm_event_item.h | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 9 | ||||
-rw-r--r-- | kernel/sched/debug.c | 4 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 2 |
7 files changed, 2 insertions, 27 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 0cc35a14afbe..bd98ea3175ec 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1732,12 +1732,6 @@ The following nested keys are defined. numa_hint_faults (npn) Number of NUMA hinting faults. - numa_task_migrated (npn) - Number of task migration by NUMA balancing. - - numa_task_swapped (npn) - Number of task swap by NUMA balancing. - pgdemote_kswapd Number of pages demoted by kswapd. diff --git a/include/linux/sched.h b/include/linux/sched.h index 4f78a64beb52..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,10 +548,6 @@ struct sched_statistics { u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; -#ifdef CONFIG_NUMA_BALANCING - u64 numa_task_migrated; - u64 numa_task_swapped; -#endif u64 nr_wakeups; u64 nr_wakeups_sync; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 91a3ce9a2687..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -66,8 +66,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NUMA_HINT_FAULTS, NUMA_HINT_FAULTS_LOCAL, NUMA_PAGE_MIGRATE, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif #ifdef CONFIG_MIGRATION PGMIGRATE_SUCCESS, PGMIGRATE_FAIL, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ec68fc686bd7..81c6df746df1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3362,10 +3362,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #ifdef CONFIG_NUMA_BALANCING static void __migrate_swap_task(struct task_struct *p, int cpu) { - __schedstat_inc(p->stats.numa_task_swapped); - count_vm_numa_event(NUMA_TASK_SWAP); - count_memcg_event_mm(p->mm, NUMA_TASK_SWAP); - if (task_on_rq_queued(p)) { struct rq *src_rq, *dst_rq; struct rq_flags srf, drf; @@ -7939,9 +7935,8 @@ int migrate_task_to(struct task_struct *p, int target_cpu) if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) return -EINVAL; - __schedstat_inc(p->stats.numa_task_migrated); - count_vm_numa_event(NUMA_TASK_MIGRATE); - count_memcg_event_mm(p->mm, NUMA_TASK_MIGRATE); + /* TODO: This is not properly updating schedstats */ + trace_sched_move_numa(p, curr_cpu, target_cpu); return stop_one_cpu(curr_cpu, migration_cpu_stop, &arg); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 9d71baf08075..557246880a7e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1210,10 +1210,6 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(nr_failed_migrations_running); P_SCHEDSTAT(nr_failed_migrations_hot); P_SCHEDSTAT(nr_forced_migrations); -#ifdef CONFIG_NUMA_BALANCING - P_SCHEDSTAT(numa_task_migrated); - P_SCHEDSTAT(numa_task_swapped); -#endif P_SCHEDSTAT(nr_wakeups); P_SCHEDSTAT(nr_wakeups_sync); P_SCHEDSTAT(nr_wakeups_migrate); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 902da8a9c643..70fdeda1120b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -474,8 +474,6 @@ static const unsigned int memcg_vm_event_stat[] = { NUMA_PAGE_MIGRATE, NUMA_PTE_UPDATES, NUMA_HINT_FAULTS, - NUMA_TASK_MIGRATE, - NUMA_TASK_SWAP, #endif }; diff --git a/mm/vmstat.c b/mm/vmstat.c index 429ae5339bfe..a78d70ddeacd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1346,8 +1346,6 @@ const char * const vmstat_text[] = { "numa_hint_faults", "numa_hint_faults_local", "numa_pages_migrated", - "numa_task_migrated", - "numa_task_swapped", #endif #ifdef CONFIG_MIGRATION "pgmigrate_success", |