From 5f295519b42f100c735a1e8e1a70060e26f30c3f Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Sun, 22 Jun 2025 20:00:06 -0400 Subject: smp: Improve locality in smp_call_function_any() smp_call_function_any() tries to make a local call as it's the cheapest option, or switches to a CPU in the same node. If it's not possible, the algorithm gives up and searches for any CPU, in a numerical order. Instead, it can search for the best CPU based on NUMA locality, including the 2nd nearest hop (a set of equidistant nodes), and higher. sched_numa_find_nth_cpu() does exactly that, and also helps to drop most of the housekeeping code. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250623000010.10124-2-yury.norov@gmail.com --- kernel/smp.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 974f3a3962e8..7c8cfab0ce55 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -741,32 +741,19 @@ EXPORT_SYMBOL_GPL(smp_call_function_single_async); * * Selection preference: * 1) current cpu if in @mask - * 2) any cpu of current node if in @mask - * 3) any other online cpu in @mask + * 2) nearest cpu in @mask, based on NUMA topology */ int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait) { unsigned int cpu; - const struct cpumask *nodemask; int ret; /* Try for same CPU (cheapest) */ cpu = get_cpu(); - if (cpumask_test_cpu(cpu, mask)) - goto call; - - /* Try for same node. */ - nodemask = cpumask_of_node(cpu_to_node(cpu)); - for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids; - cpu = cpumask_next_and(cpu, nodemask, mask)) { - if (cpu_online(cpu)) - goto call; - } + if (!cpumask_test_cpu(cpu, mask)) + cpu = sched_numa_find_nth_cpu(mask, 0, cpu_to_node(cpu)); - /* Any online will do: smp_call_function_single handles nr_cpu_ids. */ - cpu = cpumask_any_and(mask, cpu_online_mask); -call: ret = smp_call_function_single(cpu, func, info, wait); put_cpu(); return ret; -- cgit From 976e0e3103e463725e19a5493d02ce7b7b380663 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Sun, 22 Jun 2025 20:00:07 -0400 Subject: smp: Use cpumask_any_but() in smp_call_function_many_cond() smp_call_function_many_cond() opencodes cpumask_any_but(). Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250623000010.10124-3-yury.norov@gmail.com --- kernel/smp.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 7c8cfab0ce55..5871acf3cd45 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -807,13 +807,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask, run_local = true; /* Check if we need remote execution, i.e., any CPU excluding this one. */ - cpu = cpumask_first_and(mask, cpu_online_mask); - if (cpu == this_cpu) - cpu = cpumask_next_and(cpu, mask, cpu_online_mask); - if (cpu < nr_cpu_ids) + if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) { run_remote = true; - - if (run_remote) { cfd = this_cpu_ptr(&cfd_data); cpumask_and(cfd->cpumask, mask, cpu_online_mask); __cpumask_clear_cpu(this_cpu, cfd->cpumask); -- cgit From e0e9506523fea415e0d5abaa103fd67dc8a39696 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Sun, 22 Jun 2025 20:00:09 -0400 Subject: smp: Defer check for local execution in smp_call_function_many_cond() Defer check for local execution to the actual place where it is needed, which removes the extra local variable. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250623000010.10124-5-yury.norov@gmail.com --- kernel/smp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 5871acf3cd45..99d1fd0e9e0e 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -779,7 +779,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask, bool wait = scf_flags & SCF_WAIT; int nr_cpus = 0; bool run_remote = false; - bool run_local = false; lockdep_assert_preemption_disabled(); @@ -801,11 +800,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask, */ WARN_ON_ONCE(!in_task()); - /* Check if we need local execution. */ - if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask) && - (!cond_func || cond_func(this_cpu, info))) - run_local = true; - /* Check if we need remote execution, i.e., any CPU excluding this one. */ if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) { run_remote = true; @@ -851,7 +845,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask, send_call_function_ipi_mask(cfd->cpumask_ipi); } - if (run_local) { + /* Check if we need local execution. */ + if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask) && + (!cond_func || cond_func(this_cpu, info))) { unsigned long flags; local_irq_save(flags); -- cgit From 946a7281982530d333eaee62bd1726f25908b3a9 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 2 Jul 2025 13:52:54 -0400 Subject: smp: Wait only if work was enqueued Whenever work is enqueued for a remote CPU, smp_call_function_many_cond() may need to wait for that work to be completed. However, if no work is enqueued for a remote CPU, because the condition func() evaluated to false for all CPUs, there is no need to wait. Set run_remote only if work was enqueued on remote CPUs. Document the difference between "work enqueued", and "CPU needs to be woken up" Suggested-by: Jann Horn Signed-off-by: Rik van Riel Signed-off-by: Thomas Gleixner Reviewed-by: Yury Norov (NVIDIA) Link: https://lore.kernel.org/all/20250703203019.11331ac3@fangorn --- kernel/smp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 99d1fd0e9e0e..c5e1da7a88da 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -802,7 +802,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask, /* Check if we need remote execution, i.e., any CPU excluding this one. */ if (cpumask_any_and_but(mask, cpu_online_mask, this_cpu) < nr_cpu_ids) { - run_remote = true; cfd = this_cpu_ptr(&cfd_data); cpumask_and(cfd->cpumask, mask, cpu_online_mask); __cpumask_clear_cpu(this_cpu, cfd->cpumask); @@ -816,6 +815,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask, continue; } + /* Work is enqueued on a remote CPU. */ + run_remote = true; + csd_lock(csd); if (wait) csd->node.u_flags |= CSD_TYPE_SYNC; @@ -827,6 +829,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask, #endif trace_csd_queue_cpu(cpu, _RET_IP_, func, csd); + /* + * Kick the remote CPU if this is the first work + * item enqueued. + */ if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) { __cpumask_set_cpu(cpu, cfd->cpumask_ipi); nr_cpus++; -- cgit