summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2025-02-07 10:58:23 -1000
committerTejun Heo <tj@kernel.org>2025-02-08 20:32:54 -1000
commit2fa0fbeb69edd367b7c44f484e8dc5a5a1a311ef (patch)
tree5627ad235172c0ed449d20b700e6b21c9a54a474
parent029b6ce733712a41421955194b113f283dcb1026 (diff)
sched_ext: Implement auto local dispatching of migration disabled tasks
Migration disabled tasks are special and pinned to their previous CPUs. They tripped up some unsuspecting BPF schedulers as their ->nr_cpus_allowed may not agree with the bits set in ->cpus_ptr. Make it easier for BPF schedulers by automatically dispatching them to the pinned local DSQs by default. If a BPF scheduler wants to handle migration disabled tasks explicitly, it can set SCX_OPS_ENQ_MIGRATION_DISABLED. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Andrea Righi <arighi@nvidia.com>
-rw-r--r--kernel/sched/ext.c23
1 files changed, 23 insertions, 0 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index a6d6d6dadde5..efdbf4d85a21 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -123,6 +123,19 @@ enum scx_ops_flags {
SCX_OPS_SWITCH_PARTIAL = 1LLU << 3,
/*
+ * A migration disabled task can only execute on its current CPU. By
+ * default, such tasks are automatically put on the CPU's local DSQ with
+ * the default slice on enqueue. If this ops flag is set, they also go
+ * through ops.enqueue().
+ *
+ * A migration disabled task never invokes ops.select_cpu() as it can
+ * only select the current CPU. Also, p->cpus_ptr will only contain its
+ * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr
+ * and thus may disagree with cpumask_weight(p->cpus_ptr).
+ */
+ SCX_OPS_ENQ_MIGRATION_DISABLED = 1LLU << 4,
+
+ /*
* CPU cgroup support flags
*/
SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* cpu.weight */
@@ -130,6 +143,7 @@ enum scx_ops_flags {
SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE |
SCX_OPS_ENQ_LAST |
SCX_OPS_ENQ_EXITING |
+ SCX_OPS_ENQ_MIGRATION_DISABLED |
SCX_OPS_SWITCH_PARTIAL |
SCX_OPS_HAS_CGROUP_WEIGHT,
};
@@ -882,6 +896,7 @@ static bool scx_warned_zero_slice;
static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_last);
static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting);
+static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_migration_disabled);
static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt);
static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled);
@@ -2014,6 +2029,11 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
unlikely(p->flags & PF_EXITING))
goto local;
+ /* see %SCX_OPS_ENQ_MIGRATION_DISABLED */
+ if (!static_branch_unlikely(&scx_ops_enq_migration_disabled) &&
+ is_migration_disabled(p))
+ goto local;
+
if (!SCX_HAS_OP(enqueue))
goto global;
@@ -5052,6 +5072,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
static_branch_disable(&scx_has_op[i]);
static_branch_disable(&scx_ops_enq_last);
static_branch_disable(&scx_ops_enq_exiting);
+ static_branch_disable(&scx_ops_enq_migration_disabled);
static_branch_disable(&scx_ops_cpu_preempt);
static_branch_disable(&scx_builtin_idle_enabled);
synchronize_rcu();
@@ -5661,6 +5682,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (ops->flags & SCX_OPS_ENQ_EXITING)
static_branch_enable(&scx_ops_enq_exiting);
+ if (ops->flags & SCX_OPS_ENQ_MIGRATION_DISABLED)
+ static_branch_enable(&scx_ops_enq_migration_disabled);
if (scx_ops.cpu_acquire || scx_ops.cpu_release)
static_branch_enable(&scx_ops_cpu_preempt);