summaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c74
1 files changed, 73 insertions, 1 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9605db70e671..4cf30e4de653 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -796,6 +796,26 @@ unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
/* All clamps are required to be less or equal than these values */
static struct uclamp_se uclamp_default[UCLAMP_CNT];
+/*
+ * This static key is used to reduce the uclamp overhead in the fast path. It
+ * primarily disables the call to uclamp_rq_{inc, dec}() in
+ * enqueue/dequeue_task().
+ *
+ * This allows users to continue to enable uclamp in their kernel config with
+ * minimum uclamp overhead in the fast path.
+ *
+ * As soon as userspace modifies any of the uclamp knobs, the static key is
+ * enabled, since we have an actual users that make use of uclamp
+ * functionality.
+ *
+ * The knobs that would enable this static key are:
+ *
+ * * A task modifying its uclamp value with sched_setattr().
+ * * An admin modifying the sysctl_sched_uclamp_{min, max} via procfs.
+ * * An admin modifying the cgroup cpu.uclamp.{min, max}
+ */
+DEFINE_STATIC_KEY_FALSE(sched_uclamp_used);
+
/* Integer rounded range for each bucket */
#define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS)
@@ -992,10 +1012,38 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
lockdep_assert_held(&rq->lock);
+ /*
+ * If sched_uclamp_used was enabled after task @p was enqueued,
+ * we could end up with unbalanced call to uclamp_rq_dec_id().
+ *
+ * In this case the uc_se->active flag should be false since no uclamp
+ * accounting was performed at enqueue time and we can just return
+ * here.
+ *
+ * Need to be careful of the following enqeueue/dequeue ordering
+ * problem too
+ *
+ * enqueue(taskA)
+ * // sched_uclamp_used gets enabled
+ * enqueue(taskB)
+ * dequeue(taskA)
+ * // Must not decrement bukcet->tasks here
+ * dequeue(taskB)
+ *
+ * where we could end up with stale data in uc_se and
+ * bucket[uc_se->bucket_id].
+ *
+ * The following check here eliminates the possibility of such race.
+ */
+ if (unlikely(!uc_se->active))
+ return;
+
bucket = &uc_rq->bucket[uc_se->bucket_id];
+
SCHED_WARN_ON(!bucket->tasks);
if (likely(bucket->tasks))
bucket->tasks--;
+
uc_se->active = false;
/*
@@ -1023,6 +1071,15 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
{
enum uclamp_id clamp_id;
+ /*
+ * Avoid any overhead until uclamp is actually used by the userspace.
+ *
+ * The condition is constructed such that a NOP is generated when
+ * sched_uclamp_used is disabled.
+ */
+ if (!static_branch_unlikely(&sched_uclamp_used))
+ return;
+
if (unlikely(!p->sched_class->uclamp_enabled))
return;
@@ -1038,6 +1095,15 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
{
enum uclamp_id clamp_id;
+ /*
+ * Avoid any overhead until uclamp is actually used by the userspace.
+ *
+ * The condition is constructed such that a NOP is generated when
+ * sched_uclamp_used is disabled.
+ */
+ if (!static_branch_unlikely(&sched_uclamp_used))
+ return;
+
if (unlikely(!p->sched_class->uclamp_enabled))
return;
@@ -1146,8 +1212,10 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
update_root_tg = true;
}
- if (update_root_tg)
+ if (update_root_tg) {
+ static_branch_enable(&sched_uclamp_used);
uclamp_update_root_tg();
+ }
/*
* We update all RUNNABLE tasks only when task groups are in use.
@@ -1212,6 +1280,8 @@ static void __setscheduler_uclamp(struct task_struct *p,
if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
return;
+ static_branch_enable(&sched_uclamp_used);
+
if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
attr->sched_util_min, true);
@@ -7436,6 +7506,8 @@ static ssize_t cpu_uclamp_write(struct kernfs_open_file *of, char *buf,
if (req.ret)
return req.ret;
+ static_branch_enable(&sched_uclamp_used);
+
mutex_lock(&uclamp_mutex);
rcu_read_lock();