summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-19 09:01:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-19 09:01:17 -0800
commit8ff6d472ab35d5cb9a3941a1fcd5b7cbc9338c7f (patch)
tree1c7b54f03689239f629e43caa87c7e372ff86147
parentfda5e3f284002ea55dac1c98c1498d6dd684046e (diff)
parent66951e4860d3c688bfa550ea4a19635b57e00eca (diff)
Merge tag 'sched_urgent_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Borislav Petkov: - Do not adjust the weight of empty group entities and avoid scheduling artifacts - Avoid scheduling lag by computing lag properly and thus address an EEVDF entity placement issue * tag 'sched_urgent_for_v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Fix update_cfs_group() vs DELAY_DEQUEUE sched/fair: Fix EEVDF entity placement bug causing scheduling lag
-rw-r--r--kernel/sched/fair.c151
1 files changed, 23 insertions, 128 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3e9ca38512de..26958431deb7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -689,21 +689,16 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq)
*
* XXX could add max_slice to the augmented data to track this.
*/
-static s64 entity_lag(u64 avruntime, struct sched_entity *se)
+static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
s64 vlag, limit;
- vlag = avruntime - se->vruntime;
- limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
-
- return clamp(vlag, -limit, limit);
-}
-
-static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
SCHED_WARN_ON(!se->on_rq);
- se->vlag = entity_lag(avg_vruntime(cfs_rq), se);
+ vlag = avg_vruntime(cfs_rq) - se->vruntime;
+ limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+
+ se->vlag = clamp(vlag, -limit, limit);
}
/*
@@ -3774,137 +3769,32 @@ static inline void
dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
#endif
-static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
- unsigned long weight)
-{
- unsigned long old_weight = se->load.weight;
- s64 vlag, vslice;
-
- /*
- * VRUNTIME
- * --------
- *
- * COROLLARY #1: The virtual runtime of the entity needs to be
- * adjusted if re-weight at !0-lag point.
- *
- * Proof: For contradiction assume this is not true, so we can
- * re-weight without changing vruntime at !0-lag point.
- *
- * Weight VRuntime Avg-VRuntime
- * before w v V
- * after w' v' V'
- *
- * Since lag needs to be preserved through re-weight:
- *
- * lag = (V - v)*w = (V'- v')*w', where v = v'
- * ==> V' = (V - v)*w/w' + v (1)
- *
- * Let W be the total weight of the entities before reweight,
- * since V' is the new weighted average of entities:
- *
- * V' = (WV + w'v - wv) / (W + w' - w) (2)
- *
- * by using (1) & (2) we obtain:
- *
- * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
- * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
- * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
- * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
- *
- * Since we are doing at !0-lag point which means V != v, we
- * can simplify (3):
- *
- * ==> W / (W + w' - w) = w / w'
- * ==> Ww' = Ww + ww' - ww
- * ==> W * (w' - w) = w * (w' - w)
- * ==> W = w (re-weight indicates w' != w)
- *
- * So the cfs_rq contains only one entity, hence vruntime of
- * the entity @v should always equal to the cfs_rq's weighted
- * average vruntime @V, which means we will always re-weight
- * at 0-lag point, thus breach assumption. Proof completed.
- *
- *
- * COROLLARY #2: Re-weight does NOT affect weighted average
- * vruntime of all the entities.
- *
- * Proof: According to corollary #1, Eq. (1) should be:
- *
- * (V - v)*w = (V' - v')*w'
- * ==> v' = V' - (V - v)*w/w' (4)
- *
- * According to the weighted average formula, we have:
- *
- * V' = (WV - wv + w'v') / (W - w + w')
- * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
- * = (WV - wv + w'V' - Vw + wv) / (W - w + w')
- * = (WV + w'V' - Vw) / (W - w + w')
- *
- * ==> V'*(W - w + w') = WV + w'V' - Vw
- * ==> V' * (W - w) = (W - w) * V (5)
- *
- * If the entity is the only one in the cfs_rq, then reweight
- * always occurs at 0-lag point, so V won't change. Or else
- * there are other entities, hence W != w, then Eq. (5) turns
- * into V' = V. So V won't change in either case, proof done.
- *
- *
- * So according to corollary #1 & #2, the effect of re-weight
- * on vruntime should be:
- *
- * v' = V' - (V - v) * w / w' (4)
- * = V - (V - v) * w / w'
- * = V - vl * w / w'
- * = V - vl'
- */
- if (avruntime != se->vruntime) {
- vlag = entity_lag(avruntime, se);
- vlag = div_s64(vlag * old_weight, weight);
- se->vruntime = avruntime - vlag;
- }
-
- /*
- * DEADLINE
- * --------
- *
- * When the weight changes, the virtual time slope changes and
- * we should adjust the relative virtual deadline accordingly.
- *
- * d' = v' + (d - v)*w/w'
- * = V' - (V - v)*w/w' + (d - v)*w/w'
- * = V - (V - v)*w/w' + (d - v)*w/w'
- * = V + (d - V)*w/w'
- */
- vslice = (s64)(se->deadline - avruntime);
- vslice = div_s64(vslice * old_weight, weight);
- se->deadline = avruntime + vslice;
-}
+static void place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags);
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{
bool curr = cfs_rq->curr == se;
- u64 avruntime;
if (se->on_rq) {
/* commit outstanding execution time */
update_curr(cfs_rq);
- avruntime = avg_vruntime(cfs_rq);
+ update_entity_lag(cfs_rq, se);
+ se->deadline -= se->vruntime;
+ se->rel_deadline = 1;
if (!curr)
__dequeue_entity(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
}
dequeue_load_avg(cfs_rq, se);
- if (se->on_rq) {
- reweight_eevdf(se, avruntime, weight);
- } else {
- /*
- * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
- * we need to scale se->vlag when w_i changes.
- */
- se->vlag = div_s64(se->vlag * se->load.weight, weight);
- }
+ /*
+ * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
+ * we need to scale se->vlag when w_i changes.
+ */
+ se->vlag = div_s64(se->vlag * se->load.weight, weight);
+ if (se->rel_deadline)
+ se->deadline = div_s64(se->deadline * se->load.weight, weight);
update_load_set(&se->load, weight);
@@ -3919,6 +3809,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
enqueue_load_avg(cfs_rq, se);
if (se->on_rq) {
update_load_add(&cfs_rq->load, se->load.weight);
+ place_entity(cfs_rq, se, 0);
if (!curr)
__enqueue_entity(cfs_rq, se);
@@ -4065,7 +3956,11 @@ static void update_cfs_group(struct sched_entity *se)
struct cfs_rq *gcfs_rq = group_cfs_rq(se);
long shares;
- if (!gcfs_rq)
+ /*
+ * When a group becomes empty, preserve its weight. This matters for
+ * DELAY_DEQUEUE.
+ */
+ if (!gcfs_rq || !gcfs_rq->load.weight)
return;
if (throttled_hierarchy(gcfs_rq))
@@ -5359,7 +5254,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
se->vruntime = vruntime - lag;
- if (sched_feat(PLACE_REL_DEADLINE) && se->rel_deadline) {
+ if (se->rel_deadline) {
se->deadline += se->vruntime;
se->rel_deadline = 0;
return;