summaryrefslogtreecommitdiff
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c42
1 files changed, 28 insertions, 14 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0fb9bf995a47..b4326827e326 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3329,6 +3329,15 @@ static void task_numa_work(struct callback_head *work)
if (p->flags & PF_EXITING)
return;
+ /*
+ * Memory is pinned to only one NUMA node via cpuset.mems, naturally
+ * no page can be migrated.
+ */
+ if (cpusets_enabled() && nodes_weight(cpuset_current_mems_allowed) == 1) {
+ trace_sched_skip_cpuset_numa(current, &cpuset_current_mems_allowed);
+ return;
+ }
+
if (!mm->numa_next_scan) {
mm->numa_next_scan = now +
msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
@@ -3795,6 +3804,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
update_entity_lag(cfs_rq, se);
se->deadline -= se->vruntime;
se->rel_deadline = 1;
+ cfs_rq->nr_queued--;
if (!curr)
__dequeue_entity(cfs_rq, se);
update_load_sub(&cfs_rq->load, se->load.weight);
@@ -3821,10 +3831,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
enqueue_load_avg(cfs_rq, se);
if (se->on_rq) {
- update_load_add(&cfs_rq->load, se->load.weight);
place_entity(cfs_rq, se, 0);
+ update_load_add(&cfs_rq->load, se->load.weight);
if (!curr)
__enqueue_entity(cfs_rq, se);
+ cfs_rq->nr_queued++;
/*
* The entity's vruntime has been adjusted, so let's check
@@ -4933,13 +4944,6 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
goto done;
/*
- * To avoid overestimation of actual task utilization, skip updates if
- * we cannot grant there is idle time in this CPU.
- */
- if (dequeued > arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq))))
- return;
-
- /*
* To avoid underestimate of task utilization, skip updates of EWMA if
* we cannot grant that thread got all CPU time it wanted.
*/
@@ -6941,7 +6945,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
* Let's add the task's estimated utilization to the cfs_rq's
* estimated utilization, before we update schedutil.
*/
- if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
+ if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
util_est_enqueue(&rq->cfs, p);
if (flags & ENQUEUE_DELAYED) {
@@ -7181,7 +7185,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
*/
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
- if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
+ if (!p->se.sched_delayed)
util_est_dequeue(&rq->cfs, p);
util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
@@ -7196,6 +7200,11 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
return true;
}
+static inline unsigned int cfs_h_nr_delayed(struct rq *rq)
+{
+ return (rq->cfs.h_nr_queued - rq->cfs.h_nr_runnable);
+}
+
#ifdef CONFIG_SMP
/* Working cpumask for: sched_balance_rq(), sched_balance_newidle(). */
@@ -7357,8 +7366,12 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
- if (sync && cpu_rq(this_cpu)->nr_running == 1)
- return this_cpu;
+ if (sync) {
+ struct rq *rq = cpu_rq(this_cpu);
+
+ if ((rq->nr_running - cfs_h_nr_delayed(rq)) == 1)
+ return this_cpu;
+ }
if (available_idle_cpu(prev_cpu))
return prev_cpu;
@@ -10256,7 +10269,7 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group
(sgs->group_weight - sgs->idle_cpus != 1))
return false;
- return sched_asym(env->sd, env->dst_cpu, group->asym_prefer_cpu);
+ return sched_asym(env->sd, env->dst_cpu, READ_ONCE(group->asym_prefer_cpu));
}
/* One group has more than one SMT CPU while the other group does not */
@@ -10493,7 +10506,8 @@ static bool update_sd_pick_busiest(struct lb_env *env,
case group_asym_packing:
/* Prefer to move from lowest priority CPU's work */
- return sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu);
+ return sched_asym_prefer(READ_ONCE(sds->busiest->asym_prefer_cpu),
+ READ_ONCE(sg->asym_prefer_cpu));
case group_misfit_task:
/*