summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/exec.c2
-rw-r--r--include/linux/mm_types.h72
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/sched/sched.h48
5 files changed, 112 insertions, 34 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 6c53920795c2..aaa605529a75 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -990,7 +990,7 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
- mm_init_cid(mm);
+ mm_init_cid(mm, tsk);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8e38bc..381d22eba088 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -782,6 +782,7 @@ struct vm_area_struct {
struct mm_cid {
u64 time;
int cid;
+ int recent_cid;
};
#endif
@@ -852,6 +853,27 @@ struct mm_struct {
* When the next mm_cid scan is due (in jiffies).
*/
unsigned long mm_cid_next_scan;
+ /**
+ * @nr_cpus_allowed: Number of CPUs allowed for mm.
+ *
+ * Number of CPUs allowed in the union of all mm's
+ * threads allowed CPUs.
+ */
+ unsigned int nr_cpus_allowed;
+ /**
+ * @max_nr_cid: Maximum number of concurrency IDs allocated.
+ *
+ * Track the highest number of concurrency IDs allocated for the
+ * mm.
+ */
+ atomic_t max_nr_cid;
+ /**
+ * @cpus_allowed_lock: Lock protecting mm cpus_allowed.
+ *
+ * Provide mutual exclusion for mm cpus_allowed and
+ * mm nr_cpus_allowed updates.
+ */
+ raw_spinlock_t cpus_allowed_lock;
#endif
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* size of all page tables */
@@ -1170,18 +1192,30 @@ static inline int mm_cid_clear_lazy_put(int cid)
return cid & ~MM_CID_LAZY_PUT;
}
+/*
+ * mm_cpus_allowed: Union of all mm's threads allowed CPUs.
+ */
+static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm)
+{
+ unsigned long bitmap = (unsigned long)mm;
+
+ bitmap += offsetof(struct mm_struct, cpu_bitmap);
+ /* Skip cpu_bitmap */
+ bitmap += cpumask_size();
+ return (struct cpumask *)bitmap;
+}
+
/* Accessor for struct mm_struct's cidmask. */
static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
{
- unsigned long cid_bitmap = (unsigned long)mm;
+ unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm);
- cid_bitmap += offsetof(struct mm_struct, cpu_bitmap);
- /* Skip cpu_bitmap */
+ /* Skip mm_cpus_allowed */
cid_bitmap += cpumask_size();
return (struct cpumask *)cid_bitmap;
}
-static inline void mm_init_cid(struct mm_struct *mm)
+static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
{
int i;
@@ -1189,17 +1223,22 @@ static inline void mm_init_cid(struct mm_struct *mm)
struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);
pcpu_cid->cid = MM_CID_UNSET;
+ pcpu_cid->recent_cid = MM_CID_UNSET;
pcpu_cid->time = 0;
}
+ mm->nr_cpus_allowed = p->nr_cpus_allowed;
+ atomic_set(&mm->max_nr_cid, 0);
+ raw_spin_lock_init(&mm->cpus_allowed_lock);
+ cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
cpumask_clear(mm_cidmask(mm));
}
-static inline int mm_alloc_cid_noprof(struct mm_struct *mm)
+static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p)
{
mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid);
if (!mm->pcpu_cid)
return -ENOMEM;
- mm_init_cid(mm);
+ mm_init_cid(mm, p);
return 0;
}
#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
@@ -1212,16 +1251,31 @@ static inline void mm_destroy_cid(struct mm_struct *mm)
static inline unsigned int mm_cid_size(void)
{
- return cpumask_size();
+ return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */
+}
+
+static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask)
+{
+ struct cpumask *mm_allowed = mm_cpus_allowed(mm);
+
+ if (!mm)
+ return;
+ /* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */
+ raw_spin_lock(&mm->cpus_allowed_lock);
+ cpumask_or(mm_allowed, mm_allowed, cpumask);
+ WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed));
+ raw_spin_unlock(&mm->cpus_allowed_lock);
}
#else /* CONFIG_SCHED_MM_CID */
-static inline void mm_init_cid(struct mm_struct *mm) { }
-static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { }
+static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; }
static inline void mm_destroy_cid(struct mm_struct *mm) { }
+
static inline unsigned int mm_cid_size(void)
{
return 0;
}
+static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { }
#endif /* CONFIG_SCHED_MM_CID */
struct mmu_gather;
diff --git a/kernel/fork.c b/kernel/fork.c
index 89ceb4a68af2..7d950e93f080 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1298,7 +1298,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
if (init_new_context(p, mm))
goto fail_nocontext;
- if (mm_alloc_cid(mm))
+ if (mm_alloc_cid(mm, p))
goto fail_cid;
if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7db711ba6d12..f5ec452e2c5e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2696,6 +2696,7 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
put_prev_task(rq, p);
p->sched_class->set_cpus_allowed(p, ctx);
+ mm_set_cpus_allowed(p->mm, ctx->new_mask);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
@@ -10243,6 +10244,7 @@ int __sched_mm_cid_migrate_from_try_steal_cid(struct rq *src_rq,
*/
if (!try_cmpxchg(&src_pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
return -1;
+ WRITE_ONCE(src_pcpu_cid->recent_cid, MM_CID_UNSET);
return src_cid;
}
@@ -10255,7 +10257,8 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
{
struct mm_cid *src_pcpu_cid, *dst_pcpu_cid;
struct mm_struct *mm = t->mm;
- int src_cid, dst_cid, src_cpu;
+ int src_cid, src_cpu;
+ bool dst_cid_is_set;
struct rq *src_rq;
lockdep_assert_rq_held(dst_rq);
@@ -10272,9 +10275,9 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
* allocation closest to 0 in cases where few threads migrate around
* many CPUs.
*
- * If destination cid is already set, we may have to just clear
- * the src cid to ensure compactness in frequent migrations
- * scenarios.
+ * If destination cid or recent cid is already set, we may have
+ * to just clear the src cid to ensure compactness in frequent
+ * migrations scenarios.
*
* It is not useful to clear the src cid when the number of threads is
* greater or equal to the number of allowed CPUs, because user-space
@@ -10282,9 +10285,9 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
* allowed CPUs.
*/
dst_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(dst_rq));
- dst_cid = READ_ONCE(dst_pcpu_cid->cid);
- if (!mm_cid_is_unset(dst_cid) &&
- atomic_read(&mm->mm_users) >= t->nr_cpus_allowed)
+ dst_cid_is_set = !mm_cid_is_unset(READ_ONCE(dst_pcpu_cid->cid)) ||
+ !mm_cid_is_unset(READ_ONCE(dst_pcpu_cid->recent_cid));
+ if (dst_cid_is_set && atomic_read(&mm->mm_users) >= READ_ONCE(mm->nr_cpus_allowed))
return;
src_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, src_cpu);
src_rq = cpu_rq(src_cpu);
@@ -10295,13 +10298,14 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
src_cid);
if (src_cid == -1)
return;
- if (!mm_cid_is_unset(dst_cid)) {
+ if (dst_cid_is_set) {
__mm_cid_put(mm, src_cid);
return;
}
/* Move src_cid to dst cpu. */
mm_cid_snapshot_time(dst_rq, mm);
WRITE_ONCE(dst_pcpu_cid->cid, src_cid);
+ WRITE_ONCE(dst_pcpu_cid->recent_cid, src_cid);
}
static void sched_mm_cid_remote_clear(struct mm_struct *mm, struct mm_cid *pcpu_cid,
@@ -10540,7 +10544,7 @@ void sched_mm_cid_after_execve(struct task_struct *t)
* Matches barrier in sched_mm_cid_remote_clear_old().
*/
smp_mb();
- t->last_mm_cid = t->mm_cid = mm_cid_get(rq, mm);
+ t->last_mm_cid = t->mm_cid = mm_cid_get(rq, t, mm);
}
rseq_set_notify_resume(t);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fba524c81c63..20b6e75604ec 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3596,24 +3596,41 @@ static inline void mm_cid_put(struct mm_struct *mm)
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
}
-static inline int __mm_cid_try_get(struct mm_struct *mm)
+static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm)
{
- struct cpumask *cpumask;
- int cid;
+ struct cpumask *cidmask = mm_cidmask(mm);
+ struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
+ int cid = __this_cpu_read(pcpu_cid->recent_cid);
- cpumask = mm_cidmask(mm);
+ /* Try to re-use recent cid. This improves cache locality. */
+ if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask))
+ return cid;
+ /*
+ * Expand cid allocation if the maximum number of concurrency
+ * IDs allocated (max_nr_cid) is below the number cpus allowed
+ * and number of threads. Expanding cid allocation as much as
+ * possible improves cache locality.
+ */
+ cid = atomic_read(&mm->max_nr_cid);
+ while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) {
+ if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1))
+ continue;
+ if (!cpumask_test_and_set_cpu(cid, cidmask))
+ return cid;
+ }
/*
+ * Find the first available concurrency id.
* Retry finding first zero bit if the mask is temporarily
* filled. This only happens during concurrent remote-clear
* which owns a cid without holding a rq lock.
*/
for (;;) {
- cid = cpumask_first_zero(cpumask);
- if (cid < nr_cpu_ids)
+ cid = cpumask_first_zero(cidmask);
+ if (cid < READ_ONCE(mm->nr_cpus_allowed))
break;
cpu_relax();
}
- if (cpumask_test_and_set_cpu(cid, cpumask))
+ if (cpumask_test_and_set_cpu(cid, cidmask))
return -1;
return cid;
@@ -3631,7 +3648,8 @@ static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
WRITE_ONCE(pcpu_cid->time, rq->clock);
}
-static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
+static inline int __mm_cid_get(struct rq *rq, struct task_struct *t,
+ struct mm_struct *mm)
{
int cid;
@@ -3641,13 +3659,13 @@ static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
* guarantee forward progress.
*/
if (!READ_ONCE(use_cid_lock)) {
- cid = __mm_cid_try_get(mm);
+ cid = __mm_cid_try_get(t, mm);
if (cid >= 0)
goto end;
raw_spin_lock(&cid_lock);
} else {
raw_spin_lock(&cid_lock);
- cid = __mm_cid_try_get(mm);
+ cid = __mm_cid_try_get(t, mm);
if (cid >= 0)
goto unlock;
}
@@ -3667,7 +3685,7 @@ static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
* all newcoming allocations observe the use_cid_lock flag set.
*/
do {
- cid = __mm_cid_try_get(mm);
+ cid = __mm_cid_try_get(t, mm);
cpu_relax();
} while (cid < 0);
/*
@@ -3684,7 +3702,8 @@ end:
return cid;
}
-static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
+static inline int mm_cid_get(struct rq *rq, struct task_struct *t,
+ struct mm_struct *mm)
{
struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
struct cpumask *cpumask;
@@ -3701,8 +3720,9 @@ static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
__mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
}
- cid = __mm_cid_get(rq, mm);
+ cid = __mm_cid_get(rq, t, mm);
__this_cpu_write(pcpu_cid->cid, cid);
+ __this_cpu_write(pcpu_cid->recent_cid, cid);
return cid;
}
@@ -3755,7 +3775,7 @@ static inline void switch_mm_cid(struct rq *rq,
prev->mm_cid = -1;
}
if (next->mm_cid_active)
- next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
+ next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next, next->mm);
}
#else /* !CONFIG_SCHED_MM_CID: */