summaryrefslogtreecommitdiff
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c92
1 files changed, 83 insertions, 9 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index eb83cff7db8c..68a98ba57882 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -85,6 +85,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/numa_balancing.h>
+#include <linux/sched/sysctl.h>
#include <linux/sched/task.h>
#include <linux/nodemask.h>
#include <linux/cpuset.h>
@@ -99,6 +100,7 @@
#include <linux/swap.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
+#include <linux/memory-tiers.h>
#include <linux/migrate.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
@@ -108,7 +110,7 @@
#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>
#include <linux/printk.h>
-#include <linux/swapops.h>
+#include <linux/leafops.h>
#include <linux/gcd.h>
#include <asm/tlbflush.h>
@@ -354,6 +356,7 @@ struct mempolicy *get_task_policy(struct task_struct *p)
return &default_policy;
}
+EXPORT_SYMBOL_FOR_MODULES(get_task_policy, "kvm");
static const struct mempolicy_operations {
int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
@@ -487,6 +490,7 @@ void __mpol_put(struct mempolicy *pol)
return;
kmem_cache_free(policy_cache, pol);
}
+EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes)
{
@@ -645,7 +649,7 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk)
struct folio *folio;
struct queue_pages *qp = walk->private;
- if (unlikely(is_pmd_migration_entry(*pmd))) {
+ if (unlikely(pmd_is_migration_entry(*pmd))) {
qp->nr_failed++;
return;
}
@@ -703,7 +707,9 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
if (pte_none(ptent))
continue;
if (!pte_present(ptent)) {
- if (is_migration_entry(pte_to_swp_entry(ptent)))
+ const softleaf_t entry = softleaf_from_pte(ptent);
+
+ if (softleaf_is_migration(entry))
qp->nr_failed++;
continue;
}
@@ -766,16 +772,21 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask,
unsigned long flags = qp->flags;
struct folio *folio;
spinlock_t *ptl;
- pte_t entry;
+ pte_t ptep;
ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
- entry = huge_ptep_get(walk->mm, addr, pte);
- if (!pte_present(entry)) {
- if (unlikely(is_hugetlb_entry_migration(entry)))
- qp->nr_failed++;
+ ptep = huge_ptep_get(walk->mm, addr, pte);
+ if (!pte_present(ptep)) {
+ if (!huge_pte_none(ptep)) {
+ const softleaf_t entry = softleaf_from_pte(ptep);
+
+ if (unlikely(softleaf_is_migration(entry)))
+ qp->nr_failed++;
+ }
+
goto unlock;
}
- folio = pfn_folio(pte_pfn(entry));
+ folio = pfn_folio(pte_pfn(ptep));
if (!queue_folio_required(folio, qp))
goto unlock;
if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ||
@@ -803,6 +814,65 @@ unlock:
}
#ifdef CONFIG_NUMA_BALANCING
+/**
+ * folio_can_map_prot_numa() - check whether the folio can map prot numa
+ * @folio: The folio whose mapping considered for being made NUMA hintable
+ * @vma: The VMA that the folio belongs to.
+ * @is_private_single_threaded: Is this a single-threaded private VMA or not
+ *
+ * This function checks to see if the folio actually indicates that
+ * we need to make the mapping one which causes a NUMA hinting fault,
+ * as there are cases where it's simply unnecessary, and the folio's
+ * access time is adjusted for memory tiering if prot numa needed.
+ *
+ * Return: True if the mapping of the folio needs to be changed, false otherwise.
+ */
+bool folio_can_map_prot_numa(struct folio *folio, struct vm_area_struct *vma,
+ bool is_private_single_threaded)
+{
+ int nid;
+
+ if (!folio || folio_is_zone_device(folio) || folio_test_ksm(folio))
+ return false;
+
+ /* Also skip shared copy-on-write folios */
+ if (is_cow_mapping(vma->vm_flags) && folio_maybe_mapped_shared(folio))
+ return false;
+
+ /* Folios are pinned and can't be migrated */
+ if (folio_maybe_dma_pinned(folio))
+ return false;
+
+ /*
+ * While migration can move some dirty folios,
+ * it cannot move them all from MIGRATE_ASYNC
+ * context.
+ */
+ if (folio_is_file_lru(folio) && folio_test_dirty(folio))
+ return false;
+
+ /*
+ * Don't mess with PTEs if folio is already on the node
+ * a single-threaded process is running on.
+ */
+ nid = folio_nid(folio);
+ if (is_private_single_threaded && (nid == numa_node_id()))
+ return false;
+
+ /*
+ * Skip scanning top tier node if normal numa
+ * balancing is disabled
+ */
+ if (!(sysctl_numa_balancing_mode & NUMA_BALANCING_NORMAL) &&
+ node_is_toptier(nid))
+ return false;
+
+ if (folio_use_access_time(folio))
+ folio_xchg_access_time(folio, jiffies_to_msecs(jiffies));
+
+ return true;
+}
+
/*
* This is used to mark a range of virtual addresses to be inaccessible.
* These are later cleared by a NUMA hinting fault. Depending on these
@@ -2885,6 +2955,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
read_unlock(&sp->lock);
return pol;
}
+EXPORT_SYMBOL_FOR_MODULES(mpol_shared_policy_lookup, "kvm");
static void sp_free(struct sp_node *n)
{
@@ -3170,6 +3241,7 @@ put_mpol:
mpol_put(mpol); /* drop our incoming ref on sb mpol */
}
}
+EXPORT_SYMBOL_FOR_MODULES(mpol_shared_policy_init, "kvm");
int mpol_set_shared_policy(struct shared_policy *sp,
struct vm_area_struct *vma, struct mempolicy *pol)
@@ -3188,6 +3260,7 @@ int mpol_set_shared_policy(struct shared_policy *sp,
sp_free(new);
return err;
}
+EXPORT_SYMBOL_FOR_MODULES(mpol_set_shared_policy, "kvm");
/* Free a backing policy store on inode delete. */
void mpol_free_shared_policy(struct shared_policy *sp)
@@ -3206,6 +3279,7 @@ void mpol_free_shared_policy(struct shared_policy *sp)
}
write_unlock(&sp->lock);
}
+EXPORT_SYMBOL_FOR_MODULES(mpol_free_shared_policy, "kvm");
#ifdef CONFIG_NUMA_BALANCING
static int __initdata numabalancing_override;