diff options
Diffstat (limited to 'mm/hugetlb.c')
| -rw-r--r-- | mm/hugetlb.c | 596 |
1 files changed, 11 insertions, 585 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index ac5ce2b2b87d..26b2a319b002 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -19,7 +19,6 @@ #include <linux/mutex.h> #include <linux/memblock.h> #include <linux/minmax.h> -#include <linux/sysfs.h> #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/mmdebug.h> @@ -46,13 +45,12 @@ #include <asm/setup.h> #include <linux/io.h> -#include <linux/hugetlb.h> -#include <linux/hugetlb_cgroup.h> #include <linux/node.h> #include <linux/page_owner.h> #include "internal.h" #include "hugetlb_vmemmap.h" #include "hugetlb_cma.h" +#include "hugetlb_internal.h" #include <linux/page-isolation.h> int hugetlb_max_hstate __read_mostly; @@ -134,17 +132,6 @@ static void hugetlb_free_folio(struct folio *folio) folio_put(folio); } -/* - * Check if the hstate represents gigantic pages but gigantic page - * runtime support is not available. This is a common condition used to - * skip operations that cannot be performed on gigantic pages when runtime - * support is disabled. - */ -static inline bool hstate_is_gigantic_no_runtime(struct hstate *h) -{ - return hstate_is_gigantic(h) && !gigantic_page_runtime_supported(); -} - static inline bool subpool_is_free(struct hugepage_subpool *spool) { if (spool->count) @@ -1431,77 +1418,6 @@ err: return NULL; } -/* - * common helper functions for hstate_next_node_to_{alloc|free}. - * We may have allocated or freed a huge page based on a different - * nodes_allowed previously, so h->next_node_to_{alloc|free} might - * be outside of *nodes_allowed. Ensure that we use an allowed - * node for alloc or free. - */ -static int next_node_allowed(int nid, nodemask_t *nodes_allowed) -{ - nid = next_node_in(nid, *nodes_allowed); - VM_BUG_ON(nid >= MAX_NUMNODES); - - return nid; -} - -static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed) -{ - if (!node_isset(nid, *nodes_allowed)) - nid = next_node_allowed(nid, nodes_allowed); - return nid; -} - -/* - * returns the previously saved node ["this node"] from which to - * allocate a persistent huge page for the pool and advance the - * next node from which to allocate, handling wrap at end of node - * mask. - */ -static int hstate_next_node_to_alloc(int *next_node, - nodemask_t *nodes_allowed) -{ - int nid; - - VM_BUG_ON(!nodes_allowed); - - nid = get_valid_node_allowed(*next_node, nodes_allowed); - *next_node = next_node_allowed(nid, nodes_allowed); - - return nid; -} - -/* - * helper for remove_pool_hugetlb_folio() - return the previously saved - * node ["this node"] from which to free a huge page. Advance the - * next node id whether or not we find a free huge page to free so - * that the next attempt to free addresses the next node. - */ -static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) -{ - int nid; - - VM_BUG_ON(!nodes_allowed); - - nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed); - h->next_nid_to_free = next_node_allowed(nid, nodes_allowed); - - return nid; -} - -#define for_each_node_mask_to_alloc(next_node, nr_nodes, node, mask) \ - for (nr_nodes = nodes_weight(*mask); \ - nr_nodes > 0 && \ - ((node = hstate_next_node_to_alloc(next_node, mask)) || 1); \ - nr_nodes--) - -#define for_each_node_mask_to_free(hs, nr_nodes, node, mask) \ - for (nr_nodes = nodes_weight(*mask); \ - nr_nodes > 0 && \ - ((node = hstate_next_node_to_free(hs, mask)) || 1); \ - nr_nodes--) - #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE #ifdef CONFIG_CONTIG_ALLOC static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, @@ -1557,8 +1473,8 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid, * * Must be called with hugetlb lock held. */ -static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus) +void remove_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus) { int nid = folio_nid(folio); @@ -1593,8 +1509,8 @@ static void remove_hugetlb_folio(struct hstate *h, struct folio *folio, h->nr_huge_pages_node[nid]--; } -static void add_hugetlb_folio(struct hstate *h, struct folio *folio, - bool adjust_surplus) +void add_hugetlb_folio(struct hstate *h, struct folio *folio, + bool adjust_surplus) { int nid = folio_nid(folio); @@ -1925,7 +1841,7 @@ static void account_new_hugetlb_folio(struct hstate *h, struct folio *folio) h->nr_huge_pages_node[folio_nid(folio)]++; } -static void init_new_hugetlb_folio(struct folio *folio) +void init_new_hugetlb_folio(struct folio *folio) { __folio_set_hugetlb(folio); INIT_LIST_HEAD(&folio->lru); @@ -2037,8 +1953,8 @@ static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h, return folio; } -static void prep_and_add_allocated_folios(struct hstate *h, - struct list_head *folio_list) +void prep_and_add_allocated_folios(struct hstate *h, + struct list_head *folio_list) { unsigned long flags; struct folio *folio, *tmp_f; @@ -4093,8 +4009,8 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst, return rc; } -static long demote_pool_huge_page(struct hstate *src, nodemask_t *nodes_allowed, - unsigned long nr_to_demote) +long demote_pool_huge_page(struct hstate *src, nodemask_t *nodes_allowed, + unsigned long nr_to_demote) __must_hold(&hugetlb_lock) { int nr_nodes, node; @@ -4162,51 +4078,7 @@ static long demote_pool_huge_page(struct hstate *src, nodemask_t *nodes_allowed, return -EBUSY; } -#define HSTATE_ATTR_RO(_name) \ - static struct kobj_attribute _name##_attr = __ATTR_RO(_name) - -#define HSTATE_ATTR_WO(_name) \ - static struct kobj_attribute _name##_attr = __ATTR_WO(_name) - -#define HSTATE_ATTR(_name) \ - static struct kobj_attribute _name##_attr = __ATTR_RW(_name) - -static struct kobject *hugepages_kobj; -static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; - -static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp); - -static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp) -{ - int i; - - for (i = 0; i < HUGE_MAX_HSTATE; i++) - if (hstate_kobjs[i] == kobj) { - if (nidp) - *nidp = NUMA_NO_NODE; - return &hstates[i]; - } - - return kobj_to_node_hstate(kobj, nidp); -} - -static ssize_t nr_hugepages_show_common(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h; - unsigned long nr_huge_pages; - int nid; - - h = kobj_to_hstate(kobj, &nid); - if (nid == NUMA_NO_NODE) - nr_huge_pages = h->nr_huge_pages; - else - nr_huge_pages = h->nr_huge_pages_node[nid]; - - return sysfs_emit(buf, "%lu\n", nr_huge_pages); -} - -static ssize_t __nr_hugepages_store_common(bool obey_mempolicy, +ssize_t __nr_hugepages_store_common(bool obey_mempolicy, struct hstate *h, int nid, unsigned long count, size_t len) { @@ -4239,452 +4111,6 @@ static ssize_t __nr_hugepages_store_common(bool obey_mempolicy, return err ? err : len; } -static ssize_t nr_hugepages_store_common(bool obey_mempolicy, - struct kobject *kobj, const char *buf, - size_t len) -{ - struct hstate *h; - unsigned long count; - int nid; - int err; - - err = kstrtoul(buf, 10, &count); - if (err) - return err; - - h = kobj_to_hstate(kobj, &nid); - return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len); -} - -static ssize_t nr_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return nr_hugepages_show_common(kobj, attr, buf); -} - -static ssize_t nr_hugepages_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t len) -{ - return nr_hugepages_store_common(false, kobj, buf, len); -} -HSTATE_ATTR(nr_hugepages); - -#ifdef CONFIG_NUMA - -/* - * hstate attribute for optionally mempolicy-based constraint on persistent - * huge page alloc/free. - */ -static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - return nr_hugepages_show_common(kobj, attr, buf); -} - -static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t len) -{ - return nr_hugepages_store_common(true, kobj, buf, len); -} -HSTATE_ATTR(nr_hugepages_mempolicy); -#endif - - -static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h = kobj_to_hstate(kobj, NULL); - return sysfs_emit(buf, "%lu\n", h->nr_overcommit_huge_pages); -} - -static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t count) -{ - int err; - unsigned long input; - struct hstate *h = kobj_to_hstate(kobj, NULL); - - if (hstate_is_gigantic_no_runtime(h)) - return -EINVAL; - - err = kstrtoul(buf, 10, &input); - if (err) - return err; - - spin_lock_irq(&hugetlb_lock); - h->nr_overcommit_huge_pages = input; - spin_unlock_irq(&hugetlb_lock); - - return count; -} -HSTATE_ATTR(nr_overcommit_hugepages); - -static ssize_t free_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h; - unsigned long free_huge_pages; - int nid; - - h = kobj_to_hstate(kobj, &nid); - if (nid == NUMA_NO_NODE) - free_huge_pages = h->free_huge_pages; - else - free_huge_pages = h->free_huge_pages_node[nid]; - - return sysfs_emit(buf, "%lu\n", free_huge_pages); -} -HSTATE_ATTR_RO(free_hugepages); - -static ssize_t resv_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h = kobj_to_hstate(kobj, NULL); - return sysfs_emit(buf, "%lu\n", h->resv_huge_pages); -} -HSTATE_ATTR_RO(resv_hugepages); - -static ssize_t surplus_hugepages_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h; - unsigned long surplus_huge_pages; - int nid; - - h = kobj_to_hstate(kobj, &nid); - if (nid == NUMA_NO_NODE) - surplus_huge_pages = h->surplus_huge_pages; - else - surplus_huge_pages = h->surplus_huge_pages_node[nid]; - - return sysfs_emit(buf, "%lu\n", surplus_huge_pages); -} -HSTATE_ATTR_RO(surplus_hugepages); - -static ssize_t demote_store(struct kobject *kobj, - struct kobj_attribute *attr, const char *buf, size_t len) -{ - unsigned long nr_demote; - unsigned long nr_available; - nodemask_t nodes_allowed, *n_mask; - struct hstate *h; - int err; - int nid; - - err = kstrtoul(buf, 10, &nr_demote); - if (err) - return err; - h = kobj_to_hstate(kobj, &nid); - - if (nid != NUMA_NO_NODE) { - init_nodemask_of_node(&nodes_allowed, nid); - n_mask = &nodes_allowed; - } else { - n_mask = &node_states[N_MEMORY]; - } - - /* Synchronize with other sysfs operations modifying huge pages */ - mutex_lock(&h->resize_lock); - spin_lock_irq(&hugetlb_lock); - - while (nr_demote) { - long rc; - - /* - * Check for available pages to demote each time thorough the - * loop as demote_pool_huge_page will drop hugetlb_lock. - */ - if (nid != NUMA_NO_NODE) - nr_available = h->free_huge_pages_node[nid]; - else - nr_available = h->free_huge_pages; - nr_available -= h->resv_huge_pages; - if (!nr_available) - break; - - rc = demote_pool_huge_page(h, n_mask, nr_demote); - if (rc < 0) { - err = rc; - break; - } - - nr_demote -= rc; - } - - spin_unlock_irq(&hugetlb_lock); - mutex_unlock(&h->resize_lock); - - if (err) - return err; - return len; -} -HSTATE_ATTR_WO(demote); - -static ssize_t demote_size_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - struct hstate *h = kobj_to_hstate(kobj, NULL); - unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K; - - return sysfs_emit(buf, "%lukB\n", demote_size); -} - -static ssize_t demote_size_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t count) -{ - struct hstate *h, *demote_hstate; - unsigned long demote_size; - unsigned int demote_order; - - demote_size = (unsigned long)memparse(buf, NULL); - - demote_hstate = size_to_hstate(demote_size); - if (!demote_hstate) - return -EINVAL; - demote_order = demote_hstate->order; - if (demote_order < HUGETLB_PAGE_ORDER) - return -EINVAL; - - /* demote order must be smaller than hstate order */ - h = kobj_to_hstate(kobj, NULL); - if (demote_order >= h->order) - return -EINVAL; - - /* resize_lock synchronizes access to demote size and writes */ - mutex_lock(&h->resize_lock); - h->demote_order = demote_order; - mutex_unlock(&h->resize_lock); - - return count; -} -HSTATE_ATTR(demote_size); - -static struct attribute *hstate_attrs[] = { - &nr_hugepages_attr.attr, - &nr_overcommit_hugepages_attr.attr, - &free_hugepages_attr.attr, - &resv_hugepages_attr.attr, - &surplus_hugepages_attr.attr, -#ifdef CONFIG_NUMA - &nr_hugepages_mempolicy_attr.attr, -#endif - NULL, -}; - -static const struct attribute_group hstate_attr_group = { - .attrs = hstate_attrs, -}; - -static struct attribute *hstate_demote_attrs[] = { - &demote_size_attr.attr, - &demote_attr.attr, - NULL, -}; - -static const struct attribute_group hstate_demote_attr_group = { - .attrs = hstate_demote_attrs, -}; - -static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, - struct kobject **hstate_kobjs, - const struct attribute_group *hstate_attr_group) -{ - int retval; - int hi = hstate_index(h); - - hstate_kobjs[hi] = kobject_create_and_add(h->name, parent); - if (!hstate_kobjs[hi]) - return -ENOMEM; - - retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group); - if (retval) { - kobject_put(hstate_kobjs[hi]); - hstate_kobjs[hi] = NULL; - return retval; - } - - if (h->demote_order) { - retval = sysfs_create_group(hstate_kobjs[hi], - &hstate_demote_attr_group); - if (retval) { - pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name); - sysfs_remove_group(hstate_kobjs[hi], hstate_attr_group); - kobject_put(hstate_kobjs[hi]); - hstate_kobjs[hi] = NULL; - return retval; - } - } - - return 0; -} - -#ifdef CONFIG_NUMA -static bool hugetlb_sysfs_initialized __ro_after_init; - -/* - * node_hstate/s - associate per node hstate attributes, via their kobjects, - * with node devices in node_devices[] using a parallel array. The array - * index of a node device or _hstate == node id. - * This is here to avoid any static dependency of the node device driver, in - * the base kernel, on the hugetlb module. - */ -struct node_hstate { - struct kobject *hugepages_kobj; - struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; -}; -static struct node_hstate node_hstates[MAX_NUMNODES]; - -/* - * A subset of global hstate attributes for node devices - */ -static struct attribute *per_node_hstate_attrs[] = { - &nr_hugepages_attr.attr, - &free_hugepages_attr.attr, - &surplus_hugepages_attr.attr, - NULL, -}; - -static const struct attribute_group per_node_hstate_attr_group = { - .attrs = per_node_hstate_attrs, -}; - -/* - * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj. - * Returns node id via non-NULL nidp. - */ -static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) -{ - int nid; - - for (nid = 0; nid < nr_node_ids; nid++) { - struct node_hstate *nhs = &node_hstates[nid]; - int i; - for (i = 0; i < HUGE_MAX_HSTATE; i++) - if (nhs->hstate_kobjs[i] == kobj) { - if (nidp) - *nidp = nid; - return &hstates[i]; - } - } - - BUG(); - return NULL; -} - -/* - * Unregister hstate attributes from a single node device. - * No-op if no hstate attributes attached. - */ -void hugetlb_unregister_node(struct node *node) -{ - struct hstate *h; - struct node_hstate *nhs = &node_hstates[node->dev.id]; - - if (!nhs->hugepages_kobj) - return; /* no hstate attributes */ - - for_each_hstate(h) { - int idx = hstate_index(h); - struct kobject *hstate_kobj = nhs->hstate_kobjs[idx]; - - if (!hstate_kobj) - continue; - if (h->demote_order) - sysfs_remove_group(hstate_kobj, &hstate_demote_attr_group); - sysfs_remove_group(hstate_kobj, &per_node_hstate_attr_group); - kobject_put(hstate_kobj); - nhs->hstate_kobjs[idx] = NULL; - } - - kobject_put(nhs->hugepages_kobj); - nhs->hugepages_kobj = NULL; -} - - -/* - * Register hstate attributes for a single node device. - * No-op if attributes already registered. - */ -void hugetlb_register_node(struct node *node) -{ - struct hstate *h; - struct node_hstate *nhs = &node_hstates[node->dev.id]; - int err; - - if (!hugetlb_sysfs_initialized) - return; - - if (nhs->hugepages_kobj) - return; /* already allocated */ - - nhs->hugepages_kobj = kobject_create_and_add("hugepages", - &node->dev.kobj); - if (!nhs->hugepages_kobj) - return; - - for_each_hstate(h) { - err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj, - nhs->hstate_kobjs, - &per_node_hstate_attr_group); - if (err) { - pr_err("HugeTLB: Unable to add hstate %s for node %d\n", - h->name, node->dev.id); - hugetlb_unregister_node(node); - break; - } - } -} - -/* - * hugetlb init time: register hstate attributes for all registered node - * devices of nodes that have memory. All on-line nodes should have - * registered their associated device by this time. - */ -static void __init hugetlb_register_all_nodes(void) -{ - int nid; - - for_each_online_node(nid) - hugetlb_register_node(node_devices[nid]); -} -#else /* !CONFIG_NUMA */ - -static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) -{ - BUG(); - if (nidp) - *nidp = -1; - return NULL; -} - -static void hugetlb_register_all_nodes(void) { } - -#endif - -static void __init hugetlb_sysfs_init(void) -{ - struct hstate *h; - int err; - - hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj); - if (!hugepages_kobj) - return; - - for_each_hstate(h) { - err = hugetlb_sysfs_add_hstate(h, hugepages_kobj, - hstate_kobjs, &hstate_attr_group); - if (err) - pr_err("HugeTLB: Unable to add hstate %s\n", h->name); - } - -#ifdef CONFIG_NUMA - hugetlb_sysfs_initialized = true; -#endif - hugetlb_register_all_nodes(); -} - #ifdef CONFIG_SYSCTL static void hugetlb_sysctl_init(void); #else |
