summaryrefslogtreecommitdiff
path: root/mm/vmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r--mm/vmalloc.c208
1 files changed, 107 insertions, 101 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 340edee108c0..ab986dd09b6a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -932,6 +932,11 @@ static struct vmap_node *vmap_nodes = &single;
static __read_mostly unsigned int nr_vmap_nodes = 1;
static __read_mostly unsigned int vmap_zone_size = 1;
+/* A simple iterator over all vmap-nodes. */
+#define for_each_vmap_node(vn) \
+ for ((vn) = &vmap_nodes[0]; \
+ (vn) < &vmap_nodes[nr_vmap_nodes]; (vn)++)
+
static inline unsigned int
addr_to_node_id(unsigned long addr)
{
@@ -950,6 +955,19 @@ id_to_node(unsigned int id)
return &vmap_nodes[id % nr_vmap_nodes];
}
+static inline unsigned int
+node_to_id(struct vmap_node *node)
+{
+ /* Pointer arithmetic. */
+ unsigned int id = node - vmap_nodes;
+
+ if (likely(id < nr_vmap_nodes))
+ return id;
+
+ WARN_ONCE(1, "An address 0x%p is out-of-bounds.\n", node);
+ return 0;
+}
+
/*
* We use the value 0 to represent "no node", that is why
* an encoded value will be the node-id incremented by 1.
@@ -1022,7 +1040,8 @@ static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
static void drain_vmap_area_work(struct work_struct *work);
static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
-static atomic_long_t nr_vmalloc_pages;
+static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages;
+static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr;
unsigned long vmalloc_nr_pages(void)
{
@@ -1088,12 +1107,11 @@ find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
{
unsigned long va_start_lowest;
struct vmap_node *vn;
- int i;
repeat:
- for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) {
- vn = &vmap_nodes[i];
+ va_start_lowest = 0;
+ for_each_vmap_node(vn) {
spin_lock(&vn->busy.lock);
*va = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
@@ -1730,7 +1748,7 @@ va_clip(struct rb_root *root, struct list_head *head,
*/
lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
if (!lva)
- return -1;
+ return -ENOMEM;
}
/*
@@ -1744,7 +1762,7 @@ va_clip(struct rb_root *root, struct list_head *head,
*/
va->va_start = nva_start_addr + size;
} else {
- return -1;
+ return -EINVAL;
}
if (type != FL_FIT_TYPE) {
@@ -1773,19 +1791,19 @@ va_alloc(struct vmap_area *va,
/* Check the "vend" restriction. */
if (nva_start_addr + size > vend)
- return vend;
+ return -ERANGE;
/* Update the free vmap_area. */
ret = va_clip(root, head, va, nva_start_addr, size);
if (WARN_ON_ONCE(ret))
- return vend;
+ return ret;
return nva_start_addr;
}
/*
* Returns a start address of the newly allocated area, if success.
- * Otherwise a vend is returned that indicates failure.
+ * Otherwise an error value is returned that indicates failure.
*/
static __always_inline unsigned long
__alloc_vmap_area(struct rb_root *root, struct list_head *head,
@@ -1810,14 +1828,13 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size);
if (unlikely(!va))
- return vend;
+ return -ENOENT;
nva_start_addr = va_alloc(va, root, head, size, align, vstart, vend);
- if (nva_start_addr == vend)
- return vend;
#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
- find_vmap_lowest_match_check(root, head, size, align);
+ if (!IS_ERR_VALUE(nva_start_addr))
+ find_vmap_lowest_match_check(root, head, size, align);
#endif
return nva_start_addr;
@@ -1947,7 +1964,7 @@ node_alloc(unsigned long size, unsigned long align,
struct vmap_area *va;
*vn_id = 0;
- *addr = vend;
+ *addr = -EINVAL;
/*
* Fallback to a global heap if not vmalloc or there
@@ -2027,20 +2044,20 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
}
retry:
- if (addr == vend) {
+ if (IS_ERR_VALUE(addr)) {
preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
size, align, vstart, vend);
spin_unlock(&free_vmap_area_lock);
}
- trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
+ trace_alloc_vmap_area(addr, size, align, vstart, vend, IS_ERR_VALUE(addr));
/*
- * If an allocation fails, the "vend" address is
+ * If an allocation fails, the error value is
* returned. Therefore trigger the overflow path.
*/
- if (unlikely(addr == vend))
+ if (IS_ERR_VALUE(addr))
goto overflow;
va->va_start = addr;
@@ -2132,8 +2149,6 @@ static unsigned long lazy_max_pages(void)
return log * (32UL * 1024 * 1024 / PAGE_SIZE);
}
-static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
-
/*
* Serialize vmap purging. There is no actual critical section protected
* by this lock, but we want to avoid concurrent calls for performance
@@ -2143,7 +2158,6 @@ static DEFINE_MUTEX(vmap_purge_lock);
/* for per-CPU blocks */
static void purge_fragmented_blocks_allcpus(void);
-static cpumask_t purge_nodes;
static void
reclaim_list_global(struct list_head *head)
@@ -2166,7 +2180,7 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay)
LIST_HEAD(decay_list);
struct rb_root decay_root = RB_ROOT;
struct vmap_area *va, *nva;
- unsigned long n_decay;
+ unsigned long n_decay, pool_len;
int i;
for (i = 0; i < MAX_VA_SIZE_PAGES; i++) {
@@ -2180,22 +2194,20 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay)
list_replace_init(&vn->pool[i].head, &tmp_list);
spin_unlock(&vn->pool_lock);
- if (full_decay)
- WRITE_ONCE(vn->pool[i].len, 0);
+ pool_len = n_decay = vn->pool[i].len;
+ WRITE_ONCE(vn->pool[i].len, 0);
/* Decay a pool by ~25% out of left objects. */
- n_decay = vn->pool[i].len >> 2;
+ if (!full_decay)
+ n_decay >>= 2;
+ pool_len -= n_decay;
list_for_each_entry_safe(va, nva, &tmp_list, list) {
+ if (!n_decay--)
+ break;
+
list_del_init(&va->list);
merge_or_add_vmap_area(va, &decay_root, &decay_list);
-
- if (!full_decay) {
- WRITE_ONCE(vn->pool[i].len, vn->pool[i].len - 1);
-
- if (!--n_decay)
- break;
- }
}
/*
@@ -2204,9 +2216,10 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay)
* can populate the pool therefore a simple list replace
* operation takes place here.
*/
- if (!full_decay && !list_empty(&tmp_list)) {
+ if (!list_empty(&tmp_list)) {
spin_lock(&vn->pool_lock);
list_replace_init(&tmp_list, &vn->pool[i].head);
+ WRITE_ONCE(vn->pool[i].len, pool_len);
spin_unlock(&vn->pool_lock);
}
}
@@ -2276,6 +2289,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end,
{
unsigned long nr_purged_areas = 0;
unsigned int nr_purge_helpers;
+ static cpumask_t purge_nodes;
unsigned int nr_purge_nodes;
struct vmap_node *vn;
int i;
@@ -2287,9 +2301,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end,
*/
purge_nodes = CPU_MASK_NONE;
- for (i = 0; i < nr_vmap_nodes; i++) {
- vn = &vmap_nodes[i];
-
+ for_each_vmap_node(vn) {
INIT_LIST_HEAD(&vn->purge_list);
vn->skip_populate = full_pool_decay;
decay_va_pool_node(vn, full_pool_decay);
@@ -2308,7 +2320,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end,
end = max(end, list_last_entry(&vn->purge_list,
struct vmap_area, list)->va_end);
- cpumask_set_cpu(i, &purge_nodes);
+ cpumask_set_cpu(node_to_id(vn), &purge_nodes);
}
nr_purge_nodes = cpumask_weight(&purge_nodes);
@@ -2387,7 +2399,7 @@ static void free_vmap_area_noflush(struct vmap_area *va)
if (WARN_ON_ONCE(!list_empty(&va->list)))
return;
- nr_lazy = atomic_long_add_return(va_size(va) >> PAGE_SHIFT,
+ nr_lazy = atomic_long_add_return_relaxed(va_size(va) >> PAGE_SHIFT,
&vmap_lazy_nr);
/*
@@ -2453,7 +2465,7 @@ struct vmap_area *find_vmap_area(unsigned long addr)
if (va)
return va;
- } while ((i = (i + 1) % nr_vmap_nodes) != j);
+ } while ((i = (i + nr_vmap_nodes - 1) % nr_vmap_nodes) != j);
return NULL;
}
@@ -2479,7 +2491,7 @@ static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
if (va)
return va;
- } while ((i = (i + 1) % nr_vmap_nodes) != j);
+ } while ((i = (i + nr_vmap_nodes - 1) % nr_vmap_nodes) != j);
return NULL;
}
@@ -2948,10 +2960,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
*/
void vm_unmap_aliases(void)
{
- unsigned long start = ULONG_MAX, end = 0;
- int flush = 0;
-
- _vm_unmap_aliases(start, end, flush);
+ _vm_unmap_aliases(ULONG_MAX, 0, 0);
}
EXPORT_SYMBOL_GPL(vm_unmap_aliases);
@@ -3132,7 +3141,7 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm)
/*
* Before removing VM_UNINITIALIZED,
* we should make sure that vm has proper values.
- * Pair with smp_rmb() in show_numa_info().
+ * Pair with smp_rmb() in vread_iter() and vmalloc_info_show().
*/
smp_wmb();
vm->flags &= ~VM_UNINITIALIZED;
@@ -3403,12 +3412,13 @@ void vfree(const void *addr)
if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
vm_reset_perms(vm);
+ /* All pages of vm should be charged to same memcg, so use first one. */
+ if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
+ mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
for (i = 0; i < vm->nr_pages; i++) {
struct page *page = vm->pages[i];
BUG_ON(!page);
- if (!(vm->flags & VM_MAP_PUT_PAGES))
- mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
/*
* High-order allocs for huge vmallocs are split, so
* can be freed as an array of order-0 allocations
@@ -3704,12 +3714,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
node, page_order, nr_small_pages, area->pages);
atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
- if (gfp_mask & __GFP_ACCOUNT) {
- int i;
-
- for (i = 0; i < area->nr_pages; i++)
- mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
- }
+ /* All pages of vm should be charged to same memcg, so use first one. */
+ if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
+ mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
+ area->nr_pages);
/*
* If not enough pages were obtained to accomplish an
@@ -4967,39 +4975,37 @@ bool vmalloc_dump_obj(void *object)
#endif
#ifdef CONFIG_PROC_FS
-static void show_numa_info(struct seq_file *m, struct vm_struct *v)
-{
- if (IS_ENABLED(CONFIG_NUMA)) {
- unsigned int nr, *counters = m->private;
- unsigned int step = 1U << vm_area_page_order(v);
- if (!counters)
- return;
+/*
+ * Print number of pages allocated on each memory node.
+ *
+ * This function can only be called if CONFIG_NUMA is enabled
+ * and VM_UNINITIALIZED bit in v->flags is disabled.
+ */
+static void show_numa_info(struct seq_file *m, struct vm_struct *v,
+ unsigned int *counters)
+{
+ unsigned int nr;
+ unsigned int step = 1U << vm_area_page_order(v);
- if (v->flags & VM_UNINITIALIZED)
- return;
- /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
- smp_rmb();
+ if (!counters)
+ return;
- memset(counters, 0, nr_node_ids * sizeof(unsigned int));
+ memset(counters, 0, nr_node_ids * sizeof(unsigned int));
- for (nr = 0; nr < v->nr_pages; nr += step)
- counters[page_to_nid(v->pages[nr])] += step;
- for_each_node_state(nr, N_HIGH_MEMORY)
- if (counters[nr])
- seq_printf(m, " N%u=%u", nr, counters[nr]);
- }
+ for (nr = 0; nr < v->nr_pages; nr += step)
+ counters[page_to_nid(v->pages[nr])] += step;
+ for_each_node_state(nr, N_HIGH_MEMORY)
+ if (counters[nr])
+ seq_printf(m, " N%u=%u", nr, counters[nr]);
}
static void show_purge_info(struct seq_file *m)
{
struct vmap_node *vn;
struct vmap_area *va;
- int i;
-
- for (i = 0; i < nr_vmap_nodes; i++) {
- vn = &vmap_nodes[i];
+ for_each_vmap_node(vn) {
spin_lock(&vn->lazy.lock);
list_for_each_entry(va, &vn->lazy.head, list) {
seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
@@ -5015,11 +5021,12 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
struct vmap_node *vn;
struct vmap_area *va;
struct vm_struct *v;
- int i;
+ unsigned int *counters;
- for (i = 0; i < nr_vmap_nodes; i++) {
- vn = &vmap_nodes[i];
+ if (IS_ENABLED(CONFIG_NUMA))
+ counters = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
+ for_each_vmap_node(vn) {
spin_lock(&vn->busy.lock);
list_for_each_entry(va, &vn->busy.head, list) {
if (!va->vm) {
@@ -5032,6 +5039,11 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
}
v = va->vm;
+ if (v->flags & VM_UNINITIALIZED)
+ continue;
+
+ /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
+ smp_rmb();
seq_printf(m, "0x%pK-0x%pK %7ld",
v->addr, v->addr + v->size, v->size);
@@ -5066,7 +5078,9 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
if (is_vmalloc_addr(v->pages))
seq_puts(m, " vpages");
- show_numa_info(m, v);
+ if (IS_ENABLED(CONFIG_NUMA))
+ show_numa_info(m, v, counters);
+
seq_putc(m, '\n');
}
spin_unlock(&vn->busy.lock);
@@ -5076,19 +5090,14 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
* As a final step, dump "unpurged" areas.
*/
show_purge_info(m);
+ if (IS_ENABLED(CONFIG_NUMA))
+ kfree(counters);
return 0;
}
static int __init proc_vmalloc_init(void)
{
- void *priv_data = NULL;
-
- if (IS_ENABLED(CONFIG_NUMA))
- priv_data = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
-
- proc_create_single_data("vmallocinfo",
- 0400, NULL, vmalloc_info_show, priv_data);
-
+ proc_create_single("vmallocinfo", 0400, NULL, vmalloc_info_show);
return 0;
}
module_init(proc_vmalloc_init);
@@ -5140,7 +5149,7 @@ static void __init vmap_init_free_space(void)
static void vmap_init_nodes(void)
{
struct vmap_node *vn;
- int i, n;
+ int i;
#if BITS_PER_LONG == 64
/*
@@ -5157,7 +5166,7 @@ static void vmap_init_nodes(void)
* set of cores. Therefore a per-domain purging is supposed to
* be added as well as a per-domain balancing.
*/
- n = clamp_t(unsigned int, num_possible_cpus(), 1, 128);
+ int n = clamp_t(unsigned int, num_possible_cpus(), 1, 128);
if (n > 1) {
vn = kmalloc_array(n, sizeof(*vn), GFP_NOWAIT | __GFP_NOWARN);
@@ -5172,8 +5181,7 @@ static void vmap_init_nodes(void)
}
#endif
- for (n = 0; n < nr_vmap_nodes; n++) {
- vn = &vmap_nodes[n];
+ for_each_vmap_node(vn) {
vn->busy.root = RB_ROOT;
INIT_LIST_HEAD(&vn->busy.head);
spin_lock_init(&vn->busy.lock);
@@ -5194,15 +5202,13 @@ static void vmap_init_nodes(void)
static unsigned long
vmap_node_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
- unsigned long count;
+ unsigned long count = 0;
struct vmap_node *vn;
- int i, j;
-
- for (count = 0, i = 0; i < nr_vmap_nodes; i++) {
- vn = &vmap_nodes[i];
+ int i;
- for (j = 0; j < MAX_VA_SIZE_PAGES; j++)
- count += READ_ONCE(vn->pool[j].len);
+ for_each_vmap_node(vn) {
+ for (i = 0; i < MAX_VA_SIZE_PAGES; i++)
+ count += READ_ONCE(vn->pool[i].len);
}
return count ? count : SHRINK_EMPTY;
@@ -5211,10 +5217,10 @@ vmap_node_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
static unsigned long
vmap_node_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- int i;
+ struct vmap_node *vn;
- for (i = 0; i < nr_vmap_nodes; i++)
- decay_va_pool_node(&vmap_nodes[i], true);
+ for_each_vmap_node(vn)
+ decay_va_pool_node(vn, true);
return SHRINK_STOP;
}