diff options
Diffstat (limited to 'mm/vmalloc.c')
-rw-r--r-- | mm/vmalloc.c | 208 |
1 files changed, 107 insertions, 101 deletions
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 340edee108c0..ab986dd09b6a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -932,6 +932,11 @@ static struct vmap_node *vmap_nodes = &single; static __read_mostly unsigned int nr_vmap_nodes = 1; static __read_mostly unsigned int vmap_zone_size = 1; +/* A simple iterator over all vmap-nodes. */ +#define for_each_vmap_node(vn) \ + for ((vn) = &vmap_nodes[0]; \ + (vn) < &vmap_nodes[nr_vmap_nodes]; (vn)++) + static inline unsigned int addr_to_node_id(unsigned long addr) { @@ -950,6 +955,19 @@ id_to_node(unsigned int id) return &vmap_nodes[id % nr_vmap_nodes]; } +static inline unsigned int +node_to_id(struct vmap_node *node) +{ + /* Pointer arithmetic. */ + unsigned int id = node - vmap_nodes; + + if (likely(id < nr_vmap_nodes)) + return id; + + WARN_ONCE(1, "An address 0x%p is out-of-bounds.\n", node); + return 0; +} + /* * We use the value 0 to represent "no node", that is why * an encoded value will be the node-id incremented by 1. @@ -1022,7 +1040,8 @@ static BLOCKING_NOTIFIER_HEAD(vmap_notify_list); static void drain_vmap_area_work(struct work_struct *work); static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work); -static atomic_long_t nr_vmalloc_pages; +static __cacheline_aligned_in_smp atomic_long_t nr_vmalloc_pages; +static __cacheline_aligned_in_smp atomic_long_t vmap_lazy_nr; unsigned long vmalloc_nr_pages(void) { @@ -1088,12 +1107,11 @@ find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va) { unsigned long va_start_lowest; struct vmap_node *vn; - int i; repeat: - for (i = 0, va_start_lowest = 0; i < nr_vmap_nodes; i++) { - vn = &vmap_nodes[i]; + va_start_lowest = 0; + for_each_vmap_node(vn) { spin_lock(&vn->busy.lock); *va = __find_vmap_area_exceed_addr(addr, &vn->busy.root); @@ -1730,7 +1748,7 @@ va_clip(struct rb_root *root, struct list_head *head, */ lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT); if (!lva) - return -1; + return -ENOMEM; } /* @@ -1744,7 +1762,7 @@ va_clip(struct rb_root *root, struct list_head *head, */ va->va_start = nva_start_addr + size; } else { - return -1; + return -EINVAL; } if (type != FL_FIT_TYPE) { @@ -1773,19 +1791,19 @@ va_alloc(struct vmap_area *va, /* Check the "vend" restriction. */ if (nva_start_addr + size > vend) - return vend; + return -ERANGE; /* Update the free vmap_area. */ ret = va_clip(root, head, va, nva_start_addr, size); if (WARN_ON_ONCE(ret)) - return vend; + return ret; return nva_start_addr; } /* * Returns a start address of the newly allocated area, if success. - * Otherwise a vend is returned that indicates failure. + * Otherwise an error value is returned that indicates failure. */ static __always_inline unsigned long __alloc_vmap_area(struct rb_root *root, struct list_head *head, @@ -1810,14 +1828,13 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head, va = find_vmap_lowest_match(root, size, align, vstart, adjust_search_size); if (unlikely(!va)) - return vend; + return -ENOENT; nva_start_addr = va_alloc(va, root, head, size, align, vstart, vend); - if (nva_start_addr == vend) - return vend; #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK - find_vmap_lowest_match_check(root, head, size, align); + if (!IS_ERR_VALUE(nva_start_addr)) + find_vmap_lowest_match_check(root, head, size, align); #endif return nva_start_addr; @@ -1947,7 +1964,7 @@ node_alloc(unsigned long size, unsigned long align, struct vmap_area *va; *vn_id = 0; - *addr = vend; + *addr = -EINVAL; /* * Fallback to a global heap if not vmalloc or there @@ -2027,20 +2044,20 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, } retry: - if (addr == vend) { + if (IS_ERR_VALUE(addr)) { preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node); addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list, size, align, vstart, vend); spin_unlock(&free_vmap_area_lock); } - trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend); + trace_alloc_vmap_area(addr, size, align, vstart, vend, IS_ERR_VALUE(addr)); /* - * If an allocation fails, the "vend" address is + * If an allocation fails, the error value is * returned. Therefore trigger the overflow path. */ - if (unlikely(addr == vend)) + if (IS_ERR_VALUE(addr)) goto overflow; va->va_start = addr; @@ -2132,8 +2149,6 @@ static unsigned long lazy_max_pages(void) return log * (32UL * 1024 * 1024 / PAGE_SIZE); } -static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0); - /* * Serialize vmap purging. There is no actual critical section protected * by this lock, but we want to avoid concurrent calls for performance @@ -2143,7 +2158,6 @@ static DEFINE_MUTEX(vmap_purge_lock); /* for per-CPU blocks */ static void purge_fragmented_blocks_allcpus(void); -static cpumask_t purge_nodes; static void reclaim_list_global(struct list_head *head) @@ -2166,7 +2180,7 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay) LIST_HEAD(decay_list); struct rb_root decay_root = RB_ROOT; struct vmap_area *va, *nva; - unsigned long n_decay; + unsigned long n_decay, pool_len; int i; for (i = 0; i < MAX_VA_SIZE_PAGES; i++) { @@ -2180,22 +2194,20 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay) list_replace_init(&vn->pool[i].head, &tmp_list); spin_unlock(&vn->pool_lock); - if (full_decay) - WRITE_ONCE(vn->pool[i].len, 0); + pool_len = n_decay = vn->pool[i].len; + WRITE_ONCE(vn->pool[i].len, 0); /* Decay a pool by ~25% out of left objects. */ - n_decay = vn->pool[i].len >> 2; + if (!full_decay) + n_decay >>= 2; + pool_len -= n_decay; list_for_each_entry_safe(va, nva, &tmp_list, list) { + if (!n_decay--) + break; + list_del_init(&va->list); merge_or_add_vmap_area(va, &decay_root, &decay_list); - - if (!full_decay) { - WRITE_ONCE(vn->pool[i].len, vn->pool[i].len - 1); - - if (!--n_decay) - break; - } } /* @@ -2204,9 +2216,10 @@ decay_va_pool_node(struct vmap_node *vn, bool full_decay) * can populate the pool therefore a simple list replace * operation takes place here. */ - if (!full_decay && !list_empty(&tmp_list)) { + if (!list_empty(&tmp_list)) { spin_lock(&vn->pool_lock); list_replace_init(&tmp_list, &vn->pool[i].head); + WRITE_ONCE(vn->pool[i].len, pool_len); spin_unlock(&vn->pool_lock); } } @@ -2276,6 +2289,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end, { unsigned long nr_purged_areas = 0; unsigned int nr_purge_helpers; + static cpumask_t purge_nodes; unsigned int nr_purge_nodes; struct vmap_node *vn; int i; @@ -2287,9 +2301,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end, */ purge_nodes = CPU_MASK_NONE; - for (i = 0; i < nr_vmap_nodes; i++) { - vn = &vmap_nodes[i]; - + for_each_vmap_node(vn) { INIT_LIST_HEAD(&vn->purge_list); vn->skip_populate = full_pool_decay; decay_va_pool_node(vn, full_pool_decay); @@ -2308,7 +2320,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end, end = max(end, list_last_entry(&vn->purge_list, struct vmap_area, list)->va_end); - cpumask_set_cpu(i, &purge_nodes); + cpumask_set_cpu(node_to_id(vn), &purge_nodes); } nr_purge_nodes = cpumask_weight(&purge_nodes); @@ -2387,7 +2399,7 @@ static void free_vmap_area_noflush(struct vmap_area *va) if (WARN_ON_ONCE(!list_empty(&va->list))) return; - nr_lazy = atomic_long_add_return(va_size(va) >> PAGE_SHIFT, + nr_lazy = atomic_long_add_return_relaxed(va_size(va) >> PAGE_SHIFT, &vmap_lazy_nr); /* @@ -2453,7 +2465,7 @@ struct vmap_area *find_vmap_area(unsigned long addr) if (va) return va; - } while ((i = (i + 1) % nr_vmap_nodes) != j); + } while ((i = (i + nr_vmap_nodes - 1) % nr_vmap_nodes) != j); return NULL; } @@ -2479,7 +2491,7 @@ static struct vmap_area *find_unlink_vmap_area(unsigned long addr) if (va) return va; - } while ((i = (i + 1) % nr_vmap_nodes) != j); + } while ((i = (i + nr_vmap_nodes - 1) % nr_vmap_nodes) != j); return NULL; } @@ -2948,10 +2960,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) */ void vm_unmap_aliases(void) { - unsigned long start = ULONG_MAX, end = 0; - int flush = 0; - - _vm_unmap_aliases(start, end, flush); + _vm_unmap_aliases(ULONG_MAX, 0, 0); } EXPORT_SYMBOL_GPL(vm_unmap_aliases); @@ -3132,7 +3141,7 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm) /* * Before removing VM_UNINITIALIZED, * we should make sure that vm has proper values. - * Pair with smp_rmb() in show_numa_info(). + * Pair with smp_rmb() in vread_iter() and vmalloc_info_show(). */ smp_wmb(); vm->flags &= ~VM_UNINITIALIZED; @@ -3403,12 +3412,13 @@ void vfree(const void *addr) if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS)) vm_reset_perms(vm); + /* All pages of vm should be charged to same memcg, so use first one. */ + if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES)) + mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages); for (i = 0; i < vm->nr_pages; i++) { struct page *page = vm->pages[i]; BUG_ON(!page); - if (!(vm->flags & VM_MAP_PUT_PAGES)) - mod_memcg_page_state(page, MEMCG_VMALLOC, -1); /* * High-order allocs for huge vmallocs are split, so * can be freed as an array of order-0 allocations @@ -3704,12 +3714,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, node, page_order, nr_small_pages, area->pages); atomic_long_add(area->nr_pages, &nr_vmalloc_pages); - if (gfp_mask & __GFP_ACCOUNT) { - int i; - - for (i = 0; i < area->nr_pages; i++) - mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1); - } + /* All pages of vm should be charged to same memcg, so use first one. */ + if (gfp_mask & __GFP_ACCOUNT && area->nr_pages) + mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC, + area->nr_pages); /* * If not enough pages were obtained to accomplish an @@ -4967,39 +4975,37 @@ bool vmalloc_dump_obj(void *object) #endif #ifdef CONFIG_PROC_FS -static void show_numa_info(struct seq_file *m, struct vm_struct *v) -{ - if (IS_ENABLED(CONFIG_NUMA)) { - unsigned int nr, *counters = m->private; - unsigned int step = 1U << vm_area_page_order(v); - if (!counters) - return; +/* + * Print number of pages allocated on each memory node. + * + * This function can only be called if CONFIG_NUMA is enabled + * and VM_UNINITIALIZED bit in v->flags is disabled. + */ +static void show_numa_info(struct seq_file *m, struct vm_struct *v, + unsigned int *counters) +{ + unsigned int nr; + unsigned int step = 1U << vm_area_page_order(v); - if (v->flags & VM_UNINITIALIZED) - return; - /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ - smp_rmb(); + if (!counters) + return; - memset(counters, 0, nr_node_ids * sizeof(unsigned int)); + memset(counters, 0, nr_node_ids * sizeof(unsigned int)); - for (nr = 0; nr < v->nr_pages; nr += step) - counters[page_to_nid(v->pages[nr])] += step; - for_each_node_state(nr, N_HIGH_MEMORY) - if (counters[nr]) - seq_printf(m, " N%u=%u", nr, counters[nr]); - } + for (nr = 0; nr < v->nr_pages; nr += step) + counters[page_to_nid(v->pages[nr])] += step; + for_each_node_state(nr, N_HIGH_MEMORY) + if (counters[nr]) + seq_printf(m, " N%u=%u", nr, counters[nr]); } static void show_purge_info(struct seq_file *m) { struct vmap_node *vn; struct vmap_area *va; - int i; - - for (i = 0; i < nr_vmap_nodes; i++) { - vn = &vmap_nodes[i]; + for_each_vmap_node(vn) { spin_lock(&vn->lazy.lock); list_for_each_entry(va, &vn->lazy.head, list) { seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n", @@ -5015,11 +5021,12 @@ static int vmalloc_info_show(struct seq_file *m, void *p) struct vmap_node *vn; struct vmap_area *va; struct vm_struct *v; - int i; + unsigned int *counters; - for (i = 0; i < nr_vmap_nodes; i++) { - vn = &vmap_nodes[i]; + if (IS_ENABLED(CONFIG_NUMA)) + counters = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); + for_each_vmap_node(vn) { spin_lock(&vn->busy.lock); list_for_each_entry(va, &vn->busy.head, list) { if (!va->vm) { @@ -5032,6 +5039,11 @@ static int vmalloc_info_show(struct seq_file *m, void *p) } v = va->vm; + if (v->flags & VM_UNINITIALIZED) + continue; + + /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */ + smp_rmb(); seq_printf(m, "0x%pK-0x%pK %7ld", v->addr, v->addr + v->size, v->size); @@ -5066,7 +5078,9 @@ static int vmalloc_info_show(struct seq_file *m, void *p) if (is_vmalloc_addr(v->pages)) seq_puts(m, " vpages"); - show_numa_info(m, v); + if (IS_ENABLED(CONFIG_NUMA)) + show_numa_info(m, v, counters); + seq_putc(m, '\n'); } spin_unlock(&vn->busy.lock); @@ -5076,19 +5090,14 @@ static int vmalloc_info_show(struct seq_file *m, void *p) * As a final step, dump "unpurged" areas. */ show_purge_info(m); + if (IS_ENABLED(CONFIG_NUMA)) + kfree(counters); return 0; } static int __init proc_vmalloc_init(void) { - void *priv_data = NULL; - - if (IS_ENABLED(CONFIG_NUMA)) - priv_data = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); - - proc_create_single_data("vmallocinfo", - 0400, NULL, vmalloc_info_show, priv_data); - + proc_create_single("vmallocinfo", 0400, NULL, vmalloc_info_show); return 0; } module_init(proc_vmalloc_init); @@ -5140,7 +5149,7 @@ static void __init vmap_init_free_space(void) static void vmap_init_nodes(void) { struct vmap_node *vn; - int i, n; + int i; #if BITS_PER_LONG == 64 /* @@ -5157,7 +5166,7 @@ static void vmap_init_nodes(void) * set of cores. Therefore a per-domain purging is supposed to * be added as well as a per-domain balancing. */ - n = clamp_t(unsigned int, num_possible_cpus(), 1, 128); + int n = clamp_t(unsigned int, num_possible_cpus(), 1, 128); if (n > 1) { vn = kmalloc_array(n, sizeof(*vn), GFP_NOWAIT | __GFP_NOWARN); @@ -5172,8 +5181,7 @@ static void vmap_init_nodes(void) } #endif - for (n = 0; n < nr_vmap_nodes; n++) { - vn = &vmap_nodes[n]; + for_each_vmap_node(vn) { vn->busy.root = RB_ROOT; INIT_LIST_HEAD(&vn->busy.head); spin_lock_init(&vn->busy.lock); @@ -5194,15 +5202,13 @@ static void vmap_init_nodes(void) static unsigned long vmap_node_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - unsigned long count; + unsigned long count = 0; struct vmap_node *vn; - int i, j; - - for (count = 0, i = 0; i < nr_vmap_nodes; i++) { - vn = &vmap_nodes[i]; + int i; - for (j = 0; j < MAX_VA_SIZE_PAGES; j++) - count += READ_ONCE(vn->pool[j].len); + for_each_vmap_node(vn) { + for (i = 0; i < MAX_VA_SIZE_PAGES; i++) + count += READ_ONCE(vn->pool[i].len); } return count ? count : SHRINK_EMPTY; @@ -5211,10 +5217,10 @@ vmap_node_shrink_count(struct shrinker *shrink, struct shrink_control *sc) static unsigned long vmap_node_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - int i; + struct vmap_node *vn; - for (i = 0; i < nr_vmap_nodes; i++) - decay_va_pool_node(&vmap_nodes[i], true); + for_each_vmap_node(vn) + decay_va_pool_node(vn, true); return SHRINK_STOP; } |