diff options
| -rw-r--r-- | Documentation/ABI/testing/sysfs-kernel-slab | 5 | ||||
| -rw-r--r-- | Documentation/admin-guide/kdump/vmcoreinfo.rst | 8 | ||||
| -rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 12 | ||||
| -rw-r--r-- | Documentation/admin-guide/mm/index.rst | 1 | ||||
| -rw-r--r-- | Documentation/admin-guide/mm/slab.rst (renamed from Documentation/mm/slub.rst) | 19 | ||||
| -rw-r--r-- | Documentation/mm/index.rst | 1 | ||||
| -rw-r--r-- | Documentation/mm/slab.rst | 7 | ||||
| -rw-r--r-- | MAINTAINERS | 17 | ||||
| -rw-r--r-- | include/linux/mm.h | 4 | ||||
| -rw-r--r-- | mm/kfence/core.c | 4 | ||||
| -rw-r--r-- | mm/slab.h | 28 | ||||
| -rw-r--r-- | mm/slub.c | 80 | ||||
| -rw-r--r-- | tools/cgroup/memcg_slabinfo.py | 4 |
13 files changed, 110 insertions, 80 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab index 658999be5164..b26e4299f822 100644 --- a/Documentation/ABI/testing/sysfs-kernel-slab +++ b/Documentation/ABI/testing/sysfs-kernel-slab @@ -37,7 +37,8 @@ Description: The alloc_calls file is read-only and lists the kernel code locations from which allocations for this cache were performed. The alloc_calls file only contains information if debugging is - enabled for that cache (see Documentation/mm/slub.rst). + enabled for that cache (see + Documentation/admin-guide/mm/slab.rst). What: /sys/kernel/slab/<cache>/alloc_fastpath Date: February 2008 @@ -219,7 +220,7 @@ Contact: Pekka Enberg <penberg@cs.helsinki.fi>, Description: The free_calls file is read-only and lists the locations of object frees if slab debugging is enabled (see - Documentation/mm/slub.rst). + Documentation/admin-guide/mm/slab.rst). What: /sys/kernel/slab/<cache>/free_fastpath Date: February 2008 diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 8cf4614385b7..404a15f6782c 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -325,14 +325,14 @@ NR_FREE_PAGES On linux-2.6.21 or later, the number of free pages is in vm_stat[NR_FREE_PAGES]. Used to get the number of free pages. -PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask|PG_hugetlb ------------------------------------------------------------------------------------------ +PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_hwpoison|PG_head_mask +-------------------------------------------------------------------------- Page attributes. These flags are used to filter various unnecessary for dumping pages. -PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_unaccepted) -------------------------------------------------------------------------------------------------------------------------- +PAGE_SLAB_MAPCOUNT_VALUE|PAGE_BUDDY_MAPCOUNT_VALUE|PAGE_OFFLINE_MAPCOUNT_VALUE|PAGE_HUGETLB_MAPCOUNT_VALUE|PAGE_UNACCEPTED_MAPCOUNT_VALUE +------------------------------------------------------------------------------------------------------------------------------------------ More page attributes. These flags are used to filter various unnecessary for dumping pages. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index d3f5a1c69dab..4943fc845a15 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6587,14 +6587,14 @@ slab_debug can create guard zones around objects and may poison objects when not in use. Also tracks the last alloc / free. For more information see - Documentation/mm/slub.rst. + Documentation/admin-guide/mm/slab.rst. (slub_debug legacy name also accepted for now) slab_max_order= [MM] Determines the maximum allowed order for slabs. A high setting may cause OOMs due to memory fragmentation. For more information see - Documentation/mm/slub.rst. + Documentation/admin-guide/mm/slab.rst. (slub_max_order legacy name also accepted for now) slab_merge [MM] @@ -6609,13 +6609,14 @@ the number of objects indicated. The higher the number of objects the smaller the overhead of tracking slabs and the less frequently locks need to be acquired. - For more information see Documentation/mm/slub.rst. + For more information see + Documentation/admin-guide/mm/slab.rst. (slub_min_objects legacy name also accepted for now) slab_min_order= [MM] Determines the minimum page order for slabs. Must be lower or equal to slab_max_order. For more information see - Documentation/mm/slub.rst. + Documentation/admin-guide/mm/slab.rst. (slub_min_order legacy name also accepted for now) slab_nomerge [MM] @@ -6629,7 +6630,8 @@ cache (risks via metadata attacks are mostly unchanged). Debug options disable merging on their own. - For more information see Documentation/mm/slub.rst. + For more information see + Documentation/admin-guide/mm/slab.rst. (slub_nomerge legacy name also accepted for now) slab_strict_numa [MM] diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst index 2d2f6c222308..ebc83ca20fdc 100644 --- a/Documentation/admin-guide/mm/index.rst +++ b/Documentation/admin-guide/mm/index.rst @@ -37,6 +37,7 @@ the Linux memory management. numaperf pagemap shrinker_debugfs + slab soft-dirty swap_numa transhuge diff --git a/Documentation/mm/slub.rst b/Documentation/admin-guide/mm/slab.rst index 84ca1dc94e5e..14429ab90611 100644 --- a/Documentation/mm/slub.rst +++ b/Documentation/admin-guide/mm/slab.rst @@ -1,13 +1,12 @@ -========================== -Short users guide for SLUB -========================== - -The basic philosophy of SLUB is very different from SLAB. SLAB -requires rebuilding the kernel to activate debug options for all -slab caches. SLUB always includes full debugging but it is off by default. -SLUB can enable debugging only for selected slabs in order to avoid -an impact on overall system performance which may make a bug more -difficult to find. +======================================== +Short users guide for the slab allocator +======================================== + +The slab allocator includes full debugging support (when built with +CONFIG_SLUB_DEBUG=y) but it is off by default (unless built with +CONFIG_SLUB_DEBUG_ON=y). You can enable debugging only for selected +slabs in order to avoid an impact on overall system performance which +may make a bug more difficult to find. In order to switch debugging on one can add an option ``slab_debug`` to the kernel command line. That will enable full debugging for diff --git a/Documentation/mm/index.rst b/Documentation/mm/index.rst index d3ada3e45e10..fb45acba16ac 100644 --- a/Documentation/mm/index.rst +++ b/Documentation/mm/index.rst @@ -56,7 +56,6 @@ documentation, or deleted if it has served its purpose. page_owner page_table_check remap_file_pages - slub split_page_table_lock transhuge unevictable-lru diff --git a/Documentation/mm/slab.rst b/Documentation/mm/slab.rst index 87d5a5bb172f..2bcc58ada302 100644 --- a/Documentation/mm/slab.rst +++ b/Documentation/mm/slab.rst @@ -3,3 +3,10 @@ =============== Slab Allocation =============== + +Functions and structures +======================== + +.. kernel-doc:: mm/slab.h +.. kernel-doc:: mm/slub.c + :internal: diff --git a/MAINTAINERS b/MAINTAINERS index b968bc6959d1..8c1bc089cf51 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23015,17 +23015,24 @@ F: Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml F: drivers/nvmem/layouts/sl28vpd.c SLAB ALLOCATOR -M: Christoph Lameter <cl@gentwo.org> -M: David Rientjes <rientjes@google.com> -M: Andrew Morton <akpm@linux-foundation.org> M: Vlastimil Babka <vbabka@suse.cz> +M: Andrew Morton <akpm@linux-foundation.org> +R: Christoph Lameter <cl@gentwo.org> +R: David Rientjes <rientjes@google.com> R: Roman Gushchin <roman.gushchin@linux.dev> R: Harry Yoo <harry.yoo@oracle.com> L: linux-mm@kvack.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git -F: include/linux/sl?b*.h -F: mm/sl?b* +F: Documentation/admin-guide/mm/slab.rst +F: Documentation/mm/slab.rst +F: include/linux/mempool.h +F: include/linux/slab.h +F: mm/failslab.c +F: mm/mempool.c +F: mm/slab.h +F: mm/slab_common.c +F: mm/slub.c SLCAN CAN NETWORK DRIVER M: Dario Binacchi <dario.binacchi@amarulasolutions.com> diff --git a/include/linux/mm.h b/include/linux/mm.h index 00cd8415c0a0..0c44bb8ce544 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1325,6 +1325,8 @@ static inline void get_page(struct page *page) struct folio *folio = page_folio(page); if (WARN_ON_ONCE(folio_test_slab(folio))) return; + if (WARN_ON_ONCE(folio_test_large_kmalloc(folio))) + return; folio_get(folio); } @@ -1419,7 +1421,7 @@ static inline void put_page(struct page *page) { struct folio *folio = page_folio(page); - if (folio_test_slab(folio)) + if (folio_test_slab(folio) || folio_test_large_kmalloc(folio)) return; folio_put(folio); diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 102048821c22..0ed3be100963 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -605,8 +605,8 @@ static unsigned long kfence_init_pool(void) pages = virt_to_page(__kfence_pool); /* - * Set up object pages: they must have PG_slab set, to avoid freeing - * these as real pages. + * Set up object pages: they must have PGTY_slab set to avoid freeing + * them as real pages. * * We also want to avoid inserting kfence_free() in the kfree() * fast-path in SLUB, and therefore need to ensure kfree() correctly diff --git a/mm/slab.h b/mm/slab.h index 05a21dc796e0..248b34c839b7 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -50,7 +50,7 @@ typedef union { /* Reuses the bits in struct page */ struct slab { - unsigned long __page_flags; + unsigned long flags; struct kmem_cache *slab_cache; union { @@ -99,7 +99,7 @@ struct slab { #define SLAB_MATCH(pg, sl) \ static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) -SLAB_MATCH(flags, __page_flags); +SLAB_MATCH(flags, flags); SLAB_MATCH(compound_head, slab_cache); /* Ensure bit 0 is clear */ SLAB_MATCH(_refcount, __page_refcount); #ifdef CONFIG_MEMCG @@ -167,30 +167,6 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t) */ #define slab_page(s) folio_page(slab_folio(s), 0) -/* - * If network-based swap is enabled, sl*b must keep track of whether pages - * were allocated from pfmemalloc reserves. - */ -static inline bool slab_test_pfmemalloc(const struct slab *slab) -{ - return folio_test_active(slab_folio(slab)); -} - -static inline void slab_set_pfmemalloc(struct slab *slab) -{ - folio_set_active(slab_folio(slab)); -} - -static inline void slab_clear_pfmemalloc(struct slab *slab) -{ - folio_clear_active(slab_folio(slab)); -} - -static inline void __slab_clear_pfmemalloc(struct slab *slab) -{ - __folio_clear_active(slab_folio(slab)); -} - static inline void *slab_address(const struct slab *slab) { return folio_address(slab_folio(slab)); diff --git a/mm/slub.c b/mm/slub.c index 31e11ef256f9..70327dc70ee5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -91,14 +91,14 @@ * The partially empty slabs cached on the CPU partial list are used * for performance reasons, which speeds up the allocation process. * These slabs are not frozen, but are also exempt from list management, - * by clearing the PG_workingset flag when moving out of the node + * by clearing the SL_partial flag when moving out of the node * partial list. Please see __slab_free() for more details. * * To sum up, the current scheme is: - * - node partial slab: PG_Workingset && !frozen - * - cpu partial slab: !PG_Workingset && !frozen - * - cpu slab: !PG_Workingset && frozen - * - full slab: !PG_Workingset && !frozen + * - node partial slab: SL_partial && !frozen + * - cpu partial slab: !SL_partial && !frozen + * - cpu slab: !SL_partial && frozen + * - full slab: !SL_partial && !frozen * * list_lock * @@ -183,6 +183,22 @@ * the fast path and disables lockless freelists. */ +/** + * enum slab_flags - How the slab flags bits are used. + * @SL_locked: Is locked with slab_lock() + * @SL_partial: On the per-node partial list + * @SL_pfmemalloc: Was allocated from PF_MEMALLOC reserves + * + * The slab flags share space with the page flags but some bits have + * different interpretations. The high bits are used for information + * like zone/node/section. + */ +enum slab_flags { + SL_locked = PG_locked, + SL_partial = PG_workingset, /* Historical reasons for this bit */ + SL_pfmemalloc = PG_active, /* Historical reasons for this bit */ +}; + /* * We could simply use migrate_disable()/enable() but as long as it's a * function call even on !PREEMPT_RT, use inline preempt_disable() there. @@ -635,16 +651,35 @@ static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s) #endif /* CONFIG_SLUB_CPU_PARTIAL */ /* + * If network-based swap is enabled, slub must keep track of whether memory + * were allocated from pfmemalloc reserves. + */ +static inline bool slab_test_pfmemalloc(const struct slab *slab) +{ + return test_bit(SL_pfmemalloc, &slab->flags); +} + +static inline void slab_set_pfmemalloc(struct slab *slab) +{ + set_bit(SL_pfmemalloc, &slab->flags); +} + +static inline void __slab_clear_pfmemalloc(struct slab *slab) +{ + __clear_bit(SL_pfmemalloc, &slab->flags); +} + +/* * Per slab locking using the pagelock */ static __always_inline void slab_lock(struct slab *slab) { - bit_spin_lock(PG_locked, &slab->__page_flags); + bit_spin_lock(SL_locked, &slab->flags); } static __always_inline void slab_unlock(struct slab *slab) { - bit_spin_unlock(PG_locked, &slab->__page_flags); + bit_spin_unlock(SL_locked, &slab->flags); } static inline bool @@ -1010,7 +1045,7 @@ static void print_slab_info(const struct slab *slab) { pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n", slab, slab->objects, slab->inuse, slab->freelist, - &slab->__page_flags); + &slab->flags); } void skip_orig_size_check(struct kmem_cache *s, const void *object) @@ -2717,23 +2752,19 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab) free_slab(s, slab); } -/* - * SLUB reuses PG_workingset bit to keep track of whether it's on - * the per-node partial list. - */ static inline bool slab_test_node_partial(const struct slab *slab) { - return folio_test_workingset(slab_folio(slab)); + return test_bit(SL_partial, &slab->flags); } static inline void slab_set_node_partial(struct slab *slab) { - set_bit(PG_workingset, folio_flags(slab_folio(slab), 0)); + set_bit(SL_partial, &slab->flags); } static inline void slab_clear_node_partial(struct slab *slab) { - clear_bit(PG_workingset, folio_flags(slab_folio(slab), 0)); + clear_bit(SL_partial, &slab->flags); } /* @@ -4269,7 +4300,12 @@ static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) flags = kmalloc_fix_flags(flags); flags |= __GFP_COMP; - folio = (struct folio *)alloc_pages_node_noprof(node, flags, order); + + if (node == NUMA_NO_NODE) + folio = (struct folio *)alloc_frozen_pages_noprof(flags, order); + else + folio = (struct folio *)__alloc_frozen_pages_noprof(flags, order, node, NULL); + if (folio) { ptr = folio_address(folio); lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, @@ -4765,7 +4801,7 @@ static void free_large_kmalloc(struct folio *folio, void *object) lruvec_stat_mod_folio(folio, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); __folio_clear_large_kmalloc(folio); - folio_put(folio); + free_frozen_pages(&folio->page, order); } /* @@ -4930,12 +4966,12 @@ alloc_new: * When slub_debug_orig_size() is off, krealloc() only knows about the bucket * size of an allocation (but not the exact size it was allocated with) and * hence implements the following semantics for shrinking and growing buffers - * with __GFP_ZERO. + * with __GFP_ZERO:: * - * new bucket - * 0 size size - * |--------|----------------| - * | keep | zero | + * new bucket + * 0 size size + * |--------|----------------| + * | keep | zero | * * Otherwise, the original allocation size 'orig_size' could be used to * precisely clear the requested size, and the new size will also be stored diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index 270c28a0d098..6bf4bde77903 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -146,11 +146,11 @@ def detect_kernel_config(): def for_each_slab(prog): - PGSlab = ~prog.constant('PG_slab') + slabtype = prog.constant('PGTY_slab') for page in for_each_page(prog): try: - if page.page_type.value_() == PGSlab: + if (page.page_type.value_() >> 24) == slabtype: yield cast('struct slab *', page) except FaultError: pass |
