From 23baf831a32c04f9a968812511540b1b3e648bf5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Mar 2023 14:31:33 +0300 Subject: mm, treewide: redefine MAX_ORDER sanely MAX_ORDER currently defined as number of orders page allocator supports: user can ask buddy allocator for page order between 0 and MAX_ORDER-1. This definition is counter-intuitive and lead to number of bugs all over the kernel. Change the definition of MAX_ORDER to be inclusive: the range of orders user can ask from buddy allocator is 0..MAX_ORDER now. [kirill@shutemov.name: fix min() warning] Link: https://lkml.kernel.org/r/20230315153800.32wib3n5rickolvh@box [akpm@linux-foundation.org: fix another min_t warning] [kirill@shutemov.name: fixups per Zi Yan] Link: https://lkml.kernel.org/r/20230316232144.b7ic4cif4kjiabws@box.shutemov.name [akpm@linux-foundation.org: fix underlining in docs] Link: https://lore.kernel.org/oe-kbuild-all/202303191025.VRCTk6mP-lkp@intel.com/ Link: https://lkml.kernel.org/r/20230315113133.11326-11-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Michael Ellerman [powerpc] Cc: "Kirill A. Shutemov" Cc: Zi Yan Signed-off-by: Andrew Morton --- drivers/gpu/drm/ttm/ttm_pool.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/ttm/ttm_pool.c') diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index aa116a7bbae3..6c8585abe08d 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -65,11 +65,11 @@ module_param(page_pool_size, ulong, 0644); static atomic_long_t allocated_pages; -static struct ttm_pool_type global_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_uncached[MAX_ORDER]; +static struct ttm_pool_type global_write_combined[MAX_ORDER + 1]; +static struct ttm_pool_type global_uncached[MAX_ORDER + 1]; -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; +static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER + 1]; +static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; @@ -405,7 +405,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); + for (order = min_t(unsigned int, MAX_ORDER, __fls(num_pages)); num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; @@ -542,7 +542,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, if (use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j <= MAX_ORDER; ++j) ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } @@ -562,7 +562,7 @@ void ttm_pool_fini(struct ttm_pool *pool) if (pool->use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j <= MAX_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } @@ -616,7 +616,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) unsigned int i; seq_puts(m, "\t "); - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i <= MAX_ORDER; ++i) seq_printf(m, " ---%2u---", i); seq_puts(m, "\n"); } @@ -627,7 +627,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i <= MAX_ORDER; ++i) seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); seq_puts(m, "\n"); } @@ -736,7 +736,7 @@ int ttm_pool_mgr_init(unsigned long num_pages) spin_lock_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i <= MAX_ORDER; ++i) { ttm_pool_type_init(&global_write_combined[i], NULL, ttm_write_combined, i); ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); @@ -769,7 +769,7 @@ void ttm_pool_mgr_fini(void) { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i <= MAX_ORDER; ++i) { ttm_pool_type_fini(&global_write_combined[i]); ttm_pool_type_fini(&global_uncached[i]); -- cgit From 379989e7cbdc7aa7496a00ee286ec146c7599cf0 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 4 Apr 2023 22:06:48 +0200 Subject: drm/ttm/pool: Fix ttm_pool_alloc error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When hitting an error, the error path forgot to unmap dma mappings and could call set_pages_wb() on already uncached pages. Fix this by introducing a common ttm_pool_free_range() function that does the right thing. v2: - Simplify that common function (Christian König) v3: - Rename that common function to ttm_pool_free_range() (Christian König) Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Cc: Christian König Cc: Dave Airlie Cc: Christian Koenig Cc: Huang Rui Cc: dri-devel@lists.freedesktop.org Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230404200650.11043-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_pool.c | 81 ++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/ttm/ttm_pool.c') diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index aa116a7bbae3..dfce896c4bae 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -367,6 +367,43 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order, return 0; } +/** + * ttm_pool_free_range() - Free a range of TTM pages + * @pool: The pool used for allocating. + * @tt: The struct ttm_tt holding the page pointers. + * @caching: The page caching mode used by the range. + * @start_page: index for first page to free. + * @end_page: index for last page to free + 1. + * + * During allocation the ttm_tt page-vector may be populated with ranges of + * pages with different attributes if allocation hit an error without being + * able to completely fulfill the allocation. This function can be used + * to free these individual ranges. + */ +static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, + enum ttm_caching caching, + pgoff_t start_page, pgoff_t end_page) +{ + struct page **pages = tt->pages; + unsigned int order; + pgoff_t i, nr; + + for (i = start_page; i < end_page; i += nr, pages += nr) { + struct ttm_pool_type *pt = NULL; + + order = ttm_pool_page_order(pool, *pages); + nr = (1UL << order); + if (tt->dma_address) + ttm_pool_unmap(pool, tt->dma_address[i], nr); + + pt = ttm_pool_select_type(pool, caching, order); + if (pt) + ttm_pool_type_give(pt, *pages); + else + ttm_pool_free_page(pool, caching, order, *pages); + } +} + /** * ttm_pool_alloc - Fill a ttm_tt object * @@ -382,12 +419,14 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order, int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, struct ttm_operation_ctx *ctx) { - unsigned long num_pages = tt->num_pages; + pgoff_t num_pages = tt->num_pages; dma_addr_t *dma_addr = tt->dma_address; struct page **caching = tt->pages; struct page **pages = tt->pages; + enum ttm_caching page_caching; gfp_t gfp_flags = GFP_USER; - unsigned int i, order; + pgoff_t caching_divide; + unsigned int order; struct page *p; int r; @@ -410,6 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; + page_caching = tt->caching; pt = ttm_pool_select_type(pool, tt->caching, order); p = pt ? ttm_pool_type_take(pt) : NULL; if (p) { @@ -418,6 +458,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, if (r) goto error_free_page; + caching = pages; do { r = ttm_pool_page_allocated(pool, order, p, &dma_addr, @@ -426,14 +467,15 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, if (r) goto error_free_page; + caching = pages; if (num_pages < (1 << order)) break; p = ttm_pool_type_take(pt); } while (p); - caching = pages; } + page_caching = ttm_cached; while (num_pages >= (1 << order) && (p = ttm_pool_alloc_page(pool, gfp_flags, order))) { @@ -442,6 +484,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, tt->caching); if (r) goto error_free_page; + caching = pages; } r = ttm_pool_page_allocated(pool, order, p, &dma_addr, &num_pages, &pages); @@ -468,15 +511,13 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, return 0; error_free_page: - ttm_pool_free_page(pool, tt->caching, order, p); + ttm_pool_free_page(pool, page_caching, order, p); error_free_all: num_pages = tt->num_pages - num_pages; - for (i = 0; i < num_pages; ) { - order = ttm_pool_page_order(pool, tt->pages[i]); - ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]); - i += 1 << order; - } + caching_divide = caching - tt->pages; + ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide); + ttm_pool_free_range(pool, tt, ttm_cached, caching_divide, num_pages); return r; } @@ -492,27 +533,7 @@ EXPORT_SYMBOL(ttm_pool_alloc); */ void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt) { - unsigned int i; - - for (i = 0; i < tt->num_pages; ) { - struct page *p = tt->pages[i]; - unsigned int order, num_pages; - struct ttm_pool_type *pt; - - order = ttm_pool_page_order(pool, p); - num_pages = 1ULL << order; - if (tt->dma_address) - ttm_pool_unmap(pool, tt->dma_address[i], num_pages); - - pt = ttm_pool_select_type(pool, tt->caching, order); - if (pt) - ttm_pool_type_give(pt, tt->pages[i]); - else - ttm_pool_free_page(pool, tt->caching, order, - tt->pages[i]); - - i += num_pages; - } + ttm_pool_free_range(pool, tt, tt->caching, 0, tt->num_pages); while (atomic_long_read(&allocated_pages) > page_pool_size) ttm_pool_shrink(); -- cgit From 322458c2bb1a0398c5775333e1e71e1ece8a461f Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Tue, 4 Apr 2023 22:06:49 +0200 Subject: drm/ttm: Reduce the number of used allocation orders for TTM pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When swapping out, we will split multi-order pages both in order to move them to the swap-cache and to be able to return memory to the swap cache as soon as possible on a page-by-page basis. Reduce the page max order to the system PMD size, as we can then be nicer to the system and avoid splitting gigantic pages. Looking forward to when we might be able to swap out PMD size folios without splitting, this will also be a benefit. v2: - Include all orders up to the PMD size (Christian König) v3: - Avoid compilation errors for architectures with special PFN_SHIFTs Signed-off-by: Thomas Hellström Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230404200650.11043-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/ttm/ttm_pool.c') diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index dfce896c4bae..18c342a919a2 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -47,6 +47,11 @@ #include "ttm_module.h" +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) +/* Some architectures have a weird PMD_SHIFT */ +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) + /** * struct ttm_pool_dma - Helper object for coherent DMA mappings * @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644); static atomic_long_t allocated_pages; -static struct ttm_pool_type global_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_uncached[MAX_ORDER]; +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, if (use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j < TTM_DIM_ORDER; ++j) ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool) if (pool->use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < MAX_ORDER; ++j) + for (j = 0; j < TTM_DIM_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) unsigned int i; seq_puts(m, "\t "); - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i < TTM_DIM_ORDER; ++i) seq_printf(m, " ---%2u---", i); seq_puts(m, "\n"); } @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) + for (i = 0; i < TTM_DIM_ORDER; ++i) seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); seq_puts(m, "\n"); } @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages) { unsigned int i; + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); + BUILD_BUG_ON(TTM_DIM_ORDER < 1); + if (!page_pool_size) page_pool_size = num_pages; spin_lock_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i < TTM_DIM_ORDER; ++i) { ttm_pool_type_init(&global_write_combined[i], NULL, ttm_write_combined, i); ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void) { unsigned int i; - for (i = 0; i < MAX_ORDER; ++i) { + for (i = 0; i < TTM_DIM_ORDER; ++i) { ttm_pool_type_fini(&global_write_combined[i]); ttm_pool_type_fini(&global_uncached[i]); -- cgit From 56e51681246e574dcb2e13fc071c2945c7667c83 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 13 Apr 2023 17:02:01 +0200 Subject: drm/ttm: revert "Reduce the number of used allocation orders for TTM pages" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 322458c2bb1a0398c5775333e1e71e1ece8a461f. PMD_SHIFT is not necessary constant on all architectures resulting in build failures. Signed-off-by: Christian König Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/CAKMK7uHgUuqWJuqmZKrxi2mNiqExhmMif-naYnzUSj-puW-x+A@mail.gmail.com --- drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/ttm/ttm_pool.c') diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 18c342a919a2..dfce896c4bae 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -47,11 +47,6 @@ #include "ttm_module.h" -#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT) -#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1) -/* Some architectures have a weird PMD_SHIFT */ -#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER) - /** * struct ttm_pool_dma - Helper object for coherent DMA mappings * @@ -70,11 +65,11 @@ module_param(page_pool_size, ulong, 0644); static atomic_long_t allocated_pages; -static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER]; -static struct ttm_pool_type global_uncached[TTM_DIM_ORDER]; +static struct ttm_pool_type global_write_combined[MAX_ORDER]; +static struct ttm_pool_type global_uncached[MAX_ORDER]; -static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER]; -static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER]; +static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; +static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; static spinlock_t shrinker_lock; static struct list_head shrinker_list; @@ -449,7 +444,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, else gfp_flags |= GFP_HIGHUSER; - for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages)); + for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); num_pages; order = min_t(unsigned int, order, __fls(num_pages))) { struct ttm_pool_type *pt; @@ -568,7 +563,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, if (use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < TTM_DIM_ORDER; ++j) + for (j = 0; j < MAX_ORDER; ++j) ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } @@ -588,7 +583,7 @@ void ttm_pool_fini(struct ttm_pool *pool) if (pool->use_dma_alloc) { for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) - for (j = 0; j < TTM_DIM_ORDER; ++j) + for (j = 0; j < MAX_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } @@ -642,7 +637,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m) unsigned int i; seq_puts(m, "\t "); - for (i = 0; i < TTM_DIM_ORDER; ++i) + for (i = 0; i < MAX_ORDER; ++i) seq_printf(m, " ---%2u---", i); seq_puts(m, "\n"); } @@ -653,7 +648,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, { unsigned int i; - for (i = 0; i < TTM_DIM_ORDER; ++i) + for (i = 0; i < MAX_ORDER; ++i) seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); seq_puts(m, "\n"); } @@ -756,16 +751,13 @@ int ttm_pool_mgr_init(unsigned long num_pages) { unsigned int i; - BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER); - BUILD_BUG_ON(TTM_DIM_ORDER < 1); - if (!page_pool_size) page_pool_size = num_pages; spin_lock_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); - for (i = 0; i < TTM_DIM_ORDER; ++i) { + for (i = 0; i < MAX_ORDER; ++i) { ttm_pool_type_init(&global_write_combined[i], NULL, ttm_write_combined, i); ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); @@ -798,7 +790,7 @@ void ttm_pool_mgr_fini(void) { unsigned int i; - for (i = 0; i < TTM_DIM_ORDER; ++i) { + for (i = 0; i < MAX_ORDER; ++i) { ttm_pool_type_fini(&global_write_combined[i]); ttm_pool_type_fini(&global_uncached[i]); -- cgit