From 25176ad09ca395fcc83b1fc78adf25c8eb1bd964 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 2 Apr 2024 14:55:15 +0200 Subject: mm/treewide: rename CONFIG_HAVE_FAST_GUP to CONFIG_HAVE_GUP_FAST Nowadays, we call it "GUP-fast", the external interface includes functions like "get_user_pages_fast()", and we renamed all internal functions to reflect that as well. Let's make the config option reflect that. Link: https://lkml.kernel.org/r/20240402125516.223131-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Mike Rapoport (IBM) Reviewed-by: Jason Gunthorpe Reviewed-by: John Hubbard Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/rmap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/rmap.h') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b7944a833668..9bf9324214fc 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -284,7 +284,7 @@ static inline int hugetlb_try_share_anon_rmap(struct folio *folio) VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio); /* Paired with the memory barrier in try_grab_folio(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb(); if (unlikely(folio_maybe_dma_pinned(folio))) @@ -295,7 +295,7 @@ static inline int hugetlb_try_share_anon_rmap(struct folio *folio) * This is conceptually a smp_wmb() paired with the smp_rmb() in * gup_must_unshare(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb__after_atomic(); return 0; } @@ -541,7 +541,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, */ /* Paired with the memory barrier in try_grab_folio(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb(); if (unlikely(folio_maybe_dma_pinned(folio))) @@ -552,7 +552,7 @@ static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, * This is conceptually a smp_wmb() paired with the smp_rmb() in * gup_must_unshare(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) + if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb__after_atomic(); return 0; } -- cgit From c2e65ebc02fb60b64a1c5689fe2c6f60d0fc1626 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 9 Apr 2024 21:22:45 +0200 Subject: mm/rmap: always inline anon/file rmap duplication of a single PTE As we grow the code, the compiler might make stupid decisions and unnecessarily degrade fork() performance. Let's make sure to always inline functions that operate on a single PTE so the compiler will always optimize out the loop and avoid a function call. This is a preparation for maintining a total mapcount for large folios. Link: https://lkml.kernel.org/r/20240409192301.907377-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Yin Fengwei Cc: Chris Zankel Cc: Hugh Dickins Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Miaohe Lin Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Richard Chang Cc: Rich Felker Cc: Ryan Roberts Cc: Yang Shi Cc: Yoshinori Sato Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/rmap.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include/linux/rmap.h') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 9bf9324214fc..9549d78928bb 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -347,8 +347,12 @@ static inline void folio_dup_file_rmap_ptes(struct folio *folio, { __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); } -#define folio_dup_file_rmap_pte(folio, page) \ - folio_dup_file_rmap_ptes(folio, page, 1) + +static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, + struct page *page) +{ + __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE); +} /** * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio @@ -448,8 +452,13 @@ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, RMAP_LEVEL_PTE); } -#define folio_try_dup_anon_rmap_pte(folio, page, vma) \ - folio_try_dup_anon_rmap_ptes(folio, page, 1, vma) + +static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, + struct page *page, struct vm_area_struct *src_vma) +{ + return __folio_try_dup_anon_rmap(folio, page, 1, src_vma, + RMAP_LEVEL_PTE); +} /** * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range -- cgit From 46d62de7ad1286854e0c2944ad26a1c1b1a5f191 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 9 Apr 2024 21:22:46 +0200 Subject: mm/rmap: add fast-path for small folios when adding/removing/duplicating Let's add a fast-path for small folios to all relevant rmap functions. Note that only RMAP_LEVEL_PTE applies. This is a preparation for tracking the mapcount of large folios in a single value. Link: https://lkml.kernel.org/r/20240409192301.907377-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Yin Fengwei Cc: Chris Zankel Cc: Hugh Dickins Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Miaohe Lin Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Richard Chang Cc: Rich Felker Cc: Ryan Roberts Cc: Yang Shi Cc: Yoshinori Sato Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/rmap.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux/rmap.h') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 9549d78928bb..327f1ca5a487 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -322,6 +322,11 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, switch (level) { case RMAP_LEVEL_PTE: + if (!folio_test_large(folio)) { + atomic_inc(&page->_mapcount); + break; + } + do { atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); @@ -405,6 +410,14 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, if (PageAnonExclusive(page + i)) return -EBUSY; } + + if (!folio_test_large(folio)) { + if (PageAnonExclusive(page)) + ClearPageAnonExclusive(page); + atomic_inc(&page->_mapcount); + break; + } + do { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); -- cgit From 05c5323b2a344c19c51cd1b91a4ab9ae90853794 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 9 Apr 2024 21:22:47 +0200 Subject: mm: track mapcount of large folios in single value Let's track the mapcount of large folios in a single value. The mapcount of a large folio currently corresponds to the sum of the entire mapcount and all page mapcounts. This sum is what we actually want to know in folio_mapcount() and it is also sufficient for implementing folio_mapped(). With PTE-mapped THP becoming more important and more widely used, we want to avoid looping over all pages of a folio just to obtain the mapcount of large folios. The comment "In the common case, avoid the loop when no pages mapped by PTE" in folio_total_mapcount() does no longer hold for mTHP that are always mapped by PTE. Further, we are planning on using folio_mapcount() more frequently, and might even want to remove page mapcounts for large folios in some kernel configs. Therefore, allow for reading the mapcount of large folios efficiently and atomically without looping over any pages. Maintain the mapcount also for hugetlb pages for simplicity. Use the new mapcount to implement folio_mapcount() and folio_mapped(). Make page_mapped() simply call folio_mapped(). We can now get rid of folio_large_is_mapped(). _nr_pages_mapped is now only used in rmap code and for debugging purposes. Keep folio_nr_pages_mapped() around, but document that its use should be limited to rmap internals and debugging purposes. This change implies one additional atomic add/sub whenever mapping/unmapping (parts of) a large folio. As we now batch RMAP operations for PTE-mapped THP during fork(), during unmap/zap, and when PTE-remapping a PMD-mapped THP, and we adjust the large mapcount for a PTE batch only once, the added overhead in the common case is small. Only when unmapping individual pages of a large folio (e.g., during COW), the overhead might be bigger in comparison, but it's essentially one additional atomic operation. Note that before the new mapcount would overflow, already our refcount would overflow: each mapping requires a folio reference. Extend the focumentation of folio_mapcount(). Link: https://lkml.kernel.org/r/20240409192301.907377-5-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Yin Fengwei Cc: Chris Zankel Cc: Hugh Dickins Cc: John Paul Adrian Glaubitz Cc: Jonathan Corbet Cc: Matthew Wilcox (Oracle) Cc: Max Filippov Cc: Miaohe Lin Cc: Muchun Song Cc: Naoya Horiguchi Cc: Peter Xu Cc: Richard Chang Cc: Rich Felker Cc: Ryan Roberts Cc: Yang Shi Cc: Yoshinori Sato Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/rmap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/rmap.h') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 327f1ca5a487..0f906dc6d280 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -273,6 +273,7 @@ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, ClearPageAnonExclusive(&folio->page); } atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); return 0; } @@ -306,6 +307,7 @@ static inline void hugetlb_add_file_rmap(struct folio *folio) VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); } static inline void hugetlb_remove_rmap(struct folio *folio) @@ -313,11 +315,14 @@ static inline void hugetlb_remove_rmap(struct folio *folio) VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); atomic_dec(&folio->_entire_mapcount); + atomic_dec(&folio->_large_mapcount); } static __always_inline void __folio_dup_file_rmap(struct folio *folio, struct page *page, int nr_pages, enum rmap_level level) { + const int orig_nr_pages = nr_pages; + __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { @@ -330,9 +335,11 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, do { atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); + atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); break; } } @@ -382,6 +389,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *src_vma, enum rmap_level level) { + const int orig_nr_pages = nr_pages; bool maybe_pinned; int i; @@ -423,6 +431,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, ClearPageAnonExclusive(page); atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); + atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: if (PageAnonExclusive(page)) { @@ -431,6 +440,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, ClearPageAnonExclusive(page); } atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); break; } return 0; -- cgit From 37bc2ff506b184411e4cc80f111c638b2b4c83d4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 12 Apr 2024 20:35:00 +0100 Subject: mm: return the address from page_mapped_in_vma() The only user of this function calls page_address_in_vma() immediately after page_mapped_in_vma() calculates it and uses it to return true/false. Return the address instead, allowing memory-failure to skip the call to page_address_in_vma(). Link: https://lkml.kernel.org/r/20240412193510.2356957-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Miaohe Lin Reviewed-by: Jane Chu Cc: Dan Williams Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/rmap.h') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 0f906dc6d280..7229b9baf20d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -730,7 +730,7 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); -int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); +unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); /* * rmap_walk_control: To control rmap traversing for specific needs -- cgit