From dea2434c23c102b3e7d320849ec1cfeb432edb60 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Sep 2018 10:43:14 +0200 Subject: asm-generic/tlb: Provide a comment Write a comment explaining some of this.. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Nick Piggin Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 119 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 6be86c1c5c58..f1594ba8b2de 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -22,6 +22,118 @@ #ifdef CONFIG_MMU +/* + * Generic MMU-gather implementation. + * + * The mmu_gather data structure is used by the mm code to implement the + * correct and efficient ordering of freeing pages and TLB invalidations. + * + * This correct ordering is: + * + * 1) unhook page + * 2) TLB invalidate page + * 3) free page + * + * That is, we must never free a page before we have ensured there are no live + * translations left to it. Otherwise it might be possible to observe (or + * worse, change) the page content after it has been reused. + * + * The mmu_gather API consists of: + * + * - tlb_gather_mmu() / tlb_finish_mmu(); start and finish a mmu_gather + * + * Finish in particular will issue a (final) TLB invalidate and free + * all (remaining) queued pages. + * + * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA + * + * Defaults to flushing at tlb_end_vma() to reset the range; helps when + * there's large holes between the VMAs. + * + * - tlb_remove_page() / __tlb_remove_page() + * - tlb_remove_page_size() / __tlb_remove_page_size() + * + * __tlb_remove_page_size() is the basic primitive that queues a page for + * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a + * boolean indicating if the queue is (now) full and a call to + * tlb_flush_mmu() is required. + * + * tlb_remove_page() and tlb_remove_page_size() imply the call to + * tlb_flush_mmu() when required and has no return value. + * + * - tlb_remove_check_page_size_change() + * + * call before __tlb_remove_page*() to set the current page-size; implies a + * possible tlb_flush_mmu() call. + * + * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() / tlb_flush_mmu_free() + * + * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets + * related state, like the range) + * + * tlb_flush_mmu_free() - frees the queued pages; make absolutely + * sure no additional tlb_remove_page() + * calls happen between _tlbonly() and this. + * + * tlb_flush_mmu() - the above two calls. + * + * - mmu_gather::fullmm + * + * A flag set by tlb_gather_mmu() to indicate we're going to free + * the entire mm; this allows a number of optimizations. + * + * - We can ignore tlb_{start,end}_vma(); because we don't + * care about ranges. Everything will be shot down. + * + * - (RISC) architectures that use ASIDs can cycle to a new ASID + * and delay the invalidation until ASID space runs out. + * + * - mmu_gather::need_flush_all + * + * A flag that can be set by the arch code if it wants to force + * flush the entire TLB irrespective of the range. For instance + * x86-PAE needs this when changing top-level entries. + * + * And requires the architecture to provide and implement tlb_flush(). + * + * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make + * use of: + * + * - mmu_gather::start / mmu_gather::end + * + * which provides the range that needs to be flushed to cover the pages to + * be freed. + * + * - mmu_gather::freed_tables + * + * set when we freed page table pages + * + * - tlb_get_unmap_shift() / tlb_get_unmap_size() + * + * returns the smallest TLB entry size unmapped in this range + * + * Additionally there are a few opt-in features: + * + * HAVE_RCU_TABLE_FREE + * + * This provides tlb_remove_table(), to be used instead of tlb_remove_page() + * for page directores (__p*_free_tlb()). This provides separate freeing of + * the page-table pages themselves in a semi-RCU fashion (see comment below). + * Useful if your architecture doesn't use IPIs for remote TLB invalidates + * and therefore doesn't naturally serialize with software page-table walkers. + * + * When used, an architecture is expected to provide __tlb_remove_table() + * which does the actual freeing of these pages. + * + * HAVE_RCU_TABLE_INVALIDATE + * + * This makes HAVE_RCU_TABLE_FREE call tlb_flush_mmu_tlbonly() before freeing + * the page-table pages. Required if you use HAVE_RCU_TABLE_FREE and your + * architecture uses the Linux page-tables natively. + * + */ +#define HAVE_GENERIC_MMU_GATHER + #ifdef CONFIG_HAVE_RCU_TABLE_FREE /* * Semi RCU freeing of the page directories. @@ -89,14 +201,17 @@ struct mmu_gather_batch { */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) -/* struct mmu_gather is an opaque type used by the mm code for passing around +/* + * struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ struct mmu_gather { struct mm_struct *mm; + #ifdef CONFIG_HAVE_RCU_TABLE_FREE struct mmu_table_batch *batch; #endif + unsigned long start; unsigned long end; /* @@ -131,8 +246,6 @@ struct mmu_gather { int page_size; }; -#define HAVE_GENERIC_MMU_GATHER - void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); -- cgit From ed6a79352cad00e9a49d6e438be40e45107207bf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 31 Aug 2018 14:46:08 +0200 Subject: asm-generic/tlb, arch: Provide CONFIG_HAVE_MMU_GATHER_PAGE_SIZE Move the mmu_gather::page_size things into the generic code instead of PowerPC specific bits. No change in behavior intended. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Nick Piggin Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index f1594ba8b2de..e75620e41ba4 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -61,7 +61,7 @@ * tlb_remove_page() and tlb_remove_page_size() imply the call to * tlb_flush_mmu() when required and has no return value. * - * - tlb_remove_check_page_size_change() + * - tlb_change_page_size() * * call before __tlb_remove_page*() to set the current page-size; implies a * possible tlb_flush_mmu() call. @@ -114,6 +114,11 @@ * * Additionally there are a few opt-in features: * + * HAVE_MMU_GATHER_PAGE_SIZE + * + * This ensures we call tlb_flush() every time tlb_change_page_size() actually + * changes the size and provides mmu_gather::page_size to tlb_flush(). + * * HAVE_RCU_TABLE_FREE * * This provides tlb_remove_table(), to be used instead of tlb_remove_page() @@ -239,11 +244,15 @@ struct mmu_gather { unsigned int cleared_puds : 1; unsigned int cleared_p4ds : 1; + unsigned int batch_count; + struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; - unsigned int batch_count; - int page_size; + +#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE + unsigned int page_size; +#endif }; void arch_tlb_gather_mmu(struct mmu_gather *tlb, @@ -309,21 +318,18 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) return tlb_remove_page_size(tlb, page, PAGE_SIZE); } -#ifndef tlb_remove_check_page_size_change -#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change -static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, +static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { - /* - * We don't care about page size change, just update - * mmu_gather page size here so that debug checks - * doesn't throw false warning. - */ -#ifdef CONFIG_DEBUG_VM +#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE + if (tlb->page_size && tlb->page_size != page_size) { + if (!tlb->fullmm) + tlb_flush_mmu(tlb); + } + tlb->page_size = page_size; #endif } -#endif static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) { -- cgit From e7fd28a706bfaf9cd65dccf18140187f7ad04839 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Aug 2018 13:00:17 +0200 Subject: asm-generic/tlb, arch: Provide generic VIPT cache flush The one obvious thing SH and ARM want is a sensible default for tlb_start_vma(). (also: https://lkml.org/lkml/2004/1/15/6 ) Avoid all VIPT architectures providing their own tlb_start_vma() implementation and rely on architectures to provide a no-op flush_cache_range() when it is not relevant. This patch makes tlb_start_vma() default to flush_cache_range(), which should be right and sufficient. The only exceptions that I found where (oddly): - m68k-mmu - sparc64 - unicore Those architectures appear to have flush_cache_range(), but their current tlb_start_vma() does not call it. No change in behavior intended. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Dave Hansen Cc: David Miller Cc: Guan Xuetao Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Nick Piggin Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index e75620e41ba4..f0aa53db5e60 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_MMU @@ -356,17 +357,19 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) * the vmas are adjusted to only cover the region to be torn down. */ #ifndef tlb_start_vma -#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_start_vma(tlb, vma) \ +do { \ + if (!tlb->fullmm) \ + flush_cache_range(vma, vma->vm_start, vma->vm_end); \ +} while (0) #endif -#define __tlb_end_vma(tlb, vma) \ - do { \ - if (!tlb->fullmm) \ - tlb_flush_mmu_tlbonly(tlb); \ - } while (0) - #ifndef tlb_end_vma -#define tlb_end_vma __tlb_end_vma +#define tlb_end_vma(tlb, vma) \ +do { \ + if (!tlb->fullmm) \ + tlb_flush_mmu_tlbonly(tlb); \ +} while (0) #endif #ifndef __tlb_remove_tlb_entry -- cgit From 5f307be18b32aeff7bbad540c0d3897ecedbeb56 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Sep 2018 13:18:15 +0200 Subject: asm-generic/tlb, arch: Provide generic tlb_flush() based on flush_tlb_range() Provide a generic tlb_flush() implementation that relies on flush_tlb_range(). This is a little awkward because flush_tlb_range() assumes a VMA for range invalidation, but we no longer have one. Audit of all flush_tlb_range() implementations shows only vma->vm_mm and vma->vm_flags are used, and of the latter only VM_EXEC (I-TLB invalidates) and VM_HUGETLB (large TLB invalidate) are used. Therefore, track VM_EXEC and VM_HUGETLB in two more bits, and create a 'fake' VMA. This allows architectures that have a reasonably efficient flush_tlb_range() to not require any additional effort. No change in behavior intended. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Nick Piggin Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 95 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 83 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index f0aa53db5e60..e6a4c407be6c 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -95,7 +95,7 @@ * flush the entire TLB irrespective of the range. For instance * x86-PAE needs this when changing top-level entries. * - * And requires the architecture to provide and implement tlb_flush(). + * And allows the architecture to provide and implement tlb_flush(): * * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make * use of: @@ -111,7 +111,10 @@ * * - tlb_get_unmap_shift() / tlb_get_unmap_size() * - * returns the smallest TLB entry size unmapped in this range + * returns the smallest TLB entry size unmapped in this range. + * + * If an architecture does not provide tlb_flush() a default implementation + * based on flush_tlb_range() will be used. * * Additionally there are a few opt-in features: * @@ -245,6 +248,12 @@ struct mmu_gather { unsigned int cleared_puds : 1; unsigned int cleared_p4ds : 1; + /* + * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma + */ + unsigned int vma_exec : 1; + unsigned int vma_huge : 1; + unsigned int batch_count; struct mmu_gather_batch *active; @@ -286,8 +295,59 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->cleared_pmds = 0; tlb->cleared_puds = 0; tlb->cleared_p4ds = 0; + /* + * Do not reset mmu_gather::vma_* fields here, we do not + * call into tlb_start_vma() again to set them if there is an + * intermediate flush. + */ +} + +#ifndef tlb_flush + +#if defined(tlb_start_vma) || defined(tlb_end_vma) +#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() +#endif + +static inline void tlb_flush(struct mmu_gather *tlb) +{ + if (tlb->fullmm || tlb->need_flush_all) { + flush_tlb_mm(tlb->mm); + } else if (tlb->end) { + struct vm_area_struct vma = { + .vm_mm = tlb->mm, + .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) | + (tlb->vma_huge ? VM_HUGETLB : 0), + }; + + flush_tlb_range(&vma, tlb->start, tlb->end); + } +} + +static inline void +tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + /* + * flush_tlb_range() implementations that look at VM_HUGETLB (tile, + * mips-4k) flush only large pages. + * + * flush_tlb_range() implementations that flush I-TLB also flush D-TLB + * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing + * range. + * + * We rely on tlb_end_vma() to issue a flush, such that when we reset + * these values the batch is empty. + */ + tlb->vma_huge = !!(vma->vm_flags & VM_HUGETLB); + tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); } +#else + +static inline void +tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } + +#endif + static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { if (!tlb->end) @@ -357,19 +417,30 @@ static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) * the vmas are adjusted to only cover the region to be torn down. */ #ifndef tlb_start_vma -#define tlb_start_vma(tlb, vma) \ -do { \ - if (!tlb->fullmm) \ - flush_cache_range(vma, vma->vm_start, vma->vm_end); \ -} while (0) +static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + if (tlb->fullmm) + return; + + tlb_update_vma_flags(tlb, vma); + flush_cache_range(vma, vma->vm_start, vma->vm_end); +} #endif #ifndef tlb_end_vma -#define tlb_end_vma(tlb, vma) \ -do { \ - if (!tlb->fullmm) \ - tlb_flush_mmu_tlbonly(tlb); \ -} while (0) +static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + if (tlb->fullmm) + return; + + /* + * Do a TLB flush and reset the range at VMA boundaries; this avoids + * the ranges growing with the unused space between consecutive VMAs, + * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on + * this. + */ + tlb_flush_mmu_tlbonly(tlb); +} #endif #ifndef __tlb_remove_tlb_entry -- cgit From a30e32bd79e924f460b8b83408d88af34a402f6d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2018 16:51:51 +0200 Subject: asm-generic/tlb: Provide generic tlb_flush() based on flush_tlb_mm() When an architecture does not have (an efficient) flush_tlb_range(), but instead always uses full TLB invalidates, the current generic tlb_flush() is sub-optimal, for it will generate extra flushes in order to keep the range small. But if we cannot do range flushes, that is a moot concern. Optionally provide this simplified default. No change in behavior intended. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index e6a4c407be6c..6850d8bab6d3 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -114,7 +114,8 @@ * returns the smallest TLB entry size unmapped in this range. * * If an architecture does not provide tlb_flush() a default implementation - * based on flush_tlb_range() will be used. + * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is + * specified, in which case we'll default to flush_tlb_mm(). * * Additionally there are a few opt-in features: * @@ -140,6 +141,9 @@ * the page-table pages. Required if you use HAVE_RCU_TABLE_FREE and your * architecture uses the Linux page-tables natively. * + * MMU_GATHER_NO_RANGE + * + * Use this if your architecture lacks an efficient flush_tlb_range(). */ #define HAVE_GENERIC_MMU_GATHER @@ -302,12 +306,45 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) */ } +#ifdef CONFIG_MMU_GATHER_NO_RANGE + +#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma) +#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma() +#endif + +/* + * When an architecture does not have efficient means of range flushing TLBs + * there is no point in doing intermediate flushes on tlb_end_vma() to keep the + * range small. We equally don't have to worry about page granularity or other + * things. + * + * All we need to do is issue a full flush for any !0 range. + */ +static inline void tlb_flush(struct mmu_gather *tlb) +{ + if (tlb->end) + flush_tlb_mm(tlb->mm); +} + +static inline void +tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } + +#define tlb_end_vma tlb_end_vma +static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { } + +#else /* CONFIG_MMU_GATHER_NO_RANGE */ + #ifndef tlb_flush #if defined(tlb_start_vma) || defined(tlb_end_vma) #error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma() #endif +/* + * When an architecture does not provide its own tlb_flush() implementation + * but does have a reasonably efficient flush_vma_range() implementation + * use that. + */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->fullmm || tlb->need_flush_all) { @@ -348,6 +385,8 @@ tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { } #endif +#endif /* CONFIG_MMU_GATHER_NO_RANGE */ + static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { if (!tlb->end) -- cgit From 8b6dd0c47894e2c190560914318aa4181bc8c2f2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 Sep 2018 16:00:53 +0200 Subject: asm-generic/tlb, ia64: Conditionally provide tlb_migrate_finish() Needed for ia64 -- alternatively we drop the entire hook. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Nick Piggin Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 6850d8bab6d3..1c861989b704 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -604,6 +604,8 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #endif /* CONFIG_MMU */ +#ifndef tlb_migrate_finish #define tlb_migrate_finish(mm) do {} while (0) +#endif #endif /* _ASM_GENERIC__TLB_H */ -- cgit From 96bc9567cbe112e9320250f01b9c060c882e8619 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Sep 2018 13:24:41 +0200 Subject: asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE Make issuing a TLB invalidate for page-table pages the normal case. The reason is twofold: - too many invalidates is safer than too few, - most architectures use the linux page-tables natively and would thus require this. Make it an opt-out, instead of an opt-in. No change in behavior intended. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 1c861989b704..81799e6a4304 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -135,11 +135,12 @@ * When used, an architecture is expected to provide __tlb_remove_table() * which does the actual freeing of these pages. * - * HAVE_RCU_TABLE_INVALIDATE + * HAVE_RCU_TABLE_NO_INVALIDATE * - * This makes HAVE_RCU_TABLE_FREE call tlb_flush_mmu_tlbonly() before freeing - * the page-table pages. Required if you use HAVE_RCU_TABLE_FREE and your - * architecture uses the Linux page-tables natively. + * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before + * freeing the page-table pages. This can be avoided if you use + * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux + * page-tables natively. * * MMU_GATHER_NO_RANGE * -- cgit From 952a31c9e6fa963eabf3692f31a769e59f4c8303 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 18 Sep 2018 14:51:50 +0200 Subject: asm-generic/tlb: Introduce CONFIG_HAVE_MMU_GATHER_NO_GATHER=y Add the Kconfig option HAVE_MMU_GATHER_NO_GATHER to the generic mmu_gather code. If the option is set the mmu_gather will not track individual pages for delayed page free anymore. A platform that enables the option needs to provide its own implementation of the __tlb_remove_page_size() function to free pages. No change in behavior intended. Signed-off-by: Martin Schwidefsky Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: aneesh.kumar@linux.vnet.ibm.com Cc: heiko.carstens@de.ibm.com Cc: linux@armlinux.org.uk Cc: npiggin@gmail.com Link: http://lkml.kernel.org/r/20180918125151.31744-2-schwidefsky@de.ibm.com Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 81799e6a4304..af20aa8255cd 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -191,6 +191,7 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #endif +#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER /* * If we can't allocate a page to make a big batch of page pointers * to work on, then just handle a few from the on-stack structure. @@ -215,6 +216,10 @@ struct mmu_gather_batch { */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) +extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + int page_size); +#endif + /* * struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. @@ -261,6 +266,7 @@ struct mmu_gather { unsigned int batch_count; +#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; @@ -268,6 +274,7 @@ struct mmu_gather { #ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE unsigned int page_size; #endif +#endif }; void arch_tlb_gather_mmu(struct mmu_gather *tlb, @@ -276,8 +283,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force); void tlb_flush_mmu_free(struct mmu_gather *tlb); -extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, - int page_size); static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, -- cgit From b3fa8ed4e48802e6ba0aa5f3283313a27dcbf46f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Sep 2018 10:51:01 +0200 Subject: asm-generic/tlb: Remove CONFIG_HAVE_GENERIC_MMU_GATHER Since all architectures are now using it, it is redundant. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index af20aa8255cd..2648a02a6b1b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -146,7 +146,6 @@ * * Use this if your architecture lacks an efficient flush_tlb_range(). */ -#define HAVE_GENERIC_MMU_GATHER #ifdef CONFIG_HAVE_RCU_TABLE_FREE /* -- cgit From fa0aafb8acb684e68231ff0a547ed249f8dc31a5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Sep 2018 10:54:04 +0200 Subject: asm-generic/tlb: Remove tlb_flush_mmu_free() As the comment notes; it is a potentially dangerous operation. Just use tlb_flush_mmu(), that will skip the (double) TLB invalidate if it really isn't needed anyway. No change in behavior intended. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 2648a02a6b1b..ddd3d02be93d 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -67,16 +67,13 @@ * call before __tlb_remove_page*() to set the current page-size; implies a * possible tlb_flush_mmu() call. * - * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() / tlb_flush_mmu_free() + * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() * * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets * related state, like the range) * - * tlb_flush_mmu_free() - frees the queued pages; make absolutely - * sure no additional tlb_remove_page() - * calls happen between _tlbonly() and this. - * - * tlb_flush_mmu() - the above two calls. + * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees + * whatever pages are still batched. * * - mmu_gather::fullmm * @@ -281,7 +278,6 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force); -void tlb_flush_mmu_free(struct mmu_gather *tlb); static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, -- cgit From 0a8caf211bcf52cbb59e100ead4908fe88d2a510 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Sep 2018 10:55:10 +0200 Subject: asm-generic/tlb: Remove tlb_table_flush() There are no external users of this API (nor should there be); remove it. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index ddd3d02be93d..fc661b4675ff 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -182,7 +182,6 @@ struct mmu_table_batch { #define MAX_TABLE_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) -extern void tlb_table_flush(struct mmu_gather *tlb); extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #endif -- cgit From 6455959819bf2469190ae9f6b4ccebaa9827e884 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Feb 2019 14:38:37 +0100 Subject: ia64/tlb: Eradicate tlb_migrate_finish() callback Only ia64-sn2 uses this as an optimization, and there it is of questionable correctness due to the mm_users==1 test. Remove it entirely. No change in behavior intended. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/asm-generic/tlb.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index fc661b4675ff..b9edc7608d90 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -604,8 +604,4 @@ static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vm #endif /* CONFIG_MMU */ -#ifndef tlb_migrate_finish -#define tlb_migrate_finish(mm) do {} while (0) -#endif - #endif /* _ASM_GENERIC__TLB_H */ -- cgit