From b57b98d147ef98758886a39efb94f3254542c39b Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Sat, 29 Oct 2005 18:15:50 -0700 Subject: [PATCH] mm/msync.c cleanup This is not problem actually, but sync_page_range() is using for exported function to filesystems. The msync_xxx is more readable at least to me. Signed-off-by: OGAWA Hirofumi Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/msync.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'mm/msync.c') diff --git a/mm/msync.c b/mm/msync.c index d0f5a1bce7cb..9cab3f2d5863 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -22,7 +22,7 @@ * threads/the swapper from ripping pte's out from under us. */ -static void sync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, +static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end) { pte_t *pte; @@ -50,7 +50,7 @@ static void sync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, pte_unmap(pte - 1); } -static inline void sync_pmd_range(struct vm_area_struct *vma, pud_t *pud, +static inline void msync_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end) { pmd_t *pmd; @@ -61,11 +61,11 @@ static inline void sync_pmd_range(struct vm_area_struct *vma, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none_or_clear_bad(pmd)) continue; - sync_pte_range(vma, pmd, addr, next); + msync_pte_range(vma, pmd, addr, next); } while (pmd++, addr = next, addr != end); } -static inline void sync_pud_range(struct vm_area_struct *vma, pgd_t *pgd, +static inline void msync_pud_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end) { pud_t *pud; @@ -76,11 +76,11 @@ static inline void sync_pud_range(struct vm_area_struct *vma, pgd_t *pgd, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - sync_pmd_range(vma, pud, addr, next); + msync_pmd_range(vma, pud, addr, next); } while (pud++, addr = next, addr != end); } -static void sync_page_range(struct vm_area_struct *vma, +static void msync_page_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { struct mm_struct *mm = vma->vm_mm; @@ -101,14 +101,14 @@ static void sync_page_range(struct vm_area_struct *vma, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - sync_pud_range(vma, pgd, addr, next); + msync_pud_range(vma, pgd, addr, next); } while (pgd++, addr = next, addr != end); spin_unlock(&mm->page_table_lock); } #ifdef CONFIG_PREEMPT -static inline void filemap_sync(struct vm_area_struct *vma, - unsigned long addr, unsigned long end) +static inline void filemap_msync(struct vm_area_struct *vma, + unsigned long addr, unsigned long end) { const size_t chunk = 64 * 1024; /* bytes */ unsigned long next; @@ -117,15 +117,15 @@ static inline void filemap_sync(struct vm_area_struct *vma, next = addr + chunk; if (next > end || next < addr) next = end; - sync_page_range(vma, addr, next); + msync_page_range(vma, addr, next); cond_resched(); } while (addr = next, addr != end); } #else -static inline void filemap_sync(struct vm_area_struct *vma, - unsigned long addr, unsigned long end) +static inline void filemap_msync(struct vm_area_struct *vma, + unsigned long addr, unsigned long end) { - sync_page_range(vma, addr, end); + msync_page_range(vma, addr, end); } #endif @@ -150,7 +150,7 @@ static int msync_interval(struct vm_area_struct *vma, return -EBUSY; if (file && (vma->vm_flags & VM_SHARED)) { - filemap_sync(vma, addr, end); + filemap_msync(vma, addr, end); if (flags & MS_SYNC) { struct address_space *mapping = file->f_mapping; -- cgit From 0c942a4539c09adf09097315cc174aefd0eeedf7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:15:53 -0700 Subject: [PATCH] mm: msync_pte_range progress Use latency breaking in msync_pte_range like that in copy_pte_range, instead of the ugly CONFIG_PREEMPT filemap_msync alternatives. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/msync.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) (limited to 'mm/msync.c') diff --git a/mm/msync.c b/mm/msync.c index 9cab3f2d5863..3b5f1c521d4b 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -26,12 +26,21 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end) { pte_t *pte; + int progress = 0; +again: pte = pte_offset_map(pmd, addr); do { unsigned long pfn; struct page *page; + if (progress >= 64) { + progress = 0; + if (need_resched() || + need_lockbreak(&vma->vm_mm->page_table_lock)) + break; + } + progress++; if (!pte_present(*pte)) continue; if (!pte_maybe_dirty(*pte)) @@ -46,8 +55,12 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (ptep_clear_flush_dirty(vma, addr, pte) || page_test_and_clear_dirty(page)) set_page_dirty(page); + progress += 3; } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap(pte - 1); + cond_resched_lock(&vma->vm_mm->page_table_lock); + if (addr != end) + goto again; } static inline void msync_pmd_range(struct vm_area_struct *vma, pud_t *pud, @@ -106,29 +119,6 @@ static void msync_page_range(struct vm_area_struct *vma, spin_unlock(&mm->page_table_lock); } -#ifdef CONFIG_PREEMPT -static inline void filemap_msync(struct vm_area_struct *vma, - unsigned long addr, unsigned long end) -{ - const size_t chunk = 64 * 1024; /* bytes */ - unsigned long next; - - do { - next = addr + chunk; - if (next > end || next < addr) - next = end; - msync_page_range(vma, addr, next); - cond_resched(); - } while (addr = next, addr != end); -} -#else -static inline void filemap_msync(struct vm_area_struct *vma, - unsigned long addr, unsigned long end) -{ - msync_page_range(vma, addr, end); -} -#endif - /* * MS_SYNC syncs the entire file - including mappings. * @@ -150,7 +140,7 @@ static int msync_interval(struct vm_area_struct *vma, return -EBUSY; if (file && (vma->vm_flags & VM_SHARED)) { - filemap_msync(vma, addr, end); + msync_page_range(vma, addr, end); if (flags & MS_SYNC) { struct address_space *mapping = file->f_mapping; -- cgit From b5810039a54e5babf428e9a1e89fc1940fabff11 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 29 Oct 2005 18:16:12 -0700 Subject: [PATCH] core remove PageReserved Remove PageReserved() calls from core code by tightening VM_RESERVED handling in mm/ to cover PageReserved functionality. PageReserved special casing is removed from get_page and put_page. All setting and clearing of PageReserved is retained, and it is now flagged in the page_alloc checks to help ensure we don't introduce any refcount based freeing of Reserved pages. MAP_PRIVATE, PROT_WRITE of VM_RESERVED regions is tentatively being deprecated. We never completely handled it correctly anyway, and is be reintroduced in future if required (Hugh has a proof of concept). Once PageReserved() calls are removed from kernel/power/swsusp.c, and all arch/ and driver code, the Set and Clear calls, and the PG_reserved bit can be trivially removed. Last real user of PageReserved is swsusp, which uses PageReserved to determine whether a struct page points to valid memory or not. This still needs to be addressed (a generic page_is_ram() should work). A last caveat: the ZERO_PAGE is now refcounted and managed with rmap (and thus mapcounted and count towards shared rss). These writes to the struct page could cause excessive cacheline bouncing on big systems. There are a number of ways this could be addressed if it is an issue. Signed-off-by: Nick Piggin Refcount bug fix for filemap_xip.c Signed-off-by: Carsten Otte Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/msync.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'mm/msync.c') diff --git a/mm/msync.c b/mm/msync.c index 3b5f1c521d4b..860395486060 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -25,6 +25,7 @@ static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end) { + struct mm_struct *mm = vma->vm_mm; pte_t *pte; int progress = 0; @@ -37,7 +38,7 @@ again: if (progress >= 64) { progress = 0; if (need_resched() || - need_lockbreak(&vma->vm_mm->page_table_lock)) + need_lockbreak(&mm->page_table_lock)) break; } progress++; @@ -46,11 +47,11 @@ again: if (!pte_maybe_dirty(*pte)) continue; pfn = pte_pfn(*pte); - if (!pfn_valid(pfn)) + if (unlikely(!pfn_valid(pfn))) { + print_bad_pte(vma, *pte, addr); continue; + } page = pfn_to_page(pfn); - if (PageReserved(page)) - continue; if (ptep_clear_flush_dirty(vma, addr, pte) || page_test_and_clear_dirty(page)) @@ -58,7 +59,7 @@ again: progress += 3; } while (pte++, addr += PAGE_SIZE, addr != end); pte_unmap(pte - 1); - cond_resched_lock(&vma->vm_mm->page_table_lock); + cond_resched_lock(&mm->page_table_lock); if (addr != end) goto again; } @@ -102,8 +103,10 @@ static void msync_page_range(struct vm_area_struct *vma, /* For hugepages we can't go walking the page table normally, * but that's ok, hugetlbfs is memory based, so we don't need - * to do anything more on an msync() */ - if (is_vm_hugetlb_page(vma)) + * to do anything more on an msync(). + * Can't do anything with VM_RESERVED regions either. + */ + if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED)) return; BUG_ON(addr >= end); -- cgit From 705e87c0c3c38424f7f30556c85bc20e808d2f59 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:27 -0700 Subject: [PATCH] mm: pte_offset_map_lock loops Convert those common loops using page_table_lock on the outside and pte_offset_map within to use just pte_offset_map_lock within instead. These all hold mmap_sem (some exclusively, some not), so at no level can a page table be whipped away from beneath them. But whereas pte_alloc loops tested with the "atomic" pmd_present, these loops are testing with pmd_none, which on i386 PAE tests both lower and upper halves. That's now unsafe, so add a cast into pmd_none to test only the vital lower half: we lose a little sensitivity to a corrupt middle directory, but not enough to worry about. It appears that i386 and UML were the only architectures vulnerable in this way, and pgd and pud no problem. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/msync.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'mm/msync.c') diff --git a/mm/msync.c b/mm/msync.c index 860395486060..0e040e9c39d8 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -17,28 +17,22 @@ #include #include -/* - * Called with mm->page_table_lock held to protect against other - * threads/the swapper from ripping pte's out from under us. - */ - static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end) { - struct mm_struct *mm = vma->vm_mm; pte_t *pte; + spinlock_t *ptl; int progress = 0; again: - pte = pte_offset_map(pmd, addr); + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { unsigned long pfn; struct page *page; if (progress >= 64) { progress = 0; - if (need_resched() || - need_lockbreak(&mm->page_table_lock)) + if (need_resched() || need_lockbreak(ptl)) break; } progress++; @@ -58,8 +52,8 @@ again: set_page_dirty(page); progress += 3; } while (pte++, addr += PAGE_SIZE, addr != end); - pte_unmap(pte - 1); - cond_resched_lock(&mm->page_table_lock); + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); if (addr != end) goto again; } @@ -97,7 +91,6 @@ static inline void msync_pud_range(struct vm_area_struct *vma, pgd_t *pgd, static void msync_page_range(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { - struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; unsigned long next; @@ -110,16 +103,14 @@ static void msync_page_range(struct vm_area_struct *vma, return; BUG_ON(addr >= end); - pgd = pgd_offset(mm, addr); + pgd = pgd_offset(vma->vm_mm, addr); flush_cache_range(vma, addr, end); - spin_lock(&mm->page_table_lock); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; msync_pud_range(vma, pgd, addr, next); } while (pgd++, addr = next, addr != end); - spin_unlock(&mm->page_table_lock); } /* -- cgit