From 202d182a92c60416680e31baa697faa60b0882f5 Mon Sep 17 00:00:00 2001 From: Bjorn Steinbrink Date: Mon, 16 May 2005 21:53:17 -0700 Subject: [PATCH] mm: fix rss counter being incremented when unmapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a bug introduced by the "mm counter operations through macros" patch, which replaced a decrement operation in with an increment macro in try_to_unmap_one(). Signed-off-by: Björn Steinbrink Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/rmap.c b/mm/rmap.c index 378de234c12b..a6203b4e1278 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -586,7 +586,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) dec_mm_counter(mm, anon_rss); } - inc_mm_counter(mm, rss); + dec_mm_counter(mm, rss); page_remove_rmap(page); page_cache_release(page); -- cgit From 7179906293ebdc333f14a03d3e58b03604848f3c Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Mon, 16 May 2005 21:53:18 -0700 Subject: [PATCH] mm acct accounting fix This patch fixes mm->total_vm and mm->locked_vm acctounting in case when move_page_tables() fails inside move_vma(). Signed-Off-By: Kirill Korotaev Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mremap.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/mremap.c b/mm/mremap.c index 0dd7ace94e51..ec7238a78f36 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -224,6 +224,12 @@ static unsigned long move_vma(struct vm_area_struct *vma, split = 1; } + /* + * if we failed to move page tables we still do total_vm increment + * since do_munmap() will decrement it by old_len == new_len + */ + mm->total_vm += new_len >> PAGE_SHIFT; + if (do_munmap(mm, old_addr, old_len) < 0) { /* OOM: unable to split vma, just get accounts right */ vm_unacct_memory(excess >> PAGE_SHIFT); @@ -237,7 +243,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, vma->vm_next->vm_flags |= VM_ACCOUNT; } - mm->total_vm += new_len >> PAGE_SHIFT; __vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT); if (vm_flags & VM_LOCKED) { mm->locked_vm += new_len >> PAGE_SHIFT; -- cgit From 7a019225c797a1047470accee950d69cfe7c59c5 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 16 May 2005 21:53:37 -0700 Subject: [PATCH] mm/nommu.c: try to fix __vmalloc Linus changed the second argument of __vmalloc from int to unsigned int breaking the compilation for CONFIG_MMU=n configurations (since he only changed vmalloc.c but not nommu.c). Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/nommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/nommu.c b/mm/nommu.c index b293ec1cc4e6..c53e9c8f6b4a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -150,7 +150,8 @@ void vfree(void *addr) kfree(addr); } -void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot) +void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask, + pgprot_t prot) { /* * kmalloc doesn't like __GFP_HIGHMEM for some reason -- cgit From ba32311eb73f624a85a5fc2e043cda8e076f86ef Mon Sep 17 00:00:00 2001 From: "McMullan, Jason" Date: Mon, 16 May 2005 21:53:40 -0700 Subject: [PATCH] swapout oops fix Fix OOPS when swapping on a device that doesn't have an unplug_io_fn defined (eg, ATA Over Ethernet) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swapfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/swapfile.c b/mm/swapfile.c index a60e0075d55b..da48405cd9a3 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -79,7 +79,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) WARN_ON(page_count(page) <= 1); bdi = bdev->bd_inode->i_mapping->backing_dev_info; - bdi->unplug_io_fn(bdi, page); + blk_run_backing_dev(bdi, page); } up_read(&swap_unplug_sem); } -- cgit From b81074800b98ac50b64d4c8d34e8abf0fda5e3d1 Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Mon, 16 May 2005 21:53:50 -0700 Subject: [PATCH] do_swap_page() can map random data if swap read fails There is a bug in do_swap_page(): when swap page happens to be unreadable, page filled with random data is mapped into user address space. The fix is to check for PageUptodate and send SIGBUS in case of error. Signed-Off-By: Kirill Korotaev Signed-Off-By: Alexey Kuznetsov Acked-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/mm/memory.c b/mm/memory.c index 6bad4c4064e7..d209f745db7f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1701,12 +1701,13 @@ static int do_swap_page(struct mm_struct * mm, spin_lock(&mm->page_table_lock); page_table = pte_offset_map(pmd, address); if (unlikely(!pte_same(*page_table, orig_pte))) { - pte_unmap(page_table); - spin_unlock(&mm->page_table_lock); - unlock_page(page); - page_cache_release(page); ret = VM_FAULT_MINOR; - goto out; + goto out_nomap; + } + + if (unlikely(!PageUptodate(page))) { + ret = VM_FAULT_SIGBUS; + goto out_nomap; } /* The page isn't present yet, go ahead with the fault. */ @@ -1741,6 +1742,12 @@ static int do_swap_page(struct mm_struct * mm, spin_unlock(&mm->page_table_lock); out: return ret; +out_nomap: + pte_unmap(page_table); + spin_unlock(&mm->page_table_lock); + unlock_page(page); + page_cache_release(page); + goto out; } /* -- cgit From 49a43876b935c811cfd29d8fe998a6912a1cc5c4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 18 May 2005 15:39:33 -0700 Subject: [PATCH] prevent NULL mmap in topdown model Prevent the topdown allocator from allocating mmap areas all the way down to address zero. We still allow a MAP_FIXED mapping of page 0 (needed for various things, ranging from Wine and DOSEMU to people who want to allow speculative loads off a NULL pointer). Tested by Chris Wright. Signed-off-by: Linus Torvalds --- mm/mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/mmap.c b/mm/mmap.c index 01f9793591f6..63df2d698414 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1244,7 +1244,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = mm->free_area_cache; /* make sure it can fit in the remaining address space */ - if (addr >= len) { + if (addr > len) { vma = find_vma(mm, addr-len); if (!vma || addr <= vma->vm_start) /* remember the address as a hint for next time */ @@ -1266,7 +1266,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* try just below the current vma->vm_start */ addr = vma->vm_start-len; - } while (len <= vma->vm_start); + } while (len < vma->vm_start); /* * A failed mmap() very likely causes application failure, -- cgit From 07ab67c8d0d7c1021343b7d5c045033d6bf7be69 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 May 2005 22:43:37 -0700 Subject: Fix get_unmapped_area sanity tests As noted by Chris Wright, we need to do the full range of tests regardless of whether MAP_FIXED is set or not, so re-organize get_unmapped_area() slightly to do the sanity checks unconditionally. --- mm/mmap.c | 59 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 28 deletions(-) (limited to 'mm') diff --git a/mm/mmap.c b/mm/mmap.c index 63df2d698414..de54acd9942f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1302,37 +1302,40 @@ unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - if (flags & MAP_FIXED) { - unsigned long ret; + unsigned long ret; - if (addr > TASK_SIZE - len) - return -ENOMEM; - if (addr & ~PAGE_MASK) - return -EINVAL; - if (file && is_file_hugepages(file)) { - /* - * Check if the given range is hugepage aligned, and - * can be made suitable for hugepages. - */ - ret = prepare_hugepage_range(addr, len); - } else { - /* - * Ensure that a normal request is not falling in a - * reserved hugepage range. For some archs like IA-64, - * there is a separate region for hugepages. - */ - ret = is_hugepage_only_range(current->mm, addr, len); - } - if (ret) - return -EINVAL; - return addr; - } + if (!(flags & MAP_FIXED)) { + unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - if (file && file->f_op && file->f_op->get_unmapped_area) - return file->f_op->get_unmapped_area(file, addr, len, - pgoff, flags); + get_area = current->mm->get_unmapped_area; + if (file && file->f_op && file->f_op->get_unmapped_area) + get_area = file->f_op->get_unmapped_area; + addr = get_area(file, addr, len, pgoff, flags); + if (IS_ERR_VALUE(addr)) + return addr; + } - return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); + if (addr > TASK_SIZE - len) + return -ENOMEM; + if (addr & ~PAGE_MASK) + return -EINVAL; + if (file && is_file_hugepages(file)) { + /* + * Check if the given range is hugepage aligned, and + * can be made suitable for hugepages. + */ + ret = prepare_hugepage_range(addr, len); + } else { + /* + * Ensure that a normal request is not falling in a + * reserved hugepage range. For some archs like IA-64, + * there is a separate region for hugepages. + */ + ret = is_hugepage_only_range(current->mm, addr, len); + } + if (ret) + return -EINVAL; + return addr; } EXPORT_SYMBOL(get_unmapped_area); -- cgit From 7856dfeb23c16ef3d8dac8871b4d5b93c70b59b9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 May 2005 14:27:57 -0700 Subject: [PATCH] x86_64: Fixed guard page handling again in iounmap Caused oopses again. Also fix potential mismatch in checking if change_page_attr was needed. To do it without races I needed to change mm/vmalloc.c to export a __remove_vm_area that does not take vmlist lock. Noticed by Terence Ripperda and based on a patch of his. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2bd83e5c2bbf..8ff16a1eee6a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -248,31 +248,20 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END); } -/** - * remove_vm_area - find and remove a contingous kernel virtual area - * - * @addr: base address - * - * Search for the kernel VM area starting at @addr, and remove it. - * This function returns the found VM area, but using it is NOT safe - * on SMP machines. - */ -struct vm_struct *remove_vm_area(void *addr) +/* Caller must hold vmlist_lock */ +struct vm_struct *__remove_vm_area(void *addr) { struct vm_struct **p, *tmp; - write_lock(&vmlist_lock); for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) { if (tmp->addr == addr) goto found; } - write_unlock(&vmlist_lock); return NULL; found: unmap_vm_area(tmp); *p = tmp->next; - write_unlock(&vmlist_lock); /* * Remove the guard page. @@ -281,6 +270,24 @@ found: return tmp; } +/** + * remove_vm_area - find and remove a contingous kernel virtual area + * + * @addr: base address + * + * Search for the kernel VM area starting at @addr, and remove it. + * This function returns the found VM area, but using it is NOT safe + * on SMP machines, except for its size or flags. + */ +struct vm_struct *remove_vm_area(void *addr) +{ + struct vm_struct *v; + write_lock(&vmlist_lock); + v = __remove_vm_area(addr); + write_unlock(&vmlist_lock); + return v; +} + void __vunmap(void *addr, int deallocate_pages) { struct vm_struct *area; -- cgit From b5c44c2147a447f77e07fecdb087ae288e1f4e40 Mon Sep 17 00:00:00 2001 From: Suparna Bhattacharya Date: Sat, 21 May 2005 16:33:36 -0700 Subject: [PATCH] fix for __generic_file_aio_read() to return 0 on EOF I came across the following problem while running ltp-aiodio testcases from ltp-full-20050405 on linux-2.6.12-rc3-mm3. I tried running the tests with EXT3 as well as JFS filesystems. One or two fsx-linux testcases were hung after some time. These testcases were hanging at wait_for_all_aios(). Debugging shows that there were some iocbs which were not getting completed eventhough the last retry for those returned -EIOCBQUEUED. Also all such pending iocbs represented READ operation. Further debugging revealed that all such iocbs hit EOF in the DIO layer. To be more precise, the "pos" from which they were trying to read was greater than the "size" of the file. So the generic_file_direct_IO returned 0. This happens rarely as there is already a check in __generic_file_aio_read(), for whether "pos" < "size" before calling direct IO routine. >size = i_size_read(inode); >if (pos < size) { > retval = generic_file_direct_IO(READ, iocb, > iov, pos, nr_segs); But for READ, we are taking the inode->i_sem only in the DIO layer. So it is possible that some other process can change the size of the file before we take the i_sem. In such a case ( when "pos" > "size"), the __generic_file_aio_read() would return -EIOCBQUEUED even though there were no I/O requests submitted by the DIO layer. This would cause the AIO layer to expect aio_complete() for THE iocb, which doesnot happen. And thus the test hangs forever, waiting for an I/O completion, where there are no requests submitted at all. The following patch makes __generic_file_aio_read() return 0 (instead of returning -EIOCBQUEUED), on getting 0 from generic_file_direct_IO(), so that the AIO layer does the aio_complete(). Testing: I have tested the patch on a SMP machine(with 2 Pentium 4 (HT)) running linux-2.6.12-rc3-mm3. I ran the ltp-aiodio testcases and none of the fsx-linux tests hung. Also the aio-stress tests ran without any problem. Signed-off-by: Suzuki K P Signed-off-by: Suparna Bhattacharya Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 47263ac3e4ea..1d33fec7bac6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1004,7 +1004,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, if (pos < size) { retval = generic_file_direct_IO(READ, iocb, iov, pos, nr_segs); - if (retval >= 0 && !is_sync_kiocb(iocb)) + if (retval > 0 && !is_sync_kiocb(iocb)) retval = -EIOCBQUEUED; if (retval > 0) *ppos = pos + retval; -- cgit From cafdd8ba08935d9b161bb781851dc4c0e6f70427 Mon Sep 17 00:00:00 2001 From: William Lee Irwin III Date: Tue, 24 May 2005 19:31:09 -0700 Subject: [PATCH] try_to_unmap_cluster() passes out-of-bounds pte to pte_unmap() try_to_unmap_cluster() does: for (pte = pte_offset_map(pmd, address); address < end; pte++, address += PAGE_SIZE) { ... } pte_unmap(pte); It may take a little staring to notice, but pte can actually fall off the end of the pte page in this iteration, which makes life difficult for kmap_atomic() and the users not expecting it to BUG(). Of course, we're somewhat lucky in that arithmetic elsewhere in the function guarantees that at least one iteration is made, lest this force larger rearrangements to be made. This issue and patch also apply to non-mm mainline and with trivial adjustments, at least two related kernels. Discovered during internal testing at Oracle. Signed-off-by: William Irwin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/rmap.c b/mm/rmap.c index a6203b4e1278..9827409eb7c7 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -626,7 +626,7 @@ static void try_to_unmap_cluster(unsigned long cursor, pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte; + pte_t *pte, *original_pte; pte_t pteval; struct page *page; unsigned long address; @@ -658,7 +658,7 @@ static void try_to_unmap_cluster(unsigned long cursor, if (!pmd_present(*pmd)) goto out_unlock; - for (pte = pte_offset_map(pmd, address); + for (original_pte = pte = pte_offset_map(pmd, address); address < end; pte++, address += PAGE_SIZE) { if (!pte_present(*pte)) @@ -694,7 +694,7 @@ static void try_to_unmap_cluster(unsigned long cursor, (*mapcount)--; } - pte_unmap(pte); + pte_unmap(original_pte); out_unlock: spin_unlock(&mm->page_table_lock); } -- cgit