From 202d182a92c60416680e31baa697faa60b0882f5 Mon Sep 17 00:00:00 2001
From: Bjorn Steinbrink <B.Steinbrink@gmx.de>
Date: Mon, 16 May 2005 21:53:17 -0700
Subject: [PATCH] mm: fix rss counter being incremented when unmapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes a bug introduced by the "mm counter operations through
macros" patch, which replaced a decrement operation in with an increment
macro in try_to_unmap_one().

Signed-off-by: Björn Steinbrink <B.Steinbrink@gmx.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/rmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mm')

diff --git a/mm/rmap.c b/mm/rmap.c
index 378de234c12b..a6203b4e1278 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -586,7 +586,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
 		dec_mm_counter(mm, anon_rss);
 	}
 
-	inc_mm_counter(mm, rss);
+	dec_mm_counter(mm, rss);
 	page_remove_rmap(page);
 	page_cache_release(page);
 
-- 
cgit 


From 7179906293ebdc333f14a03d3e58b03604848f3c Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Mon, 16 May 2005 21:53:18 -0700
Subject: [PATCH] mm acct accounting fix

This patch fixes mm->total_vm and mm->locked_vm acctounting in case when
move_page_tables() fails inside move_vma().

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/mremap.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'mm')

diff --git a/mm/mremap.c b/mm/mremap.c
index 0dd7ace94e51..ec7238a78f36 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -224,6 +224,12 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 			split = 1;
 	}
 
+	/*
+	 * if we failed to move page tables we still do total_vm increment
+	 * since do_munmap() will decrement it by old_len == new_len
+	 */
+	mm->total_vm += new_len >> PAGE_SHIFT;
+
 	if (do_munmap(mm, old_addr, old_len) < 0) {
 		/* OOM: unable to split vma, just get accounts right */
 		vm_unacct_memory(excess >> PAGE_SHIFT);
@@ -237,7 +243,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 			vma->vm_next->vm_flags |= VM_ACCOUNT;
 	}
 
-	mm->total_vm += new_len >> PAGE_SHIFT;
 	__vm_stat_account(mm, vma->vm_flags, vma->vm_file, new_len>>PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += new_len >> PAGE_SHIFT;
-- 
cgit 


From 7a019225c797a1047470accee950d69cfe7c59c5 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 16 May 2005 21:53:37 -0700
Subject: [PATCH] mm/nommu.c: try to fix __vmalloc

Linus changed the second argument of __vmalloc from int to unsigned int
breaking the compilation for CONFIG_MMU=n configurations (since he only
changed vmalloc.c but not nommu.c).

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/nommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'mm')

diff --git a/mm/nommu.c b/mm/nommu.c
index b293ec1cc4e6..c53e9c8f6b4a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -150,7 +150,8 @@ void vfree(void *addr)
 	kfree(addr);
 }
 
-void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
+void *__vmalloc(unsigned long size, unsigned int __nocast gfp_mask,
+			pgprot_t prot)
 {
 	/*
 	 * kmalloc doesn't like __GFP_HIGHMEM for some reason
-- 
cgit 


From ba32311eb73f624a85a5fc2e043cda8e076f86ef Mon Sep 17 00:00:00 2001
From: "McMullan, Jason" <jason.mcmullan@timesys.com>
Date: Mon, 16 May 2005 21:53:40 -0700
Subject: [PATCH] swapout oops fix

Fix OOPS when swapping on a device that doesn't have an unplug_io_fn defined
(eg, ATA Over Ethernet)

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/swapfile.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mm')

diff --git a/mm/swapfile.c b/mm/swapfile.c
index a60e0075d55b..da48405cd9a3 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -79,7 +79,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
 		WARN_ON(page_count(page) <= 1);
 
 		bdi = bdev->bd_inode->i_mapping->backing_dev_info;
-		bdi->unplug_io_fn(bdi, page);
+		blk_run_backing_dev(bdi, page);
 	}
 	up_read(&swap_unplug_sem);
 }
-- 
cgit 


From b81074800b98ac50b64d4c8d34e8abf0fda5e3d1 Mon Sep 17 00:00:00 2001
From: Kirill Korotaev <dev@sw.ru>
Date: Mon, 16 May 2005 21:53:50 -0700
Subject: [PATCH] do_swap_page() can map random data if swap read fails

There is a bug in do_swap_page(): when swap page happens to be unreadable,
page filled with random data is mapped into user address space.  The fix is
to check for PageUptodate and send SIGBUS in case of error.

Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Signed-Off-By: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/memory.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'mm')

diff --git a/mm/memory.c b/mm/memory.c
index 6bad4c4064e7..d209f745db7f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1701,12 +1701,13 @@ static int do_swap_page(struct mm_struct * mm,
 	spin_lock(&mm->page_table_lock);
 	page_table = pte_offset_map(pmd, address);
 	if (unlikely(!pte_same(*page_table, orig_pte))) {
-		pte_unmap(page_table);
-		spin_unlock(&mm->page_table_lock);
-		unlock_page(page);
-		page_cache_release(page);
 		ret = VM_FAULT_MINOR;
-		goto out;
+		goto out_nomap;
+	}
+
+	if (unlikely(!PageUptodate(page))) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_nomap;
 	}
 
 	/* The page isn't present yet, go ahead with the fault. */
@@ -1741,6 +1742,12 @@ static int do_swap_page(struct mm_struct * mm,
 	spin_unlock(&mm->page_table_lock);
 out:
 	return ret;
+out_nomap:
+	pte_unmap(page_table);
+	spin_unlock(&mm->page_table_lock);
+	unlock_page(page);
+	page_cache_release(page);
+	goto out;
 }
 
 /*
-- 
cgit 


From 49a43876b935c811cfd29d8fe998a6912a1cc5c4 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Wed, 18 May 2005 15:39:33 -0700
Subject: [PATCH] prevent NULL mmap in topdown model

Prevent the topdown allocator from allocating mmap areas all the way
down to address zero.

We still allow a MAP_FIXED mapping of page 0 (needed for various things,
ranging from Wine and DOSEMU to people who want to allow speculative
loads off a NULL pointer).

Tested by Chris Wright.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/mmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'mm')

diff --git a/mm/mmap.c b/mm/mmap.c
index 01f9793591f6..63df2d698414 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1244,7 +1244,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	addr = mm->free_area_cache;
 
 	/* make sure it can fit in the remaining address space */
-	if (addr >= len) {
+	if (addr > len) {
 		vma = find_vma(mm, addr-len);
 		if (!vma || addr <= vma->vm_start)
 			/* remember the address as a hint for next time */
@@ -1266,7 +1266,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 		/* try just below the current vma->vm_start */
 		addr = vma->vm_start-len;
-	} while (len <= vma->vm_start);
+	} while (len < vma->vm_start);
 
 	/*
 	 * A failed mmap() very likely causes application failure,
-- 
cgit 


From 07ab67c8d0d7c1021343b7d5c045033d6bf7be69 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@ppc970.osdl.org>
Date: Thu, 19 May 2005 22:43:37 -0700
Subject: Fix get_unmapped_area sanity tests

As noted by Chris Wright, we need to do the full range of tests regardless
of whether MAP_FIXED is set or not, so re-organize get_unmapped_area()
slightly to do the sanity checks unconditionally.
---
 mm/mmap.c | 59 +++++++++++++++++++++++++++++++----------------------------
 1 file changed, 31 insertions(+), 28 deletions(-)

(limited to 'mm')

diff --git a/mm/mmap.c b/mm/mmap.c
index 63df2d698414..de54acd9942f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1302,37 +1302,40 @@ unsigned long
 get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		unsigned long pgoff, unsigned long flags)
 {
-	if (flags & MAP_FIXED) {
-		unsigned long ret;
+	unsigned long ret;
 
-		if (addr > TASK_SIZE - len)
-			return -ENOMEM;
-		if (addr & ~PAGE_MASK)
-			return -EINVAL;
-		if (file && is_file_hugepages(file))  {
-			/*
-			 * Check if the given range is hugepage aligned, and
-			 * can be made suitable for hugepages.
-			 */
-			ret = prepare_hugepage_range(addr, len);
-		} else {
-			/*
-			 * Ensure that a normal request is not falling in a
-			 * reserved hugepage range.  For some archs like IA-64,
-			 * there is a separate region for hugepages.
-			 */
-			ret = is_hugepage_only_range(current->mm, addr, len);
-		}
-		if (ret)
-			return -EINVAL;
-		return addr;
-	}
+	if (!(flags & MAP_FIXED)) {
+		unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
-	if (file && file->f_op && file->f_op->get_unmapped_area)
-		return file->f_op->get_unmapped_area(file, addr, len,
-						pgoff, flags);
+		get_area = current->mm->get_unmapped_area;
+		if (file && file->f_op && file->f_op->get_unmapped_area)
+			get_area = file->f_op->get_unmapped_area;
+		addr = get_area(file, addr, len, pgoff, flags);
+		if (IS_ERR_VALUE(addr))
+			return addr;
+	}
 
-	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+	if (addr > TASK_SIZE - len)
+		return -ENOMEM;
+	if (addr & ~PAGE_MASK)
+		return -EINVAL;
+	if (file && is_file_hugepages(file))  {
+		/*
+		 * Check if the given range is hugepage aligned, and
+		 * can be made suitable for hugepages.
+		 */
+		ret = prepare_hugepage_range(addr, len);
+	} else {
+		/*
+		 * Ensure that a normal request is not falling in a
+		 * reserved hugepage range.  For some archs like IA-64,
+		 * there is a separate region for hugepages.
+		 */
+		ret = is_hugepage_only_range(current->mm, addr, len);
+	}
+	if (ret)
+		return -EINVAL;
+	return addr;
 }
 
 EXPORT_SYMBOL(get_unmapped_area);
-- 
cgit 


From 7856dfeb23c16ef3d8dac8871b4d5b93c70b59b9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Fri, 20 May 2005 14:27:57 -0700
Subject: [PATCH] x86_64: Fixed guard page handling again in iounmap

Caused oopses again.  Also fix potential mismatch in checking if
change_page_attr was needed.

To do it without races I needed to change mm/vmalloc.c to export a
__remove_vm_area that does not take vmlist lock.

Noticed by Terence Ripperda and based on a patch of his.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/vmalloc.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

(limited to 'mm')

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2bd83e5c2bbf..8ff16a1eee6a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -248,31 +248,20 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 	return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
 }
 
-/**
- *	remove_vm_area  -  find and remove a contingous kernel virtual area
- *
- *	@addr:		base address
- *
- *	Search for the kernel VM area starting at @addr, and remove it.
- *	This function returns the found VM area, but using it is NOT safe
- *	on SMP machines.
- */
-struct vm_struct *remove_vm_area(void *addr)
+/* Caller must hold vmlist_lock */
+struct vm_struct *__remove_vm_area(void *addr)
 {
 	struct vm_struct **p, *tmp;
 
-	write_lock(&vmlist_lock);
 	for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) {
 		 if (tmp->addr == addr)
 			 goto found;
 	}
-	write_unlock(&vmlist_lock);
 	return NULL;
 
 found:
 	unmap_vm_area(tmp);
 	*p = tmp->next;
-	write_unlock(&vmlist_lock);
 
 	/*
 	 * Remove the guard page.
@@ -281,6 +270,24 @@ found:
 	return tmp;
 }
 
+/**
+ *	remove_vm_area  -  find and remove a contingous kernel virtual area
+ *
+ *	@addr:		base address
+ *
+ *	Search for the kernel VM area starting at @addr, and remove it.
+ *	This function returns the found VM area, but using it is NOT safe
+ *	on SMP machines, except for its size or flags.
+ */
+struct vm_struct *remove_vm_area(void *addr)
+{
+	struct vm_struct *v;
+	write_lock(&vmlist_lock);
+	v = __remove_vm_area(addr);
+	write_unlock(&vmlist_lock);
+	return v;
+}
+
 void __vunmap(void *addr, int deallocate_pages)
 {
 	struct vm_struct *area;
-- 
cgit 


From b5c44c2147a447f77e07fecdb087ae288e1f4e40 Mon Sep 17 00:00:00 2001
From: Suparna Bhattacharya <suparna@in.ibm.com>
Date: Sat, 21 May 2005 16:33:36 -0700
Subject: [PATCH] fix for __generic_file_aio_read() to return 0 on EOF

I came across the following problem while running ltp-aiodio testcases from
ltp-full-20050405 on linux-2.6.12-rc3-mm3.  I tried running the tests with
EXT3 as well as JFS filesystems.

One or two fsx-linux testcases were hung after some time.  These testcases
were hanging at wait_for_all_aios().

Debugging shows that there were some iocbs which were not getting completed
eventhough the last retry for those returned -EIOCBQUEUED.  Also all such
pending iocbs represented READ operation.

Further debugging revealed that all such iocbs hit EOF in the DIO layer.
To be more precise, the "pos" from which they were trying to read was
greater than the "size" of the file.  So the generic_file_direct_IO
returned 0.

This happens rarely as there is already a check in
__generic_file_aio_read(), for whether "pos" < "size" before calling direct
IO routine.

>size = i_size_read(inode);
>if (pos < size) {
>	  retval = generic_file_direct_IO(READ, iocb,
>                               iov, pos, nr_segs);

But for READ, we are taking the inode->i_sem only in the DIO layer.  So it
is possible that some other process can change the size of the file before
we take the i_sem.  In such a case ( when "pos" > "size"), the
__generic_file_aio_read() would return -EIOCBQUEUED even though there were
no I/O requests submitted by the DIO layer.  This would cause the AIO layer
to expect aio_complete() for THE iocb, which doesnot happen.  And thus the
test hangs forever, waiting for an I/O completion, where there are no
requests submitted at all.

The following patch makes __generic_file_aio_read() return 0 (instead of
returning -EIOCBQUEUED), on getting 0 from generic_file_direct_IO(), so
that the AIO layer does the aio_complete().

Testing:

I have tested the patch on a SMP machine(with 2 Pentium 4 (HT)) running
linux-2.6.12-rc3-mm3.  I ran the ltp-aiodio testcases and none of the
fsx-linux tests hung.  Also the aio-stress tests ran without any problem.

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mm')

diff --git a/mm/filemap.c b/mm/filemap.c
index 47263ac3e4ea..1d33fec7bac6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1004,7 +1004,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		if (pos < size) {
 			retval = generic_file_direct_IO(READ, iocb,
 						iov, pos, nr_segs);
-			if (retval >= 0 && !is_sync_kiocb(iocb))
+			if (retval > 0 && !is_sync_kiocb(iocb))
 				retval = -EIOCBQUEUED;
 			if (retval > 0)
 				*ppos = pos + retval;
-- 
cgit 


From cafdd8ba08935d9b161bb781851dc4c0e6f70427 Mon Sep 17 00:00:00 2001
From: William Lee Irwin III <wli@holomorphy.com>
Date: Tue, 24 May 2005 19:31:09 -0700
Subject: [PATCH] try_to_unmap_cluster() passes out-of-bounds pte to
 pte_unmap()

try_to_unmap_cluster() does:
        for (pte = pte_offset_map(pmd, address);
                        address < end; pte++, address += PAGE_SIZE) {
		...
	}

	pte_unmap(pte);

It may take a little staring to notice, but pte can actually fall off the
end of the pte page in this iteration, which makes life difficult for
kmap_atomic() and the users not expecting it to BUG().  Of course, we're
somewhat lucky in that arithmetic elsewhere in the function guarantees that
at least one iteration is made, lest this force larger rearrangements to be
made.  This issue and patch also apply to non-mm mainline and with trivial
adjustments, at least two related kernels.

Discovered during internal testing at Oracle.

Signed-off-by: William Irwin <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/rmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'mm')

diff --git a/mm/rmap.c b/mm/rmap.c
index a6203b4e1278..9827409eb7c7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -626,7 +626,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte;
+	pte_t *pte, *original_pte;
 	pte_t pteval;
 	struct page *page;
 	unsigned long address;
@@ -658,7 +658,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
 	if (!pmd_present(*pmd))
 		goto out_unlock;
 
-	for (pte = pte_offset_map(pmd, address);
+	for (original_pte = pte = pte_offset_map(pmd, address);
 			address < end; pte++, address += PAGE_SIZE) {
 
 		if (!pte_present(*pte))
@@ -694,7 +694,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
 		(*mapcount)--;
 	}
 
-	pte_unmap(pte);
+	pte_unmap(original_pte);
 out_unlock:
 	spin_unlock(&mm->page_table_lock);
 }
-- 
cgit