From ab50b8ed818016cfecd747d6d4bb9139986bc029 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:15:56 -0700 Subject: [PATCH] mm: vm_stat_account unshackled The original vm_stat_account has fallen into disuse, with only one user, and only one user of vm_stat_unaccount. It's easier to keep track if we convert them all to __vm_stat_account, then free it from its __shackles. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 280bd44ac441..e2ff11f8c1b0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -212,7 +212,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) if (mpnt->vm_flags & VM_DONTCOPY) { long pages = vma_pages(mpnt); mm->total_vm -= pages; - __vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, + vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, -pages); continue; } -- cgit From 404351e67a9facb475abf1492245374a28d13e90 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:04 -0700 Subject: [PATCH] mm: mm_init set_mm_counters How is anon_rss initialized? In dup_mmap, and by mm_alloc's memset; but that's not so good if an mm_counter_t is a special type. And how is rss initialized? By set_mm_counter, all over the place. Come on, we just need to initialize them both at once by set_mm_counter in mm_init (which follows the memcpy when forking). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index e2ff11f8c1b0..25caa02e2eac 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -198,8 +198,6 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) mm->free_area_cache = oldmm->mmap_base; mm->cached_hole_size = ~0UL; mm->map_count = 0; - set_mm_counter(mm, rss, 0); - set_mm_counter(mm, anon_rss, 0); cpus_clear(mm->cpu_vm_mask); mm->mm_rb = RB_ROOT; rb_link = &mm->mm_rb.rb_node; @@ -323,6 +321,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm) INIT_LIST_HEAD(&mm->mmlist); mm->core_waiters = 0; mm->nr_ptes = 0; + set_mm_counter(mm, rss, 0); + set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); mm->ioctx_list = NULL; -- cgit From 4294621f41a85497019fae64341aa5351a1921b7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:05 -0700 Subject: [PATCH] mm: rss = file_rss + anon_rss I was lazy when we added anon_rss, and chose to change as few places as possible. So currently each anonymous page has to be counted twice, in rss and in anon_rss. Which won't be so good if those are atomic counts in some configurations. Change that around: keep file_rss and anon_rss separately, and add them together (with get_mm_rss macro) when the total is needed - reading two atomics is much cheaper than updating two atomics. And update anon_rss upfront, typically in memory.c, not tucked away in page_add_anon_rmap. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 25caa02e2eac..2048ed7b5872 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -321,7 +321,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm) INIT_LIST_HEAD(&mm->mmlist); mm->core_waiters = 0; mm->nr_ptes = 0; - set_mm_counter(mm, rss, 0); + set_mm_counter(mm, file_rss, 0); set_mm_counter(mm, anon_rss, 0); spin_lock_init(&mm->page_table_lock); rwlock_init(&mm->ioctx_list_lock); @@ -499,7 +499,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) if (retval) goto free_pt; - mm->hiwater_rss = get_mm_counter(mm,rss); + mm->hiwater_rss = get_mm_rss(mm); mm->hiwater_vm = mm->total_vm; good_mm: -- cgit From fd3e42fcc888a773572282575d2fdbf5cfd6216e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:06 -0700 Subject: [PATCH] mm: dup_mmap use oldmm more Use the parent's oldmm throughout dup_mmap, instead of perversely going back to current->mm. (Can you hear the sigh of relief from those mpnts? Usually I squash them, but not today.) Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 2048ed7b5872..0e7fe4a8a8df 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -182,16 +182,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) } #ifdef CONFIG_MMU -static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) +static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) { - struct vm_area_struct * mpnt, *tmp, **pprev; + struct vm_area_struct *mpnt, *tmp, **pprev; struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; struct mempolicy *pol; down_write(&oldmm->mmap_sem); - flush_cache_mm(current->mm); + flush_cache_mm(oldmm); mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; @@ -204,7 +204,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) rb_parent = NULL; pprev = &mm->mmap; - for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) { + for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { struct file *file; if (mpnt->vm_flags & VM_DONTCOPY) { @@ -265,7 +265,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) rb_parent = &tmp->vm_rb; mm->map_count++; - retval = copy_page_range(mm, current->mm, tmp); + retval = copy_page_range(mm, oldmm, tmp); spin_unlock(&mm->page_table_lock); if (tmp->vm_ops && tmp->vm_ops->open) @@ -277,7 +277,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) retval = 0; out: - flush_tlb_mm(current->mm); + flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); return retval; fail_nomem_policy: -- cgit From 7ee78232501ea9de2b6c8f10d32c9a0fee541357 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:08 -0700 Subject: [PATCH] mm: dup_mmap down new mmap_sem One anomaly remains from when Andrea rationalized the responsibilities of mmap_sem and page_table_lock: in dup_mmap we add vmas to the child holding its page_table_lock, but not the mmap_sem which normally guards the vma list and rbtree. Which could be an issue for unuse_mm: though since it just walks down the list (today with page_table_lock, tomorrow not), it's probably okay. Will need a memory barrier? Oh, keep it simple, Nick and I agreed, no harm in taking child's mmap_sem here. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 0e7fe4a8a8df..2a587b3224e3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -192,6 +192,8 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) down_write(&oldmm->mmap_sem); flush_cache_mm(oldmm); + down_write(&mm->mmap_sem); + mm->locked_vm = 0; mm->mmap = NULL; mm->mmap_cache = NULL; @@ -251,10 +253,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) } /* - * Link in the new vma and copy the page table entries: - * link in first so that swapoff can see swap entries. - * Note that, exceptionally, here the vma is inserted - * without holding mm->mmap_sem. + * Link in the new vma and copy the page table entries. */ spin_lock(&mm->page_table_lock); *pprev = tmp; @@ -275,8 +274,8 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto out; } retval = 0; - out: + up_write(&mm->mmap_sem); flush_tlb_mm(oldmm); up_write(&oldmm->mmap_sem); return retval; -- cgit From c74df32c724a1652ad8399b4891bb02c9d43743a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:23 -0700 Subject: [PATCH] mm: ptd_alloc take ptlock Second step in pushing down the page_table_lock. Remove the temporary bridging hack from __pud_alloc, __pmd_alloc, __pte_alloc: expect callers not to hold page_table_lock, whether it's on init_mm or a user mm; take page_table_lock internally to check if a racing task already allocated. Convert their callers from common code. But avoid coming back to change them again later: instead of moving the spin_lock(&mm->page_table_lock) down, switch over to new macros pte_alloc_map_lock and pte_unmap_unlock, which encapsulate the mapping+locking and unlocking+unmapping together, and in the end may use alternatives to the mm page_table_lock itself. These callers all hold mmap_sem (some exclusively, some not), so at no level can a page table be whipped away from beneath them; and pte_alloc uses the "atomic" pmd_present to test whether it needs to allocate. It appears that on all arches we can safely descend without page_table_lock. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 2a587b3224e3..8a069612eac3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -255,7 +255,6 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) /* * Link in the new vma and copy the page table entries. */ - spin_lock(&mm->page_table_lock); *pprev = tmp; pprev = &tmp->vm_next; @@ -265,7 +264,6 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->map_count++; retval = copy_page_range(mm, oldmm, tmp); - spin_unlock(&mm->page_table_lock); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); -- cgit