From 2c3daa722b624eaf0c5ea60e4f180bd0684542e2 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Tue, 29 Apr 2008 00:59:58 -0700 Subject: CGroup API files: use read_u64 in memory controller Update the memory controller to use read_u64 for its limit/usage/failcnt control files, calling the new res_counter_read_u64() function. Signed-off-by: Paul Menage Cc: "Li Zefan" Cc: Balbir Singh Cc: Paul Jackson Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: "YAMAMOTO Takashi" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2e0bfc93484b..e227d7c5989f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -853,13 +853,10 @@ static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) return 0; } -static ssize_t mem_cgroup_read(struct cgroup *cont, - struct cftype *cft, struct file *file, - char __user *userbuf, size_t nbytes, loff_t *ppos) +static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { - return res_counter_read(&mem_cgroup_from_cont(cont)->res, - cft->private, userbuf, nbytes, ppos, - NULL); + return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, + cft->private); } static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, @@ -950,18 +947,18 @@ static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", .private = RES_USAGE, - .read = mem_cgroup_read, + .read_u64 = mem_cgroup_read, }, { .name = "limit_in_bytes", .private = RES_LIMIT, .write = mem_cgroup_write, - .read = mem_cgroup_read, + .read_u64 = mem_cgroup_read, }, { .name = "failcnt", .private = RES_FAILCNT, - .read = mem_cgroup_read, + .read_u64 = mem_cgroup_read, }, { .name = "force_empty", -- cgit From c64745cf0f34f2cb08fc28c93d844e583d0d591d Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Tue, 29 Apr 2008 01:00:02 -0700 Subject: CGroup API files: use cgroup map for memcontrol stats file Remove the seq_file boilerplate used to construct the memcontrol stats map, and instead use the new map representation for cgroup control files Signed-off-by: Paul Menage Cc: "Li Zefan" Cc: Balbir Singh Cc: Paul Jackson Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: "YAMAMOTO Takashi" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e227d7c5989f..496ab700e0a4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -899,9 +899,9 @@ static const struct mem_cgroup_stat_desc { [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, }; -static int mem_control_stat_show(struct seq_file *m, void *arg) +static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, + struct cgroup_map_cb *cb) { - struct cgroup *cont = m->private; struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); struct mem_cgroup_stat *stat = &mem_cont->stat; int i; @@ -911,8 +911,7 @@ static int mem_control_stat_show(struct seq_file *m, void *arg) val = mem_cgroup_read_stat(stat, i); val *= mem_cgroup_stat_desc[i].unit; - seq_printf(m, "%s %lld\n", mem_cgroup_stat_desc[i].msg, - (long long)val); + cb->fill(cb, mem_cgroup_stat_desc[i].msg, val); } /* showing # of active pages */ { @@ -922,27 +921,12 @@ static int mem_control_stat_show(struct seq_file *m, void *arg) MEM_CGROUP_ZSTAT_INACTIVE); active = mem_cgroup_get_all_zonestat(mem_cont, MEM_CGROUP_ZSTAT_ACTIVE); - seq_printf(m, "active %ld\n", (active) * PAGE_SIZE); - seq_printf(m, "inactive %ld\n", (inactive) * PAGE_SIZE); + cb->fill(cb, "active", (active) * PAGE_SIZE); + cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); } return 0; } -static const struct file_operations mem_control_stat_file_operations = { - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mem_control_stat_open(struct inode *unused, struct file *file) -{ - /* XXX __d_cont */ - struct cgroup *cont = file->f_dentry->d_parent->d_fsdata; - - file->f_op = &mem_control_stat_file_operations; - return single_open(file, mem_control_stat_show, cont); -} - static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", @@ -967,7 +951,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "stat", - .open = mem_control_stat_open, + .read_map = mem_control_stat_show, }, }; -- cgit From c27e8818a09bbdfe7c07c629cb2c27e1a742e156 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Tue, 29 Apr 2008 01:00:03 -0700 Subject: CGroup API files: drop mem_cgroup_force_empty() This function isn't needed - a NULL pointer in the cftype read function will result in the same EINVAL response to userspace. Signed-off-by: Paul Menage Cc: "Li Zefan" Cc: Balbir Singh Cc: Paul Jackson Cc: Pavel Emelyanov Cc: KAMEZAWA Hiroyuki Cc: "YAMAMOTO Takashi" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 496ab700e0a4..d12795cc7622 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -880,17 +880,6 @@ static ssize_t mem_force_empty_write(struct cgroup *cont, return ret; } -/* - * Note: This should be removed if cgroup supports write-only file. - */ -static ssize_t mem_force_empty_read(struct cgroup *cont, - struct cftype *cft, - struct file *file, char __user *userbuf, - size_t nbytes, loff_t *ppos) -{ - return -EINVAL; -} - static const struct mem_cgroup_stat_desc { const char *msg; u64 unit; @@ -947,7 +936,6 @@ static struct cftype mem_cgroup_files[] = { { .name = "force_empty", .write = mem_force_empty_write, - .read = mem_force_empty_read, }, { .name = "stat", -- cgit From cf475ad28ac35cc9ba612d67158f29b73b38b05d Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 29 Apr 2008 01:00:16 -0700 Subject: cgroups: add an owner to the mm_struct Remove the mem_cgroup member from mm_struct and instead adds an owner. This approach was suggested by Paul Menage. The advantage of this approach is that, once the mm->owner is known, using the subsystem id, the cgroup can be determined. It also allows several control groups that are virtually grouped by mm_struct, to exist independent of the memory controller i.e., without adding mem_cgroup's for each controller, to mm_struct. A new config option CONFIG_MM_OWNER is added and the memory resource controller selects this config option. This patch also adds cgroup callbacks to notify subsystems when mm->owner changes. The mm_cgroup_changed callback is called with the task_lock() of the new task held and is called just prior to changing the mm->owner. I am indebted to Paul Menage for the several reviews of this patchset and helping me make it lighter and simpler. This patch was tested on a powerpc box, it was compiled with both the MM_OWNER config turned on and off. After the thread group leader exits, it's moved to init_css_state by cgroup_exit(), thus all future charges from runnings threads would be redirected to the init_css_set's subsystem. Signed-off-by: Balbir Singh Cc: Pavel Emelianov Cc: Hugh Dickins Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Hirokazu Takahashi Cc: David Rientjes , Cc: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Acked-by: Pekka Enberg Reviewed-by: Paul Menage Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d12795cc7622..49d80814798b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -236,26 +236,12 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) css); } -static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) +struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) { return container_of(task_subsys_state(p, mem_cgroup_subsys_id), struct mem_cgroup, css); } -void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) -{ - struct mem_cgroup *mem; - - mem = mem_cgroup_from_task(p); - css_get(&mem->css); - mm->mem_cgroup = mem; -} - -void mm_free_cgroup(struct mm_struct *mm) -{ - css_put(&mm->mem_cgroup->css); -} - static inline int page_cgroup_locked(struct page *page) { return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); @@ -476,6 +462,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, int zid = zone_idx(z); struct mem_cgroup_per_zone *mz; + BUG_ON(!mem_cont); mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); if (active) src = &mz->active_list; @@ -574,7 +561,7 @@ retry: mm = &init_mm; rcu_read_lock(); - mem = rcu_dereference(mm->mem_cgroup); + mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); /* * For every charge from the cgroup, increment reference count */ @@ -985,10 +972,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) struct mem_cgroup *mem; int node; - if (unlikely((cont->parent) == NULL)) { + if (unlikely((cont->parent) == NULL)) mem = &init_mem_cgroup; - init_mm.mem_cgroup = mem; - } else + else mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); if (mem == NULL) @@ -1067,10 +1053,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, if (!thread_group_leader(p)) goto out; - css_get(&mem->css); - rcu_assign_pointer(mm->mem_cgroup, mem); - css_put(&old_mem->css); - out: mmput(mm); } -- cgit From c84872e168d10926acd2dee975d19172eef79252 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:00:17 -0700 Subject: memcgroup: add the max_usage member on the res_counter This field is the maximal value of the usage one since the counter creation (or since the latest reset). To reset this to the usage value simply write anything to the appropriate cgroup file. Signed-off-by: Pavel Emelyanov Acked-by: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 49d80814798b..350a14da6525 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -855,6 +855,17 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, mem_cgroup_write_strategy); } +static ssize_t mem_cgroup_max_reset(struct cgroup *cont, struct cftype *cft, + struct file *file, const char __user *userbuf, + size_t nbytes, loff_t *ppos) +{ + struct mem_cgroup *mem; + + mem = mem_cgroup_from_cont(cont); + res_counter_reset_max(&mem->res); + return nbytes; +} + static ssize_t mem_force_empty_write(struct cgroup *cont, struct cftype *cft, struct file *file, const char __user *userbuf, @@ -909,6 +920,12 @@ static struct cftype mem_cgroup_files[] = { .private = RES_USAGE, .read_u64 = mem_cgroup_read, }, + { + .name = "max_usage_in_bytes", + .private = RES_MAX_USAGE, + .write = mem_cgroup_max_reset, + .read_u64 = mem_cgroup_read, + }, { .name = "limit_in_bytes", .private = RES_LIMIT, -- cgit From b6ac57d50a375aa2f267e1b2b56c46564a936d00 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 29 Apr 2008 01:00:19 -0700 Subject: memcgroup: move memory controller allocations to their own slabs Move the memory controller data structure page_cgroup to its own slab cache. It saves space on the system, allocations are not necessarily pushed to order of 2 and should provide performance benefits. Users who disable the memory controller can also double check that the memory controller is not allocating page_cgroup's. NOTE: Hugh Dickins brought up the issue of whether we want to mark page_cgroup as __GFP_MOVABLE or __GFP_RECLAIMABLE. I don't think there is an easy answer at the moment. page_cgroup's are associated with user pages, they can be reclaimed once the user page has been reclaimed, so it might make sense to mark them as __GFP_RECLAIMABLE. For now, I am leaving the marking to default values that the slab allocator uses. Signed-off-by: Balbir Singh Cc: Pavel Emelianov Cc: Hugh Dickins Cc: Sudhir Kumar Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 350a14da6525..f4079692cbf5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ struct cgroup_subsys mem_cgroup_subsys; static const int MEM_CGROUP_RECLAIM_RETRIES = 5; +static struct kmem_cache *page_cgroup_cache; /* * Statistics for memory cgroup. @@ -547,7 +549,7 @@ retry: } unlock_page_cgroup(page); - pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); + pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); if (pc == NULL) goto err; @@ -609,7 +611,7 @@ retry: */ res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); goto retry; } page_assign_page_cgroup(page, pc); @@ -624,7 +626,7 @@ done: return 0; out: css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); err: return -ENOMEM; } @@ -682,7 +684,7 @@ void mem_cgroup_uncharge_page(struct page *page) res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); return; } @@ -989,10 +991,12 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) struct mem_cgroup *mem; int node; - if (unlikely((cont->parent) == NULL)) + if (unlikely((cont->parent) == NULL)) { mem = &init_mem_cgroup; - else + page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); + } else { mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); + } if (mem == NULL) return ERR_PTR(-ENOMEM); -- cgit From 85cc59db12724e1248f5e4841e61339cf485d5c7 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:00:20 -0700 Subject: memcgroup: use triggers in force_empty and max_usage files These two files are essentially event callbacks. They do not care about the contents of the string, but only about the fact of the write itself. Signed-off-by: Pavel Emelyanov Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f4079692cbf5..dc3472f9f68c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -857,27 +857,18 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, mem_cgroup_write_strategy); } -static ssize_t mem_cgroup_max_reset(struct cgroup *cont, struct cftype *cft, - struct file *file, const char __user *userbuf, - size_t nbytes, loff_t *ppos) +static int mem_cgroup_max_reset(struct cgroup *cont, unsigned int event) { struct mem_cgroup *mem; mem = mem_cgroup_from_cont(cont); res_counter_reset_max(&mem->res); - return nbytes; + return 0; } -static ssize_t mem_force_empty_write(struct cgroup *cont, - struct cftype *cft, struct file *file, - const char __user *userbuf, - size_t nbytes, loff_t *ppos) +static int mem_force_empty_write(struct cgroup *cont, unsigned int event) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cont); - int ret = mem_cgroup_force_empty(mem); - if (!ret) - ret = nbytes; - return ret; + return mem_cgroup_force_empty(mem_cgroup_from_cont(cont)); } static const struct mem_cgroup_stat_desc { @@ -925,7 +916,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "max_usage_in_bytes", .private = RES_MAX_USAGE, - .write = mem_cgroup_max_reset, + .trigger = mem_cgroup_max_reset, .read_u64 = mem_cgroup_read, }, { @@ -941,7 +932,7 @@ static struct cftype mem_cgroup_files[] = { }, { .name = "force_empty", - .write = mem_force_empty_write, + .trigger = mem_force_empty_write, }, { .name = "stat", -- cgit From 29f2a4dac856e9433a502b05b40e8e90385d8e27 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 29 Apr 2008 01:00:21 -0700 Subject: memcgroup: implement failcounter reset This is a very common requirement from people using the resource accounting facilities (not only memcgroup but also OpenVZ beancounters). They want to put the cgroup in an initial state without re-creating it. For example after re-configuring a group people want to observe how this new configuration fits the group needs without saving the previous failcnt value. Merge two resets into one mem_cgroup_reset() function to demonstrate how multiplexing work. Besides, I have plans to move the files, that correspond to res_counter to the res_counter.c file and somehow "import" them into controller. I don't know how to make it gracefully yet, but merging resets of max_usage and failcnt in one function will be there for sure. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Pavel Emelyanov Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dc3472f9f68c..f891876efee1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -857,12 +857,19 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, mem_cgroup_write_strategy); } -static int mem_cgroup_max_reset(struct cgroup *cont, unsigned int event) +static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) { struct mem_cgroup *mem; mem = mem_cgroup_from_cont(cont); - res_counter_reset_max(&mem->res); + switch (event) { + case RES_MAX_USAGE: + res_counter_reset_max(&mem->res); + break; + case RES_FAILCNT: + res_counter_reset_failcnt(&mem->res); + break; + } return 0; } @@ -916,7 +923,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "max_usage_in_bytes", .private = RES_MAX_USAGE, - .trigger = mem_cgroup_max_reset, + .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { @@ -928,6 +935,7 @@ static struct cftype mem_cgroup_files[] = { { .name = "failcnt", .private = RES_FAILCNT, + .trigger = mem_cgroup_reset, .read_u64 = mem_cgroup_read, }, { -- cgit From 3eae90c3cdd4e762d0f4f5e939c98780fccded57 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 29 Apr 2008 01:00:22 -0700 Subject: memcg: remove redundant function calls remove_list/add_list uses page_cgroup_zoneinfo() in it. So, it's called twice before and after lock. mz = page_cgroup_zoneinfo(); lock(); mz = page_cgroup_zoneinfo(); .... unlock(); And address of mz never changes. This is not good. This patch fixes this behavior. Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f891876efee1..395fd8e4166a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -275,10 +275,10 @@ static void unlock_page_cgroup(struct page *page) bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } -static void __mem_cgroup_remove_list(struct page_cgroup *pc) +static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, + struct page_cgroup *pc) { int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; - struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); if (from) MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; @@ -289,10 +289,10 @@ static void __mem_cgroup_remove_list(struct page_cgroup *pc) list_del_init(&pc->lru); } -static void __mem_cgroup_add_list(struct page_cgroup *pc) +static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, + struct page_cgroup *pc) { int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; - struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); if (!to) { MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; @@ -618,7 +618,7 @@ retry: mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(pc); + __mem_cgroup_add_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(page); @@ -674,7 +674,7 @@ void mem_cgroup_uncharge_page(struct page *page) if (--(pc->ref_cnt) == 0) { mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(pc); + __mem_cgroup_remove_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); page_assign_page_cgroup(page, NULL); @@ -736,7 +736,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(pc); + __mem_cgroup_remove_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); page_assign_page_cgroup(page, NULL); @@ -748,7 +748,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(pc); + __mem_cgroup_add_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(newpage); -- cgit From 4a56d02e34baedbea5eb1fd558f2b856b8c7db1e Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 29 Apr 2008 01:00:23 -0700 Subject: memcgroup: make the memory controller more desktop responsive This patch makes the memory controller more responsive on my desktop. 1. Set all cached pages as inactive. We were by default marking all pages as active, thus forcing us to go through two passes for reclaiming pages 2. Remove congestion_wait(), since we already have that logic in do_try_to_free_pages() Signed-off-by: Balbir Singh Reviewed-by: KOSAKI Motohiro Cc: YAMAMOTO Takashi Cc: Paul Menage Cc: Pavel Emelianov Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 395fd8e4166a..c5285afe2048 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -591,7 +591,6 @@ retry: mem_cgroup_out_of_memory(mem, gfp_mask); goto out; } - congestion_wait(WRITE, HZ/10); } pc->ref_cnt = 1; @@ -599,7 +598,7 @@ retry: pc->page = page; pc->flags = PAGE_CGROUP_FLAG_ACTIVE; if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) - pc->flags |= PAGE_CGROUP_FLAG_CACHE; + pc->flags = PAGE_CGROUP_FLAG_CACHE; lock_page_cgroup(page); if (page_get_page_cgroup(page)) { -- cgit From 33327948782bcef89c78eb47af86b6a2df9fd4a5 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 29 Apr 2008 01:00:24 -0700 Subject: memcgroup: use vmalloc for mem_cgroup allocation On ia64, this kmalloc() requires order-4 pages. But this is not necessary to be physically contiguous. For big mem_cgroup, vmalloc is better. For small ones, kmalloc is used. [akpm@linux-foundation.org: simplification] Signed-off-by: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Li Zefan Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c5285afe2048..15aa34b11e88 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -983,6 +984,29 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) kfree(mem->info.nodeinfo[node]); } +static struct mem_cgroup *mem_cgroup_alloc(void) +{ + struct mem_cgroup *mem; + + if (sizeof(*mem) < PAGE_SIZE) + mem = kmalloc(sizeof(*mem), GFP_KERNEL); + else + mem = vmalloc(sizeof(*mem)); + + if (mem) + memset(mem, 0, sizeof(*mem)); + return mem; +} + +static void mem_cgroup_free(struct mem_cgroup *mem) +{ + if (sizeof(*mem) < PAGE_SIZE) + kfree(mem); + else + vfree(mem); +} + + static struct cgroup_subsys_state * mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -993,12 +1017,11 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) mem = &init_mem_cgroup; page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); } else { - mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); + mem = mem_cgroup_alloc(); + if (!mem) + return ERR_PTR(-ENOMEM); } - if (mem == NULL) - return ERR_PTR(-ENOMEM); - res_counter_init(&mem->res); memset(&mem->info, 0, sizeof(mem->info)); @@ -1012,7 +1035,7 @@ free_out: for_each_node_state(node, N_POSSIBLE) free_mem_cgroup_per_zone_info(mem, node); if (cont->parent != NULL) - kfree(mem); + mem_cgroup_free(mem); return ERR_PTR(-ENOMEM); } @@ -1032,7 +1055,7 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, for_each_node_state(node, N_POSSIBLE) free_mem_cgroup_per_zone_info(mem, node); - kfree(mem_cgroup_from_cont(cont)); + mem_cgroup_free(mem_cgroup_from_cont(cont)); } static int mem_cgroup_populate(struct cgroup_subsys *ss, -- cgit From 1faf8e40a8ab12ae1f7f474965e6fb031e43f8d6 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 29 Apr 2008 01:00:24 -0700 Subject: memcg: remove redundant initialization in mem_cgroup_create() *mem has been zeroed, that means mem->info has already been filled with 0. Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 15aa34b11e88..33add96cd5fb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1024,8 +1024,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) res_counter_init(&mem->res); - memset(&mem->info, 0, sizeof(mem->info)); - for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; -- cgit From 55e462b05b5df4fd113c4a304c4f487d44b0898e Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Thu, 1 May 2008 04:35:12 -0700 Subject: memcg: simple stats for memory resource controller Implement trivial statistics for the memory resource controller. Signed-off-by: Balaji Rao Acked-by: Balbir Singh Cc: Dhaval Giani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 33add96cd5fb..e46451e1d9b7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -48,6 +48,8 @@ enum mem_cgroup_stat_index { */ MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as rss */ + MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ + MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ MEM_CGROUP_STAT_NSTATS, }; @@ -199,6 +201,13 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); else __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); + + if (charge) + __mem_cgroup_stat_add_safe(stat, + MEM_CGROUP_STAT_PGPGIN_COUNT, 1); + else + __mem_cgroup_stat_add_safe(stat, + MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); } static struct mem_cgroup_per_zone * @@ -884,6 +893,8 @@ static const struct mem_cgroup_stat_desc { } mem_cgroup_stat_desc[] = { [MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, }, [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, + [MEM_CGROUP_STAT_PGPGIN_COUNT] = {"pgpgin", 1, }, + [MEM_CGROUP_STAT_PGPGOUT_COUNT] = {"pgpgout", 1, }, }; static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, -- cgit