diff options
| author | Mark Brown <broonie@kernel.org> | 2020-08-25 11:01:46 +0100 | 
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2020-08-25 11:01:46 +0100 | 
| commit | 3bec5b6aae830355e786e204b20a7cea38c3a8ed (patch) | |
| tree | fd597b87faf55ceb2a207ee94f4feca6276696db /include/linux/memcontrol.h | |
| parent | a577f3456c0a2fac3dee037c483753e6e68f3e49 (diff) | |
| parent | d012a7190fc1fd72ed48911e77ca97ba4521bccd (diff) | |
Merge tag 'v5.9-rc2' into regulator-5.9
Linux 5.9-rc2
Diffstat (limited to 'include/linux/memcontrol.h')
| -rw-r--r-- | include/linux/memcontrol.h | 217 | 
1 files changed, 193 insertions, 24 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e77197a62809..d0b036123c6a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -23,6 +23,7 @@  #include <linux/page-flags.h>  struct mem_cgroup; +struct obj_cgroup;  struct page;  struct mm_struct;  struct kmem_cache; @@ -31,8 +32,7 @@ struct kmem_cache;  enum memcg_stat_item {  	MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS,  	MEMCG_SOCK, -	/* XXX: why are these zone and not node counters? */ -	MEMCG_KERNEL_STACK_KB, +	MEMCG_PERCPU_B,  	MEMCG_NR_STAT,  }; @@ -48,12 +48,6 @@ enum memcg_memory_event {  	MEMCG_NR_MEMORY_EVENTS,  }; -enum mem_cgroup_protection { -	MEMCG_PROT_NONE, -	MEMCG_PROT_LOW, -	MEMCG_PROT_MIN, -}; -  struct mem_cgroup_reclaim_cookie {  	pg_data_t *pgdat;  	unsigned int generation; @@ -71,8 +65,8 @@ struct mem_cgroup_id {  /*   * Per memcg event counter is incremented at every pagein/pageout. With THP, - * it will be incremated by the number of pages. This counter is used for - * for trigger some periodic events. This is straightforward and better + * it will be incremented by the number of pages. This counter is used + * to trigger some periodic events. This is straightforward and better   * than using jiffies etc. to handle periodic memcg event.   */  enum mem_cgroup_events_target { @@ -193,6 +187,22 @@ struct memcg_cgwb_frn {  };  /* + * Bucket for arbitrarily byte-sized objects charged to a memory + * cgroup. The bucket can be reparented in one piece when the cgroup + * is destroyed, without having to round up the individual references + * of all live memory objects in the wild. + */ +struct obj_cgroup { +	struct percpu_ref refcnt; +	struct mem_cgroup *memcg; +	atomic_t nr_charged_bytes; +	union { +		struct list_head list; +		struct rcu_head rcu; +	}; +}; + +/*   * The memory controller data structure. The memory controller controls both   * page cache and RSS per cgroup. We would eventually like to provide   * statistics based on the statistics developed by Rik Van Riel for clock-pro, @@ -300,7 +310,8 @@ struct mem_cgroup {          /* Index in the kmem_cache->memcg_params.memcg_caches array */  	int kmemcg_id;  	enum memcg_kmem_state kmem_state; -	struct list_head kmem_caches; +	struct obj_cgroup __rcu *objcg; +	struct list_head objcg_list; /* list of inherited objcgs */  #endif  #ifdef CONFIG_CGROUP_WRITEBACK @@ -329,6 +340,13 @@ struct mem_cgroup {  extern struct mem_cgroup *root_mem_cgroup; +static __always_inline bool memcg_stat_item_in_bytes(int idx) +{ +	if (idx == MEMCG_PERCPU_B) +		return true; +	return vmstat_item_in_bytes(idx); +} +  static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)  {  	return (memcg == root_mem_cgroup); @@ -339,12 +357,49 @@ static inline bool mem_cgroup_disabled(void)  	return !cgroup_subsys_enabled(memory_cgrp_subsys);  } -static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg, +static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, +						  struct mem_cgroup *memcg,  						  bool in_low_reclaim)  {  	if (mem_cgroup_disabled())  		return 0; +	/* +	 * There is no reclaim protection applied to a targeted reclaim. +	 * We are special casing this specific case here because +	 * mem_cgroup_protected calculation is not robust enough to keep +	 * the protection invariant for calculated effective values for +	 * parallel reclaimers with different reclaim target. This is +	 * especially a problem for tail memcgs (as they have pages on LRU) +	 * which would want to have effective values 0 for targeted reclaim +	 * but a different value for external reclaim. +	 * +	 * Example +	 * Let's have global and A's reclaim in parallel: +	 *  | +	 *  A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G) +	 *  |\ +	 *  | C (low = 1G, usage = 2.5G) +	 *  B (low = 1G, usage = 0.5G) +	 * +	 * For the global reclaim +	 * A.elow = A.low +	 * B.elow = min(B.usage, B.low) because children_low_usage <= A.elow +	 * C.elow = min(C.usage, C.low) +	 * +	 * With the effective values resetting we have A reclaim +	 * A.elow = 0 +	 * B.elow = B.low +	 * C.elow = C.low +	 * +	 * If the global reclaim races with A's reclaim then +	 * B.elow = C.elow = 0 because children_low_usage > A.elow) +	 * is possible and reclaiming B would be violating the protection. +	 * +	 */ +	if (root == memcg) +		return 0; +  	if (in_low_reclaim)  		return READ_ONCE(memcg->memory.emin); @@ -352,8 +407,36 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,  		   READ_ONCE(memcg->memory.elow));  } -enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root, -						struct mem_cgroup *memcg); +void mem_cgroup_calculate_protection(struct mem_cgroup *root, +				     struct mem_cgroup *memcg); + +static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg) +{ +	/* +	 * The root memcg doesn't account charges, and doesn't support +	 * protection. +	 */ +	return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg); + +} + +static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +{ +	if (!mem_cgroup_supports_protection(memcg)) +		return false; + +	return READ_ONCE(memcg->memory.elow) >= +		page_counter_read(&memcg->memory); +} + +static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +{ +	if (!mem_cgroup_supports_protection(memcg)) +		return false; + +	return READ_ONCE(memcg->memory.emin) >= +		page_counter_read(&memcg->memory); +}  int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); @@ -416,6 +499,33 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){  	return css ? container_of(css, struct mem_cgroup, css) : NULL;  } +static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg) +{ +	return percpu_ref_tryget(&objcg->refcnt); +} + +static inline void obj_cgroup_get(struct obj_cgroup *objcg) +{ +	percpu_ref_get(&objcg->refcnt); +} + +static inline void obj_cgroup_put(struct obj_cgroup *objcg) +{ +	percpu_ref_put(&objcg->refcnt); +} + +/* + * After the initialization objcg->memcg is always pointing at + * a valid memcg, but can be atomically swapped to the parent memcg. + * + * The caller must ensure that the returned memcg won't be released: + * e.g. acquire the rcu_read_lock or css_set_lock. + */ +static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) +{ +	return READ_ONCE(objcg->memcg); +} +  static inline void mem_cgroup_put(struct mem_cgroup *memcg)  {  	if (memcg) @@ -520,7 +630,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,  	struct mem_cgroup_per_node *mz;  	mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); -	return mz->lru_zone_size[zone_idx][lru]; +	return READ_ONCE(mz->lru_zone_size[zone_idx][lru]);  }  void mem_cgroup_handle_over_high(void); @@ -679,11 +789,34 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,  	return x;  } +void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, +			      int val);  void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,  			int val);  void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); +  void mod_memcg_obj_state(void *p, int idx, int val); +static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, +					 int val) +{ +	unsigned long flags; + +	local_irq_save(flags); +	__mod_lruvec_slab_state(p, idx, val); +	local_irq_restore(flags); +} + +static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, +					  enum node_stat_item idx, int val) +{ +	unsigned long flags; + +	local_irq_save(flags); +	__mod_memcg_lruvec_state(lruvec, idx, val); +	local_irq_restore(flags); +} +  static inline void mod_lruvec_state(struct lruvec *lruvec,  				    enum node_stat_item idx, int val)  { @@ -825,16 +958,26 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,  {  } -static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg, +static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, +						  struct mem_cgroup *memcg,  						  bool in_low_reclaim)  {  	return 0;  } -static inline enum mem_cgroup_protection mem_cgroup_protected( -	struct mem_cgroup *root, struct mem_cgroup *memcg) +static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, +						   struct mem_cgroup *memcg) +{ +} + +static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +{ +	return false; +} + +static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)  { -	return MEMCG_PROT_NONE; +	return false;  }  static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, @@ -1057,6 +1200,11 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,  	return node_page_state(lruvec_pgdat(lruvec), idx);  } +static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, +					    enum node_stat_item idx, int val) +{ +} +  static inline void __mod_lruvec_state(struct lruvec *lruvec,  				      enum node_stat_item idx, int val)  { @@ -1089,6 +1237,14 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,  	__mod_node_page_state(page_pgdat(page), idx, val);  } +static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx, +					 int val) +{ +	struct page *page = virt_to_head_page(p); + +	mod_node_page_state(page_pgdat(page), idx, val); +} +  static inline void mod_memcg_obj_state(void *p, int idx, int val)  {  } @@ -1341,9 +1497,6 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,  }  #endif -struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep); -void memcg_kmem_put_cache(struct kmem_cache *cachep); -  #ifdef CONFIG_MEMCG_KMEM  int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,  			unsigned int nr_pages); @@ -1351,8 +1504,12 @@ void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);  int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);  void __memcg_kmem_uncharge_page(struct page *page, int order); +struct obj_cgroup *get_obj_cgroup_from_current(void); + +int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); +void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); +  extern struct static_key_false memcg_kmem_enabled_key; -extern struct workqueue_struct *memcg_kmem_cache_wq;  extern int memcg_nr_cache_ids;  void memcg_get_cache_ids(void); @@ -1368,7 +1525,19 @@ void memcg_put_cache_ids(void);  static inline bool memcg_kmem_enabled(void)  { -	return static_branch_unlikely(&memcg_kmem_enabled_key); +	return static_branch_likely(&memcg_kmem_enabled_key); +} + +static inline bool memcg_kmem_bypass(void) +{ +	if (in_interrupt()) +		return true; + +	/* Allow remote memcg charging in kthread contexts. */ +	if ((!current->mm || (current->flags & PF_KTHREAD)) && +	     !current->active_memcg) +		return true; +	return false;  }  static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,  | 
