diff options
Diffstat (limited to 'kernel/cgroup/cgroup.c')
-rw-r--r-- | kernel/cgroup/cgroup.c | 148 |
1 files changed, 79 insertions, 69 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 63e5b90da1f3..a723b7dc6e4e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -95,6 +95,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex); EXPORT_SYMBOL_GPL(css_set_lock); #endif +struct blocking_notifier_head cgroup_lifetime_notifier = + BLOCKING_NOTIFIER_INIT(cgroup_lifetime_notifier); + DEFINE_SPINLOCK(trace_cgroup_path_lock); char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; static bool cgroup_debug __read_mostly; @@ -161,10 +164,14 @@ static struct static_key_true *cgroup_subsys_on_dfl_key[] = { }; #undef SUBSYS -static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu); +static DEFINE_PER_CPU(struct css_rstat_cpu, root_rstat_cpu); +static DEFINE_PER_CPU(struct cgroup_rstat_base_cpu, root_rstat_base_cpu); /* the default hierarchy */ -struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu }; +struct cgroup_root cgrp_dfl_root = { + .cgrp.self.rstat_cpu = &root_rstat_cpu, + .cgrp.rstat_base_cpu = &root_rstat_base_cpu, +}; EXPORT_SYMBOL_GPL(cgrp_dfl_root); /* @@ -1335,6 +1342,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) { struct cgroup *cgrp = &root->cgrp; struct cgrp_cset_link *link, *tmp_link; + int ret; trace_cgroup_destroy_root(root); @@ -1343,6 +1351,10 @@ static void cgroup_destroy_root(struct cgroup_root *root) BUG_ON(atomic_read(&root->nr_cgrps)); BUG_ON(!list_empty(&cgrp->self.children)); + ret = blocking_notifier_call_chain(&cgroup_lifetime_notifier, + CGROUP_LIFETIME_OFFLINE, cgrp); + WARN_ON_ONCE(notifier_to_errno(ret)); + /* Rebind all subsystems back to the default hierarchy */ WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask)); @@ -1371,7 +1383,6 @@ static void cgroup_destroy_root(struct cgroup_root *root) cgroup_unlock(); - cgroup_rstat_exit(cgrp); kernfs_destroy_root(root->kf_root); cgroup_free_root(root); } @@ -1715,7 +1726,7 @@ static void css_clear_dir(struct cgroup_subsys_state *css) css->flags &= ~CSS_VISIBLE; - if (!css->ss) { + if (css_is_self(css)) { if (cgroup_on_dfl(cgrp)) { cgroup_addrm_files(css, cgrp, cgroup_base_files, false); @@ -1747,7 +1758,7 @@ static int css_populate_dir(struct cgroup_subsys_state *css) if (css->flags & CSS_VISIBLE) return 0; - if (!css->ss) { + if (css_is_self(css)) { if (cgroup_on_dfl(cgrp)) { ret = cgroup_addrm_files(css, cgrp, cgroup_base_files, true); @@ -1876,13 +1887,6 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) } spin_unlock_irq(&css_set_lock); - if (ss->css_rstat_flush) { - list_del_rcu(&css->rstat_css_node); - synchronize_rcu(); - list_add_rcu(&css->rstat_css_node, - &dcgrp->rstat_css_list); - } - /* default hierarchy doesn't enable controllers by default */ dst_root->subsys_mask |= 1 << ssid; if (dst_root == &cgrp_dfl_root) { @@ -2065,7 +2069,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) cgrp->dom_cgrp = cgrp; cgrp->max_descendants = INT_MAX; cgrp->max_depth = INT_MAX; - INIT_LIST_HEAD(&cgrp->rstat_css_list); prev_cputime_init(&cgrp->prev_cputime); for_each_subsys(ss, ssid) @@ -2146,7 +2149,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) if (ret) goto destroy_root; - ret = cgroup_rstat_init(root_cgrp); + ret = css_rstat_init(&root_cgrp->self); if (ret) goto destroy_root; @@ -2154,10 +2157,9 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) if (ret) goto exit_stats; - if (root == &cgrp_dfl_root) { - ret = cgroup_bpf_inherit(root_cgrp); - WARN_ON_ONCE(ret); - } + ret = blocking_notifier_call_chain(&cgroup_lifetime_notifier, + CGROUP_LIFETIME_ONLINE, root_cgrp); + WARN_ON_ONCE(notifier_to_errno(ret)); trace_cgroup_setup_root(root); @@ -2188,7 +2190,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) goto out; exit_stats: - cgroup_rstat_exit(root_cgrp); + css_rstat_exit(&root_cgrp->self); destroy_root: kernfs_destroy_root(root->kf_root); root->kf_root = NULL; @@ -5444,8 +5446,9 @@ static void css_free_rwork_fn(struct work_struct *work) struct cgroup *cgrp = css->cgroup; percpu_ref_exit(&css->refcnt); + css_rstat_exit(css); - if (ss) { + if (!css_is_self(css)) { /* css free path */ struct cgroup_subsys_state *parent = css->parent; int id = css->id; @@ -5474,7 +5477,6 @@ static void css_free_rwork_fn(struct work_struct *work) cgroup_put(cgroup_parent(cgrp)); kernfs_put(cgrp->kn); psi_cgroup_free(cgrp); - cgroup_rstat_exit(cgrp); kfree(cgrp); } else { /* @@ -5499,14 +5501,10 @@ static void css_release_work_fn(struct work_struct *work) css->flags |= CSS_RELEASED; list_del_rcu(&css->sibling); - if (ss) { + if (!css_is_self(css)) { struct cgroup *parent_cgrp; - /* css release path */ - if (!list_empty(&css->rstat_css_node)) { - cgroup_rstat_flush(cgrp); - list_del_rcu(&css->rstat_css_node); - } + css_rstat_flush(css); cgroup_idr_replace(&ss->css_idr, NULL, css->id); if (ss->css_released) @@ -5532,7 +5530,7 @@ static void css_release_work_fn(struct work_struct *work) /* cgroup release path */ TRACE_CGROUP_PATH(release, cgrp); - cgroup_rstat_flush(cgrp); + css_rstat_flush(&cgrp->self); spin_lock_irq(&css_set_lock); for (tcgrp = cgroup_parent(cgrp); tcgrp; @@ -5580,7 +5578,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css, css->id = -1; INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); - INIT_LIST_HEAD(&css->rstat_css_node); css->serial_nr = css_serial_nr_next++; atomic_set(&css->online_cnt, 0); @@ -5589,9 +5586,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css, css_get(css->parent); } - if (ss->css_rstat_flush) - list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list); - BUG_ON(cgroup_css(cgrp, ss)); } @@ -5684,6 +5678,10 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, goto err_free_css; css->id = err; + err = css_rstat_init(css); + if (err) + goto err_free_css; + /* @css is ready to be brought online now, make it visible */ list_add_tail_rcu(&css->sibling, &parent_css->children); cgroup_idr_replace(&ss->css_idr, css, css->id); @@ -5697,7 +5695,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, err_list_del: list_del_rcu(&css->sibling); err_free_css: - list_del_rcu(&css->rstat_css_node); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); return ERR_PTR(err); @@ -5713,7 +5710,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, struct cgroup_root *root = parent->root; struct cgroup *cgrp, *tcgrp; struct kernfs_node *kn; - int level = parent->level + 1; + int i, level = parent->level + 1; int ret; /* allocate the cgroup and its ID, 0 is reserved for the root */ @@ -5725,17 +5722,13 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, if (ret) goto out_free_cgrp; - ret = cgroup_rstat_init(cgrp); - if (ret) - goto out_cancel_ref; - /* create the directory */ kn = kernfs_create_dir_ns(parent->kn, name, mode, current_fsuid(), current_fsgid(), cgrp, NULL); if (IS_ERR(kn)) { ret = PTR_ERR(kn); - goto out_stat_exit; + goto out_cancel_ref; } cgrp->kn = kn; @@ -5745,15 +5738,20 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, cgrp->root = root; cgrp->level = level; - ret = psi_cgroup_alloc(cgrp); + /* + * Now that init_cgroup_housekeeping() has been called and cgrp->self + * is setup, it is safe to perform rstat initialization on it. + */ + ret = css_rstat_init(&cgrp->self); if (ret) goto out_kernfs_remove; - if (cgrp->root == &cgrp_dfl_root) { - ret = cgroup_bpf_inherit(cgrp); - if (ret) - goto out_psi_free; - } + ret = psi_cgroup_alloc(cgrp); + if (ret) + goto out_stat_exit; + + for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) + cgrp->ancestors[tcgrp->level] = tcgrp; /* * New cgroup inherits effective freeze counter, and @@ -5771,24 +5769,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, set_bit(CGRP_FROZEN, &cgrp->flags); } - spin_lock_irq(&css_set_lock); - for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { - cgrp->ancestors[tcgrp->level] = tcgrp; - - if (tcgrp != cgrp) { - tcgrp->nr_descendants++; - - /* - * If the new cgroup is frozen, all ancestor cgroups - * get a new frozen descendant, but their state can't - * change because of this. - */ - if (cgrp->freezer.e_freeze) - tcgrp->freezer.nr_frozen_descendants++; - } - } - spin_unlock_irq(&css_set_lock); - if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); @@ -5797,7 +5777,29 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, cgrp->self.serial_nr = css_serial_nr_next++; + ret = blocking_notifier_call_chain_robust(&cgroup_lifetime_notifier, + CGROUP_LIFETIME_ONLINE, + CGROUP_LIFETIME_OFFLINE, cgrp); + ret = notifier_to_errno(ret); + if (ret) + goto out_psi_free; + /* allocation complete, commit to creation */ + spin_lock_irq(&css_set_lock); + for (i = 0; i < level; i++) { + tcgrp = cgrp->ancestors[i]; + tcgrp->nr_descendants++; + + /* + * If the new cgroup is frozen, all ancestor cgroups get a new + * frozen descendant, but their state can't change because of + * this. + */ + if (cgrp->freezer.e_freeze) + tcgrp->freezer.nr_frozen_descendants++; + } + spin_unlock_irq(&css_set_lock); + list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); atomic_inc(&root->nr_cgrps); cgroup_get_live(parent); @@ -5815,10 +5817,10 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, out_psi_free: psi_cgroup_free(cgrp); +out_stat_exit: + css_rstat_exit(&cgrp->self); out_kernfs_remove: kernfs_remove(cgrp->kn); -out_stat_exit: - cgroup_rstat_exit(cgrp); out_cancel_ref: percpu_ref_exit(&cgrp->self.refcnt); out_free_cgrp: @@ -6015,7 +6017,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) struct cgroup *tcgrp, *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *css; struct cgrp_cset_link *link; - int ssid; + int ssid, ret; lockdep_assert_held(&cgroup_mutex); @@ -6073,8 +6075,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) cgroup1_check_for_release(parent); - if (cgrp->root == &cgrp_dfl_root) - cgroup_bpf_offline(cgrp); + ret = blocking_notifier_call_chain(&cgroup_lifetime_notifier, + CGROUP_LIFETIME_OFFLINE, cgrp); + WARN_ON_ONCE(notifier_to_errno(ret)); /* put the base reference */ percpu_ref_kill(&cgrp->self.refcnt); @@ -6136,6 +6139,9 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) } else { css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); BUG_ON(css->id < 0); + + BUG_ON(ss_rstat_init(ss)); + BUG_ON(css_rstat_init(css)); } /* Update the init_css_set to contain a subsys @@ -6184,6 +6190,8 @@ int __init cgroup_init_early(void) ss->id, ss->name); WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN, "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]); + WARN(ss->early_init && ss->css_rstat_flush, + "cgroup rstat cannot be used with early init subsystem\n"); ss->id = i; ss->name = cgroup_subsys_name[i]; @@ -6212,7 +6220,7 @@ int __init cgroup_init(void) BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); - cgroup_rstat_boot(); + BUG_ON(ss_rstat_init(NULL)); get_user_ns(init_cgroup_ns.user_ns); @@ -6225,6 +6233,8 @@ int __init cgroup_init(void) hash_add(css_set_table, &init_css_set.hlist, css_set_hash(init_css_set.subsys)); + cgroup_bpf_lifetime_notifier_init(); + BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); cgroup_unlock(); |