diff options
Diffstat (limited to 'kernel/cgroup/cgroup.c')
| -rw-r--r-- | kernel/cgroup/cgroup.c | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index aae180d56c8c..48019a661c08 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -290,6 +290,7 @@ static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup_subsys_state *css, struct cgroup *cgrp, struct cftype cfts[], bool is_add); +static void cgroup_rt_init(void); #ifdef CONFIG_DEBUG_CGROUP_REF #define CGROUP_REF_FN_ATTRS noinline @@ -6360,6 +6361,7 @@ int __init cgroup_init(void) BUG_ON(ss_rstat_init(NULL)); get_user_ns(init_cgroup_ns.user_ns); + cgroup_rt_init(); cgroup_lock(); @@ -6990,7 +6992,7 @@ void cgroup_task_exit(struct task_struct *tsk) } while_each_subsys_mask(); } -void cgroup_task_dead(struct task_struct *tsk) +static void do_cgroup_task_dead(struct task_struct *tsk) { struct css_set *cset; unsigned long flags; @@ -7016,6 +7018,57 @@ void cgroup_task_dead(struct task_struct *tsk) spin_unlock_irqrestore(&css_set_lock, flags); } +#ifdef CONFIG_PREEMPT_RT +/* + * cgroup_task_dead() is called from finish_task_switch() which doesn't allow + * scheduling even in RT. As the task_dead path requires grabbing css_set_lock, + * this lead to sleeping in the invalid context warning bug. css_set_lock is too + * big to become a raw_spinlock. The task_dead path doesn't need to run + * synchronously but can't be delayed indefinitely either as the dead task pins + * the cgroup and task_struct can be pinned indefinitely. Bounce through lazy + * irq_work to allow batching while ensuring timely completion. + */ +static DEFINE_PER_CPU(struct llist_head, cgrp_dead_tasks); +static DEFINE_PER_CPU(struct irq_work, cgrp_dead_tasks_iwork); + +static void cgrp_dead_tasks_iwork_fn(struct irq_work *iwork) +{ + struct llist_node *lnode; + struct task_struct *task, *next; + + lnode = llist_del_all(this_cpu_ptr(&cgrp_dead_tasks)); + llist_for_each_entry_safe(task, next, lnode, cg_dead_lnode) { + do_cgroup_task_dead(task); + put_task_struct(task); + } +} + +static void __init cgroup_rt_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + init_llist_head(per_cpu_ptr(&cgrp_dead_tasks, cpu)); + per_cpu(cgrp_dead_tasks_iwork, cpu) = + IRQ_WORK_INIT_LAZY(cgrp_dead_tasks_iwork_fn); + } +} + +void cgroup_task_dead(struct task_struct *task) +{ + get_task_struct(task); + llist_add(&task->cg_dead_lnode, this_cpu_ptr(&cgrp_dead_tasks)); + irq_work_queue(this_cpu_ptr(&cgrp_dead_tasks_iwork)); +} +#else /* CONFIG_PREEMPT_RT */ +static void __init cgroup_rt_init(void) {} + +void cgroup_task_dead(struct task_struct *task) +{ + do_cgroup_task_dead(task); +} +#endif /* CONFIG_PREEMPT_RT */ + void cgroup_task_release(struct task_struct *task) { struct cgroup_subsys *ss; |
