diff options
| -rw-r--r-- | include/linux/sched/ext.h | 2 | ||||
| -rw-r--r-- | kernel/rcu/tree_stall.h | 7 | ||||
| -rw-r--r-- | kernel/sched/ext.c | 35 |
3 files changed, 44 insertions, 0 deletions
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 8b92842776cb..0cf0915572c9 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -206,12 +206,14 @@ struct sched_ext_entity { void sched_ext_free(struct task_struct *p); void print_scx_info(const char *log_lvl, struct task_struct *p); void scx_softlockup(u32 dur_s); +bool scx_rcu_cpu_stall(void); #else /* !CONFIG_SCHED_CLASS_EXT */ static inline void sched_ext_free(struct task_struct *p) {} static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} static inline void scx_softlockup(u32 dur_s) {} +static inline bool scx_rcu_cpu_stall(void) { return false; } #endif /* CONFIG_SCHED_CLASS_EXT */ diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 486c00536207..af61b2d0d311 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -134,6 +134,13 @@ static void panic_on_rcu_stall(void) { static int cpu_stall; + /* + * Attempt to kick out the BPF scheduler if it's installed and defer + * the panic to give the system a chance to recover. + */ + if (scx_rcu_cpu_stall()) + return; + if (++cpu_stall < sysctl_max_rcu_stall_to_panic) return; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index bee98fdcdd01..df5b2c952cf7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4673,6 +4673,41 @@ bool scx_allow_ttwu_queue(const struct task_struct *p) } /** + * scx_rcu_cpu_stall - sched_ext RCU CPU stall handler + * + * While there are various reasons why RCU CPU stalls can occur on a system + * that may not be caused by the current BPF scheduler, try kicking out the + * current scheduler in an attempt to recover the system to a good state before + * issuing panics. + */ +bool scx_rcu_cpu_stall(void) +{ + struct scx_sched *sch; + + rcu_read_lock(); + + sch = rcu_dereference(scx_root); + if (unlikely(!sch)) { + rcu_read_unlock(); + return false; + } + + switch (scx_enable_state()) { + case SCX_ENABLING: + case SCX_ENABLED: + break; + default: + rcu_read_unlock(); + return false; + } + + scx_error(sch, "RCU CPU stall detected!"); + rcu_read_unlock(); + + return true; +} + +/** * scx_softlockup - sched_ext softlockup handler * @dur_s: number of seconds of CPU stuck due to soft lockup * |
