summaryrefslogtreecommitdiff
path: root/kernel/rcu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcu')
-rw-r--r--kernel/rcu/rcu.h18
-rw-r--r--kernel/rcu/rcuscale.c2
-rw-r--r--kernel/rcu/rcutorture.c208
-rw-r--r--kernel/rcu/srcutree.c2
-rw-r--r--kernel/rcu/tree.c84
-rw-r--r--kernel/rcu/tree.h3
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_nocb.h10
-rw-r--r--kernel/rcu/tree_plugin.h2
-rw-r--r--kernel/rcu/tree_stall.h4
10 files changed, 233 insertions, 102 deletions
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index eed2951a4962..9cf01832a6c3 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -57,6 +57,9 @@
/* Low-order bit definition for polled grace-period APIs. */
#define RCU_GET_STATE_COMPLETED 0x1
+/* A complete grace period count */
+#define RCU_SEQ_GP (RCU_SEQ_STATE_MASK + 1)
+
extern int sysctl_sched_rt_runtime;
/*
@@ -157,12 +160,21 @@ static inline bool rcu_seq_done(unsigned long *sp, unsigned long s)
* Given a snapshot from rcu_seq_snap(), determine whether or not a
* full update-side operation has occurred, but do not allow the
* (ULONG_MAX / 2) safety-factor/guard-band.
+ *
+ * The token returned by get_state_synchronize_rcu_full() is based on
+ * rcu_state.gp_seq but it is tested in poll_state_synchronize_rcu_full()
+ * against the root rnp->gp_seq. Since rcu_seq_start() is first called
+ * on rcu_state.gp_seq and only later reflected on the root rnp->gp_seq,
+ * it is possible that rcu_seq_snap(rcu_state.gp_seq) returns 2 full grace
+ * periods ahead of the root rnp->gp_seq. To prevent false-positives with the
+ * full polling API that a wrap around instantly completed the GP, when nothing
+ * like that happened, adjust for the 2 GPs in the ULONG_CMP_LT().
*/
static inline bool rcu_seq_done_exact(unsigned long *sp, unsigned long s)
{
unsigned long cur_s = READ_ONCE(*sp);
- return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (3 * RCU_SEQ_STATE_MASK + 1));
+ return ULONG_CMP_GE(cur_s, s) || ULONG_CMP_LT(cur_s, s - (2 * RCU_SEQ_GP));
}
/*
@@ -572,6 +584,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
unsigned long c_old,
unsigned long c);
void rcu_gp_set_torture_wait(int duration);
+void rcu_set_gpwrap_lag(unsigned long lag);
+int rcu_get_gpwrap_count(int cpu);
#else
static inline void rcutorture_get_gp_data(int *flags, unsigned long *gp_seq)
{
@@ -589,6 +603,8 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
do { } while (0)
#endif
static inline void rcu_gp_set_torture_wait(int duration) { }
+static inline void rcu_set_gpwrap_lag(unsigned long lag) { }
+static inline int rcu_get_gpwrap_count(int cpu) { return 0; }
#endif
unsigned long long rcutorture_gather_gp_seqs(void);
void rcutorture_format_gp_seqs(unsigned long long seqs, char *cp, size_t len);
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index 0f3059b1b80d..b521d0455992 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -762,7 +762,7 @@ kfree_scale_thread(void *arg)
}
for (i = 0; i < kfree_alloc_num; i++) {
- alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
+ alloc_ptr = kcalloc(kfree_mult, sizeof(struct kfree_obj), GFP_KERNEL);
if (!alloc_ptr)
return -ENOMEM;
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 4fa7772be183..70ec0f21abc3 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -115,6 +115,10 @@ torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, object_debug, 0, "Enable debug-object double call_rcu() testing");
torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (jiffies), 0=disable");
+torture_param(bool, gpwrap_lag, true, "Enable grace-period wrap lag testing");
+torture_param(int, gpwrap_lag_gps, 8, "Value to set for set_gpwrap_lag during an active testing period.");
+torture_param(int, gpwrap_lag_cycle_mins, 30, "Total cycle duration for gpwrap lag testing (in minutes)");
+torture_param(int, gpwrap_lag_active_mins, 5, "Duration for which gpwrap lag is active within each cycle (in minutes)");
torture_param(int, nocbs_nthreads, 0, "Number of NOCB toggle threads, 0 to disable");
torture_param(int, nocbs_toggle, 1000, "Time between toggling nocb state (ms)");
torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disable");
@@ -413,6 +417,8 @@ struct rcu_torture_ops {
bool (*reader_blocked)(void);
unsigned long long (*gather_gp_seqs)(void);
void (*format_gp_seqs)(unsigned long long seqs, char *cp, size_t len);
+ void (*set_gpwrap_lag)(unsigned long lag);
+ int (*get_gpwrap_count)(int cpu);
long cbflood_max;
int irq_capable;
int can_boost;
@@ -619,6 +625,8 @@ static struct rcu_torture_ops rcu_ops = {
: NULL,
.gather_gp_seqs = rcutorture_gather_gp_seqs,
.format_gp_seqs = rcutorture_format_gp_seqs,
+ .set_gpwrap_lag = rcu_set_gpwrap_lag,
+ .get_gpwrap_count = rcu_get_gpwrap_count,
.irq_capable = 1,
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
.extendables = RCUTORTURE_MAX_EXTEND,
@@ -2164,53 +2172,70 @@ rcutorture_loop_extend(int *readstate, bool insoftirq, struct torture_random_sta
return &rtrsp[j];
}
-/*
- * Do one read-side critical section, returning false if there was
- * no data to read. Can be invoked both from process context and
- * from a timer handler.
- */
-static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
-{
- bool checkpolling = !(torture_random(trsp) & 0xfff);
+struct rcu_torture_one_read_state {
+ bool checkpolling;
unsigned long cookie;
struct rcu_gp_oldstate cookie_full;
- int i;
unsigned long started;
- unsigned long completed;
- int newstate;
struct rcu_torture *p;
- int pipe_count;
- bool preempted = false;
- int readstate = 0;
- struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS] = { { 0 } };
- struct rt_read_seg *rtrsp = &rtseg[0];
- struct rt_read_seg *rtrsp1;
+ int readstate;
+ struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS];
+ struct rt_read_seg *rtrsp;
unsigned long long ts;
+};
- WARN_ON_ONCE(!rcu_is_watching());
- newstate = rcutorture_extend_mask(readstate, trsp);
- rcutorture_one_extend(&readstate, newstate, myid < 0, trsp, rtrsp++);
- if (checkpolling) {
+static void init_rcu_torture_one_read_state(struct rcu_torture_one_read_state *rtorsp,
+ struct torture_random_state *trsp)
+{
+ memset(rtorsp, 0, sizeof(*rtorsp));
+ rtorsp->checkpolling = !(torture_random(trsp) & 0xfff);
+ rtorsp->rtrsp = &rtorsp->rtseg[0];
+}
+
+/*
+ * Set up the first segment of a series of overlapping read-side
+ * critical sections. The caller must have actually initiated the
+ * outermost read-side critical section.
+ */
+static bool rcu_torture_one_read_start(struct rcu_torture_one_read_state *rtorsp,
+ struct torture_random_state *trsp, long myid)
+{
+ if (rtorsp->checkpolling) {
if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- cookie = cur_ops->get_gp_state();
+ rtorsp->cookie = cur_ops->get_gp_state();
if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full)
- cur_ops->get_gp_state_full(&cookie_full);
+ cur_ops->get_gp_state_full(&rtorsp->cookie_full);
}
- started = cur_ops->get_gp_seq();
- ts = rcu_trace_clock_local();
- p = rcu_dereference_check(rcu_torture_current,
+ rtorsp->started = cur_ops->get_gp_seq();
+ rtorsp->ts = rcu_trace_clock_local();
+ rtorsp->p = rcu_dereference_check(rcu_torture_current,
!cur_ops->readlock_held || cur_ops->readlock_held());
- if (p == NULL) {
+ if (rtorsp->p == NULL) {
/* Wait for rcu_torture_writer to get underway */
- rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp);
+ rcutorture_one_extend(&rtorsp->readstate, 0, myid < 0, trsp, rtorsp->rtrsp);
return false;
}
- if (p->rtort_mbtest == 0)
+ if (rtorsp->p->rtort_mbtest == 0)
atomic_inc(&n_rcu_torture_mberror);
- rcu_torture_reader_do_mbchk(myid, p, trsp);
- rtrsp = rcutorture_loop_extend(&readstate, myid < 0, trsp, rtrsp);
+ rcu_torture_reader_do_mbchk(myid, rtorsp->p, trsp);
+ return true;
+}
+
+/*
+ * Complete the last segment of a series of overlapping read-side
+ * critical sections and check for errors.
+ */
+static void rcu_torture_one_read_end(struct rcu_torture_one_read_state *rtorsp,
+ struct torture_random_state *trsp, long myid)
+{
+ int i;
+ unsigned long completed;
+ int pipe_count;
+ bool preempted = false;
+ struct rt_read_seg *rtrsp1;
+
preempt_disable();
- pipe_count = READ_ONCE(p->rtort_pipe_count);
+ pipe_count = READ_ONCE(rtorsp->p->rtort_pipe_count);
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
// Should not happen in a correct RCU implementation,
// happens quite often for torture_type=busted.
@@ -2218,28 +2243,28 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
}
completed = cur_ops->get_gp_seq();
if (pipe_count > 1) {
- do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu,
- ts, started, completed);
+ do_trace_rcu_torture_read(cur_ops->name, &rtorsp->p->rtort_rcu,
+ rtorsp->ts, rtorsp->started, completed);
rcu_ftrace_dump(DUMP_ALL);
}
__this_cpu_inc(rcu_torture_count[pipe_count]);
- completed = rcutorture_seq_diff(completed, started);
+ completed = rcutorture_seq_diff(completed, rtorsp->started);
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
__this_cpu_inc(rcu_torture_batch[completed]);
preempt_enable();
- if (checkpolling) {
+ if (rtorsp->checkpolling) {
if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- WARN_ONCE(cur_ops->poll_gp_state(cookie),
+ WARN_ONCE(cur_ops->poll_gp_state(rtorsp->cookie),
"%s: Cookie check 2 failed %s(%d) %lu->%lu\n",
__func__,
rcu_torture_writer_state_getname(),
rcu_torture_writer_state,
- cookie, cur_ops->get_gp_state());
+ rtorsp->cookie, cur_ops->get_gp_state());
if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full)
- WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full),
+ WARN_ONCE(cur_ops->poll_gp_state_full(&rtorsp->cookie_full),
"%s: Cookie check 6 failed %s(%d) online %*pbl\n",
__func__,
rcu_torture_writer_state_getname(),
@@ -2248,21 +2273,42 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
}
if (cur_ops->reader_blocked)
preempted = cur_ops->reader_blocked();
- rcutorture_one_extend(&readstate, 0, myid < 0, trsp, rtrsp);
- WARN_ON_ONCE(readstate);
+ rcutorture_one_extend(&rtorsp->readstate, 0, myid < 0, trsp, rtorsp->rtrsp);
+ WARN_ON_ONCE(rtorsp->readstate);
// This next splat is expected behavior if leakpointer, especially
// for CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels.
- WARN_ON_ONCE(leakpointer && READ_ONCE(p->rtort_pipe_count) > 1);
+ WARN_ON_ONCE(leakpointer && READ_ONCE(rtorsp->p->rtort_pipe_count) > 1);
/* If error or close call, record the sequence of reader protections. */
if ((pipe_count > 1 || completed > 1) && !xchg(&err_segs_recorded, 1)) {
i = 0;
- for (rtrsp1 = &rtseg[0]; rtrsp1 < rtrsp; rtrsp1++)
+ for (rtrsp1 = &rtorsp->rtseg[0]; rtrsp1 < rtorsp->rtrsp; rtrsp1++)
err_segs[i++] = *rtrsp1;
rt_read_nsegs = i;
rt_read_preempted = preempted;
}
+}
+/*
+ * Do one read-side critical section, returning false if there was
+ * no data to read. Can be invoked both from process context and
+ * from a timer handler.
+ */
+static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
+{
+ int newstate;
+ struct rcu_torture_one_read_state rtors;
+
+ WARN_ON_ONCE(!rcu_is_watching());
+ init_rcu_torture_one_read_state(&rtors, trsp);
+ newstate = rcutorture_extend_mask(rtors.readstate, trsp);
+ rcutorture_one_extend(&rtors.readstate, newstate, myid < 0, trsp, rtors.rtrsp++);
+ if (!rcu_torture_one_read_start(&rtors, trsp, myid)) {
+ rcutorture_one_extend(&rtors.readstate, 0, myid < 0, trsp, rtors.rtrsp);
+ return false;
+ }
+ rtors.rtrsp = rcutorture_loop_extend(&rtors.readstate, myid < 0, trsp, rtors.rtrsp);
+ rcu_torture_one_read_end(&rtors, trsp, myid);
return true;
}
@@ -2307,7 +2353,7 @@ rcu_torture_reader(void *arg)
set_user_nice(current, MAX_NICE);
if (irqreader && cur_ops->irq_capable)
timer_setup_on_stack(&t, rcu_torture_timer, 0);
- tick_dep_set_task(current, TICK_DEP_BIT_RCU);
+ tick_dep_set_task(current, TICK_DEP_BIT_RCU); // CPU bound, so need tick.
do {
if (irqreader && cur_ops->irq_capable) {
if (!timer_pending(&t))
@@ -2325,7 +2371,7 @@ rcu_torture_reader(void *arg)
} while (!torture_must_stop());
if (irqreader && cur_ops->irq_capable) {
timer_delete_sync(&t);
- destroy_timer_on_stack(&t);
+ timer_destroy_on_stack(&t);
}
tick_dep_clear_task(current, TICK_DEP_BIT_RCU);
torture_kthread_stopping("rcu_torture_reader");
@@ -2394,6 +2440,7 @@ rcu_torture_stats_print(void)
int i;
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+ long n_gpwraps = 0;
struct rcu_torture *rtcp;
static unsigned long rtcv_snap = ULONG_MAX;
static bool splatted;
@@ -2404,6 +2451,8 @@ rcu_torture_stats_print(void)
pipesummary[i] += READ_ONCE(per_cpu(rcu_torture_count, cpu)[i]);
batchsummary[i] += READ_ONCE(per_cpu(rcu_torture_batch, cpu)[i]);
}
+ if (cur_ops->get_gpwrap_count)
+ n_gpwraps += cur_ops->get_gpwrap_count(cpu);
}
for (i = RCU_TORTURE_PIPE_LEN; i >= 0; i--) {
if (pipesummary[i] != 0)
@@ -2435,8 +2484,9 @@ rcu_torture_stats_print(void)
data_race(n_barrier_attempts),
data_race(n_rcu_torture_barrier_error));
pr_cont("read-exits: %ld ", data_race(n_read_exits)); // Statistic.
- pr_cont("nocb-toggles: %ld:%ld\n",
+ pr_cont("nocb-toggles: %ld:%ld ",
atomic_long_read(&n_nocb_offload), atomic_long_read(&n_nocb_deoffload));
+ pr_cont("gpwraps: %ld\n", n_gpwraps);
pr_alert("%s%s ", torture_type, TORTURE_FLAG);
if (atomic_read(&n_rcu_torture_mberror) ||
@@ -3036,7 +3086,7 @@ static void rcu_torture_fwd_prog_cr(struct rcu_fwd *rfp)
cver = READ_ONCE(rcu_torture_current_version);
gps = cur_ops->get_gp_seq();
rfp->rcu_launder_gp_seq_start = gps;
- tick_dep_set_task(current, TICK_DEP_BIT_RCU);
+ tick_dep_set_task(current, TICK_DEP_BIT_RCU); // CPU bound, so need tick.
while (time_before(jiffies, stopat) &&
!shutdown_time_arrived() &&
!READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) {
@@ -3607,6 +3657,57 @@ static int rcu_torture_preempt(void *unused)
static enum cpuhp_state rcutor_hp;
+static struct hrtimer gpwrap_lag_timer;
+static bool gpwrap_lag_active;
+
+/* Timer handler for toggling RCU grace-period sequence overflow test lag value */
+static enum hrtimer_restart rcu_gpwrap_lag_timer(struct hrtimer *timer)
+{
+ ktime_t next_delay;
+
+ if (gpwrap_lag_active) {
+ pr_alert("rcu-torture: Disabling gpwrap lag (value=0)\n");
+ cur_ops->set_gpwrap_lag(0);
+ gpwrap_lag_active = false;
+ next_delay = ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0);
+ } else {
+ pr_alert("rcu-torture: Enabling gpwrap lag (value=%d)\n", gpwrap_lag_gps);
+ cur_ops->set_gpwrap_lag(gpwrap_lag_gps);
+ gpwrap_lag_active = true;
+ next_delay = ktime_set(gpwrap_lag_active_mins * 60, 0);
+ }
+
+ if (torture_must_stop_irq())
+ return HRTIMER_NORESTART;
+
+ hrtimer_forward_now(timer, next_delay);
+ return HRTIMER_RESTART;
+}
+
+static int rcu_gpwrap_lag_init(void)
+{
+ if (!gpwrap_lag)
+ return 0;
+
+ if (gpwrap_lag_cycle_mins <= 0 || gpwrap_lag_active_mins <= 0) {
+ pr_alert("rcu-torture: lag timing parameters must be positive\n");
+ return -EINVAL;
+ }
+
+ hrtimer_setup(&gpwrap_lag_timer, rcu_gpwrap_lag_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ gpwrap_lag_active = false;
+ hrtimer_start(&gpwrap_lag_timer,
+ ktime_set((gpwrap_lag_cycle_mins - gpwrap_lag_active_mins) * 60, 0), HRTIMER_MODE_REL);
+
+ return 0;
+}
+
+static void rcu_gpwrap_lag_cleanup(void)
+{
+ hrtimer_cancel(&gpwrap_lag_timer);
+ cur_ops->set_gpwrap_lag(0);
+ gpwrap_lag_active = false;
+}
static void
rcu_torture_cleanup(void)
{
@@ -3776,6 +3877,9 @@ rcu_torture_cleanup(void)
torture_cleanup_end();
if (cur_ops->gp_slow_unregister)
cur_ops->gp_slow_unregister(NULL);
+
+ if (gpwrap_lag && cur_ops->set_gpwrap_lag)
+ rcu_gpwrap_lag_cleanup();
}
static void rcu_torture_leak_cb(struct rcu_head *rhp)
@@ -4272,9 +4376,17 @@ rcu_torture_init(void)
}
if (object_debug)
rcu_test_debug_objects();
- torture_init_end();
+
if (cur_ops->gp_slow_register && !WARN_ON_ONCE(!cur_ops->gp_slow_unregister))
cur_ops->gp_slow_register(&rcu_fwd_cb_nodelay);
+
+ if (gpwrap_lag && cur_ops->set_gpwrap_lag) {
+ firsterr = rcu_gpwrap_lag_init();
+ if (torture_init_error(firsterr))
+ goto unwind;
+ }
+
+ torture_init_end();
return 0;
unwind:
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 9a59b071501b..48047260697e 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -1589,7 +1589,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
{
if (cookie != SRCU_GET_STATE_COMPLETED &&
- !rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
+ !rcu_seq_done_exact(&ssp->srcu_sup->srcu_gp_seq, cookie))
return false;
// Ensure that the end of the SRCU grace period happens before
// any subsequent code that the caller might execute.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 659f83e71048..e8a4b720d7d2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -80,6 +80,15 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.gpwrap = true,
};
+
+int rcu_get_gpwrap_count(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ return READ_ONCE(rdp->gpwrap_count);
+}
+EXPORT_SYMBOL_GPL(rcu_get_gpwrap_count);
+
static struct rcu_state rcu_state = {
.level = { &rcu_state.node[0] },
.gp_state = RCU_GP_IDLE,
@@ -757,6 +766,25 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
}
+static unsigned long seq_gpwrap_lag = ULONG_MAX / 4;
+
+/**
+ * rcu_set_gpwrap_lag - Set RCU GP sequence overflow lag value.
+ * @lag_gps: Set overflow lag to this many grace period worth of counters
+ * which is used by rcutorture to quickly force a gpwrap situation.
+ * @lag_gps = 0 means we reset it back to the boot-time value.
+ */
+void rcu_set_gpwrap_lag(unsigned long lag_gps)
+{
+ unsigned long lag_seq_count;
+
+ lag_seq_count = (lag_gps == 0)
+ ? ULONG_MAX / 4
+ : lag_gps << RCU_SEQ_CTR_SHIFT;
+ WRITE_ONCE(seq_gpwrap_lag, lag_seq_count);
+}
+EXPORT_SYMBOL_GPL(rcu_set_gpwrap_lag);
+
/*
* When trying to report a quiescent state on behalf of some other CPU,
* it is our responsibility to check for and handle potential overflow
@@ -767,9 +795,11 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
{
raw_lockdep_assert_held_rcu_node(rnp);
- if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + ULONG_MAX / 4,
- rnp->gp_seq))
+ if (ULONG_CMP_LT(rcu_seq_current(&rdp->gp_seq) + seq_gpwrap_lag,
+ rnp->gp_seq)) {
WRITE_ONCE(rdp->gpwrap, true);
+ WRITE_ONCE(rdp->gpwrap_count, READ_ONCE(rdp->gpwrap_count) + 1);
+ }
if (ULONG_CMP_LT(rdp->rcu_iw_gp_seq + ULONG_MAX / 4, rnp->gp_seq))
rdp->rcu_iw_gp_seq = rnp->gp_seq + ULONG_MAX / 4;
}
@@ -801,6 +831,10 @@ static int rcu_watching_snap_save(struct rcu_data *rdp)
return 0;
}
+#ifndef arch_irq_stat_cpu
+#define arch_irq_stat_cpu(cpu) 0
+#endif
+
/*
* Returns positive if the specified CPU has passed through a quiescent state
* by virtue of being in or having passed through an dynticks idle state since
@@ -936,9 +970,9 @@ static int rcu_watching_snap_recheck(struct rcu_data *rdp)
rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
- rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
- rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
- rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
+ rsrp->nr_hardirqs = kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu);
+ rsrp->nr_softirqs = kstat_cpu_softirqs_sum(cpu);
+ rsrp->nr_csw = nr_context_switches_cpu(cpu);
rsrp->jiffies = jiffies;
rsrp->gp_seq = rdp->gp_seq;
}
@@ -1060,38 +1094,6 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
return needmore;
}
-static void swake_up_one_online_ipi(void *arg)
-{
- struct swait_queue_head *wqh = arg;
-
- swake_up_one(wqh);
-}
-
-static void swake_up_one_online(struct swait_queue_head *wqh)
-{
- int cpu = get_cpu();
-
- /*
- * If called from rcutree_report_cpu_starting(), wake up
- * is dangerous that late in the CPU-down hotplug process. The
- * scheduler might queue an ignored hrtimer. Defer the wake up
- * to an online CPU instead.
- */
- if (unlikely(cpu_is_offline(cpu))) {
- int target;
-
- target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU),
- cpu_online_mask);
-
- smp_call_function_single(target, swake_up_one_online_ipi,
- wqh, 0);
- put_cpu();
- } else {
- put_cpu();
- swake_up_one(wqh);
- }
-}
-
/*
* Awaken the grace-period kthread. Don't do a self-awaken (unless in an
* interrupt or softirq handler, in which case we just might immediately
@@ -1116,7 +1118,7 @@ static void rcu_gp_kthread_wake(void)
return;
WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
- swake_up_one_online(&rcu_state.gp_wq);
+ swake_up_one(&rcu_state.gp_wq);
}
/*
@@ -1798,6 +1800,7 @@ static noinline_for_stack bool rcu_gp_init(void)
struct rcu_data *rdp;
struct rcu_node *rnp = rcu_get_root();
bool start_new_poll;
+ unsigned long old_gp_seq;
WRITE_ONCE(rcu_state.gp_activity, jiffies);
raw_spin_lock_irq_rcu_node(rnp);
@@ -1825,7 +1828,12 @@ static noinline_for_stack bool rcu_gp_init(void)
*/
start_new_poll = rcu_sr_normal_gp_init();
/* Record GP times before starting GP, hence rcu_seq_start(). */
+ old_gp_seq = rcu_state.gp_seq;
rcu_seq_start(&rcu_state.gp_seq);
+ /* Ensure that rcu_seq_done_exact() guardband doesn't give false positives. */
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
+ rcu_seq_done_exact(&old_gp_seq, rcu_seq_snap(&rcu_state.gp_seq)));
+
ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index a9a811d9d7a3..3830c19cf2f6 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -168,7 +168,7 @@ struct rcu_snap_record {
u64 cputime_irq; /* Accumulated cputime of hard irqs */
u64 cputime_softirq;/* Accumulated cputime of soft irqs */
u64 cputime_system; /* Accumulated cputime of kernel tasks */
- unsigned long nr_hardirqs; /* Accumulated number of hard irqs */
+ u64 nr_hardirqs; /* Accumulated number of hard irqs */
unsigned int nr_softirqs; /* Accumulated number of soft irqs */
unsigned long long nr_csw; /* Accumulated number of task switches */
unsigned long jiffies; /* Track jiffies value */
@@ -183,6 +183,7 @@ struct rcu_data {
bool core_needs_qs; /* Core waits for quiescent state. */
bool beenonline; /* CPU online at least once. */
bool gpwrap; /* Possible ->gp_seq wrap. */
+ unsigned int gpwrap_count; /* Count of GP sequence wrap. */
bool cpu_started; /* RCU watching this onlining CPU. */
struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
unsigned long grpmask; /* Mask to apply to leaf qsmask. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 8d4895c854c5..c36c7d5575ca 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -200,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp,
if (rnp->parent == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (wake)
- swake_up_one_online(&rcu_state.expedited_wq);
+ swake_up_one(&rcu_state.expedited_wq);
break;
}
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index fa269d34167a..1596812f7f12 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -216,7 +216,7 @@ static bool __wake_nocb_gp(struct rcu_data *rdp_gp,
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
if (needwake) {
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
- swake_up_one_online(&rdp_gp->nocb_gp_wq);
+ swake_up_one(&rdp_gp->nocb_gp_wq);
}
return needwake;
@@ -554,19 +554,13 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY,
TPS("WakeLazy"));
- } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) {
+ } else if (!irqs_disabled_flags(flags)) {
/* ... if queue was empty ... */
rcu_nocb_unlock(rdp);
wake_nocb_gp(rdp, false);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
TPS("WakeEmpty"));
} else {
- /*
- * Don't do the wake-up upfront on fragile paths.
- * Also offline CPUs can't call swake_up_one_online() from
- * (soft-)IRQs. Rely on the final deferred wake-up from
- * rcutree_report_cpu_dead()
- */
rcu_nocb_unlock(rdp);
wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
TPS("WakeEmptyIsDeferred"));
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3c0bbbbb686f..0b0f56f6abc8 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -29,7 +29,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
(IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
lockdep_is_held(&rdp->nocb_lock) ||
lockdep_is_held(&rcu_state.nocb_mutex) ||
- (!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) &&
+ ((!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) || softirq_count()) &&
rdp == this_cpu_ptr(&rcu_data)) ||
rcu_current_is_nocb_kthread(rdp)),
"Unsafe read of RCU_NOCB offloaded state"
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 925fcdad5dea..56b21219442b 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -435,8 +435,8 @@ static void print_cpu_stat_info(int cpu)
rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
pr_err("\t hardirqs softirqs csw/system\n");
- pr_err("\t number: %8ld %10d %12lld\n",
- kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs,
+ pr_err("\t number: %8lld %10d %12lld\n",
+ kstat_cpu_irqs_sum(cpu) + arch_irq_stat_cpu(cpu) - rsrp->nr_hardirqs,
kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs,
nr_context_switches_cpu(cpu) - rsrp->nr_csw);
pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n",