From 22fc6eccbf4ce4eb6265e6ada7b50a7b9cc57d05 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Sun, 8 Jan 2006 01:01:27 -0800 Subject: [PATCH] Change maxaligned_in_smp alignemnt macros to internodealigned_in_smp macros ____cacheline_maxaligned_in_smp is currently used to align critical structures and avoid false sharing. It uses per-arch L1_CACHE_SHIFT_MAX and people find L1_CACHE_SHIFT_MAX useless. However, we have been using ____cacheline_maxaligned_in_smp to align structures on the internode cacheline size. As per Andi's suggestion, following patch kills ____cacheline_maxaligned_in_smp and introduces INTERNODE_CACHE_SHIFT, which defaults to L1_CACHE_SHIFT for all arches. Arches needing L3/Internode cacheline alignment can define INTERNODE_CACHE_SHIFT in the arch asm/cache.h. Patch replaces ____cacheline_maxaligned_in_smp with ____cacheline_internodealigned_in_smp With this patch, L1_CACHE_SHIFT_MAX can be killed Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcupdate.c') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 48d3bce465b8..c9afc61240e4 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -61,9 +61,9 @@ struct rcu_state { /* for current batch to proceed. */ }; -static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp = +static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp = {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; -static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp = +static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp = {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; -- cgit From e56d090310d7625ecb43a1eeebd479f04affb48b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 8 Jan 2006 01:01:37 -0800 Subject: [PATCH] RCU signal handling RCU tasklist_lock and RCU signal handling: send signals RCU-read-locked instead of tasklist_lock read-locked. This is a scalability improvement on SMP and a preemption-latency improvement under PREEMPT_RCU. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Acked-by: William Irwin Cc: Roland McGrath Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcupdate.c') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c9afc61240e4..0a669bd2f6d1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include -- cgit From 095975da26dba21698582e91e96be10f7417333f Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 8 Jan 2006 01:02:19 -0800 Subject: [PATCH] rcu file: use atomic primitives Use atomic_inc_not_zero for rcu files instead of special case rcuref. Signed-off-by: Nick Piggin Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'kernel/rcupdate.c') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 0a669bd2f6d1..30b0bba03859 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -46,7 +46,6 @@ #include #include #include -#include #include /* Definition for rcupdate control block. */ @@ -74,19 +73,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; static int maxbatch = 10000; -#ifndef __HAVE_ARCH_CMPXCHG -/* - * We use an array of spinlocks for the rcurefs -- similar to ones in sparc - * 32 bit atomic_t implementations, and a hash function similar to that - * for our refcounting needs. - * Can't help multiprocessors which donot have cmpxchg :( - */ - -spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { - [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED -}; -#endif - /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. -- cgit From 677517771b7b6efaf8617e70f655b16f3cafcc9b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 8 Jan 2006 22:19:16 +0300 Subject: [PATCH] rcu: uninline __rcu_pending() __rcu_pending() is rather fat and called twice from rcu_pending(). rcu_pending() has multiple callers, and not that small too. This patch uninlines both of them. Signed-off-by: Oleg Nesterov Acked-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'kernel/rcupdate.c') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 30b0bba03859..ccc45d49ce71 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -429,6 +429,36 @@ static void rcu_process_callbacks(unsigned long unused) &__get_cpu_var(rcu_bh_data)); } +static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) +{ + /* This cpu has pending rcu entries and the grace period + * for them has completed. + */ + if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) + return 1; + + /* This cpu has no pending entries, but there are new entries */ + if (!rdp->curlist && rdp->nxtlist) + return 1; + + /* This cpu has finished callbacks to invoke */ + if (rdp->donelist) + return 1; + + /* The rcu core waits for a quiescent state from the cpu */ + if (rdp->quiescbatch != rcp->cur || rdp->qs_pending) + return 1; + + /* nothing to do */ + return 0; +} + +int rcu_pending(int cpu) +{ + return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) || + __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); +} + void rcu_check_callbacks(int cpu, int user) { if (user || -- cgit From dbc1651f0c5b7d13acc59d3b805a7224332fb1fb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 8 Jan 2006 22:19:33 +0300 Subject: [PATCH] rcu: don't set ->next_pending in rcu_start_batch() I think it is better to set ->next_pending in the caller, when it is needed. This saves one parameter, and this coincides with cpu_quiet() beahaviour, which sets ->completed = ->cur itself. Signed-off-by: Oleg Nesterov Acked-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel/rcupdate.c') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ccc45d49ce71..05ee48316f70 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -236,12 +236,8 @@ static void rcu_do_batch(struct rcu_data *rdp) * active batch and the batch to be registered has not already occurred. * Caller must hold rcu_state.lock. */ -static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp, - int next_pending) +static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp) { - if (next_pending) - rcp->next_pending = 1; - if (rcp->next_pending && rcp->completed == rcp->cur) { rcp->next_pending = 0; @@ -275,7 +271,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) if (cpus_empty(rsp->cpumask)) { /* batch completed ! */ rcp->completed = rcp->cur; - rcu_start_batch(rcp, rsp, 0); + rcu_start_batch(rcp, rsp); } } @@ -410,7 +406,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, if (!rcp->next_pending) { /* and start it/schedule start if it's a new batch */ spin_lock(&rsp->lock); - rcu_start_batch(rcp, rsp, 1); + rcp->next_pending = 1; + rcu_start_batch(rcp, rsp); spin_unlock(&rsp->lock); } } else { -- cgit From 69a0b3157983925f14fe0bdc49622d5389538d8d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 10 Jan 2006 16:48:02 +0300 Subject: [PATCH] rcu: join rcu_ctrlblk and rcu_state This patch moves rcu_state into the rcu_ctrlblk. I think there are no reasons why we should have 2 different variables to control rcu state. Every user of rcu_state has also "rcu_ctrlblk *rcp" in the parameter list. Signed-off-by: Oleg Nesterov Acked-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 82 ++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 44 deletions(-) (limited to 'kernel/rcupdate.c') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 05ee48316f70..e18f9190eafa 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -49,22 +49,18 @@ #include /* Definition for rcupdate control block. */ -struct rcu_ctrlblk rcu_ctrlblk = - { .cur = -300, .completed = -300 }; -struct rcu_ctrlblk rcu_bh_ctrlblk = - { .cur = -300, .completed = -300 }; - -/* Bookkeeping of the progress of the grace period */ -struct rcu_state { - spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */ - cpumask_t cpumask; /* CPUs that need to switch in order */ - /* for current batch to proceed. */ +struct rcu_ctrlblk rcu_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, +}; +struct rcu_ctrlblk rcu_bh_ctrlblk = { + .cur = -300, + .completed = -300, + .lock = SPIN_LOCK_UNLOCKED, + .cpumask = CPU_MASK_NONE, }; - -static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp = - {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; -static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp = - {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE }; DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; @@ -220,13 +216,13 @@ static void rcu_do_batch(struct rcu_data *rdp) * This is done by rcu_start_batch. The start is not broadcasted to * all cpus, they must pick this up by comparing rcp->cur with * rdp->quiescbatch. All cpus are recorded in the - * rcu_state.cpumask bitmap. + * rcu_ctrlblk.cpumask bitmap. * - All cpus must go through a quiescent state. * Since the start of the grace period is not broadcasted, at least two * calls to rcu_check_quiescent_state are required: * The first call just notices that a new grace period is running. The * following calls check if there was a quiescent state since the beginning - * of the grace period. If so, it updates rcu_state.cpumask. If + * of the grace period. If so, it updates rcu_ctrlblk.cpumask. If * the bitmap is empty, then the grace period is completed. * rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace * period (if necessary). @@ -234,9 +230,9 @@ static void rcu_do_batch(struct rcu_data *rdp) /* * Register a new batch of callbacks, and start it up if there is currently no * active batch and the batch to be registered has not already occurred. - * Caller must hold rcu_state.lock. + * Caller must hold rcu_ctrlblk.lock. */ -static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp) +static void rcu_start_batch(struct rcu_ctrlblk *rcp) { if (rcp->next_pending && rcp->completed == rcp->cur) { @@ -251,11 +247,11 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp) /* * Accessing nohz_cpu_mask before incrementing rcp->cur needs a * Barrier Otherwise it can cause tickless idle CPUs to be - * included in rsp->cpumask, which will extend graceperiods + * included in rcp->cpumask, which will extend graceperiods * unnecessarily. */ smp_mb(); - cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask); + cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); } } @@ -265,13 +261,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp) * Clear it from the cpu mask and complete the grace period if it was the last * cpu. Start another grace period if someone has further entries pending */ -static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) +static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp) { - cpu_clear(cpu, rsp->cpumask); - if (cpus_empty(rsp->cpumask)) { + cpu_clear(cpu, rcp->cpumask); + if (cpus_empty(rcp->cpumask)) { /* batch completed ! */ rcp->completed = rcp->cur; - rcu_start_batch(rcp, rsp); + rcu_start_batch(rcp); } } @@ -281,7 +277,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp) * quiescent cycle, then indicate that it has done so. */ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, - struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_data *rdp) { if (rdp->quiescbatch != rcp->cur) { /* start new grace period: */ @@ -306,15 +302,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp, return; rdp->qs_pending = 0; - spin_lock(&rsp->lock); + spin_lock(&rcp->lock); /* * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync * during cpu startup. Ignore the quiescent state. */ if (likely(rdp->quiescbatch == rcp->cur)) - cpu_quiet(rdp->cpu, rcp, rsp); + cpu_quiet(rdp->cpu, rcp); - spin_unlock(&rsp->lock); + spin_unlock(&rcp->lock); } @@ -335,16 +331,16 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list, } static void __rcu_offline_cpu(struct rcu_data *this_rdp, - struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_ctrlblk *rcp, struct rcu_data *rdp) { /* if the cpu going offline owns the grace period * we can block indefinitely waiting for it, so flush * it here */ - spin_lock_bh(&rsp->lock); + spin_lock_bh(&rcp->lock); if (rcp->cur != rcp->completed) - cpu_quiet(rdp->cpu, rcp, rsp); - spin_unlock_bh(&rsp->lock); + cpu_quiet(rdp->cpu, rcp); + spin_unlock_bh(&rcp->lock); rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); @@ -354,9 +350,9 @@ static void rcu_offline_cpu(int cpu) struct rcu_data *this_rdp = &get_cpu_var(rcu_data); struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data); - __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state, + __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &per_cpu(rcu_data, cpu)); - __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state, + __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu)); put_cpu_var(rcu_data); put_cpu_var(rcu_bh_data); @@ -375,7 +371,7 @@ static void rcu_offline_cpu(int cpu) * This does the RCU processing work from tasklet context. */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, - struct rcu_state *rsp, struct rcu_data *rdp) + struct rcu_data *rdp) { if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) { *rdp->donetail = rdp->curlist; @@ -405,25 +401,23 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp, if (!rcp->next_pending) { /* and start it/schedule start if it's a new batch */ - spin_lock(&rsp->lock); + spin_lock(&rcp->lock); rcp->next_pending = 1; - rcu_start_batch(rcp, rsp); - spin_unlock(&rsp->lock); + rcu_start_batch(rcp); + spin_unlock(&rcp->lock); } } else { local_irq_enable(); } - rcu_check_quiescent_state(rcp, rsp, rdp); + rcu_check_quiescent_state(rcp, rdp); if (rdp->donelist) rcu_do_batch(rdp); } static void rcu_process_callbacks(unsigned long unused) { - __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state, - &__get_cpu_var(rcu_data)); - __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state, - &__get_cpu_var(rcu_bh_data)); + __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data)); + __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data)); } static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp) -- cgit From a9c828155ae8d1a1576f9648cc4d9677aea53f89 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 10 Jan 2006 17:24:53 +0300 Subject: [PATCH] rcu: fix hotplug-cpu ->donelist leak Pointed out by Srivatsa Vaddagiri . rcu_do_batch() stops after processing maxbatch callbacks on ->donelist leaving rcu_tasklet in TASKLET_STATE_SCHED state. If CPU_DEAD event happens remaining ->donelist entries are lost, rcu_offline_cpu() kills this tasklet. With this patch ->donelist migrates along with ->curlist and ->nxtlist to the current cpu. Compile tested. Signed-off-by: Oleg Nesterov Acked-by: Paul E. McKenney Cc: Srivatsa Vaddagiri Cc: Dipankar Sarma Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcupdate.c') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index e18f9190eafa..0cf8146bd585 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -343,8 +343,9 @@ static void __rcu_offline_cpu(struct rcu_data *this_rdp, spin_unlock_bh(&rcp->lock); rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); - + rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); } + static void rcu_offline_cpu(int cpu) { struct rcu_data *this_rdp = &get_cpu_var(rcu_data); -- cgit