diff options
Diffstat (limited to 'kernel')
37 files changed, 4269 insertions, 1301 deletions
| diff --git a/kernel/Makefile b/kernel/Makefile index 82fb182f6f61..47dbcd570cd8 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -8,10 +8,15 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \  	    signal.o sys.o kmod.o workqueue.o pid.o \  	    rcupdate.o extable.o params.o posix-timers.o \  	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ -	    hrtimer.o +	    hrtimer.o rwsem.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o  obj-y += time/  obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o +obj-$(CONFIG_LOCKDEP) += lockdep.o +ifeq ($(CONFIG_PROC_FS),y) +obj-$(CONFIG_LOCKDEP) += lockdep_proc.o +endif  obj-$(CONFIG_FUTEX) += futex.o  ifeq ($(CONFIG_COMPAT),y)  obj-$(CONFIG_FUTEX) += futex_compat.o @@ -22,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o  obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o  obj-$(CONFIG_SMP) += cpu.o spinlock.o  obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o +obj-$(CONFIG_PROVE_LOCKING) += spinlock.o  obj-$(CONFIG_UID16) += uid16.o  obj-$(CONFIG_MODULES) += module.o  obj-$(CONFIG_KALLSYMS) += kallsyms.o diff --git a/kernel/capability.c b/kernel/capability.c index 1a4d8a40d3f9..c7685ad00a97 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)       int ret = 0;       pid_t pid;       __u32 version; -     task_t *target; +     struct task_struct *target;       struct __user_cap_data_struct data;       if (get_user(version, &header->version)) @@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,  			      kernel_cap_t *inheritable,  			      kernel_cap_t *permitted)  { -	task_t *g, *target; +	struct task_struct *g, *target;  	int ret = -EPERM;  	int found = 0; @@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective,  			       kernel_cap_t *inheritable,  			       kernel_cap_t *permitted)  { -     task_t *g, *target; +     struct task_struct *g, *target;       int ret = -EPERM;       int found = 0; @@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)  {       kernel_cap_t inheritable, permitted, effective;       __u32 version; -     task_t *target; +     struct task_struct *target;       int ret;       pid_t pid; diff --git a/kernel/exit.c b/kernel/exit.c index 7f7ef2258553..6664c084783d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -134,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)  void release_task(struct task_struct * p)  { +	struct task_struct *leader;  	int zap_leader; -	task_t *leader;  repeat:  	atomic_dec(&p->user->processes);  	write_lock_irq(&tasklist_lock); @@ -209,7 +209,7 @@ out:   *   * "I ask you, have you ever known what it is to be an orphan?"   */ -static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) +static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)  {  	struct task_struct *p;  	int ret = 1; @@ -582,7 +582,8 @@ static void exit_mm(struct task_struct * tsk)  	mmput(mm);  } -static inline void choose_new_parent(task_t *p, task_t *reaper) +static inline void +choose_new_parent(struct task_struct *p, struct task_struct *reaper)  {  	/*  	 * Make sure we're not reparenting to ourselves and that @@ -592,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper)  	p->real_parent = reaper;  } -static void reparent_thread(task_t *p, task_t *father, int traced) +static void +reparent_thread(struct task_struct *p, struct task_struct *father, int traced)  {  	/* We don't want people slaying init.  */  	if (p->exit_signal != -1) @@ -656,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced)   * group, and if no such member exists, give it to   * the global child reaper process (ie "init")   */ -static void forget_original_parent(struct task_struct * father, -					  struct list_head *to_release) +static void +forget_original_parent(struct task_struct *father, struct list_head *to_release)  {  	struct task_struct *p, *reaper = father;  	struct list_head *_p, *_n; @@ -680,7 +682,7 @@ static void forget_original_parent(struct task_struct * father,  	 */  	list_for_each_safe(_p, _n, &father->children) {  		int ptrace; -		p = list_entry(_p,struct task_struct,sibling); +		p = list_entry(_p, struct task_struct, sibling);  		ptrace = p->ptrace; @@ -709,7 +711,7 @@ static void forget_original_parent(struct task_struct * father,  			list_add(&p->ptrace_list, to_release);  	}  	list_for_each_safe(_p, _n, &father->ptrace_children) { -		p = list_entry(_p,struct task_struct,ptrace_list); +		p = list_entry(_p, struct task_struct, ptrace_list);  		choose_new_parent(p, reaper);  		reparent_thread(p, father, 1);  	} @@ -829,7 +831,7 @@ static void exit_notify(struct task_struct *tsk)  	list_for_each_safe(_p, _n, &ptrace_dead) {  		list_del_init(_p); -		t = list_entry(_p,struct task_struct,ptrace_list); +		t = list_entry(_p, struct task_struct, ptrace_list);  		release_task(t);  	} @@ -933,10 +935,9 @@ fastcall NORET_TYPE void do_exit(long code)  	if (unlikely(current->pi_state_cache))  		kfree(current->pi_state_cache);  	/* -	 * If DEBUG_MUTEXES is on, make sure we are holding no locks: +	 * Make sure we are holding no locks:  	 */ -	mutex_debug_check_no_locks_held(tsk); -	rt_mutex_debug_check_no_locks_held(tsk); +	debug_check_no_locks_held(tsk);  	if (tsk->io_context)  		exit_io_context(); @@ -1011,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code)  	do_group_exit((error_code & 0xff) << 8);  } -static int eligible_child(pid_t pid, int options, task_t *p) +static int eligible_child(pid_t pid, int options, struct task_struct *p)  {  	if (pid > 0) {  		if (p->pid != pid) @@ -1052,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p)  	return 1;  } -static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, +static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,  			       int why, int status,  			       struct siginfo __user *infop,  			       struct rusage __user *rusagep)  {  	int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; +  	put_task_struct(p);  	if (!retval)  		retval = put_user(SIGCHLD, &infop->si_signo); @@ -1082,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,   * the lock and this task is uninteresting.  If we return nonzero, we have   * released the lock and the system call should return.   */ -static int wait_task_zombie(task_t *p, int noreap, +static int wait_task_zombie(struct task_struct *p, int noreap,  			    struct siginfo __user *infop,  			    int __user *stat_addr, struct rusage __user *ru)  { @@ -1244,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap,   * the lock and this task is uninteresting.  If we return nonzero, we have   * released the lock and the system call should return.   */ -static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap, -			     struct siginfo __user *infop, +static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, +			     int noreap, struct siginfo __user *infop,  			     int __user *stat_addr, struct rusage __user *ru)  {  	int retval, exit_code; @@ -1359,7 +1361,7 @@ bail_ref:   * the lock and this task is uninteresting.  If we return nonzero, we have   * released the lock and the system call should return.   */ -static int wait_task_continued(task_t *p, int noreap, +static int wait_task_continued(struct task_struct *p, int noreap,  			       struct siginfo __user *infop,  			       int __user *stat_addr, struct rusage __user *ru)  { @@ -1445,7 +1447,7 @@ repeat:  		int ret;  		list_for_each(_p,&tsk->children) { -			p = list_entry(_p,struct task_struct,sibling); +			p = list_entry(_p, struct task_struct, sibling);  			ret = eligible_child(pid, options, p);  			if (!ret) diff --git a/kernel/fork.c b/kernel/fork.c index 9064bf9e131b..56e4e07e45f7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -193,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)  	down_write(&oldmm->mmap_sem);  	flush_cache_mm(oldmm); -	down_write(&mm->mmap_sem); +	/* +	 * Not linked in yet - no deadlock potential: +	 */ +	down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);  	mm->locked_vm = 0;  	mm->mmap = NULL; @@ -919,10 +922,6 @@ static inline void rt_mutex_init_task(struct task_struct *p)  	spin_lock_init(&p->pi_lock);  	plist_head_init(&p->pi_waiters, &p->pi_lock);  	p->pi_blocked_on = NULL; -# ifdef CONFIG_DEBUG_RT_MUTEXES -	spin_lock_init(&p->held_list_lock); -	INIT_LIST_HEAD(&p->held_list_head); -# endif  #endif  } @@ -934,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p)   * parts of the process environment (as per the clone   * flags). The actual kick-off is left to the caller.   */ -static task_t *copy_process(unsigned long clone_flags, -				 unsigned long stack_start, -				 struct pt_regs *regs, -				 unsigned long stack_size, -				 int __user *parent_tidptr, -				 int __user *child_tidptr, -				 int pid) +static struct task_struct *copy_process(unsigned long clone_flags, +					unsigned long stack_start, +					struct pt_regs *regs, +					unsigned long stack_size, +					int __user *parent_tidptr, +					int __user *child_tidptr, +					int pid)  {  	int retval;  	struct task_struct *p = NULL; @@ -972,6 +971,10 @@ static task_t *copy_process(unsigned long clone_flags,  	if (!p)  		goto fork_out; +#ifdef CONFIG_TRACE_IRQFLAGS +	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); +	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); +#endif  	retval = -EAGAIN;  	if (atomic_read(&p->user->processes) >=  			p->signal->rlim[RLIMIT_NPROC].rlim_cur) { @@ -1046,6 +1049,26 @@ static task_t *copy_process(unsigned long clone_flags,   	}  	mpol_fix_fork_child_flag(p);  #endif +#ifdef CONFIG_TRACE_IRQFLAGS +	p->irq_events = 0; +	p->hardirqs_enabled = 0; +	p->hardirq_enable_ip = 0; +	p->hardirq_enable_event = 0; +	p->hardirq_disable_ip = _THIS_IP_; +	p->hardirq_disable_event = 0; +	p->softirqs_enabled = 1; +	p->softirq_enable_ip = _THIS_IP_; +	p->softirq_enable_event = 0; +	p->softirq_disable_ip = 0; +	p->softirq_disable_event = 0; +	p->hardirq_context = 0; +	p->softirq_context = 0; +#endif +#ifdef CONFIG_LOCKDEP +	p->lockdep_depth = 0; /* no locks held yet */ +	p->curr_chain_key = 0; +	p->lockdep_recursion = 0; +#endif  	rt_mutex_init_task(p); @@ -1271,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)  	return regs;  } -task_t * __devinit fork_idle(int cpu) +struct task_struct * __devinit fork_idle(int cpu)  { -	task_t *task; +	struct task_struct *task;  	struct pt_regs regs;  	task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0); diff --git a/kernel/futex.c b/kernel/futex.c index 15caf93e4a43..1dc98e4dd287 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -607,6 +607,22 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)  }  /* + * Express the locking dependencies for lockdep: + */ +static inline void +double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) +{ +	if (hb1 <= hb2) { +		spin_lock(&hb1->lock); +		if (hb1 < hb2) +			spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); +	} else { /* hb1 > hb2 */ +		spin_lock(&hb2->lock); +		spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); +	} +} + +/*   * Wake up all waiters hashed on the physical page that is mapped   * to this virtual address:   */ @@ -674,11 +690,7 @@ retryfull:  	hb2 = hash_futex(&key2);  retry: -	if (hb1 < hb2) -		spin_lock(&hb1->lock); -	spin_lock(&hb2->lock); -	if (hb1 > hb2) -		spin_lock(&hb1->lock); +	double_lock_hb(hb1, hb2);  	op_ret = futex_atomic_op_inuser(op, uaddr2);  	if (unlikely(op_ret < 0)) { @@ -787,11 +799,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,  	hb1 = hash_futex(&key1);  	hb2 = hash_futex(&key2); -	if (hb1 < hb2) -		spin_lock(&hb1->lock); -	spin_lock(&hb2->lock); -	if (hb1 > hb2) -		spin_lock(&hb1->lock); +	double_lock_hb(hb1, hb2);  	if (likely(cmpval != NULL)) {  		u32 curval; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 8d3dc29ef41a..d17766d40dab 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer)  	return HRTIMER_NORESTART;  } -void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task) +void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)  {  	sl->timer.function = hrtimer_wakeup;  	sl->task = task; @@ -782,8 +782,10 @@ static void __devinit init_hrtimers_cpu(int cpu)  	struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);  	int i; -	for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) +	for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {  		spin_lock_init(&base->lock); +		lockdep_set_class(&base->lock, &base->lock_key); +	}  }  #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 54105bdfe20d..9336f2e89e40 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -261,10 +261,13 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)  	 * keep it masked and get out of here  	 */  	action = desc->action; -	if (unlikely(!action || (desc->status & IRQ_DISABLED))) +	if (unlikely(!action || (desc->status & IRQ_DISABLED))) { +		desc->status |= IRQ_PENDING;  		goto out; +	}  	desc->status |= IRQ_INPROGRESS; +	desc->status &= ~IRQ_PENDING;  	spin_unlock(&desc->lock);  	action_ret = handle_IRQ_event(irq, regs, action); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index aeb6e391276c..fc4e906aedbd 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -132,7 +132,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,  	handle_dynamic_tick(action);  	if (!(action->flags & IRQF_DISABLED)) -		local_irq_enable(); +		local_irq_enable_in_hardirq();  	do {  		ret = action->handler(irq, action->dev_id, regs); @@ -249,3 +249,19 @@ out:  	return 1;  } +#ifdef CONFIG_TRACE_IRQFLAGS + +/* + * lockdep: we want to handle all irq_desc locks as a single lock-class: + */ +static struct lock_class_key irq_desc_lock_class; + +void early_init_irq_lock_class(void) +{ +	int i; + +	for (i = 0; i < NR_IRQS; i++) +		lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class); +} + +#endif diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c911c6ec4dd6..4e461438e48b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -410,6 +410,12 @@ int request_irq(unsigned int irq,  	struct irqaction *action;  	int retval; +#ifdef CONFIG_LOCKDEP +	/* +	 * Lockdep wants atomic interrupt handlers: +	 */ +	irqflags |= SA_INTERRUPT; +#endif  	/*  	 * Sanity-check: shared interrupts must pass in a real dev-ID,  	 * otherwise we'll have trouble later trying to figure out diff --git a/kernel/kmod.c b/kernel/kmod.c index 1b7157af051c..1d32defa38ab 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -233,7 +233,7 @@ static void __call_usermodehelper(void *data)  int call_usermodehelper_keys(char *path, char **argv, char **envp,  			     struct key *session_keyring, int wait)  { -	DECLARE_COMPLETION(done); +	DECLARE_COMPLETION_ONSTACK(done);  	struct subprocess_info sub_info = {  		.complete	= &done,  		.path		= path, diff --git a/kernel/lockdep.c b/kernel/lockdep.c new file mode 100644 index 000000000000..f32ca78c198d --- /dev/null +++ b/kernel/lockdep.c @@ -0,0 +1,2702 @@ +/* + * kernel/lockdep.c + * + * Runtime locking correctness validator + * + * Started by Ingo Molnar: + * + *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * this code maps all the lock dependencies as they occur in a live kernel + * and will warn about the following classes of locking bugs: + * + * - lock inversion scenarios + * - circular lock dependencies + * - hardirq/softirq safe/unsafe locking bugs + * + * Bugs are reported even if the current locking scenario does not cause + * any deadlock at this point. + * + * I.e. if anytime in the past two locks were taken in a different order, + * even if it happened for another task, even if those were different + * locks (but of the same class as this lock), this code will detect it. + * + * Thanks to Arjan van de Ven for coming up with the initial idea of + * mapping lock dependencies runtime. + */ +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> +#include <linux/kallsyms.h> +#include <linux/interrupt.h> +#include <linux/stacktrace.h> +#include <linux/debug_locks.h> +#include <linux/irqflags.h> + +#include <asm/sections.h> + +#include "lockdep_internals.h" + +/* + * hash_lock: protects the lockdep hashes and class/list/hash allocators. + * + * This is one of the rare exceptions where it's justified + * to use a raw spinlock - we really dont want the spinlock + * code to recurse back into the lockdep code. + */ +static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + +static int lockdep_initialized; + +unsigned long nr_list_entries; +static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; + +/* + * Allocate a lockdep entry. (assumes hash_lock held, returns + * with NULL on failure) + */ +static struct lock_list *alloc_list_entry(void) +{ +	if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { +		__raw_spin_unlock(&hash_lock); +		debug_locks_off(); +		printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); +		printk("turning off the locking correctness validator.\n"); +		return NULL; +	} +	return list_entries + nr_list_entries++; +} + +/* + * All data structures here are protected by the global debug_lock. + * + * Mutex key structs only get allocated, once during bootup, and never + * get freed - this significantly simplifies the debugging code. + */ +unsigned long nr_lock_classes; +static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; + +/* + * We keep a global list of all lock classes. The list only grows, + * never shrinks. The list is only accessed with the lockdep + * spinlock lock held. + */ +LIST_HEAD(all_lock_classes); + +/* + * The lockdep classes are in a hash-table as well, for fast lookup: + */ +#define CLASSHASH_BITS		(MAX_LOCKDEP_KEYS_BITS - 1) +#define CLASSHASH_SIZE		(1UL << CLASSHASH_BITS) +#define CLASSHASH_MASK		(CLASSHASH_SIZE - 1) +#define __classhashfn(key)	((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK) +#define classhashentry(key)	(classhash_table + __classhashfn((key))) + +static struct list_head classhash_table[CLASSHASH_SIZE]; + +unsigned long nr_lock_chains; +static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; + +/* + * We put the lock dependency chains into a hash-table as well, to cache + * their existence: + */ +#define CHAINHASH_BITS		(MAX_LOCKDEP_CHAINS_BITS-1) +#define CHAINHASH_SIZE		(1UL << CHAINHASH_BITS) +#define CHAINHASH_MASK		(CHAINHASH_SIZE - 1) +#define __chainhashfn(chain) \ +		(((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK) +#define chainhashentry(chain)	(chainhash_table + __chainhashfn((chain))) + +static struct list_head chainhash_table[CHAINHASH_SIZE]; + +/* + * The hash key of the lock dependency chains is a hash itself too: + * it's a hash of all locks taken up to that lock, including that lock. + * It's a 64-bit hash, because it's important for the keys to be + * unique. + */ +#define iterate_chain_key(key1, key2) \ +	(((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \ +	((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \ +	(key2)) + +void lockdep_off(void) +{ +	current->lockdep_recursion++; +} + +EXPORT_SYMBOL(lockdep_off); + +void lockdep_on(void) +{ +	current->lockdep_recursion--; +} + +EXPORT_SYMBOL(lockdep_on); + +int lockdep_internal(void) +{ +	return current->lockdep_recursion != 0; +} + +EXPORT_SYMBOL(lockdep_internal); + +/* + * Debugging switches: + */ + +#define VERBOSE			0 +#ifdef VERBOSE +# define VERY_VERBOSE		0 +#endif + +#if VERBOSE +# define HARDIRQ_VERBOSE	1 +# define SOFTIRQ_VERBOSE	1 +#else +# define HARDIRQ_VERBOSE	0 +# define SOFTIRQ_VERBOSE	0 +#endif + +#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE +/* + * Quick filtering for interesting events: + */ +static int class_filter(struct lock_class *class) +{ +	if (class->name_version == 1 && +			!strcmp(class->name, "&rl->lock")) +		return 1; +	if (class->name_version == 1 && +			!strcmp(class->name, "&ni->mrec_lock")) +		return 1; +	if (class->name_version == 1 && +			!strcmp(class->name, "mft_ni_runlist_lock")) +		return 1; +	if (class->name_version == 1 && +			!strcmp(class->name, "mft_ni_mrec_lock")) +		return 1; +	if (class->name_version == 1 && +			!strcmp(class->name, "&vol->lcnbmp_lock")) +		return 1; +	return 0; +} +#endif + +static int verbose(struct lock_class *class) +{ +#if VERBOSE +	return class_filter(class); +#endif +	return 0; +} + +#ifdef CONFIG_TRACE_IRQFLAGS + +static int hardirq_verbose(struct lock_class *class) +{ +#if HARDIRQ_VERBOSE +	return class_filter(class); +#endif +	return 0; +} + +static int softirq_verbose(struct lock_class *class) +{ +#if SOFTIRQ_VERBOSE +	return class_filter(class); +#endif +	return 0; +} + +#endif + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the hash_lock. + */ +unsigned long nr_stack_trace_entries; +static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; + +static int save_trace(struct stack_trace *trace) +{ +	trace->nr_entries = 0; +	trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; +	trace->entries = stack_trace + nr_stack_trace_entries; + +	save_stack_trace(trace, NULL, 0, 3); + +	trace->max_entries = trace->nr_entries; + +	nr_stack_trace_entries += trace->nr_entries; +	if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) +		return 0; + +	if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { +		__raw_spin_unlock(&hash_lock); +		if (debug_locks_off()) { +			printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); +			printk("turning off the locking correctness validator.\n"); +			dump_stack(); +		} +		return 0; +	} + +	return 1; +} + +unsigned int nr_hardirq_chains; +unsigned int nr_softirq_chains; +unsigned int nr_process_chains; +unsigned int max_lockdep_depth; +unsigned int max_recursion_depth; + +#ifdef CONFIG_DEBUG_LOCKDEP +/* + * We cannot printk in early bootup code. Not even early_printk() + * might work. So we mark any initialization errors and printk + * about it later on, in lockdep_info(). + */ +static int lockdep_init_error; + +/* + * Various lockdep statistics: + */ +atomic_t chain_lookup_hits; +atomic_t chain_lookup_misses; +atomic_t hardirqs_on_events; +atomic_t hardirqs_off_events; +atomic_t redundant_hardirqs_on; +atomic_t redundant_hardirqs_off; +atomic_t softirqs_on_events; +atomic_t softirqs_off_events; +atomic_t redundant_softirqs_on; +atomic_t redundant_softirqs_off; +atomic_t nr_unused_locks; +atomic_t nr_cyclic_checks; +atomic_t nr_cyclic_check_recursions; +atomic_t nr_find_usage_forwards_checks; +atomic_t nr_find_usage_forwards_recursions; +atomic_t nr_find_usage_backwards_checks; +atomic_t nr_find_usage_backwards_recursions; +# define debug_atomic_inc(ptr)		atomic_inc(ptr) +# define debug_atomic_dec(ptr)		atomic_dec(ptr) +# define debug_atomic_read(ptr)		atomic_read(ptr) +#else +# define debug_atomic_inc(ptr)		do { } while (0) +# define debug_atomic_dec(ptr)		do { } while (0) +# define debug_atomic_read(ptr)		0 +#endif + +/* + * Locking printouts: + */ + +static const char *usage_str[] = +{ +	[LOCK_USED] =			"initial-use ", +	[LOCK_USED_IN_HARDIRQ] =	"in-hardirq-W", +	[LOCK_USED_IN_SOFTIRQ] =	"in-softirq-W", +	[LOCK_ENABLED_SOFTIRQS] =	"softirq-on-W", +	[LOCK_ENABLED_HARDIRQS] =	"hardirq-on-W", +	[LOCK_USED_IN_HARDIRQ_READ] =	"in-hardirq-R", +	[LOCK_USED_IN_SOFTIRQ_READ] =	"in-softirq-R", +	[LOCK_ENABLED_SOFTIRQS_READ] =	"softirq-on-R", +	[LOCK_ENABLED_HARDIRQS_READ] =	"hardirq-on-R", +}; + +const char * __get_key_name(struct lockdep_subclass_key *key, char *str) +{ +	unsigned long offs, size; +	char *modname; + +	return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str); +} + +void +get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4) +{ +	*c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.'; + +	if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) +		*c1 = '+'; +	else +		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) +			*c1 = '-'; + +	if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) +		*c2 = '+'; +	else +		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) +			*c2 = '-'; + +	if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) +		*c3 = '-'; +	if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) { +		*c3 = '+'; +		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) +			*c3 = '?'; +	} + +	if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) +		*c4 = '-'; +	if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) { +		*c4 = '+'; +		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) +			*c4 = '?'; +	} +} + +static void print_lock_name(struct lock_class *class) +{ +	char str[128], c1, c2, c3, c4; +	const char *name; + +	get_usage_chars(class, &c1, &c2, &c3, &c4); + +	name = class->name; +	if (!name) { +		name = __get_key_name(class->key, str); +		printk(" (%s", name); +	} else { +		printk(" (%s", name); +		if (class->name_version > 1) +			printk("#%d", class->name_version); +		if (class->subclass) +			printk("/%d", class->subclass); +	} +	printk("){%c%c%c%c}", c1, c2, c3, c4); +} + +static void print_lockdep_cache(struct lockdep_map *lock) +{ +	const char *name; +	char str[128]; + +	name = lock->name; +	if (!name) +		name = __get_key_name(lock->key->subkeys, str); + +	printk("%s", name); +} + +static void print_lock(struct held_lock *hlock) +{ +	print_lock_name(hlock->class); +	printk(", at: "); +	print_ip_sym(hlock->acquire_ip); +} + +static void lockdep_print_held_locks(struct task_struct *curr) +{ +	int i, depth = curr->lockdep_depth; + +	if (!depth) { +		printk("no locks held by %s/%d.\n", curr->comm, curr->pid); +		return; +	} +	printk("%d lock%s held by %s/%d:\n", +		depth, depth > 1 ? "s" : "", curr->comm, curr->pid); + +	for (i = 0; i < depth; i++) { +		printk(" #%d: ", i); +		print_lock(curr->held_locks + i); +	} +} +/* + * Helper to print a nice hierarchy of lock dependencies: + */ +static void print_spaces(int nr) +{ +	int i; + +	for (i = 0; i < nr; i++) +		printk("  "); +} + +static void print_lock_class_header(struct lock_class *class, int depth) +{ +	int bit; + +	print_spaces(depth); +	printk("->"); +	print_lock_name(class); +	printk(" ops: %lu", class->ops); +	printk(" {\n"); + +	for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { +		if (class->usage_mask & (1 << bit)) { +			int len = depth; + +			print_spaces(depth); +			len += printk("   %s", usage_str[bit]); +			len += printk(" at:\n"); +			print_stack_trace(class->usage_traces + bit, len); +		} +	} +	print_spaces(depth); +	printk(" }\n"); + +	print_spaces(depth); +	printk(" ... key      at: "); +	print_ip_sym((unsigned long)class->key); +} + +/* + * printk all lock dependencies starting at <entry>: + */ +static void print_lock_dependencies(struct lock_class *class, int depth) +{ +	struct lock_list *entry; + +	if (DEBUG_LOCKS_WARN_ON(depth >= 20)) +		return; + +	print_lock_class_header(class, depth); + +	list_for_each_entry(entry, &class->locks_after, entry) { +		DEBUG_LOCKS_WARN_ON(!entry->class); +		print_lock_dependencies(entry->class, depth + 1); + +		print_spaces(depth); +		printk(" ... acquired at:\n"); +		print_stack_trace(&entry->trace, 2); +		printk("\n"); +	} +} + +/* + * Add a new dependency to the head of the list: + */ +static int add_lock_to_list(struct lock_class *class, struct lock_class *this, +			    struct list_head *head, unsigned long ip) +{ +	struct lock_list *entry; +	/* +	 * Lock not present yet - get a new dependency struct and +	 * add it to the list: +	 */ +	entry = alloc_list_entry(); +	if (!entry) +		return 0; + +	entry->class = this; +	save_trace(&entry->trace); + +	/* +	 * Since we never remove from the dependency list, the list can +	 * be walked lockless by other CPUs, it's only allocation +	 * that must be protected by the spinlock. But this also means +	 * we must make new entries visible only once writes to the +	 * entry become visible - hence the RCU op: +	 */ +	list_add_tail_rcu(&entry->entry, head); + +	return 1; +} + +/* + * Recursive, forwards-direction lock-dependency checking, used for + * both noncyclic checking and for hardirq-unsafe/softirq-unsafe + * checking. + * + * (to keep the stackframe of the recursive functions small we + *  use these global variables, and we also mark various helper + *  functions as noinline.) + */ +static struct held_lock *check_source, *check_target; + +/* + * Print a dependency chain entry (this is only done when a deadlock + * has been detected): + */ +static noinline int +print_circular_bug_entry(struct lock_list *target, unsigned int depth) +{ +	if (debug_locks_silent) +		return 0; +	printk("\n-> #%u", depth); +	print_lock_name(target->class); +	printk(":\n"); +	print_stack_trace(&target->trace, 6); + +	return 0; +} + +/* + * When a circular dependency is detected, print the + * header first: + */ +static noinline int +print_circular_bug_header(struct lock_list *entry, unsigned int depth) +{ +	struct task_struct *curr = current; + +	__raw_spin_unlock(&hash_lock); +	debug_locks_off(); +	if (debug_locks_silent) +		return 0; + +	printk("\n=======================================================\n"); +	printk(  "[ INFO: possible circular locking dependency detected ]\n"); +	printk(  "-------------------------------------------------------\n"); +	printk("%s/%d is trying to acquire lock:\n", +		curr->comm, curr->pid); +	print_lock(check_source); +	printk("\nbut task is already holding lock:\n"); +	print_lock(check_target); +	printk("\nwhich lock already depends on the new lock.\n\n"); +	printk("\nthe existing dependency chain (in reverse order) is:\n"); + +	print_circular_bug_entry(entry, depth); + +	return 0; +} + +static noinline int print_circular_bug_tail(void) +{ +	struct task_struct *curr = current; +	struct lock_list this; + +	if (debug_locks_silent) +		return 0; + +	this.class = check_source->class; +	save_trace(&this.trace); +	print_circular_bug_entry(&this, 0); + +	printk("\nother info that might help us debug this:\n\n"); +	lockdep_print_held_locks(curr); + +	printk("\nstack backtrace:\n"); +	dump_stack(); + +	return 0; +} + +static int noinline print_infinite_recursion_bug(void) +{ +	__raw_spin_unlock(&hash_lock); +	DEBUG_LOCKS_WARN_ON(1); + +	return 0; +} + +/* + * Prove that the dependency graph starting at <entry> can not + * lead to <target>. Print an error and return 0 if it does. + */ +static noinline int +check_noncircular(struct lock_class *source, unsigned int depth) +{ +	struct lock_list *entry; + +	debug_atomic_inc(&nr_cyclic_check_recursions); +	if (depth > max_recursion_depth) +		max_recursion_depth = depth; +	if (depth >= 20) +		return print_infinite_recursion_bug(); +	/* +	 * Check this lock's dependency list: +	 */ +	list_for_each_entry(entry, &source->locks_after, entry) { +		if (entry->class == check_target->class) +			return print_circular_bug_header(entry, depth+1); +		debug_atomic_inc(&nr_cyclic_checks); +		if (!check_noncircular(entry->class, depth+1)) +			return print_circular_bug_entry(entry, depth+1); +	} +	return 1; +} + +static int very_verbose(struct lock_class *class) +{ +#if VERY_VERBOSE +	return class_filter(class); +#endif +	return 0; +} +#ifdef CONFIG_TRACE_IRQFLAGS + +/* + * Forwards and backwards subgraph searching, for the purposes of + * proving that two subgraphs can be connected by a new dependency + * without creating any illegal irq-safe -> irq-unsafe lock dependency. + */ +static enum lock_usage_bit find_usage_bit; +static struct lock_class *forwards_match, *backwards_match; + +/* + * Find a node in the forwards-direction dependency sub-graph starting + * at <source> that matches <find_usage_bit>. + * + * Return 2 if such a node exists in the subgraph, and put that node + * into <forwards_match>. + * + * Return 1 otherwise and keep <forwards_match> unchanged. + * Return 0 on error. + */ +static noinline int +find_usage_forwards(struct lock_class *source, unsigned int depth) +{ +	struct lock_list *entry; +	int ret; + +	if (depth > max_recursion_depth) +		max_recursion_depth = depth; +	if (depth >= 20) +		return print_infinite_recursion_bug(); + +	debug_atomic_inc(&nr_find_usage_forwards_checks); +	if (source->usage_mask & (1 << find_usage_bit)) { +		forwards_match = source; +		return 2; +	} + +	/* +	 * Check this lock's dependency list: +	 */ +	list_for_each_entry(entry, &source->locks_after, entry) { +		debug_atomic_inc(&nr_find_usage_forwards_recursions); +		ret = find_usage_forwards(entry->class, depth+1); +		if (ret == 2 || ret == 0) +			return ret; +	} +	return 1; +} + +/* + * Find a node in the backwards-direction dependency sub-graph starting + * at <source> that matches <find_usage_bit>. + * + * Return 2 if such a node exists in the subgraph, and put that node + * into <backwards_match>. + * + * Return 1 otherwise and keep <backwards_match> unchanged. + * Return 0 on error. + */ +static noinline int +find_usage_backwards(struct lock_class *source, unsigned int depth) +{ +	struct lock_list *entry; +	int ret; + +	if (depth > max_recursion_depth) +		max_recursion_depth = depth; +	if (depth >= 20) +		return print_infinite_recursion_bug(); + +	debug_atomic_inc(&nr_find_usage_backwards_checks); +	if (source->usage_mask & (1 << find_usage_bit)) { +		backwards_match = source; +		return 2; +	} + +	/* +	 * Check this lock's dependency list: +	 */ +	list_for_each_entry(entry, &source->locks_before, entry) { +		debug_atomic_inc(&nr_find_usage_backwards_recursions); +		ret = find_usage_backwards(entry->class, depth+1); +		if (ret == 2 || ret == 0) +			return ret; +	} +	return 1; +} + +static int +print_bad_irq_dependency(struct task_struct *curr, +			 struct held_lock *prev, +			 struct held_lock *next, +			 enum lock_usage_bit bit1, +			 enum lock_usage_bit bit2, +			 const char *irqclass) +{ +	__raw_spin_unlock(&hash_lock); +	debug_locks_off(); +	if (debug_locks_silent) +		return 0; + +	printk("\n======================================================\n"); +	printk(  "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", +		irqclass, irqclass); +	printk(  "------------------------------------------------------\n"); +	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", +		curr->comm, curr->pid, +		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, +		curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, +		curr->hardirqs_enabled, +		curr->softirqs_enabled); +	print_lock(next); + +	printk("\nand this task is already holding:\n"); +	print_lock(prev); +	printk("which would create a new lock dependency:\n"); +	print_lock_name(prev->class); +	printk(" ->"); +	print_lock_name(next->class); +	printk("\n"); + +	printk("\nbut this new dependency connects a %s-irq-safe lock:\n", +		irqclass); +	print_lock_name(backwards_match); +	printk("\n... which became %s-irq-safe at:\n", irqclass); + +	print_stack_trace(backwards_match->usage_traces + bit1, 1); + +	printk("\nto a %s-irq-unsafe lock:\n", irqclass); +	print_lock_name(forwards_match); +	printk("\n... which became %s-irq-unsafe at:\n", irqclass); +	printk("..."); + +	print_stack_trace(forwards_match->usage_traces + bit2, 1); + +	printk("\nother info that might help us debug this:\n\n"); +	lockdep_print_held_locks(curr); + +	printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass); +	print_lock_dependencies(backwards_match, 0); + +	printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass); +	print_lock_dependencies(forwards_match, 0); + +	printk("\nstack backtrace:\n"); +	dump_stack(); + +	return 0; +} + +static int +check_usage(struct task_struct *curr, struct held_lock *prev, +	    struct held_lock *next, enum lock_usage_bit bit_backwards, +	    enum lock_usage_bit bit_forwards, const char *irqclass) +{ +	int ret; + +	find_usage_bit = bit_backwards; +	/* fills in <backwards_match> */ +	ret = find_usage_backwards(prev->class, 0); +	if (!ret || ret == 1) +		return ret; + +	find_usage_bit = bit_forwards; +	ret = find_usage_forwards(next->class, 0); +	if (!ret || ret == 1) +		return ret; +	/* ret == 2 */ +	return print_bad_irq_dependency(curr, prev, next, +			bit_backwards, bit_forwards, irqclass); +} + +#endif + +static int +print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, +		   struct held_lock *next) +{ +	debug_locks_off(); +	__raw_spin_unlock(&hash_lock); +	if (debug_locks_silent) +		return 0; + +	printk("\n=============================================\n"); +	printk(  "[ INFO: possible recursive locking detected ]\n"); +	printk(  "---------------------------------------------\n"); +	printk("%s/%d is trying to acquire lock:\n", +		curr->comm, curr->pid); +	print_lock(next); +	printk("\nbut task is already holding lock:\n"); +	print_lock(prev); + +	printk("\nother info that might help us debug this:\n"); +	lockdep_print_held_locks(curr); + +	printk("\nstack backtrace:\n"); +	dump_stack(); + +	return 0; +} + +/* + * Check whether we are holding such a class already. + * + * (Note that this has to be done separately, because the graph cannot + * detect such classes of deadlocks.) + * + * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read + */ +static int +check_deadlock(struct task_struct *curr, struct held_lock *next, +	       struct lockdep_map *next_instance, int read) +{ +	struct held_lock *prev; +	int i; + +	for (i = 0; i < curr->lockdep_depth; i++) { +		prev = curr->held_locks + i; +		if (prev->class != next->class) +			continue; +		/* +		 * Allow read-after-read recursion of the same +		 * lock class (i.e. read_lock(lock)+read_lock(lock)): +		 */ +		if ((read == 2) && prev->read) +			return 2; +		return print_deadlock_bug(curr, prev, next); +	} +	return 1; +} + +/* + * There was a chain-cache miss, and we are about to add a new dependency + * to a previous lock. We recursively validate the following rules: + * + *  - would the adding of the <prev> -> <next> dependency create a + *    circular dependency in the graph? [== circular deadlock] + * + *  - does the new prev->next dependency connect any hardirq-safe lock + *    (in the full backwards-subgraph starting at <prev>) with any + *    hardirq-unsafe lock (in the full forwards-subgraph starting at + *    <next>)? [== illegal lock inversion with hardirq contexts] + * + *  - does the new prev->next dependency connect any softirq-safe lock + *    (in the full backwards-subgraph starting at <prev>) with any + *    softirq-unsafe lock (in the full forwards-subgraph starting at + *    <next>)? [== illegal lock inversion with softirq contexts] + * + * any of these scenarios could lead to a deadlock. + * + * Then if all the validations pass, we add the forwards and backwards + * dependency. + */ +static int +check_prev_add(struct task_struct *curr, struct held_lock *prev, +	       struct held_lock *next) +{ +	struct lock_list *entry; +	int ret; + +	/* +	 * Prove that the new <prev> -> <next> dependency would not +	 * create a circular dependency in the graph. (We do this by +	 * forward-recursing into the graph starting at <next>, and +	 * checking whether we can reach <prev>.) +	 * +	 * We are using global variables to control the recursion, to +	 * keep the stackframe size of the recursive functions low: +	 */ +	check_source = next; +	check_target = prev; +	if (!(check_noncircular(next->class, 0))) +		return print_circular_bug_tail(); + +#ifdef CONFIG_TRACE_IRQFLAGS +	/* +	 * Prove that the new dependency does not connect a hardirq-safe +	 * lock with a hardirq-unsafe lock - to achieve this we search +	 * the backwards-subgraph starting at <prev>, and the +	 * forwards-subgraph starting at <next>: +	 */ +	if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, +					LOCK_ENABLED_HARDIRQS, "hard")) +		return 0; + +	/* +	 * Prove that the new dependency does not connect a hardirq-safe-read +	 * lock with a hardirq-unsafe lock - to achieve this we search +	 * the backwards-subgraph starting at <prev>, and the +	 * forwards-subgraph starting at <next>: +	 */ +	if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, +					LOCK_ENABLED_HARDIRQS, "hard-read")) +		return 0; + +	/* +	 * Prove that the new dependency does not connect a softirq-safe +	 * lock with a softirq-unsafe lock - to achieve this we search +	 * the backwards-subgraph starting at <prev>, and the +	 * forwards-subgraph starting at <next>: +	 */ +	if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, +					LOCK_ENABLED_SOFTIRQS, "soft")) +		return 0; +	/* +	 * Prove that the new dependency does not connect a softirq-safe-read +	 * lock with a softirq-unsafe lock - to achieve this we search +	 * the backwards-subgraph starting at <prev>, and the +	 * forwards-subgraph starting at <next>: +	 */ +	if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, +					LOCK_ENABLED_SOFTIRQS, "soft")) +		return 0; +#endif +	/* +	 * For recursive read-locks we do all the dependency checks, +	 * but we dont store read-triggered dependencies (only +	 * write-triggered dependencies). This ensures that only the +	 * write-side dependencies matter, and that if for example a +	 * write-lock never takes any other locks, then the reads are +	 * equivalent to a NOP. +	 */ +	if (next->read == 2 || prev->read == 2) +		return 1; +	/* +	 * Is the <prev> -> <next> dependency already present? +	 * +	 * (this may occur even though this is a new chain: consider +	 *  e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3 +	 *  chains - the second one will be new, but L1 already has +	 *  L2 added to its dependency list, due to the first chain.) +	 */ +	list_for_each_entry(entry, &prev->class->locks_after, entry) { +		if (entry->class == next->class) +			return 2; +	} + +	/* +	 * Ok, all validations passed, add the new lock +	 * to the previous lock's dependency list: +	 */ +	ret = add_lock_to_list(prev->class, next->class, +			       &prev->class->locks_after, next->acquire_ip); +	if (!ret) +		return 0; +	/* +	 * Return value of 2 signals 'dependency already added', +	 * in that case we dont have to add the backlink either. +	 */ +	if (ret == 2) +		return 2; +	ret = add_lock_to_list(next->class, prev->class, +			       &next->class->locks_before, next->acquire_ip); + +	/* +	 * Debugging printouts: +	 */ +	if (verbose(prev->class) || verbose(next->class)) { +		__raw_spin_unlock(&hash_lock); +		printk("\n new dependency: "); +		print_lock_name(prev->class); +		printk(" => "); +		print_lock_name(next->class); +		printk("\n"); +		dump_stack(); +		__raw_spin_lock(&hash_lock); +	} +	return 1; +} + +/* + * Add the dependency to all directly-previous locks that are 'relevant'. + * The ones that are relevant are (in increasing distance from curr): + * all consecutive trylock entries and the final non-trylock entry - or + * the end of this context's lock-chain - whichever comes first. + */ +static int +check_prevs_add(struct task_struct *curr, struct held_lock *next) +{ +	int depth = curr->lockdep_depth; +	struct held_lock *hlock; + +	/* +	 * Debugging checks. +	 * +	 * Depth must not be zero for a non-head lock: +	 */ +	if (!depth) +		goto out_bug; +	/* +	 * At least two relevant locks must exist for this +	 * to be a head: +	 */ +	if (curr->held_locks[depth].irq_context != +			curr->held_locks[depth-1].irq_context) +		goto out_bug; + +	for (;;) { +		hlock = curr->held_locks + depth-1; +		/* +		 * Only non-recursive-read entries get new dependencies +		 * added: +		 */ +		if (hlock->read != 2) { +			check_prev_add(curr, hlock, next); +			/* +			 * Stop after the first non-trylock entry, +			 * as non-trylock entries have added their +			 * own direct dependencies already, so this +			 * lock is connected to them indirectly: +			 */ +			if (!hlock->trylock) +				break; +		} +		depth--; +		/* +		 * End of lock-stack? +		 */ +		if (!depth) +			break; +		/* +		 * Stop the search if we cross into another context: +		 */ +		if (curr->held_locks[depth].irq_context != +				curr->held_locks[depth-1].irq_context) +			break; +	} +	return 1; +out_bug: +	__raw_spin_unlock(&hash_lock); +	DEBUG_LOCKS_WARN_ON(1); + +	return 0; +} + + +/* + * Is this the address of a static object: + */ +static int static_obj(void *obj) +{ +	unsigned long start = (unsigned long) &_stext, +		      end   = (unsigned long) &_end, +		      addr  = (unsigned long) obj; +#ifdef CONFIG_SMP +	int i; +#endif + +	/* +	 * static variable? +	 */ +	if ((addr >= start) && (addr < end)) +		return 1; + +#ifdef CONFIG_SMP +	/* +	 * percpu var? +	 */ +	for_each_possible_cpu(i) { +		start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); +		end   = (unsigned long) &__per_cpu_end   + per_cpu_offset(i); + +		if ((addr >= start) && (addr < end)) +			return 1; +	} +#endif + +	/* +	 * module var? +	 */ +	return is_module_address(addr); +} + +/* + * To make lock name printouts unique, we calculate a unique + * class->name_version generation counter: + */ +static int count_matching_names(struct lock_class *new_class) +{ +	struct lock_class *class; +	int count = 0; + +	if (!new_class->name) +		return 0; + +	list_for_each_entry(class, &all_lock_classes, lock_entry) { +		if (new_class->key - new_class->subclass == class->key) +			return class->name_version; +		if (class->name && !strcmp(class->name, new_class->name)) +			count = max(count, class->name_version); +	} + +	return count + 1; +} + +extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void); + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +register_lock_class(struct lockdep_map *lock, unsigned int subclass) +{ +	struct lockdep_subclass_key *key; +	struct list_head *hash_head; +	struct lock_class *class; + +#ifdef CONFIG_DEBUG_LOCKDEP +	/* +	 * If the architecture calls into lockdep before initializing +	 * the hashes then we'll warn about it later. (we cannot printk +	 * right now) +	 */ +	if (unlikely(!lockdep_initialized)) { +		lockdep_init(); +		lockdep_init_error = 1; +	} +#endif + +	/* +	 * Static locks do not have their class-keys yet - for them the key +	 * is the lock object itself: +	 */ +	if (unlikely(!lock->key)) +		lock->key = (void *)lock; + +	/* +	 * NOTE: the class-key must be unique. For dynamic locks, a static +	 * lock_class_key variable is passed in through the mutex_init() +	 * (or spin_lock_init()) call - which acts as the key. For static +	 * locks we use the lock object itself as the key. +	 */ +	if (sizeof(struct lock_class_key) > sizeof(struct lock_class)) +		__error_too_big_MAX_LOCKDEP_SUBCLASSES(); + +	key = lock->key->subkeys + subclass; + +	hash_head = classhashentry(key); + +	/* +	 * We can walk the hash lockfree, because the hash only +	 * grows, and we are careful when adding entries to the end: +	 */ +	list_for_each_entry(class, hash_head, hash_entry) +		if (class->key == key) +			goto out_set; + +	/* +	 * Debug-check: all keys must be persistent! + 	 */ +	if (!static_obj(lock->key)) { +		debug_locks_off(); +		printk("INFO: trying to register non-static key.\n"); +		printk("the code is fine but needs lockdep annotation.\n"); +		printk("turning off the locking correctness validator.\n"); +		dump_stack(); + +		return NULL; +	} + +	__raw_spin_lock(&hash_lock); +	/* +	 * We have to do the hash-walk again, to avoid races +	 * with another CPU: +	 */ +	list_for_each_entry(class, hash_head, hash_entry) +		if (class->key == key) +			goto out_unlock_set; +	/* +	 * Allocate a new key from the static array, and add it to +	 * the hash: +	 */ +	if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { +		__raw_spin_unlock(&hash_lock); +		debug_locks_off(); +		printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); +		printk("turning off the locking correctness validator.\n"); +		return NULL; +	} +	class = lock_classes + nr_lock_classes++; +	debug_atomic_inc(&nr_unused_locks); +	class->key = key; +	class->name = lock->name; +	class->subclass = subclass; +	INIT_LIST_HEAD(&class->lock_entry); +	INIT_LIST_HEAD(&class->locks_before); +	INIT_LIST_HEAD(&class->locks_after); +	class->name_version = count_matching_names(class); +	/* +	 * We use RCU's safe list-add method to make +	 * parallel walking of the hash-list safe: +	 */ +	list_add_tail_rcu(&class->hash_entry, hash_head); + +	if (verbose(class)) { +		__raw_spin_unlock(&hash_lock); +		printk("\nnew class %p: %s", class->key, class->name); +		if (class->name_version > 1) +			printk("#%d", class->name_version); +		printk("\n"); +		dump_stack(); +		__raw_spin_lock(&hash_lock); +	} +out_unlock_set: +	__raw_spin_unlock(&hash_lock); + +out_set: +	lock->class[subclass] = class; + +	DEBUG_LOCKS_WARN_ON(class->subclass != subclass); + +	return class; +} + +/* + * Look up a dependency chain. If the key is not present yet then + * add it and return 0 - in this case the new dependency chain is + * validated. If the key is already hashed, return 1. + */ +static inline int lookup_chain_cache(u64 chain_key) +{ +	struct list_head *hash_head = chainhashentry(chain_key); +	struct lock_chain *chain; + +	DEBUG_LOCKS_WARN_ON(!irqs_disabled()); +	/* +	 * We can walk it lock-free, because entries only get added +	 * to the hash: +	 */ +	list_for_each_entry(chain, hash_head, entry) { +		if (chain->chain_key == chain_key) { +cache_hit: +			debug_atomic_inc(&chain_lookup_hits); +			/* +			 * In the debugging case, force redundant checking +			 * by returning 1: +			 */ +#ifdef CONFIG_DEBUG_LOCKDEP +			__raw_spin_lock(&hash_lock); +			return 1; +#endif +			return 0; +		} +	} +	/* +	 * Allocate a new chain entry from the static array, and add +	 * it to the hash: +	 */ +	__raw_spin_lock(&hash_lock); +	/* +	 * We have to walk the chain again locked - to avoid duplicates: +	 */ +	list_for_each_entry(chain, hash_head, entry) { +		if (chain->chain_key == chain_key) { +			__raw_spin_unlock(&hash_lock); +			goto cache_hit; +		} +	} +	if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { +		__raw_spin_unlock(&hash_lock); +		debug_locks_off(); +		printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); +		printk("turning off the locking correctness validator.\n"); +		return 0; +	} +	chain = lock_chains + nr_lock_chains++; +	chain->chain_key = chain_key; +	list_add_tail_rcu(&chain->entry, hash_head); +	debug_atomic_inc(&chain_lookup_misses); +#ifdef CONFIG_TRACE_IRQFLAGS +	if (current->hardirq_context) +		nr_hardirq_chains++; +	else { +		if (current->softirq_context) +			nr_softirq_chains++; +		else +			nr_process_chains++; +	} +#else +	nr_process_chains++; +#endif + +	return 1; +} + +/* + * We are building curr_chain_key incrementally, so double-check + * it from scratch, to make sure that it's done correctly: + */ +static void check_chain_key(struct task_struct *curr) +{ +#ifdef CONFIG_DEBUG_LOCKDEP +	struct held_lock *hlock, *prev_hlock = NULL; +	unsigned int i, id; +	u64 chain_key = 0; + +	for (i = 0; i < curr->lockdep_depth; i++) { +		hlock = curr->held_locks + i; +		if (chain_key != hlock->prev_chain_key) { +			debug_locks_off(); +			printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n", +				curr->lockdep_depth, i, +				(unsigned long long)chain_key, +				(unsigned long long)hlock->prev_chain_key); +			WARN_ON(1); +			return; +		} +		id = hlock->class - lock_classes; +		DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS); +		if (prev_hlock && (prev_hlock->irq_context != +							hlock->irq_context)) +			chain_key = 0; +		chain_key = iterate_chain_key(chain_key, id); +		prev_hlock = hlock; +	} +	if (chain_key != curr->curr_chain_key) { +		debug_locks_off(); +		printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n", +			curr->lockdep_depth, i, +			(unsigned long long)chain_key, +			(unsigned long long)curr->curr_chain_key); +		WARN_ON(1); +	} +#endif +} + +#ifdef CONFIG_TRACE_IRQFLAGS + +/* + * print irq inversion bug: + */ +static int +print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other, +			struct held_lock *this, int forwards, +			const char *irqclass) +{ +	__raw_spin_unlock(&hash_lock); +	debug_locks_off(); +	if (debug_locks_silent) +		return 0; + +	printk("\n=========================================================\n"); +	printk(  "[ INFO: possible irq lock inversion dependency detected ]\n"); +	printk(  "---------------------------------------------------------\n"); +	printk("%s/%d just changed the state of lock:\n", +		curr->comm, curr->pid); +	print_lock(this); +	if (forwards) +		printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); +	else +		printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass); +	print_lock_name(other); +	printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); + +	printk("\nother info that might help us debug this:\n"); +	lockdep_print_held_locks(curr); + +	printk("\nthe first lock's dependencies:\n"); +	print_lock_dependencies(this->class, 0); + +	printk("\nthe second lock's dependencies:\n"); +	print_lock_dependencies(other, 0); + +	printk("\nstack backtrace:\n"); +	dump_stack(); + +	return 0; +} + +/* + * Prove that in the forwards-direction subgraph starting at <this> + * there is no lock matching <mask>: + */ +static int +check_usage_forwards(struct task_struct *curr, struct held_lock *this, +		     enum lock_usage_bit bit, const char *irqclass) +{ +	int ret; + +	find_usage_bit = bit; +	/* fills in <forwards_match> */ +	ret = find_usage_forwards(this->class, 0); +	if (!ret || ret == 1) +		return ret; + +	return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass); +} + +/* + * Prove that in the backwards-direction subgraph starting at <this> + * there is no lock matching <mask>: + */ +static int +check_usage_backwards(struct task_struct *curr, struct held_lock *this, +		      enum lock_usage_bit bit, const char *irqclass) +{ +	int ret; + +	find_usage_bit = bit; +	/* fills in <backwards_match> */ +	ret = find_usage_backwards(this->class, 0); +	if (!ret || ret == 1) +		return ret; + +	return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass); +} + +static inline void print_irqtrace_events(struct task_struct *curr) +{ +	printk("irq event stamp: %u\n", curr->irq_events); +	printk("hardirqs last  enabled at (%u): ", curr->hardirq_enable_event); +	print_ip_sym(curr->hardirq_enable_ip); +	printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event); +	print_ip_sym(curr->hardirq_disable_ip); +	printk("softirqs last  enabled at (%u): ", curr->softirq_enable_event); +	print_ip_sym(curr->softirq_enable_ip); +	printk("softirqs last disabled at (%u): ", curr->softirq_disable_event); +	print_ip_sym(curr->softirq_disable_ip); +} + +#else +static inline void print_irqtrace_events(struct task_struct *curr) +{ +} +#endif + +static int +print_usage_bug(struct task_struct *curr, struct held_lock *this, +		enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) +{ +	__raw_spin_unlock(&hash_lock); +	debug_locks_off(); +	if (debug_locks_silent) +		return 0; + +	printk("\n=================================\n"); +	printk(  "[ INFO: inconsistent lock state ]\n"); +	printk(  "---------------------------------\n"); + +	printk("inconsistent {%s} -> {%s} usage.\n", +		usage_str[prev_bit], usage_str[new_bit]); + +	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", +		curr->comm, curr->pid, +		trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, +		trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, +		trace_hardirqs_enabled(curr), +		trace_softirqs_enabled(curr)); +	print_lock(this); + +	printk("{%s} state was registered at:\n", usage_str[prev_bit]); +	print_stack_trace(this->class->usage_traces + prev_bit, 1); + +	print_irqtrace_events(curr); +	printk("\nother info that might help us debug this:\n"); +	lockdep_print_held_locks(curr); + +	printk("\nstack backtrace:\n"); +	dump_stack(); + +	return 0; +} + +/* + * Print out an error if an invalid bit is set: + */ +static inline int +valid_state(struct task_struct *curr, struct held_lock *this, +	    enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) +{ +	if (unlikely(this->class->usage_mask & (1 << bad_bit))) +		return print_usage_bug(curr, this, bad_bit, new_bit); +	return 1; +} + +#define STRICT_READ_CHECKS	1 + +/* + * Mark a lock with a usage bit, and validate the state transition: + */ +static int mark_lock(struct task_struct *curr, struct held_lock *this, +		     enum lock_usage_bit new_bit, unsigned long ip) +{ +	unsigned int new_mask = 1 << new_bit, ret = 1; + +	/* +	 * If already set then do not dirty the cacheline, +	 * nor do any checks: +	 */ +	if (likely(this->class->usage_mask & new_mask)) +		return 1; + +	__raw_spin_lock(&hash_lock); +	/* +	 * Make sure we didnt race: +	 */ +	if (unlikely(this->class->usage_mask & new_mask)) { +		__raw_spin_unlock(&hash_lock); +		return 1; +	} + +	this->class->usage_mask |= new_mask; + +#ifdef CONFIG_TRACE_IRQFLAGS +	if (new_bit == LOCK_ENABLED_HARDIRQS || +			new_bit == LOCK_ENABLED_HARDIRQS_READ) +		ip = curr->hardirq_enable_ip; +	else if (new_bit == LOCK_ENABLED_SOFTIRQS || +			new_bit == LOCK_ENABLED_SOFTIRQS_READ) +		ip = curr->softirq_enable_ip; +#endif +	if (!save_trace(this->class->usage_traces + new_bit)) +		return 0; + +	switch (new_bit) { +#ifdef CONFIG_TRACE_IRQFLAGS +	case LOCK_USED_IN_HARDIRQ: +		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) +			return 0; +		if (!valid_state(curr, this, new_bit, +				 LOCK_ENABLED_HARDIRQS_READ)) +			return 0; +		/* +		 * just marked it hardirq-safe, check that this lock +		 * took no hardirq-unsafe lock in the past: +		 */ +		if (!check_usage_forwards(curr, this, +					  LOCK_ENABLED_HARDIRQS, "hard")) +			return 0; +#if STRICT_READ_CHECKS +		/* +		 * just marked it hardirq-safe, check that this lock +		 * took no hardirq-unsafe-read lock in the past: +		 */ +		if (!check_usage_forwards(curr, this, +				LOCK_ENABLED_HARDIRQS_READ, "hard-read")) +			return 0; +#endif +		if (hardirq_verbose(this->class)) +			ret = 2; +		break; +	case LOCK_USED_IN_SOFTIRQ: +		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) +			return 0; +		if (!valid_state(curr, this, new_bit, +				 LOCK_ENABLED_SOFTIRQS_READ)) +			return 0; +		/* +		 * just marked it softirq-safe, check that this lock +		 * took no softirq-unsafe lock in the past: +		 */ +		if (!check_usage_forwards(curr, this, +					  LOCK_ENABLED_SOFTIRQS, "soft")) +			return 0; +#if STRICT_READ_CHECKS +		/* +		 * just marked it softirq-safe, check that this lock +		 * took no softirq-unsafe-read lock in the past: +		 */ +		if (!check_usage_forwards(curr, this, +				LOCK_ENABLED_SOFTIRQS_READ, "soft-read")) +			return 0; +#endif +		if (softirq_verbose(this->class)) +			ret = 2; +		break; +	case LOCK_USED_IN_HARDIRQ_READ: +		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) +			return 0; +		/* +		 * just marked it hardirq-read-safe, check that this lock +		 * took no hardirq-unsafe lock in the past: +		 */ +		if (!check_usage_forwards(curr, this, +					  LOCK_ENABLED_HARDIRQS, "hard")) +			return 0; +		if (hardirq_verbose(this->class)) +			ret = 2; +		break; +	case LOCK_USED_IN_SOFTIRQ_READ: +		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) +			return 0; +		/* +		 * just marked it softirq-read-safe, check that this lock +		 * took no softirq-unsafe lock in the past: +		 */ +		if (!check_usage_forwards(curr, this, +					  LOCK_ENABLED_SOFTIRQS, "soft")) +			return 0; +		if (softirq_verbose(this->class)) +			ret = 2; +		break; +	case LOCK_ENABLED_HARDIRQS: +		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ)) +			return 0; +		if (!valid_state(curr, this, new_bit, +				 LOCK_USED_IN_HARDIRQ_READ)) +			return 0; +		/* +		 * just marked it hardirq-unsafe, check that no hardirq-safe +		 * lock in the system ever took it in the past: +		 */ +		if (!check_usage_backwards(curr, this, +					   LOCK_USED_IN_HARDIRQ, "hard")) +			return 0; +#if STRICT_READ_CHECKS +		/* +		 * just marked it hardirq-unsafe, check that no +		 * hardirq-safe-read lock in the system ever took +		 * it in the past: +		 */ +		if (!check_usage_backwards(curr, this, +				   LOCK_USED_IN_HARDIRQ_READ, "hard-read")) +			return 0; +#endif +		if (hardirq_verbose(this->class)) +			ret = 2; +		break; +	case LOCK_ENABLED_SOFTIRQS: +		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ)) +			return 0; +		if (!valid_state(curr, this, new_bit, +				 LOCK_USED_IN_SOFTIRQ_READ)) +			return 0; +		/* +		 * just marked it softirq-unsafe, check that no softirq-safe +		 * lock in the system ever took it in the past: +		 */ +		if (!check_usage_backwards(curr, this, +					   LOCK_USED_IN_SOFTIRQ, "soft")) +			return 0; +#if STRICT_READ_CHECKS +		/* +		 * just marked it softirq-unsafe, check that no +		 * softirq-safe-read lock in the system ever took +		 * it in the past: +		 */ +		if (!check_usage_backwards(curr, this, +				   LOCK_USED_IN_SOFTIRQ_READ, "soft-read")) +			return 0; +#endif +		if (softirq_verbose(this->class)) +			ret = 2; +		break; +	case LOCK_ENABLED_HARDIRQS_READ: +		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ)) +			return 0; +#if STRICT_READ_CHECKS +		/* +		 * just marked it hardirq-read-unsafe, check that no +		 * hardirq-safe lock in the system ever took it in the past: +		 */ +		if (!check_usage_backwards(curr, this, +					   LOCK_USED_IN_HARDIRQ, "hard")) +			return 0; +#endif +		if (hardirq_verbose(this->class)) +			ret = 2; +		break; +	case LOCK_ENABLED_SOFTIRQS_READ: +		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ)) +			return 0; +#if STRICT_READ_CHECKS +		/* +		 * just marked it softirq-read-unsafe, check that no +		 * softirq-safe lock in the system ever took it in the past: +		 */ +		if (!check_usage_backwards(curr, this, +					   LOCK_USED_IN_SOFTIRQ, "soft")) +			return 0; +#endif +		if (softirq_verbose(this->class)) +			ret = 2; +		break; +#endif +	case LOCK_USED: +		/* +		 * Add it to the global list of classes: +		 */ +		list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); +		debug_atomic_dec(&nr_unused_locks); +		break; +	default: +		debug_locks_off(); +		WARN_ON(1); +		return 0; +	} + +	__raw_spin_unlock(&hash_lock); + +	/* +	 * We must printk outside of the hash_lock: +	 */ +	if (ret == 2) { +		printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); +		print_lock(this); +		print_irqtrace_events(curr); +		dump_stack(); +	} + +	return ret; +} + +#ifdef CONFIG_TRACE_IRQFLAGS +/* + * Mark all held locks with a usage bit: + */ +static int +mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip) +{ +	enum lock_usage_bit usage_bit; +	struct held_lock *hlock; +	int i; + +	for (i = 0; i < curr->lockdep_depth; i++) { +		hlock = curr->held_locks + i; + +		if (hardirq) { +			if (hlock->read) +				usage_bit = LOCK_ENABLED_HARDIRQS_READ; +			else +				usage_bit = LOCK_ENABLED_HARDIRQS; +		} else { +			if (hlock->read) +				usage_bit = LOCK_ENABLED_SOFTIRQS_READ; +			else +				usage_bit = LOCK_ENABLED_SOFTIRQS; +		} +		if (!mark_lock(curr, hlock, usage_bit, ip)) +			return 0; +	} + +	return 1; +} + +/* + * Debugging helper: via this flag we know that we are in + * 'early bootup code', and will warn about any invalid irqs-on event: + */ +static int early_boot_irqs_enabled; + +void early_boot_irqs_off(void) +{ +	early_boot_irqs_enabled = 0; +} + +void early_boot_irqs_on(void) +{ +	early_boot_irqs_enabled = 1; +} + +/* + * Hardirqs will be enabled: + */ +void trace_hardirqs_on(void) +{ +	struct task_struct *curr = current; +	unsigned long ip; + +	if (unlikely(!debug_locks || current->lockdep_recursion)) +		return; + +	if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) +		return; + +	if (unlikely(curr->hardirqs_enabled)) { +		debug_atomic_inc(&redundant_hardirqs_on); +		return; +	} +	/* we'll do an OFF -> ON transition: */ +	curr->hardirqs_enabled = 1; +	ip = (unsigned long) __builtin_return_address(0); + +	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) +		return; +	if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) +		return; +	/* +	 * We are going to turn hardirqs on, so set the +	 * usage bit for all held locks: +	 */ +	if (!mark_held_locks(curr, 1, ip)) +		return; +	/* +	 * If we have softirqs enabled, then set the usage +	 * bit for all held locks. (disabled hardirqs prevented +	 * this bit from being set before) +	 */ +	if (curr->softirqs_enabled) +		if (!mark_held_locks(curr, 0, ip)) +			return; + +	curr->hardirq_enable_ip = ip; +	curr->hardirq_enable_event = ++curr->irq_events; +	debug_atomic_inc(&hardirqs_on_events); +} + +EXPORT_SYMBOL(trace_hardirqs_on); + +/* + * Hardirqs were disabled: + */ +void trace_hardirqs_off(void) +{ +	struct task_struct *curr = current; + +	if (unlikely(!debug_locks || current->lockdep_recursion)) +		return; + +	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) +		return; + +	if (curr->hardirqs_enabled) { +		/* +		 * We have done an ON -> OFF transition: +		 */ +		curr->hardirqs_enabled = 0; +		curr->hardirq_disable_ip = _RET_IP_; +		curr->hardirq_disable_event = ++curr->irq_events; +		debug_atomic_inc(&hardirqs_off_events); +	} else +		debug_atomic_inc(&redundant_hardirqs_off); +} + +EXPORT_SYMBOL(trace_hardirqs_off); + +/* + * Softirqs will be enabled: + */ +void trace_softirqs_on(unsigned long ip) +{ +	struct task_struct *curr = current; + +	if (unlikely(!debug_locks)) +		return; + +	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) +		return; + +	if (curr->softirqs_enabled) { +		debug_atomic_inc(&redundant_softirqs_on); +		return; +	} + +	/* +	 * We'll do an OFF -> ON transition: +	 */ +	curr->softirqs_enabled = 1; +	curr->softirq_enable_ip = ip; +	curr->softirq_enable_event = ++curr->irq_events; +	debug_atomic_inc(&softirqs_on_events); +	/* +	 * We are going to turn softirqs on, so set the +	 * usage bit for all held locks, if hardirqs are +	 * enabled too: +	 */ +	if (curr->hardirqs_enabled) +		mark_held_locks(curr, 0, ip); +} + +/* + * Softirqs were disabled: + */ +void trace_softirqs_off(unsigned long ip) +{ +	struct task_struct *curr = current; + +	if (unlikely(!debug_locks)) +		return; + +	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) +		return; + +	if (curr->softirqs_enabled) { +		/* +		 * We have done an ON -> OFF transition: +		 */ +		curr->softirqs_enabled = 0; +		curr->softirq_disable_ip = ip; +		curr->softirq_disable_event = ++curr->irq_events; +		debug_atomic_inc(&softirqs_off_events); +		DEBUG_LOCKS_WARN_ON(!softirq_count()); +	} else +		debug_atomic_inc(&redundant_softirqs_off); +} + +#endif + +/* + * Initialize a lock instance's lock-class mapping info: + */ +void lockdep_init_map(struct lockdep_map *lock, const char *name, +		      struct lock_class_key *key) +{ +	if (unlikely(!debug_locks)) +		return; + +	if (DEBUG_LOCKS_WARN_ON(!key)) +		return; +	if (DEBUG_LOCKS_WARN_ON(!name)) +		return; +	/* +	 * Sanity check, the lock-class key must be persistent: +	 */ +	if (!static_obj(key)) { +		printk("BUG: key %p not in .data!\n", key); +		DEBUG_LOCKS_WARN_ON(1); +		return; +	} +	lock->name = name; +	lock->key = key; +	memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES); +} + +EXPORT_SYMBOL_GPL(lockdep_init_map); + +/* + * This gets called for every mutex_lock*()/spin_lock*() operation. + * We maintain the dependency maps and validate the locking attempt: + */ +static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, +			  int trylock, int read, int check, int hardirqs_off, +			  unsigned long ip) +{ +	struct task_struct *curr = current; +	struct held_lock *hlock; +	struct lock_class *class; +	unsigned int depth, id; +	int chain_head = 0; +	u64 chain_key; + +	if (unlikely(!debug_locks)) +		return 0; + +	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) +		return 0; + +	if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { +		debug_locks_off(); +		printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); +		printk("turning off the locking correctness validator.\n"); +		return 0; +	} + +	class = lock->class[subclass]; +	/* not cached yet? */ +	if (unlikely(!class)) { +		class = register_lock_class(lock, subclass); +		if (!class) +			return 0; +	} +	debug_atomic_inc((atomic_t *)&class->ops); +	if (very_verbose(class)) { +		printk("\nacquire class [%p] %s", class->key, class->name); +		if (class->name_version > 1) +			printk("#%d", class->name_version); +		printk("\n"); +		dump_stack(); +	} + +	/* +	 * Add the lock to the list of currently held locks. +	 * (we dont increase the depth just yet, up until the +	 * dependency checks are done) +	 */ +	depth = curr->lockdep_depth; +	if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) +		return 0; + +	hlock = curr->held_locks + depth; + +	hlock->class = class; +	hlock->acquire_ip = ip; +	hlock->instance = lock; +	hlock->trylock = trylock; +	hlock->read = read; +	hlock->check = check; +	hlock->hardirqs_off = hardirqs_off; + +	if (check != 2) +		goto out_calc_hash; +#ifdef CONFIG_TRACE_IRQFLAGS +	/* +	 * If non-trylock use in a hardirq or softirq context, then +	 * mark the lock as used in these contexts: +	 */ +	if (!trylock) { +		if (read) { +			if (curr->hardirq_context) +				if (!mark_lock(curr, hlock, +						LOCK_USED_IN_HARDIRQ_READ, ip)) +					return 0; +			if (curr->softirq_context) +				if (!mark_lock(curr, hlock, +						LOCK_USED_IN_SOFTIRQ_READ, ip)) +					return 0; +		} else { +			if (curr->hardirq_context) +				if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip)) +					return 0; +			if (curr->softirq_context) +				if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip)) +					return 0; +		} +	} +	if (!hardirqs_off) { +		if (read) { +			if (!mark_lock(curr, hlock, +					LOCK_ENABLED_HARDIRQS_READ, ip)) +				return 0; +			if (curr->softirqs_enabled) +				if (!mark_lock(curr, hlock, +						LOCK_ENABLED_SOFTIRQS_READ, ip)) +					return 0; +		} else { +			if (!mark_lock(curr, hlock, +					LOCK_ENABLED_HARDIRQS, ip)) +				return 0; +			if (curr->softirqs_enabled) +				if (!mark_lock(curr, hlock, +						LOCK_ENABLED_SOFTIRQS, ip)) +					return 0; +		} +	} +#endif +	/* mark it as used: */ +	if (!mark_lock(curr, hlock, LOCK_USED, ip)) +		return 0; +out_calc_hash: +	/* +	 * Calculate the chain hash: it's the combined has of all the +	 * lock keys along the dependency chain. We save the hash value +	 * at every step so that we can get the current hash easily +	 * after unlock. The chain hash is then used to cache dependency +	 * results. +	 * +	 * The 'key ID' is what is the most compact key value to drive +	 * the hash, not class->key. +	 */ +	id = class - lock_classes; +	if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) +		return 0; + +	chain_key = curr->curr_chain_key; +	if (!depth) { +		if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) +			return 0; +		chain_head = 1; +	} + +	hlock->prev_chain_key = chain_key; + +#ifdef CONFIG_TRACE_IRQFLAGS +	/* +	 * Keep track of points where we cross into an interrupt context: +	 */ +	hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + +				curr->softirq_context; +	if (depth) { +		struct held_lock *prev_hlock; + +		prev_hlock = curr->held_locks + depth-1; +		/* +		 * If we cross into another context, reset the +		 * hash key (this also prevents the checking and the +		 * adding of the dependency to 'prev'): +		 */ +		if (prev_hlock->irq_context != hlock->irq_context) { +			chain_key = 0; +			chain_head = 1; +		} +	} +#endif +	chain_key = iterate_chain_key(chain_key, id); +	curr->curr_chain_key = chain_key; + +	/* +	 * Trylock needs to maintain the stack of held locks, but it +	 * does not add new dependencies, because trylock can be done +	 * in any order. +	 * +	 * We look up the chain_key and do the O(N^2) check and update of +	 * the dependencies only if this is a new dependency chain. +	 * (If lookup_chain_cache() returns with 1 it acquires +	 * hash_lock for us) +	 */ +	if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) { +		/* +		 * Check whether last held lock: +		 * +		 * - is irq-safe, if this lock is irq-unsafe +		 * - is softirq-safe, if this lock is hardirq-unsafe +		 * +		 * And check whether the new lock's dependency graph +		 * could lead back to the previous lock. +		 * +		 * any of these scenarios could lead to a deadlock. If +		 * All validations +		 */ +		int ret = check_deadlock(curr, hlock, lock, read); + +		if (!ret) +			return 0; +		/* +		 * Mark recursive read, as we jump over it when +		 * building dependencies (just like we jump over +		 * trylock entries): +		 */ +		if (ret == 2) +			hlock->read = 2; +		/* +		 * Add dependency only if this lock is not the head +		 * of the chain, and if it's not a secondary read-lock: +		 */ +		if (!chain_head && ret != 2) +			if (!check_prevs_add(curr, hlock)) +				return 0; +		__raw_spin_unlock(&hash_lock); +	} +	curr->lockdep_depth++; +	check_chain_key(curr); +	if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { +		debug_locks_off(); +		printk("BUG: MAX_LOCK_DEPTH too low!\n"); +		printk("turning off the locking correctness validator.\n"); +		return 0; +	} +	if (unlikely(curr->lockdep_depth > max_lockdep_depth)) +		max_lockdep_depth = curr->lockdep_depth; + +	return 1; +} + +static int +print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, +			   unsigned long ip) +{ +	if (!debug_locks_off()) +		return 0; +	if (debug_locks_silent) +		return 0; + +	printk("\n=====================================\n"); +	printk(  "[ BUG: bad unlock balance detected! ]\n"); +	printk(  "-------------------------------------\n"); +	printk("%s/%d is trying to release lock (", +		curr->comm, curr->pid); +	print_lockdep_cache(lock); +	printk(") at:\n"); +	print_ip_sym(ip); +	printk("but there are no more locks to release!\n"); +	printk("\nother info that might help us debug this:\n"); +	lockdep_print_held_locks(curr); + +	printk("\nstack backtrace:\n"); +	dump_stack(); + +	return 0; +} + +/* + * Common debugging checks for both nested and non-nested unlock: + */ +static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, +			unsigned long ip) +{ +	if (unlikely(!debug_locks)) +		return 0; +	if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) +		return 0; + +	if (curr->lockdep_depth <= 0) +		return print_unlock_inbalance_bug(curr, lock, ip); + +	return 1; +} + +/* + * Remove the lock to the list of currently held locks in a + * potentially non-nested (out of order) manner. This is a + * relatively rare operation, as all the unlock APIs default + * to nested mode (which uses lock_release()): + */ +static int +lock_release_non_nested(struct task_struct *curr, +			struct lockdep_map *lock, unsigned long ip) +{ +	struct held_lock *hlock, *prev_hlock; +	unsigned int depth; +	int i; + +	/* +	 * Check whether the lock exists in the current stack +	 * of held locks: +	 */ +	depth = curr->lockdep_depth; +	if (DEBUG_LOCKS_WARN_ON(!depth)) +		return 0; + +	prev_hlock = NULL; +	for (i = depth-1; i >= 0; i--) { +		hlock = curr->held_locks + i; +		/* +		 * We must not cross into another context: +		 */ +		if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) +			break; +		if (hlock->instance == lock) +			goto found_it; +		prev_hlock = hlock; +	} +	return print_unlock_inbalance_bug(curr, lock, ip); + +found_it: +	/* +	 * We have the right lock to unlock, 'hlock' points to it. +	 * Now we remove it from the stack, and add back the other +	 * entries (if any), recalculating the hash along the way: +	 */ +	curr->lockdep_depth = i; +	curr->curr_chain_key = hlock->prev_chain_key; + +	for (i++; i < depth; i++) { +		hlock = curr->held_locks + i; +		if (!__lock_acquire(hlock->instance, +			hlock->class->subclass, hlock->trylock, +				hlock->read, hlock->check, hlock->hardirqs_off, +				hlock->acquire_ip)) +			return 0; +	} + +	if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) +		return 0; +	return 1; +} + +/* + * Remove the lock to the list of currently held locks - this gets + * called on mutex_unlock()/spin_unlock*() (or on a failed + * mutex_lock_interruptible()). This is done for unlocks that nest + * perfectly. (i.e. the current top of the lock-stack is unlocked) + */ +static int lock_release_nested(struct task_struct *curr, +			       struct lockdep_map *lock, unsigned long ip) +{ +	struct held_lock *hlock; +	unsigned int depth; + +	/* +	 * Pop off the top of the lock stack: +	 */ +	depth = curr->lockdep_depth - 1; +	hlock = curr->held_locks + depth; + +	/* +	 * Is the unlock non-nested: +	 */ +	if (hlock->instance != lock) +		return lock_release_non_nested(curr, lock, ip); +	curr->lockdep_depth--; + +	if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0))) +		return 0; + +	curr->curr_chain_key = hlock->prev_chain_key; + +#ifdef CONFIG_DEBUG_LOCKDEP +	hlock->prev_chain_key = 0; +	hlock->class = NULL; +	hlock->acquire_ip = 0; +	hlock->irq_context = 0; +#endif +	return 1; +} + +/* + * Remove the lock to the list of currently held locks - this gets + * called on mutex_unlock()/spin_unlock*() (or on a failed + * mutex_lock_interruptible()). This is done for unlocks that nest + * perfectly. (i.e. the current top of the lock-stack is unlocked) + */ +static void +__lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +{ +	struct task_struct *curr = current; + +	if (!check_unlock(curr, lock, ip)) +		return; + +	if (nested) { +		if (!lock_release_nested(curr, lock, ip)) +			return; +	} else { +		if (!lock_release_non_nested(curr, lock, ip)) +			return; +	} + +	check_chain_key(curr); +} + +/* + * Check whether we follow the irq-flags state precisely: + */ +static void check_flags(unsigned long flags) +{ +#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS) +	if (!debug_locks) +		return; + +	if (irqs_disabled_flags(flags)) +		DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled); +	else +		DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled); + +	/* +	 * We dont accurately track softirq state in e.g. +	 * hardirq contexts (such as on 4KSTACKS), so only +	 * check if not in hardirq contexts: +	 */ +	if (!hardirq_count()) { +		if (softirq_count()) +			DEBUG_LOCKS_WARN_ON(current->softirqs_enabled); +		else +			DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); +	} + +	if (!debug_locks) +		print_irqtrace_events(current); +#endif +} + +/* + * We are not always called with irqs disabled - do that here, + * and also avoid lockdep recursion: + */ +void lock_acquire(struct lockdep_map *lock, unsigned int subclass, +		  int trylock, int read, int check, unsigned long ip) +{ +	unsigned long flags; + +	if (unlikely(current->lockdep_recursion)) +		return; + +	raw_local_irq_save(flags); +	check_flags(flags); + +	current->lockdep_recursion = 1; +	__lock_acquire(lock, subclass, trylock, read, check, +		       irqs_disabled_flags(flags), ip); +	current->lockdep_recursion = 0; +	raw_local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(lock_acquire); + +void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +{ +	unsigned long flags; + +	if (unlikely(current->lockdep_recursion)) +		return; + +	raw_local_irq_save(flags); +	check_flags(flags); +	current->lockdep_recursion = 1; +	__lock_release(lock, nested, ip); +	current->lockdep_recursion = 0; +	raw_local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(lock_release); + +/* + * Used by the testsuite, sanitize the validator state + * after a simulated failure: + */ + +void lockdep_reset(void) +{ +	unsigned long flags; + +	raw_local_irq_save(flags); +	current->curr_chain_key = 0; +	current->lockdep_depth = 0; +	current->lockdep_recursion = 0; +	memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock)); +	nr_hardirq_chains = 0; +	nr_softirq_chains = 0; +	nr_process_chains = 0; +	debug_locks = 1; +	raw_local_irq_restore(flags); +} + +static void zap_class(struct lock_class *class) +{ +	int i; + +	/* +	 * Remove all dependencies this lock is +	 * involved in: +	 */ +	for (i = 0; i < nr_list_entries; i++) { +		if (list_entries[i].class == class) +			list_del_rcu(&list_entries[i].entry); +	} +	/* +	 * Unhash the class and remove it from the all_lock_classes list: +	 */ +	list_del_rcu(&class->hash_entry); +	list_del_rcu(&class->lock_entry); + +} + +static inline int within(void *addr, void *start, unsigned long size) +{ +	return addr >= start && addr < start + size; +} + +void lockdep_free_key_range(void *start, unsigned long size) +{ +	struct lock_class *class, *next; +	struct list_head *head; +	unsigned long flags; +	int i; + +	raw_local_irq_save(flags); +	__raw_spin_lock(&hash_lock); + +	/* +	 * Unhash all classes that were created by this module: +	 */ +	for (i = 0; i < CLASSHASH_SIZE; i++) { +		head = classhash_table + i; +		if (list_empty(head)) +			continue; +		list_for_each_entry_safe(class, next, head, hash_entry) +			if (within(class->key, start, size)) +				zap_class(class); +	} + +	__raw_spin_unlock(&hash_lock); +	raw_local_irq_restore(flags); +} + +void lockdep_reset_lock(struct lockdep_map *lock) +{ +	struct lock_class *class, *next, *entry; +	struct list_head *head; +	unsigned long flags; +	int i, j; + +	raw_local_irq_save(flags); +	__raw_spin_lock(&hash_lock); + +	/* +	 * Remove all classes this lock has: +	 */ +	for (i = 0; i < CLASSHASH_SIZE; i++) { +		head = classhash_table + i; +		if (list_empty(head)) +			continue; +		list_for_each_entry_safe(class, next, head, hash_entry) { +			for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { +				entry = lock->class[j]; +				if (class == entry) { +					zap_class(class); +					lock->class[j] = NULL; +					break; +				} +			} +		} +	} + +	/* +	 * Debug check: in the end all mapped classes should +	 * be gone. +	 */ +	for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { +		entry = lock->class[j]; +		if (!entry) +			continue; +		__raw_spin_unlock(&hash_lock); +		DEBUG_LOCKS_WARN_ON(1); +		raw_local_irq_restore(flags); +		return; +	} + +	__raw_spin_unlock(&hash_lock); +	raw_local_irq_restore(flags); +} + +void __init lockdep_init(void) +{ +	int i; + +	/* +	 * Some architectures have their own start_kernel() +	 * code which calls lockdep_init(), while we also +	 * call lockdep_init() from the start_kernel() itself, +	 * and we want to initialize the hashes only once: +	 */ +	if (lockdep_initialized) +		return; + +	for (i = 0; i < CLASSHASH_SIZE; i++) +		INIT_LIST_HEAD(classhash_table + i); + +	for (i = 0; i < CHAINHASH_SIZE; i++) +		INIT_LIST_HEAD(chainhash_table + i); + +	lockdep_initialized = 1; +} + +void __init lockdep_info(void) +{ +	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); + +	printk("... MAX_LOCKDEP_SUBCLASSES:    %lu\n", MAX_LOCKDEP_SUBCLASSES); +	printk("... MAX_LOCK_DEPTH:          %lu\n", MAX_LOCK_DEPTH); +	printk("... MAX_LOCKDEP_KEYS:        %lu\n", MAX_LOCKDEP_KEYS); +	printk("... CLASSHASH_SIZE:           %lu\n", CLASSHASH_SIZE); +	printk("... MAX_LOCKDEP_ENTRIES:     %lu\n", MAX_LOCKDEP_ENTRIES); +	printk("... MAX_LOCKDEP_CHAINS:      %lu\n", MAX_LOCKDEP_CHAINS); +	printk("... CHAINHASH_SIZE:          %lu\n", CHAINHASH_SIZE); + +	printk(" memory used by lock dependency info: %lu kB\n", +		(sizeof(struct lock_class) * MAX_LOCKDEP_KEYS + +		sizeof(struct list_head) * CLASSHASH_SIZE + +		sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES + +		sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS + +		sizeof(struct list_head) * CHAINHASH_SIZE) / 1024); + +	printk(" per task-struct memory footprint: %lu bytes\n", +		sizeof(struct held_lock) * MAX_LOCK_DEPTH); + +#ifdef CONFIG_DEBUG_LOCKDEP +	if (lockdep_init_error) +		printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n"); +#endif +} + +static inline int in_range(const void *start, const void *addr, const void *end) +{ +	return addr >= start && addr <= end; +} + +static void +print_freed_lock_bug(struct task_struct *curr, const void *mem_from, +		     const void *mem_to) +{ +	if (!debug_locks_off()) +		return; +	if (debug_locks_silent) +		return; + +	printk("\n=========================\n"); +	printk(  "[ BUG: held lock freed! ]\n"); +	printk(  "-------------------------\n"); +	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", +		curr->comm, curr->pid, mem_from, mem_to-1); +	lockdep_print_held_locks(curr); + +	printk("\nstack backtrace:\n"); +	dump_stack(); +} + +/* + * Called when kernel memory is freed (or unmapped), or if a lock + * is destroyed or reinitialized - this code checks whether there is + * any held lock in the memory range of <from> to <to>: + */ +void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) +{ +	const void *mem_to = mem_from + mem_len, *lock_from, *lock_to; +	struct task_struct *curr = current; +	struct held_lock *hlock; +	unsigned long flags; +	int i; + +	if (unlikely(!debug_locks)) +		return; + +	local_irq_save(flags); +	for (i = 0; i < curr->lockdep_depth; i++) { +		hlock = curr->held_locks + i; + +		lock_from = (void *)hlock->instance; +		lock_to = (void *)(hlock->instance + 1); + +		if (!in_range(mem_from, lock_from, mem_to) && +					!in_range(mem_from, lock_to, mem_to)) +			continue; + +		print_freed_lock_bug(curr, mem_from, mem_to); +		break; +	} +	local_irq_restore(flags); +} + +static void print_held_locks_bug(struct task_struct *curr) +{ +	if (!debug_locks_off()) +		return; +	if (debug_locks_silent) +		return; + +	printk("\n=====================================\n"); +	printk(  "[ BUG: lock held at task exit time! ]\n"); +	printk(  "-------------------------------------\n"); +	printk("%s/%d is exiting with locks still held!\n", +		curr->comm, curr->pid); +	lockdep_print_held_locks(curr); + +	printk("\nstack backtrace:\n"); +	dump_stack(); +} + +void debug_check_no_locks_held(struct task_struct *task) +{ +	if (unlikely(task->lockdep_depth > 0)) +		print_held_locks_bug(task); +} + +void debug_show_all_locks(void) +{ +	struct task_struct *g, *p; +	int count = 10; +	int unlock = 1; + +	printk("\nShowing all locks held in the system:\n"); + +	/* +	 * Here we try to get the tasklist_lock as hard as possible, +	 * if not successful after 2 seconds we ignore it (but keep +	 * trying). This is to enable a debug printout even if a +	 * tasklist_lock-holding task deadlocks or crashes. +	 */ +retry: +	if (!read_trylock(&tasklist_lock)) { +		if (count == 10) +			printk("hm, tasklist_lock locked, retrying... "); +		if (count) { +			count--; +			printk(" #%d", 10-count); +			mdelay(200); +			goto retry; +		} +		printk(" ignoring it.\n"); +		unlock = 0; +	} +	if (count != 10) +		printk(" locked it.\n"); + +	do_each_thread(g, p) { +		if (p->lockdep_depth) +			lockdep_print_held_locks(p); +		if (!unlock) +			if (read_trylock(&tasklist_lock)) +				unlock = 1; +	} while_each_thread(g, p); + +	printk("\n"); +	printk("=============================================\n\n"); + +	if (unlock) +		read_unlock(&tasklist_lock); +} + +EXPORT_SYMBOL_GPL(debug_show_all_locks); + +void debug_show_held_locks(struct task_struct *task) +{ +	lockdep_print_held_locks(task); +} + +EXPORT_SYMBOL_GPL(debug_show_held_locks); + diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h new file mode 100644 index 000000000000..0d355f24fe04 --- /dev/null +++ b/kernel/lockdep_internals.h @@ -0,0 +1,78 @@ +/* + * kernel/lockdep_internals.h + * + * Runtime locking correctness validator + * + * lockdep subsystem internal functions and variables. + */ + +/* + * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies + * we track. + * + * We use the per-lock dependency maps in two ways: we grow it by adding + * every to-be-taken lock to all currently held lock's own dependency + * table (if it's not there yet), and we check it for lock order + * conflicts and deadlocks. + */ +#define MAX_LOCKDEP_ENTRIES	8192UL + +#define MAX_LOCKDEP_KEYS_BITS	11 +#define MAX_LOCKDEP_KEYS	(1UL << MAX_LOCKDEP_KEYS_BITS) + +#define MAX_LOCKDEP_CHAINS_BITS	13 +#define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS) + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the hash_lock. + */ +#define MAX_STACK_TRACE_ENTRIES	131072UL + +extern struct list_head all_lock_classes; + +extern void +get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4); + +extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); + +extern unsigned long nr_lock_classes; +extern unsigned long nr_list_entries; +extern unsigned long nr_lock_chains; +extern unsigned long nr_stack_trace_entries; + +extern unsigned int nr_hardirq_chains; +extern unsigned int nr_softirq_chains; +extern unsigned int nr_process_chains; +extern unsigned int max_lockdep_depth; +extern unsigned int max_recursion_depth; + +#ifdef CONFIG_DEBUG_LOCKDEP +/* + * Various lockdep statistics: + */ +extern atomic_t chain_lookup_hits; +extern atomic_t chain_lookup_misses; +extern atomic_t hardirqs_on_events; +extern atomic_t hardirqs_off_events; +extern atomic_t redundant_hardirqs_on; +extern atomic_t redundant_hardirqs_off; +extern atomic_t softirqs_on_events; +extern atomic_t softirqs_off_events; +extern atomic_t redundant_softirqs_on; +extern atomic_t redundant_softirqs_off; +extern atomic_t nr_unused_locks; +extern atomic_t nr_cyclic_checks; +extern atomic_t nr_cyclic_check_recursions; +extern atomic_t nr_find_usage_forwards_checks; +extern atomic_t nr_find_usage_forwards_recursions; +extern atomic_t nr_find_usage_backwards_checks; +extern atomic_t nr_find_usage_backwards_recursions; +# define debug_atomic_inc(ptr)		atomic_inc(ptr) +# define debug_atomic_dec(ptr)		atomic_dec(ptr) +# define debug_atomic_read(ptr)		atomic_read(ptr) +#else +# define debug_atomic_inc(ptr)		do { } while (0) +# define debug_atomic_dec(ptr)		do { } while (0) +# define debug_atomic_read(ptr)		0 +#endif diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c new file mode 100644 index 000000000000..f6e72eaab3fa --- /dev/null +++ b/kernel/lockdep_proc.c @@ -0,0 +1,345 @@ +/* + * kernel/lockdep_proc.c + * + * Runtime locking correctness validator + * + * Started by Ingo Molnar: + * + *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + * + * Code for /proc/lockdep and /proc/lockdep_stats: + * + */ +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/kallsyms.h> +#include <linux/debug_locks.h> + +#include "lockdep_internals.h" + +static void *l_next(struct seq_file *m, void *v, loff_t *pos) +{ +	struct lock_class *class = v; + +	(*pos)++; + +	if (class->lock_entry.next != &all_lock_classes) +		class = list_entry(class->lock_entry.next, struct lock_class, +				  lock_entry); +	else +		class = NULL; +	m->private = class; + +	return class; +} + +static void *l_start(struct seq_file *m, loff_t *pos) +{ +	struct lock_class *class = m->private; + +	if (&class->lock_entry == all_lock_classes.next) +		seq_printf(m, "all lock classes:\n"); + +	return class; +} + +static void l_stop(struct seq_file *m, void *v) +{ +} + +static unsigned long count_forward_deps(struct lock_class *class) +{ +	struct lock_list *entry; +	unsigned long ret = 1; + +	/* +	 * Recurse this class's dependency list: +	 */ +	list_for_each_entry(entry, &class->locks_after, entry) +		ret += count_forward_deps(entry->class); + +	return ret; +} + +static unsigned long count_backward_deps(struct lock_class *class) +{ +	struct lock_list *entry; +	unsigned long ret = 1; + +	/* +	 * Recurse this class's dependency list: +	 */ +	list_for_each_entry(entry, &class->locks_before, entry) +		ret += count_backward_deps(entry->class); + +	return ret; +} + +static int l_show(struct seq_file *m, void *v) +{ +	unsigned long nr_forward_deps, nr_backward_deps; +	struct lock_class *class = m->private; +	char str[128], c1, c2, c3, c4; +	const char *name; + +	seq_printf(m, "%p", class->key); +#ifdef CONFIG_DEBUG_LOCKDEP +	seq_printf(m, " OPS:%8ld", class->ops); +#endif +	nr_forward_deps = count_forward_deps(class); +	seq_printf(m, " FD:%5ld", nr_forward_deps); + +	nr_backward_deps = count_backward_deps(class); +	seq_printf(m, " BD:%5ld", nr_backward_deps); + +	get_usage_chars(class, &c1, &c2, &c3, &c4); +	seq_printf(m, " %c%c%c%c", c1, c2, c3, c4); + +	name = class->name; +	if (!name) { +		name = __get_key_name(class->key, str); +		seq_printf(m, ": %s", name); +	} else{ +		seq_printf(m, ": %s", name); +		if (class->name_version > 1) +			seq_printf(m, "#%d", class->name_version); +		if (class->subclass) +			seq_printf(m, "/%d", class->subclass); +	} +	seq_puts(m, "\n"); + +	return 0; +} + +static struct seq_operations lockdep_ops = { +	.start	= l_start, +	.next	= l_next, +	.stop	= l_stop, +	.show	= l_show, +}; + +static int lockdep_open(struct inode *inode, struct file *file) +{ +	int res = seq_open(file, &lockdep_ops); +	if (!res) { +		struct seq_file *m = file->private_data; + +		if (!list_empty(&all_lock_classes)) +			m->private = list_entry(all_lock_classes.next, +					struct lock_class, lock_entry); +		else +			m->private = NULL; +	} +	return res; +} + +static struct file_operations proc_lockdep_operations = { +	.open		= lockdep_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +static void lockdep_stats_debug_show(struct seq_file *m) +{ +#ifdef CONFIG_DEBUG_LOCKDEP +	unsigned int hi1 = debug_atomic_read(&hardirqs_on_events), +		     hi2 = debug_atomic_read(&hardirqs_off_events), +		     hr1 = debug_atomic_read(&redundant_hardirqs_on), +		     hr2 = debug_atomic_read(&redundant_hardirqs_off), +		     si1 = debug_atomic_read(&softirqs_on_events), +		     si2 = debug_atomic_read(&softirqs_off_events), +		     sr1 = debug_atomic_read(&redundant_softirqs_on), +		     sr2 = debug_atomic_read(&redundant_softirqs_off); + +	seq_printf(m, " chain lookup misses:           %11u\n", +		debug_atomic_read(&chain_lookup_misses)); +	seq_printf(m, " chain lookup hits:             %11u\n", +		debug_atomic_read(&chain_lookup_hits)); +	seq_printf(m, " cyclic checks:                 %11u\n", +		debug_atomic_read(&nr_cyclic_checks)); +	seq_printf(m, " cyclic-check recursions:       %11u\n", +		debug_atomic_read(&nr_cyclic_check_recursions)); +	seq_printf(m, " find-mask forwards checks:     %11u\n", +		debug_atomic_read(&nr_find_usage_forwards_checks)); +	seq_printf(m, " find-mask forwards recursions: %11u\n", +		debug_atomic_read(&nr_find_usage_forwards_recursions)); +	seq_printf(m, " find-mask backwards checks:    %11u\n", +		debug_atomic_read(&nr_find_usage_backwards_checks)); +	seq_printf(m, " find-mask backwards recursions:%11u\n", +		debug_atomic_read(&nr_find_usage_backwards_recursions)); + +	seq_printf(m, " hardirq on events:             %11u\n", hi1); +	seq_printf(m, " hardirq off events:            %11u\n", hi2); +	seq_printf(m, " redundant hardirq ons:         %11u\n", hr1); +	seq_printf(m, " redundant hardirq offs:        %11u\n", hr2); +	seq_printf(m, " softirq on events:             %11u\n", si1); +	seq_printf(m, " softirq off events:            %11u\n", si2); +	seq_printf(m, " redundant softirq ons:         %11u\n", sr1); +	seq_printf(m, " redundant softirq offs:        %11u\n", sr2); +#endif +} + +static int lockdep_stats_show(struct seq_file *m, void *v) +{ +	struct lock_class *class; +	unsigned long nr_unused = 0, nr_uncategorized = 0, +		      nr_irq_safe = 0, nr_irq_unsafe = 0, +		      nr_softirq_safe = 0, nr_softirq_unsafe = 0, +		      nr_hardirq_safe = 0, nr_hardirq_unsafe = 0, +		      nr_irq_read_safe = 0, nr_irq_read_unsafe = 0, +		      nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0, +		      nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0, +		      sum_forward_deps = 0, factor = 0; + +	list_for_each_entry(class, &all_lock_classes, lock_entry) { + +		if (class->usage_mask == 0) +			nr_unused++; +		if (class->usage_mask == LOCKF_USED) +			nr_uncategorized++; +		if (class->usage_mask & LOCKF_USED_IN_IRQ) +			nr_irq_safe++; +		if (class->usage_mask & LOCKF_ENABLED_IRQS) +			nr_irq_unsafe++; +		if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) +			nr_softirq_safe++; +		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) +			nr_softirq_unsafe++; +		if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) +			nr_hardirq_safe++; +		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) +			nr_hardirq_unsafe++; +		if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) +			nr_irq_read_safe++; +		if (class->usage_mask & LOCKF_ENABLED_IRQS_READ) +			nr_irq_read_unsafe++; +		if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) +			nr_softirq_read_safe++; +		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) +			nr_softirq_read_unsafe++; +		if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) +			nr_hardirq_read_safe++; +		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) +			nr_hardirq_read_unsafe++; + +		sum_forward_deps += count_forward_deps(class); +	} +#ifdef CONFIG_LOCKDEP_DEBUG +	DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); +#endif +	seq_printf(m, " lock-classes:                  %11lu [max: %lu]\n", +			nr_lock_classes, MAX_LOCKDEP_KEYS); +	seq_printf(m, " direct dependencies:           %11lu [max: %lu]\n", +			nr_list_entries, MAX_LOCKDEP_ENTRIES); +	seq_printf(m, " indirect dependencies:         %11lu\n", +			sum_forward_deps); + +	/* +	 * Total number of dependencies: +	 * +	 * All irq-safe locks may nest inside irq-unsafe locks, +	 * plus all the other known dependencies: +	 */ +	seq_printf(m, " all direct dependencies:       %11lu\n", +			nr_irq_unsafe * nr_irq_safe + +			nr_hardirq_unsafe * nr_hardirq_safe + +			nr_list_entries); + +	/* +	 * Estimated factor between direct and indirect +	 * dependencies: +	 */ +	if (nr_list_entries) +		factor = sum_forward_deps / nr_list_entries; + +	seq_printf(m, " dependency chains:             %11lu [max: %lu]\n", +			nr_lock_chains, MAX_LOCKDEP_CHAINS); + +#ifdef CONFIG_TRACE_IRQFLAGS +	seq_printf(m, " in-hardirq chains:             %11u\n", +			nr_hardirq_chains); +	seq_printf(m, " in-softirq chains:             %11u\n", +			nr_softirq_chains); +#endif +	seq_printf(m, " in-process chains:             %11u\n", +			nr_process_chains); +	seq_printf(m, " stack-trace entries:           %11lu [max: %lu]\n", +			nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); +	seq_printf(m, " combined max dependencies:     %11u\n", +			(nr_hardirq_chains + 1) * +			(nr_softirq_chains + 1) * +			(nr_process_chains + 1) +	); +	seq_printf(m, " hardirq-safe locks:            %11lu\n", +			nr_hardirq_safe); +	seq_printf(m, " hardirq-unsafe locks:          %11lu\n", +			nr_hardirq_unsafe); +	seq_printf(m, " softirq-safe locks:            %11lu\n", +			nr_softirq_safe); +	seq_printf(m, " softirq-unsafe locks:          %11lu\n", +			nr_softirq_unsafe); +	seq_printf(m, " irq-safe locks:                %11lu\n", +			nr_irq_safe); +	seq_printf(m, " irq-unsafe locks:              %11lu\n", +			nr_irq_unsafe); + +	seq_printf(m, " hardirq-read-safe locks:       %11lu\n", +			nr_hardirq_read_safe); +	seq_printf(m, " hardirq-read-unsafe locks:     %11lu\n", +			nr_hardirq_read_unsafe); +	seq_printf(m, " softirq-read-safe locks:       %11lu\n", +			nr_softirq_read_safe); +	seq_printf(m, " softirq-read-unsafe locks:     %11lu\n", +			nr_softirq_read_unsafe); +	seq_printf(m, " irq-read-safe locks:           %11lu\n", +			nr_irq_read_safe); +	seq_printf(m, " irq-read-unsafe locks:         %11lu\n", +			nr_irq_read_unsafe); + +	seq_printf(m, " uncategorized locks:           %11lu\n", +			nr_uncategorized); +	seq_printf(m, " unused locks:                  %11lu\n", +			nr_unused); +	seq_printf(m, " max locking depth:             %11u\n", +			max_lockdep_depth); +	seq_printf(m, " max recursion depth:           %11u\n", +			max_recursion_depth); +	lockdep_stats_debug_show(m); +	seq_printf(m, " debug_locks:                   %11u\n", +			debug_locks); + +	return 0; +} + +static int lockdep_stats_open(struct inode *inode, struct file *file) +{ +	return single_open(file, lockdep_stats_show, NULL); +} + +static struct file_operations proc_lockdep_stats_operations = { +	.open		= lockdep_stats_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +static int __init lockdep_proc_init(void) +{ +	struct proc_dir_entry *entry; + +	entry = create_proc_entry("lockdep", S_IRUSR, NULL); +	if (entry) +		entry->proc_fops = &proc_lockdep_operations; + +	entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL); +	if (entry) +		entry->proc_fops = &proc_lockdep_stats_operations; + +	return 0; +} + +__initcall(lockdep_proc_init); + diff --git a/kernel/module.c b/kernel/module.c index 281172f01e9a..35e1b1f859d7 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1121,6 +1121,9 @@ static void free_module(struct module *mod)  	if (mod->percpu)  		percpu_modfree(mod->percpu); +	/* Free lock-classes: */ +	lockdep_free_key_range(mod->module_core, mod->core_size); +  	/* Finally, free the core (containing the module structure) */  	module_free(mod, mod->module_core);  } @@ -2159,6 +2162,29 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)  	return e;  } +/* + * Is this a valid module address? + */ +int is_module_address(unsigned long addr) +{ +	unsigned long flags; +	struct module *mod; + +	spin_lock_irqsave(&modlist_lock, flags); + +	list_for_each_entry(mod, &modules, list) { +		if (within(addr, mod->module_core, mod->core_size)) { +			spin_unlock_irqrestore(&modlist_lock, flags); +			return 1; +		} +	} + +	spin_unlock_irqrestore(&modlist_lock, flags); + +	return 0; +} + +  /* Is this a valid kernel address?  We don't grab the lock: we are oopsing. */  struct module *__module_text_address(unsigned long addr)  { diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index e38e4bac97ca..e3203c654dda 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -20,367 +20,19 @@  #include <linux/spinlock.h>  #include <linux/kallsyms.h>  #include <linux/interrupt.h> +#include <linux/debug_locks.h>  #include "mutex-debug.h"  /* - * We need a global lock when we walk through the multi-process - * lock tree. Only used in the deadlock-debugging case. - */ -DEFINE_SPINLOCK(debug_mutex_lock); - -/* - * All locks held by all tasks, in a single global list: - */ -LIST_HEAD(debug_mutex_held_locks); - -/* - * In the debug case we carry the caller's instruction pointer into - * other functions, but we dont want the function argument overhead - * in the nondebug case - hence these macros: - */ -#define __IP_DECL__		, unsigned long ip -#define __IP__			, ip -#define __RET_IP__		, (unsigned long)__builtin_return_address(0) - -/* - * "mutex debugging enabled" flag. We turn it off when we detect - * the first problem because we dont want to recurse back - * into the tracing code when doing error printk or - * executing a BUG(): - */ -int debug_mutex_on = 1; - -static void printk_task(struct task_struct *p) -{ -	if (p) -		printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); -	else -		printk("<none>"); -} - -static void printk_ti(struct thread_info *ti) -{ -	if (ti) -		printk_task(ti->task); -	else -		printk("<none>"); -} - -static void printk_task_short(struct task_struct *p) -{ -	if (p) -		printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio); -	else -		printk("<none>"); -} - -static void printk_lock(struct mutex *lock, int print_owner) -{ -	printk(" [%p] {%s}\n", lock, lock->name); - -	if (print_owner && lock->owner) { -		printk(".. held by:  "); -		printk_ti(lock->owner); -		printk("\n"); -	} -	if (lock->owner) { -		printk("... acquired at:               "); -		print_symbol("%s\n", lock->acquire_ip); -	} -} - -/* - * printk locks held by a task: - */ -static void show_task_locks(struct task_struct *p) -{ -	switch (p->state) { -	case TASK_RUNNING:		printk("R"); break; -	case TASK_INTERRUPTIBLE:	printk("S"); break; -	case TASK_UNINTERRUPTIBLE:	printk("D"); break; -	case TASK_STOPPED:		printk("T"); break; -	case EXIT_ZOMBIE:		printk("Z"); break; -	case EXIT_DEAD:			printk("X"); break; -	default:			printk("?"); break; -	} -	printk_task(p); -	if (p->blocked_on) { -		struct mutex *lock = p->blocked_on->lock; - -		printk(" blocked on mutex:"); -		printk_lock(lock, 1); -	} else -		printk(" (not blocked on mutex)\n"); -} - -/* - * printk all locks held in the system (if filter == NULL), - * or all locks belonging to a single task (if filter != NULL): - */ -void show_held_locks(struct task_struct *filter) -{ -	struct list_head *curr, *cursor = NULL; -	struct mutex *lock; -	struct thread_info *t; -	unsigned long flags; -	int count = 0; - -	if (filter) { -		printk("------------------------------\n"); -		printk("| showing all locks held by: |  ("); -		printk_task_short(filter); -		printk("):\n"); -		printk("------------------------------\n"); -	} else { -		printk("---------------------------\n"); -		printk("| showing all locks held: |\n"); -		printk("---------------------------\n"); -	} - -	/* -	 * Play safe and acquire the global trace lock. We -	 * cannot printk with that lock held so we iterate -	 * very carefully: -	 */ -next: -	debug_spin_lock_save(&debug_mutex_lock, flags); -	list_for_each(curr, &debug_mutex_held_locks) { -		if (cursor && curr != cursor) -			continue; -		lock = list_entry(curr, struct mutex, held_list); -		t = lock->owner; -		if (filter && (t != filter->thread_info)) -			continue; -		count++; -		cursor = curr->next; -		debug_spin_unlock_restore(&debug_mutex_lock, flags); - -		printk("\n#%03d:            ", count); -		printk_lock(lock, filter ? 0 : 1); -		goto next; -	} -	debug_spin_unlock_restore(&debug_mutex_lock, flags); -	printk("\n"); -} - -void mutex_debug_show_all_locks(void) -{ -	struct task_struct *g, *p; -	int count = 10; -	int unlock = 1; - -	printk("\nShowing all blocking locks in the system:\n"); - -	/* -	 * Here we try to get the tasklist_lock as hard as possible, -	 * if not successful after 2 seconds we ignore it (but keep -	 * trying). This is to enable a debug printout even if a -	 * tasklist_lock-holding task deadlocks or crashes. -	 */ -retry: -	if (!read_trylock(&tasklist_lock)) { -		if (count == 10) -			printk("hm, tasklist_lock locked, retrying... "); -		if (count) { -			count--; -			printk(" #%d", 10-count); -			mdelay(200); -			goto retry; -		} -		printk(" ignoring it.\n"); -		unlock = 0; -	} -	if (count != 10) -		printk(" locked it.\n"); - -	do_each_thread(g, p) { -		show_task_locks(p); -		if (!unlock) -			if (read_trylock(&tasklist_lock)) -				unlock = 1; -	} while_each_thread(g, p); - -	printk("\n"); -	show_held_locks(NULL); -	printk("=============================================\n\n"); - -	if (unlock) -		read_unlock(&tasklist_lock); -} - -static void report_deadlock(struct task_struct *task, struct mutex *lock, -			    struct mutex *lockblk, unsigned long ip) -{ -	printk("\n%s/%d is trying to acquire this lock:\n", -		current->comm, current->pid); -	printk_lock(lock, 1); -	printk("... trying at:                 "); -	print_symbol("%s\n", ip); -	show_held_locks(current); - -	if (lockblk) { -		printk("but %s/%d is deadlocking current task %s/%d!\n\n", -			task->comm, task->pid, current->comm, current->pid); -		printk("\n%s/%d is blocked on this lock:\n", -			task->comm, task->pid); -		printk_lock(lockblk, 1); - -		show_held_locks(task); - -		printk("\n%s/%d's [blocked] stackdump:\n\n", -			task->comm, task->pid); -		show_stack(task, NULL); -	} - -	printk("\n%s/%d's [current] stackdump:\n\n", -		current->comm, current->pid); -	dump_stack(); -	mutex_debug_show_all_locks(); -	printk("[ turning off deadlock detection. Please report this. ]\n\n"); -	local_irq_disable(); -} - -/* - * Recursively check for mutex deadlocks: - */ -static int check_deadlock(struct mutex *lock, int depth, -			  struct thread_info *ti, unsigned long ip) -{ -	struct mutex *lockblk; -	struct task_struct *task; - -	if (!debug_mutex_on) -		return 0; - -	ti = lock->owner; -	if (!ti) -		return 0; - -	task = ti->task; -	lockblk = NULL; -	if (task->blocked_on) -		lockblk = task->blocked_on->lock; - -	/* Self-deadlock: */ -	if (current == task) { -		DEBUG_OFF(); -		if (depth) -			return 1; -		printk("\n==========================================\n"); -		printk(  "[ BUG: lock recursion deadlock detected! |\n"); -		printk(  "------------------------------------------\n"); -		report_deadlock(task, lock, NULL, ip); -		return 0; -	} - -	/* Ugh, something corrupted the lock data structure? */ -	if (depth > 20) { -		DEBUG_OFF(); -		printk("\n===========================================\n"); -		printk(  "[ BUG: infinite lock dependency detected!? |\n"); -		printk(  "-------------------------------------------\n"); -		report_deadlock(task, lock, lockblk, ip); -		return 0; -	} - -	/* Recursively check for dependencies: */ -	if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) { -		printk("\n============================================\n"); -		printk(  "[ BUG: circular locking deadlock detected! ]\n"); -		printk(  "--------------------------------------------\n"); -		report_deadlock(task, lock, lockblk, ip); -		return 0; -	} -	return 0; -} - -/* - * Called when a task exits, this function checks whether the - * task is holding any locks, and reports the first one if so: - */ -void mutex_debug_check_no_locks_held(struct task_struct *task) -{ -	struct list_head *curr, *next; -	struct thread_info *t; -	unsigned long flags; -	struct mutex *lock; - -	if (!debug_mutex_on) -		return; - -	debug_spin_lock_save(&debug_mutex_lock, flags); -	list_for_each_safe(curr, next, &debug_mutex_held_locks) { -		lock = list_entry(curr, struct mutex, held_list); -		t = lock->owner; -		if (t != task->thread_info) -			continue; -		list_del_init(curr); -		DEBUG_OFF(); -		debug_spin_unlock_restore(&debug_mutex_lock, flags); - -		printk("BUG: %s/%d, lock held at task exit time!\n", -			task->comm, task->pid); -		printk_lock(lock, 1); -		if (lock->owner != task->thread_info) -			printk("exiting task is not even the owner??\n"); -		return; -	} -	debug_spin_unlock_restore(&debug_mutex_lock, flags); -} - -/* - * Called when kernel memory is freed (or unmapped), or if a mutex - * is destroyed or reinitialized - this code checks whether there is - * any held lock in the memory range of <from> to <to>: - */ -void mutex_debug_check_no_locks_freed(const void *from, unsigned long len) -{ -	struct list_head *curr, *next; -	const void *to = from + len; -	unsigned long flags; -	struct mutex *lock; -	void *lock_addr; - -	if (!debug_mutex_on) -		return; - -	debug_spin_lock_save(&debug_mutex_lock, flags); -	list_for_each_safe(curr, next, &debug_mutex_held_locks) { -		lock = list_entry(curr, struct mutex, held_list); -		lock_addr = lock; -		if (lock_addr < from || lock_addr >= to) -			continue; -		list_del_init(curr); -		DEBUG_OFF(); -		debug_spin_unlock_restore(&debug_mutex_lock, flags); - -		printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", -			current->comm, current->pid, lock, from, to); -		dump_stack(); -		printk_lock(lock, 1); -		if (lock->owner != current_thread_info()) -			printk("freeing task is not even the owner??\n"); -		return; -	} -	debug_spin_unlock_restore(&debug_mutex_lock, flags); -} - -/*   * Must be called with lock->wait_lock held.   */ -void debug_mutex_set_owner(struct mutex *lock, -			   struct thread_info *new_owner __IP_DECL__) +void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)  {  	lock->owner = new_owner; -	DEBUG_WARN_ON(!list_empty(&lock->held_list)); -	if (debug_mutex_on) { -		list_add_tail(&lock->held_list, &debug_mutex_held_locks); -		lock->acquire_ip = ip; -	}  } -void debug_mutex_init_waiter(struct mutex_waiter *waiter) +void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)  {  	memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));  	waiter->magic = waiter; @@ -389,23 +41,23 @@ void debug_mutex_init_waiter(struct mutex_waiter *waiter)  void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)  { -	SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); -	DEBUG_WARN_ON(list_empty(&lock->wait_list)); -	DEBUG_WARN_ON(waiter->magic != waiter); -	DEBUG_WARN_ON(list_empty(&waiter->list)); +	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); +	DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); +	DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); +	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));  }  void debug_mutex_free_waiter(struct mutex_waiter *waiter)  { -	DEBUG_WARN_ON(!list_empty(&waiter->list)); +	DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));  	memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));  }  void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, -			    struct thread_info *ti __IP_DECL__) +			    struct thread_info *ti)  { -	SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock)); -	check_deadlock(lock, 0, ti, ip); +	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); +  	/* Mark the current thread as blocked on the lock: */  	ti->task->blocked_on = waiter;  	waiter->lock = lock; @@ -414,9 +66,9 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,  void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,  			 struct thread_info *ti)  { -	DEBUG_WARN_ON(list_empty(&waiter->list)); -	DEBUG_WARN_ON(waiter->task != ti->task); -	DEBUG_WARN_ON(ti->task->blocked_on != waiter); +	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); +	DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); +	DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);  	ti->task->blocked_on = NULL;  	list_del_init(&waiter->list); @@ -425,24 +77,23 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,  void debug_mutex_unlock(struct mutex *lock)  { -	DEBUG_WARN_ON(lock->magic != lock); -	DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); -	DEBUG_WARN_ON(lock->owner != current_thread_info()); -	if (debug_mutex_on) { -		DEBUG_WARN_ON(list_empty(&lock->held_list)); -		list_del_init(&lock->held_list); -	} +	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); +	DEBUG_LOCKS_WARN_ON(lock->magic != lock); +	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); +	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());  } -void debug_mutex_init(struct mutex *lock, const char *name) +void debug_mutex_init(struct mutex *lock, const char *name, +		      struct lock_class_key *key)  { +#ifdef CONFIG_DEBUG_LOCK_ALLOC  	/*  	 * Make sure we are not reinitializing a held lock:  	 */ -	mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock)); +	debug_check_no_locks_freed((void *)lock, sizeof(*lock)); +	lockdep_init_map(&lock->dep_map, name, key); +#endif  	lock->owner = NULL; -	INIT_LIST_HEAD(&lock->held_list); -	lock->name = name;  	lock->magic = lock;  } @@ -456,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name)   */  void fastcall mutex_destroy(struct mutex *lock)  { -	DEBUG_WARN_ON(mutex_is_locked(lock)); +	DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));  	lock->magic = NULL;  } diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index a5196c36a5fd..babfbdfc534b 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h @@ -10,110 +10,44 @@   * More details are in kernel/mutex-debug.c.   */ -extern spinlock_t debug_mutex_lock; -extern struct list_head debug_mutex_held_locks; -extern int debug_mutex_on; - -/* - * In the debug case we carry the caller's instruction pointer into - * other functions, but we dont want the function argument overhead - * in the nondebug case - hence these macros: - */ -#define __IP_DECL__		, unsigned long ip -#define __IP__			, ip -#define __RET_IP__		, (unsigned long)__builtin_return_address(0) -  /*   * This must be called with lock->wait_lock held.   */ -extern void debug_mutex_set_owner(struct mutex *lock, -				  struct thread_info *new_owner __IP_DECL__); +extern void +debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);  static inline void debug_mutex_clear_owner(struct mutex *lock)  {  	lock->owner = NULL;  } -extern void debug_mutex_init_waiter(struct mutex_waiter *waiter); +extern void debug_mutex_lock_common(struct mutex *lock, +				    struct mutex_waiter *waiter);  extern void debug_mutex_wake_waiter(struct mutex *lock,  				    struct mutex_waiter *waiter);  extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);  extern void debug_mutex_add_waiter(struct mutex *lock,  				   struct mutex_waiter *waiter, -				   struct thread_info *ti __IP_DECL__); +				   struct thread_info *ti);  extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,  				struct thread_info *ti);  extern void debug_mutex_unlock(struct mutex *lock); -extern void debug_mutex_init(struct mutex *lock, const char *name); - -#define debug_spin_lock_save(lock, flags)		\ -	do {						\ -		local_irq_save(flags);			\ -		if (debug_mutex_on)			\ -			spin_lock(lock);		\ -	} while (0) - -#define debug_spin_unlock_restore(lock, flags)		\ -	do {						\ -		if (debug_mutex_on)			\ -			spin_unlock(lock);		\ -		local_irq_restore(flags);		\ -		preempt_check_resched();		\ -	} while (0) +extern void debug_mutex_init(struct mutex *lock, const char *name, +			     struct lock_class_key *key);  #define spin_lock_mutex(lock, flags)			\  	do {						\  		struct mutex *l = container_of(lock, struct mutex, wait_lock); \  							\ -		DEBUG_WARN_ON(in_interrupt());		\ -		debug_spin_lock_save(&debug_mutex_lock, flags); \ -		spin_lock(lock);			\ -		DEBUG_WARN_ON(l->magic != l);		\ +		DEBUG_LOCKS_WARN_ON(in_interrupt());	\ +		local_irq_save(flags);			\ +		__raw_spin_lock(&(lock)->raw_lock);	\ +		DEBUG_LOCKS_WARN_ON(l->magic != l);	\  	} while (0)  #define spin_unlock_mutex(lock, flags)			\  	do {						\ -		spin_unlock(lock);			\ -		debug_spin_unlock_restore(&debug_mutex_lock, flags);	\ +		__raw_spin_unlock(&(lock)->raw_lock);	\ +		local_irq_restore(flags);		\ +		preempt_check_resched();		\  	} while (0) - -#define DEBUG_OFF()					\ -do {							\ -	if (debug_mutex_on) {				\ -		debug_mutex_on = 0;			\ -		console_verbose();			\ -		if (spin_is_locked(&debug_mutex_lock))	\ -			spin_unlock(&debug_mutex_lock);	\ -	}						\ -} while (0) - -#define DEBUG_BUG()					\ -do {							\ -	if (debug_mutex_on) {				\ -		DEBUG_OFF();				\ -		BUG();					\ -	}						\ -} while (0) - -#define DEBUG_WARN_ON(c)				\ -do {							\ -	if (unlikely(c && debug_mutex_on)) {		\ -		DEBUG_OFF();				\ -		WARN_ON(1);				\ -	}						\ -} while (0) - -# define DEBUG_BUG_ON(c)				\ -do {							\ -	if (unlikely(c))				\ -		DEBUG_BUG();				\ -} while (0) - -#ifdef CONFIG_SMP -# define SMP_DEBUG_WARN_ON(c)			DEBUG_WARN_ON(c) -# define SMP_DEBUG_BUG_ON(c)			DEBUG_BUG_ON(c) -#else -# define SMP_DEBUG_WARN_ON(c)			do { } while (0) -# define SMP_DEBUG_BUG_ON(c)			do { } while (0) -#endif - diff --git a/kernel/mutex.c b/kernel/mutex.c index 7043db21bbce..8c71cf72a497 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -17,6 +17,7 @@  #include <linux/module.h>  #include <linux/spinlock.h>  #include <linux/interrupt.h> +#include <linux/debug_locks.h>  /*   * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -38,13 +39,14 @@   *   * It is not allowed to initialize an already locked mutex.   */ -void fastcall __mutex_init(struct mutex *lock, const char *name) +void +__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)  {  	atomic_set(&lock->count, 1);  	spin_lock_init(&lock->wait_lock);  	INIT_LIST_HEAD(&lock->wait_list); -	debug_mutex_init(lock, name); +	debug_mutex_init(lock, name, key);  }  EXPORT_SYMBOL(__mutex_init); @@ -56,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init);   * branch is predicted by the CPU as default-untaken.   */  static void fastcall noinline __sched -__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__); +__mutex_lock_slowpath(atomic_t *lock_count);  /***   * mutex_lock - acquire the mutex @@ -79,7 +81,7 @@ __mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);   *   * This function is similar to (but not equivalent to) down().   */ -void fastcall __sched mutex_lock(struct mutex *lock) +void inline fastcall __sched mutex_lock(struct mutex *lock)  {  	might_sleep();  	/* @@ -92,7 +94,7 @@ void fastcall __sched mutex_lock(struct mutex *lock)  EXPORT_SYMBOL(mutex_lock);  static void fastcall noinline __sched -__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__); +__mutex_unlock_slowpath(atomic_t *lock_count);  /***   * mutex_unlock - release the mutex @@ -120,18 +122,18 @@ EXPORT_SYMBOL(mutex_unlock);   * Lock a mutex (possibly interruptible), slowpath:   */  static inline int __sched -__mutex_lock_common(struct mutex *lock, long state __IP_DECL__) +__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)  {  	struct task_struct *task = current;  	struct mutex_waiter waiter;  	unsigned int old_val;  	unsigned long flags; -	debug_mutex_init_waiter(&waiter); -  	spin_lock_mutex(&lock->wait_lock, flags); -	debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip); +	debug_mutex_lock_common(lock, &waiter); +	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); +	debug_mutex_add_waiter(lock, &waiter, task->thread_info);  	/* add waiting tasks to the end of the waitqueue (FIFO): */  	list_add_tail(&waiter.list, &lock->wait_list); @@ -158,6 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)  		if (unlikely(state == TASK_INTERRUPTIBLE &&  						signal_pending(task))) {  			mutex_remove_waiter(lock, &waiter, task->thread_info); +			mutex_release(&lock->dep_map, 1, _RET_IP_);  			spin_unlock_mutex(&lock->wait_lock, flags);  			debug_mutex_free_waiter(&waiter); @@ -173,7 +176,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)  	/* got the lock - rejoice! */  	mutex_remove_waiter(lock, &waiter, task->thread_info); -	debug_mutex_set_owner(lock, task->thread_info __IP__); +	debug_mutex_set_owner(lock, task->thread_info);  	/* set it to 0 if there are no waiters left: */  	if (likely(list_empty(&lock->wait_list))) @@ -183,32 +186,40 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)  	debug_mutex_free_waiter(&waiter); -	DEBUG_WARN_ON(list_empty(&lock->held_list)); -	DEBUG_WARN_ON(lock->owner != task->thread_info); -  	return 0;  }  static void fastcall noinline __sched -__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__) +__mutex_lock_slowpath(atomic_t *lock_count)  {  	struct mutex *lock = container_of(lock_count, struct mutex, count); -	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__); +	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __sched +mutex_lock_nested(struct mutex *lock, unsigned int subclass) +{ +	might_sleep(); +	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);  } +EXPORT_SYMBOL_GPL(mutex_lock_nested); +#endif +  /*   * Release the lock, slowpath:   */ -static fastcall noinline void -__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__) +static fastcall inline void +__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)  {  	struct mutex *lock = container_of(lock_count, struct mutex, count);  	unsigned long flags; -	DEBUG_WARN_ON(lock->owner != current_thread_info()); -  	spin_lock_mutex(&lock->wait_lock, flags); +	mutex_release(&lock->dep_map, nested, _RET_IP_); +	debug_mutex_unlock(lock);  	/*  	 * some architectures leave the lock unlocked in the fastpath failure @@ -218,8 +229,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)  	if (__mutex_slowpath_needs_to_unlock())  		atomic_set(&lock->count, 1); -	debug_mutex_unlock(lock); -  	if (!list_empty(&lock->wait_list)) {  		/* get the first entry from the wait-list: */  		struct mutex_waiter *waiter = @@ -237,11 +246,20 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)  }  /* + * Release the lock, slowpath: + */ +static fastcall noinline void +__mutex_unlock_slowpath(atomic_t *lock_count) +{ +	__mutex_unlock_common_slowpath(lock_count, 1); +} + +/*   * Here come the less common (and hence less performance-critical) APIs:   * mutex_lock_interruptible() and mutex_trylock().   */  static int fastcall noinline __sched -__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__); +__mutex_lock_interruptible_slowpath(atomic_t *lock_count);  /***   * mutex_lock_interruptible - acquire the mutex, interruptable @@ -264,11 +282,11 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)  EXPORT_SYMBOL(mutex_lock_interruptible);  static int fastcall noinline __sched -__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__) +__mutex_lock_interruptible_slowpath(atomic_t *lock_count)  {  	struct mutex *lock = container_of(lock_count, struct mutex, count); -	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__); +	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);  }  /* @@ -284,8 +302,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)  	spin_lock_mutex(&lock->wait_lock, flags);  	prev = atomic_xchg(&lock->count, -1); -	if (likely(prev == 1)) -		debug_mutex_set_owner(lock, current_thread_info() __RET_IP__); +	if (likely(prev == 1)) { +		debug_mutex_set_owner(lock, current_thread_info()); +		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); +	}  	/* Set it back to 0 if there are no waiters: */  	if (likely(list_empty(&lock->wait_list)))  		atomic_set(&lock->count, 0); @@ -309,7 +329,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)   * This function must not be used in interrupt context. The   * mutex must be released by the same task that acquired it.   */ -int fastcall mutex_trylock(struct mutex *lock) +int fastcall __sched mutex_trylock(struct mutex *lock)  {  	return __mutex_fastpath_trylock(&lock->count,  					__mutex_trylock_slowpath); diff --git a/kernel/mutex.h b/kernel/mutex.h index 069189947257..a075dafbb290 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h @@ -16,22 +16,15 @@  #define mutex_remove_waiter(lock, waiter, ti) \  		__list_del((waiter)->list.prev, (waiter)->list.next) -#define DEBUG_WARN_ON(c)				do { } while (0)  #define debug_mutex_set_owner(lock, new_owner)		do { } while (0)  #define debug_mutex_clear_owner(lock)			do { } while (0) -#define debug_mutex_init_waiter(waiter)			do { } while (0)  #define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)  #define debug_mutex_free_waiter(waiter)			do { } while (0) -#define debug_mutex_add_waiter(lock, waiter, ti, ip)	do { } while (0) +#define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)  #define debug_mutex_unlock(lock)			do { } while (0) -#define debug_mutex_init(lock, name)			do { } while (0) - -/* - * Return-address parameters/declarations. They are very useful for - * debugging, but add overhead in the !DEBUG case - so we go the - * trouble of using this not too elegant but zero-cost solution: - */ -#define __IP_DECL__ -#define __IP__ -#define __RET_IP__ +#define debug_mutex_init(lock, name, key)		do { } while (0) +static inline void +debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) +{ +} diff --git a/kernel/pid.c b/kernel/pid.c index eeb836b65ca4..93e212f20671 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr)  	return NULL;  } -int fastcall attach_pid(task_t *task, enum pid_type type, int nr) +int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)  {  	struct pid_link *link;  	struct pid *pid; @@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)  	return 0;  } -void fastcall detach_pid(task_t *task, enum pid_type type) +void fastcall detach_pid(struct task_struct *task, enum pid_type type)  {  	struct pid_link *link;  	struct pid *pid; @@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)  /*   * Must be called under rcu_read_lock() or with tasklist_lock read-held.   */ -task_t *find_task_by_pid_type(int type, int nr) +struct task_struct *find_task_by_pid_type(int type, int nr)  {  	return pid_task(find_pid(nr), type);  } diff --git a/kernel/printk.c b/kernel/printk.c index 39ae24d2a415..bdba5d80496c 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -518,7 +518,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)  		zap_locks();  	/* This stops the holder of console_sem just where we want him */ -	spin_lock_irqsave(&logbuf_lock, flags); +	local_irq_save(flags); +	lockdep_off(); +	spin_lock(&logbuf_lock);  	printk_cpu = smp_processor_id();  	/* Emit the output into the temporary buffer */ @@ -588,7 +590,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)  		 */  		console_locked = 1;  		printk_cpu = UINT_MAX; -		spin_unlock_irqrestore(&logbuf_lock, flags); +		spin_unlock(&logbuf_lock);  		/*  		 * Console drivers may assume that per-cpu resources have @@ -604,6 +606,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)  			console_locked = 0;  			up(&console_sem);  		} +		lockdep_on(); +		local_irq_restore(flags);  	} else {  		/*  		 * Someone else owns the drivers.  We drop the spinlock, which @@ -611,7 +615,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)  		 * console drivers with the output which we just produced.  		 */  		printk_cpu = UINT_MAX; -		spin_unlock_irqrestore(&logbuf_lock, flags); +		spin_unlock(&logbuf_lock); +		lockdep_on(); +		local_irq_restore(flags);  	}  	preempt_enable(); @@ -809,8 +815,15 @@ void release_console_sem(void)  	console_may_schedule = 0;  	up(&console_sem);  	spin_unlock_irqrestore(&logbuf_lock, flags); -	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) -		wake_up_interruptible(&log_wait); +	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) { +		/* +		 * If we printk from within the lock dependency code, +		 * from within the scheduler code, then do not lock +		 * up due to self-recursion: +		 */ +		if (!lockdep_internal()) +			wake_up_interruptible(&log_wait); +	}  }  EXPORT_SYMBOL(release_console_sem); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 335c5b932e14..9a111f70145c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -28,7 +28,7 @@   *   * Must be called with the tasklist lock write-held.   */ -void __ptrace_link(task_t *child, task_t *new_parent) +void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)  {  	BUG_ON(!list_empty(&child->ptrace_list));  	if (child->parent == new_parent) @@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent)   * TASK_TRACED, resume it now.   * Requires that irqs be disabled.   */ -void ptrace_untrace(task_t *child) +void ptrace_untrace(struct task_struct *child)  {  	spin_lock(&child->sighand->siglock);  	if (child->state == TASK_TRACED) { @@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child)   *   * Must be called with the tasklist lock write-held.   */ -void __ptrace_unlink(task_t *child) +void __ptrace_unlink(struct task_struct *child)  {  	BUG_ON(!child->ptrace); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f464f5ae3f11..759805c9859a 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -53,13 +53,13 @@  static struct rcu_ctrlblk rcu_ctrlblk = {  	.cur = -300,  	.completed = -300, -	.lock = SPIN_LOCK_UNLOCKED, +	.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),  	.cpumask = CPU_MASK_NONE,  };  static struct rcu_ctrlblk rcu_bh_ctrlblk = {  	.cur = -300,  	.completed = -300, -	.lock = SPIN_LOCK_UNLOCKED, +	.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),  	.cpumask = CPU_MASK_NONE,  }; diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 4aa8a2c9f453..0c1faa950af7 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -26,6 +26,7 @@  #include <linux/interrupt.h>  #include <linux/plist.h>  #include <linux/fs.h> +#include <linux/debug_locks.h>  #include "rtmutex_common.h" @@ -45,8 +46,6 @@ do {								\  		console_verbose();				\  		if (spin_is_locked(¤t->pi_lock))		\  			spin_unlock(¤t->pi_lock);		\ -		if (spin_is_locked(¤t->held_list_lock))	\ -			spin_unlock(¤t->held_list_lock);	\  	}							\  } while (0) @@ -97,7 +96,7 @@ void deadlock_trace_off(void)  	rt_trace_on = 0;  } -static void printk_task(task_t *p) +static void printk_task(struct task_struct *p)  {  	if (p)  		printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); @@ -105,14 +104,6 @@ static void printk_task(task_t *p)  		printk("<none>");  } -static void printk_task_short(task_t *p) -{ -	if (p) -		printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio); -	else -		printk("<none>"); -} -  static void printk_lock(struct rt_mutex *lock, int print_owner)  {  	if (lock->name) @@ -128,222 +119,6 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)  		printk_task(rt_mutex_owner(lock));  		printk("\n");  	} -	if (rt_mutex_owner(lock)) { -		printk("... acquired at:               "); -		print_symbol("%s\n", lock->acquire_ip); -	} -} - -static void printk_waiter(struct rt_mutex_waiter *w) -{ -	printk("-------------------------\n"); -	printk("| waiter struct %p:\n", w); -	printk("| w->list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n", -	       w->list_entry.plist.prio_list.prev, w->list_entry.plist.prio_list.next, -	       w->list_entry.plist.node_list.prev, w->list_entry.plist.node_list.next, -	       w->list_entry.prio); -	printk("| w->pi_list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n", -	       w->pi_list_entry.plist.prio_list.prev, w->pi_list_entry.plist.prio_list.next, -	       w->pi_list_entry.plist.node_list.prev, w->pi_list_entry.plist.node_list.next, -	       w->pi_list_entry.prio); -	printk("\n| lock:\n"); -	printk_lock(w->lock, 1); -	printk("| w->ti->task:\n"); -	printk_task(w->task); -	printk("| blocked at:  "); -	print_symbol("%s\n", w->ip); -	printk("-------------------------\n"); -} - -static void show_task_locks(task_t *p) -{ -	switch (p->state) { -	case TASK_RUNNING:		printk("R"); break; -	case TASK_INTERRUPTIBLE:	printk("S"); break; -	case TASK_UNINTERRUPTIBLE:	printk("D"); break; -	case TASK_STOPPED:		printk("T"); break; -	case EXIT_ZOMBIE:		printk("Z"); break; -	case EXIT_DEAD:			printk("X"); break; -	default:			printk("?"); break; -	} -	printk_task(p); -	if (p->pi_blocked_on) { -		struct rt_mutex *lock = p->pi_blocked_on->lock; - -		printk(" blocked on:"); -		printk_lock(lock, 1); -	} else -		printk(" (not blocked)\n"); -} - -void rt_mutex_show_held_locks(task_t *task, int verbose) -{ -	struct list_head *curr, *cursor = NULL; -	struct rt_mutex *lock; -	task_t *t; -	unsigned long flags; -	int count = 0; - -	if (!rt_trace_on) -		return; - -	if (verbose) { -		printk("------------------------------\n"); -		printk("| showing all locks held by: |  ("); -		printk_task_short(task); -		printk("):\n"); -		printk("------------------------------\n"); -	} - -next: -	spin_lock_irqsave(&task->held_list_lock, flags); -	list_for_each(curr, &task->held_list_head) { -		if (cursor && curr != cursor) -			continue; -		lock = list_entry(curr, struct rt_mutex, held_list_entry); -		t = rt_mutex_owner(lock); -		WARN_ON(t != task); -		count++; -		cursor = curr->next; -		spin_unlock_irqrestore(&task->held_list_lock, flags); - -		printk("\n#%03d:            ", count); -		printk_lock(lock, 0); -		goto next; -	} -	spin_unlock_irqrestore(&task->held_list_lock, flags); - -	printk("\n"); -} - -void rt_mutex_show_all_locks(void) -{ -	task_t *g, *p; -	int count = 10; -	int unlock = 1; - -	printk("\n"); -	printk("----------------------\n"); -	printk("| showing all tasks: |\n"); -	printk("----------------------\n"); - -	/* -	 * Here we try to get the tasklist_lock as hard as possible, -	 * if not successful after 2 seconds we ignore it (but keep -	 * trying). This is to enable a debug printout even if a -	 * tasklist_lock-holding task deadlocks or crashes. -	 */ -retry: -	if (!read_trylock(&tasklist_lock)) { -		if (count == 10) -			printk("hm, tasklist_lock locked, retrying... "); -		if (count) { -			count--; -			printk(" #%d", 10-count); -			mdelay(200); -			goto retry; -		} -		printk(" ignoring it.\n"); -		unlock = 0; -	} -	if (count != 10) -		printk(" locked it.\n"); - -	do_each_thread(g, p) { -		show_task_locks(p); -		if (!unlock) -			if (read_trylock(&tasklist_lock)) -				unlock = 1; -	} while_each_thread(g, p); - -	printk("\n"); - -	printk("-----------------------------------------\n"); -	printk("| showing all locks held in the system: |\n"); -	printk("-----------------------------------------\n"); - -	do_each_thread(g, p) { -		rt_mutex_show_held_locks(p, 0); -		if (!unlock) -			if (read_trylock(&tasklist_lock)) -				unlock = 1; -	} while_each_thread(g, p); - - -	printk("=============================================\n\n"); - -	if (unlock) -		read_unlock(&tasklist_lock); -} - -void rt_mutex_debug_check_no_locks_held(task_t *task) -{ -	struct rt_mutex_waiter *w; -	struct list_head *curr; -	struct rt_mutex *lock; - -	if (!rt_trace_on) -		return; -	if (!rt_prio(task->normal_prio) && rt_prio(task->prio)) { -		printk("BUG: PI priority boost leaked!\n"); -		printk_task(task); -		printk("\n"); -	} -	if (list_empty(&task->held_list_head)) -		return; - -	spin_lock(&task->pi_lock); -	plist_for_each_entry(w, &task->pi_waiters, pi_list_entry) { -		TRACE_OFF(); - -		printk("hm, PI interest held at exit time? Task:\n"); -		printk_task(task); -		printk_waiter(w); -		return; -	} -	spin_unlock(&task->pi_lock); - -	list_for_each(curr, &task->held_list_head) { -		lock = list_entry(curr, struct rt_mutex, held_list_entry); - -		printk("BUG: %s/%d, lock held at task exit time!\n", -		       task->comm, task->pid); -		printk_lock(lock, 1); -		if (rt_mutex_owner(lock) != task) -			printk("exiting task is not even the owner??\n"); -	} -} - -int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len) -{ -	const void *to = from + len; -	struct list_head *curr; -	struct rt_mutex *lock; -	unsigned long flags; -	void *lock_addr; - -	if (!rt_trace_on) -		return 0; - -	spin_lock_irqsave(¤t->held_list_lock, flags); -	list_for_each(curr, ¤t->held_list_head) { -		lock = list_entry(curr, struct rt_mutex, held_list_entry); -		lock_addr = lock; -		if (lock_addr < from || lock_addr >= to) -			continue; -		TRACE_OFF(); - -		printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n", -			current->comm, current->pid, lock, from, to); -		dump_stack(); -		printk_lock(lock, 1); -		if (rt_mutex_owner(lock) != current) -			printk("freeing task is not even the owner??\n"); -		return 1; -	} -	spin_unlock_irqrestore(¤t->held_list_lock, flags); - -	return 0;  }  void rt_mutex_debug_task_free(struct task_struct *task) @@ -395,85 +170,41 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)  	       current->comm, current->pid);  	printk_lock(waiter->lock, 1); -	printk("... trying at:                 "); -	print_symbol("%s\n", waiter->ip); -  	printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);  	printk_lock(waiter->deadlock_lock, 1); -	rt_mutex_show_held_locks(current, 1); -	rt_mutex_show_held_locks(task, 1); +	debug_show_held_locks(current); +	debug_show_held_locks(task);  	printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);  	show_stack(task, NULL);  	printk("\n%s/%d's [current] stackdump:\n\n",  	       current->comm, current->pid);  	dump_stack(); -	rt_mutex_show_all_locks(); +	debug_show_all_locks(); +  	printk("[ turning off deadlock detection."  	       "Please report this trace. ]\n\n");  	local_irq_disable();  } -void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__) +void debug_rt_mutex_lock(struct rt_mutex *lock)  { -	unsigned long flags; - -	if (rt_trace_on) { -		TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry)); - -		spin_lock_irqsave(¤t->held_list_lock, flags); -		list_add_tail(&lock->held_list_entry, ¤t->held_list_head); -		spin_unlock_irqrestore(¤t->held_list_lock, flags); - -		lock->acquire_ip = ip; -	}  }  void debug_rt_mutex_unlock(struct rt_mutex *lock)  { -	unsigned long flags; - -	if (rt_trace_on) { -		TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current); -		TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry)); - -		spin_lock_irqsave(¤t->held_list_lock, flags); -		list_del_init(&lock->held_list_entry); -		spin_unlock_irqrestore(¤t->held_list_lock, flags); -	} +	TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);  } -void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, -			       struct task_struct *powner __IP_DECL__) +void +debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)  { -	unsigned long flags; - -	if (rt_trace_on) { -		TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry)); - -		spin_lock_irqsave(&powner->held_list_lock, flags); -		list_add_tail(&lock->held_list_entry, &powner->held_list_head); -		spin_unlock_irqrestore(&powner->held_list_lock, flags); - -		lock->acquire_ip = ip; -	}  }  void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)  { -	unsigned long flags; - -	if (rt_trace_on) { -		struct task_struct *owner = rt_mutex_owner(lock); - -		TRACE_WARN_ON_LOCKED(!owner); -		TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry)); - -		spin_lock_irqsave(&owner->held_list_lock, flags); -		list_del_init(&lock->held_list_entry); -		spin_unlock_irqrestore(&owner->held_list_lock, flags); -	} +	TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));  }  void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) @@ -493,17 +224,15 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)  void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)  { -	void *addr = lock; - -	if (rt_trace_on) { -		rt_mutex_debug_check_no_locks_freed(addr, -						    sizeof(struct rt_mutex)); -		INIT_LIST_HEAD(&lock->held_list_entry); -		lock->name = name; -	} +	/* +	 * Make sure we are not reinitializing a held lock: +	 */ +	debug_check_no_locks_freed((void *)lock, sizeof(*lock)); +	lock->name = name;  } -void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task) +void +rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)  {  } diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h index 7612fbc62d70..14193d596d78 100644 --- a/kernel/rtmutex-debug.h +++ b/kernel/rtmutex-debug.h @@ -9,20 +9,16 @@   * This file contains macros used solely by rtmutex.c. Debug version.   */ -#define __IP_DECL__		, unsigned long ip -#define __IP__			, ip -#define __RET_IP__		, (unsigned long)__builtin_return_address(0) -  extern void  rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);  extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);  extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);  extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);  extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); -extern void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__); +extern void debug_rt_mutex_lock(struct rt_mutex *lock);  extern void debug_rt_mutex_unlock(struct rt_mutex *lock);  extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, -				      struct task_struct *powner __IP_DECL__); +				      struct task_struct *powner);  extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);  extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,  				    struct rt_mutex *lock); diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index e82c2f848249..494dac872a13 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -33,7 +33,7 @@ struct test_thread_data {  };  static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; -static task_t *threads[MAX_RT_TEST_THREADS]; +static struct task_struct *threads[MAX_RT_TEST_THREADS];  static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];  enum test_opcodes { @@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,  static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)  {  	struct test_thread_data *td; +	struct task_struct *tsk;  	char *curr = buf; -	task_t *tsk;  	int i;  	td = container_of(dev, struct test_thread_data, sysdev); diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 45d61016da57..d2ef13b485e7 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -157,12 +157,11 @@ int max_lock_depth = 1024;   * Decreases task's usage by one - may thus free the task.   * Returns 0 or -EDEADLK.   */ -static int rt_mutex_adjust_prio_chain(task_t *task, +static int rt_mutex_adjust_prio_chain(struct task_struct *task,  				      int deadlock_detect,  				      struct rt_mutex *orig_lock,  				      struct rt_mutex_waiter *orig_waiter, -				      struct task_struct *top_task -				      __IP_DECL__) +				      struct task_struct *top_task)  {  	struct rt_mutex *lock;  	struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; @@ -283,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task,  	spin_unlock_irqrestore(&task->pi_lock, flags);   out_put_task:  	put_task_struct(task); +  	return ret;  } @@ -357,7 +357,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)   *   * Must be called with lock->wait_lock held.   */ -static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__) +static int try_to_take_rt_mutex(struct rt_mutex *lock)  {  	/*  	 * We have to be careful here if the atomic speedups are @@ -384,7 +384,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)  		return 0;  	/* We got the lock. */ -	debug_rt_mutex_lock(lock __IP__); +	debug_rt_mutex_lock(lock);  	rt_mutex_set_owner(lock, current, 0); @@ -402,13 +402,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)   */  static int task_blocks_on_rt_mutex(struct rt_mutex *lock,  				   struct rt_mutex_waiter *waiter, -				   int detect_deadlock -				   __IP_DECL__) +				   int detect_deadlock)  { +	struct task_struct *owner = rt_mutex_owner(lock);  	struct rt_mutex_waiter *top_waiter = waiter; -	task_t *owner = rt_mutex_owner(lock); -	int boost = 0, res;  	unsigned long flags; +	int boost = 0, res;  	spin_lock_irqsave(¤t->pi_lock, flags);  	__rt_mutex_adjust_prio(current); @@ -454,7 +453,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,  	spin_unlock(&lock->wait_lock);  	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, -					 current __IP__); +					 current);  	spin_lock(&lock->wait_lock); @@ -526,12 +525,12 @@ static void wakeup_next_waiter(struct rt_mutex *lock)   * Must be called with lock->wait_lock held   */  static void remove_waiter(struct rt_mutex *lock, -			  struct rt_mutex_waiter *waiter  __IP_DECL__) +			  struct rt_mutex_waiter *waiter)  {  	int first = (waiter == rt_mutex_top_waiter(lock)); -	int boost = 0; -	task_t *owner = rt_mutex_owner(lock); +	struct task_struct *owner = rt_mutex_owner(lock);  	unsigned long flags; +	int boost = 0;  	spin_lock_irqsave(¤t->pi_lock, flags);  	plist_del(&waiter->list_entry, &lock->wait_list); @@ -568,7 +567,7 @@ static void remove_waiter(struct rt_mutex *lock,  	spin_unlock(&lock->wait_lock); -	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current __IP__); +	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);  	spin_lock(&lock->wait_lock);  } @@ -595,7 +594,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)  	get_task_struct(task);  	spin_unlock_irqrestore(&task->pi_lock, flags); -	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task __RET_IP__); +	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);  }  /* @@ -604,7 +603,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)  static int __sched  rt_mutex_slowlock(struct rt_mutex *lock, int state,  		  struct hrtimer_sleeper *timeout, -		  int detect_deadlock __IP_DECL__) +		  int detect_deadlock)  {  	struct rt_mutex_waiter waiter;  	int ret = 0; @@ -615,7 +614,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,  	spin_lock(&lock->wait_lock);  	/* Try to acquire the lock again: */ -	if (try_to_take_rt_mutex(lock __IP__)) { +	if (try_to_take_rt_mutex(lock)) {  		spin_unlock(&lock->wait_lock);  		return 0;  	} @@ -629,7 +628,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,  	for (;;) {  		/* Try to acquire the lock: */ -		if (try_to_take_rt_mutex(lock __IP__)) +		if (try_to_take_rt_mutex(lock))  			break;  		/* @@ -653,7 +652,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,  		 */  		if (!waiter.task) {  			ret = task_blocks_on_rt_mutex(lock, &waiter, -						      detect_deadlock __IP__); +						      detect_deadlock);  			/*  			 * If we got woken up by the owner then start loop  			 * all over without going into schedule to try @@ -680,7 +679,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,  	set_current_state(TASK_RUNNING);  	if (unlikely(waiter.task)) -		remove_waiter(lock, &waiter __IP__); +		remove_waiter(lock, &waiter);  	/*  	 * try_to_take_rt_mutex() sets the waiter bit @@ -711,7 +710,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,   * Slow path try-lock function:   */  static inline int -rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__) +rt_mutex_slowtrylock(struct rt_mutex *lock)  {  	int ret = 0; @@ -719,7 +718,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)  	if (likely(rt_mutex_owner(lock) != current)) { -		ret = try_to_take_rt_mutex(lock __IP__); +		ret = try_to_take_rt_mutex(lock);  		/*  		 * try_to_take_rt_mutex() sets the lock waiters  		 * bit unconditionally. Clean this up. @@ -769,13 +768,13 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,  		  int detect_deadlock,  		  int (*slowfn)(struct rt_mutex *lock, int state,  				struct hrtimer_sleeper *timeout, -				int detect_deadlock __IP_DECL__)) +				int detect_deadlock))  {  	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {  		rt_mutex_deadlock_account_lock(lock, current);  		return 0;  	} else -		return slowfn(lock, state, NULL, detect_deadlock __RET_IP__); +		return slowfn(lock, state, NULL, detect_deadlock);  }  static inline int @@ -783,24 +782,24 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,  			struct hrtimer_sleeper *timeout, int detect_deadlock,  			int (*slowfn)(struct rt_mutex *lock, int state,  				      struct hrtimer_sleeper *timeout, -				      int detect_deadlock __IP_DECL__)) +				      int detect_deadlock))  {  	if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {  		rt_mutex_deadlock_account_lock(lock, current);  		return 0;  	} else -		return slowfn(lock, state, timeout, detect_deadlock __RET_IP__); +		return slowfn(lock, state, timeout, detect_deadlock);  }  static inline int  rt_mutex_fasttrylock(struct rt_mutex *lock, -		     int (*slowfn)(struct rt_mutex *lock __IP_DECL__)) +		     int (*slowfn)(struct rt_mutex *lock))  {  	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {  		rt_mutex_deadlock_account_lock(lock, current);  		return 1;  	} -	return slowfn(lock __RET_IP__); +	return slowfn(lock);  }  static inline void @@ -948,7 +947,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,  				struct task_struct *proxy_owner)  {  	__rt_mutex_init(lock, NULL); -	debug_rt_mutex_proxy_lock(lock, proxy_owner __RET_IP__); +	debug_rt_mutex_proxy_lock(lock, proxy_owner);  	rt_mutex_set_owner(lock, proxy_owner, 0);  	rt_mutex_deadlock_account_lock(lock, proxy_owner);  } diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h index 1e0fca13ff72..a1a1dd06421d 100644 --- a/kernel/rtmutex.h +++ b/kernel/rtmutex.h @@ -10,9 +10,6 @@   * Non-debug version.   */ -#define __IP_DECL__ -#define __IP__ -#define __RET_IP__  #define rt_mutex_deadlock_check(l)			(0)  #define rt_mutex_deadlock_account_lock(m, t)		do { } while (0)  #define rt_mutex_deadlock_account_unlock(l)		do { } while (0) diff --git a/kernel/rwsem.c b/kernel/rwsem.c new file mode 100644 index 000000000000..291ded556aa0 --- /dev/null +++ b/kernel/rwsem.c @@ -0,0 +1,147 @@ +/* kernel/rwsem.c: R/W semaphores, public implementation + * + * Written by David Howells (dhowells@redhat.com). + * Derived from asm-i386/semaphore.h + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/rwsem.h> + +#include <asm/system.h> +#include <asm/atomic.h> + +/* + * lock for reading + */ +void down_read(struct rw_semaphore *sem) +{ +	might_sleep(); +	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); + +	__down_read(sem); +} + +EXPORT_SYMBOL(down_read); + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +int down_read_trylock(struct rw_semaphore *sem) +{ +	int ret = __down_read_trylock(sem); + +	if (ret == 1) +		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); +	return ret; +} + +EXPORT_SYMBOL(down_read_trylock); + +/* + * lock for writing + */ +void down_write(struct rw_semaphore *sem) +{ +	might_sleep(); +	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); + +	__down_write(sem); +} + +EXPORT_SYMBOL(down_write); + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +int down_write_trylock(struct rw_semaphore *sem) +{ +	int ret = __down_write_trylock(sem); + +	if (ret == 1) +		rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); +	return ret; +} + +EXPORT_SYMBOL(down_write_trylock); + +/* + * release a read lock + */ +void up_read(struct rw_semaphore *sem) +{ +	rwsem_release(&sem->dep_map, 1, _RET_IP_); + +	__up_read(sem); +} + +EXPORT_SYMBOL(up_read); + +/* + * release a write lock + */ +void up_write(struct rw_semaphore *sem) +{ +	rwsem_release(&sem->dep_map, 1, _RET_IP_); + +	__up_write(sem); +} + +EXPORT_SYMBOL(up_write); + +/* + * downgrade write lock to read lock + */ +void downgrade_write(struct rw_semaphore *sem) +{ +	/* +	 * lockdep: a downgraded write will live on as a write +	 * dependency. +	 */ +	__downgrade_write(sem); +} + +EXPORT_SYMBOL(downgrade_write); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +void down_read_nested(struct rw_semaphore *sem, int subclass) +{ +	might_sleep(); +	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); + +	__down_read(sem); +} + +EXPORT_SYMBOL(down_read_nested); + +void down_read_non_owner(struct rw_semaphore *sem) +{ +	might_sleep(); + +	__down_read(sem); +} + +EXPORT_SYMBOL(down_read_non_owner); + +void down_write_nested(struct rw_semaphore *sem, int subclass) +{ +	might_sleep(); +	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); + +	__down_write_nested(sem, subclass); +} + +EXPORT_SYMBOL(down_write_nested); + +void up_read_non_owner(struct rw_semaphore *sem) +{ +	__up_read(sem); +} + +EXPORT_SYMBOL(up_read_non_owner); + +#endif + + diff --git a/kernel/sched.c b/kernel/sched.c index d5e37072ea54..4ee400f9d56b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -30,6 +30,7 @@  #include <linux/capability.h>  #include <linux/completion.h>  #include <linux/kernel_stat.h> +#include <linux/debug_locks.h>  #include <linux/security.h>  #include <linux/notifier.h>  #include <linux/profile.h> @@ -178,20 +179,15 @@ static unsigned int static_prio_timeslice(int static_prio)  		return SCALE_PRIO(DEF_TIMESLICE, static_prio);  } -static inline unsigned int task_timeslice(task_t *p) +static inline unsigned int task_timeslice(struct task_struct *p)  {  	return static_prio_timeslice(p->static_prio);  } -#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)	\ -				< (long long) (sd)->cache_hot_time) -  /*   * These are the runqueue data structures:   */ -typedef struct runqueue runqueue_t; -  struct prio_array {  	unsigned int nr_active;  	DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */ @@ -205,7 +201,7 @@ struct prio_array {   * (such as the load balancing or the thread migration code), lock   * acquire operations must be ordered by ascending &runqueue.   */ -struct runqueue { +struct rq {  	spinlock_t lock;  	/* @@ -229,9 +225,9 @@ struct runqueue {  	unsigned long expired_timestamp;  	unsigned long long timestamp_last_tick; -	task_t *curr, *idle; +	struct task_struct *curr, *idle;  	struct mm_struct *prev_mm; -	prio_array_t *active, *expired, arrays[2]; +	struct prio_array *active, *expired, arrays[2];  	int best_expired_prio;  	atomic_t nr_iowait; @@ -242,7 +238,7 @@ struct runqueue {  	int active_balance;  	int push_cpu; -	task_t *migration_thread; +	struct task_struct *migration_thread;  	struct list_head migration_queue;  #endif @@ -265,9 +261,10 @@ struct runqueue {  	unsigned long ttwu_cnt;  	unsigned long ttwu_local;  #endif +	struct lock_class_key rq_lock_key;  }; -static DEFINE_PER_CPU(struct runqueue, runqueues); +static DEFINE_PER_CPU(struct rq, runqueues);  /*   * The domain tree (rq->sd) is protected by RCU's quiescent state transition. @@ -276,8 +273,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);   * The domain tree of any CPU may only be accessed from within   * preempt-disabled sections.   */ -#define for_each_domain(cpu, domain) \ -for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent) +#define for_each_domain(cpu, __sd) \ +	for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)  #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))  #define this_rq()		(&__get_cpu_var(runqueues)) @@ -292,26 +289,33 @@ for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)  #endif  #ifndef __ARCH_WANT_UNLOCKED_CTXSW -static inline int task_running(runqueue_t *rq, task_t *p) +static inline int task_running(struct rq *rq, struct task_struct *p)  {  	return rq->curr == p;  } -static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)  {  } -static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)  {  #ifdef CONFIG_DEBUG_SPINLOCK  	/* this is a valid case when another task releases the spinlock */  	rq->lock.owner = current;  #endif +	/* +	 * If we are tracking spinlock dependencies then we have to +	 * fix up the runqueue lock - which gets 'carried over' from +	 * prev into current: +	 */ +	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); +  	spin_unlock_irq(&rq->lock);  }  #else /* __ARCH_WANT_UNLOCKED_CTXSW */ -static inline int task_running(runqueue_t *rq, task_t *p) +static inline int task_running(struct rq *rq, struct task_struct *p)  {  #ifdef CONFIG_SMP  	return p->oncpu; @@ -320,7 +324,7 @@ static inline int task_running(runqueue_t *rq, task_t *p)  #endif  } -static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)  {  #ifdef CONFIG_SMP  	/* @@ -337,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)  #endif  } -static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)  {  #ifdef CONFIG_SMP  	/* @@ -358,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)   * __task_rq_lock - lock the runqueue a given task resides on.   * Must be called interrupts disabled.   */ -static inline runqueue_t *__task_rq_lock(task_t *p) +static inline struct rq *__task_rq_lock(struct task_struct *p)  	__acquires(rq->lock)  { -	struct runqueue *rq; +	struct rq *rq;  repeat_lock_task:  	rq = task_rq(p); @@ -378,10 +382,10 @@ repeat_lock_task:   * interrupts.  Note the ordering: we can safely lookup the task_rq without   * explicitly disabling preemption.   */ -static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) +static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)  	__acquires(rq->lock)  { -	struct runqueue *rq; +	struct rq *rq;  repeat_lock_task:  	local_irq_save(*flags); @@ -394,13 +398,13 @@ repeat_lock_task:  	return rq;  } -static inline void __task_rq_unlock(runqueue_t *rq) +static inline void __task_rq_unlock(struct rq *rq)  	__releases(rq->lock)  {  	spin_unlock(&rq->lock);  } -static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags) +static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)  	__releases(rq->lock)  {  	spin_unlock_irqrestore(&rq->lock, *flags); @@ -420,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v)  	seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);  	seq_printf(seq, "timestamp %lu\n", jiffies);  	for_each_online_cpu(cpu) { -		runqueue_t *rq = cpu_rq(cpu); +		struct rq *rq = cpu_rq(cpu);  #ifdef CONFIG_SMP  		struct sched_domain *sd;  		int dcnt = 0; @@ -507,10 +511,10 @@ struct file_operations proc_schedstat_operations = {  /*   * rq_lock - lock a given runqueue and disable interrupts.   */ -static inline runqueue_t *this_rq_lock(void) +static inline struct rq *this_rq_lock(void)  	__acquires(rq->lock)  { -	runqueue_t *rq; +	struct rq *rq;  	local_irq_disable();  	rq = this_rq(); @@ -535,7 +539,7 @@ static inline runqueue_t *this_rq_lock(void)   * long it was from the *first* time it was queued to the time that it   * finally hit a cpu.   */ -static inline void sched_info_dequeued(task_t *t) +static inline void sched_info_dequeued(struct task_struct *t)  {  	t->sched_info.last_queued = 0;  } @@ -545,10 +549,10 @@ static inline void sched_info_dequeued(task_t *t)   * long it was waiting to run.  We also note when it began so that we   * can keep stats on how long its timeslice is.   */ -static void sched_info_arrive(task_t *t) +static void sched_info_arrive(struct task_struct *t)  {  	unsigned long now = jiffies, diff = 0; -	struct runqueue *rq = task_rq(t); +	struct rq *rq = task_rq(t);  	if (t->sched_info.last_queued)  		diff = now - t->sched_info.last_queued; @@ -579,7 +583,7 @@ static void sched_info_arrive(task_t *t)   * the timestamp if it is already not set.  It's assumed that   * sched_info_dequeued() will clear that stamp when appropriate.   */ -static inline void sched_info_queued(task_t *t) +static inline void sched_info_queued(struct task_struct *t)  {  	if (!t->sched_info.last_queued)  		t->sched_info.last_queued = jiffies; @@ -589,9 +593,9 @@ static inline void sched_info_queued(task_t *t)   * Called when a process ceases being the active-running process, either   * voluntarily or involuntarily.  Now we can calculate how long we ran.   */ -static inline void sched_info_depart(task_t *t) +static inline void sched_info_depart(struct task_struct *t)  { -	struct runqueue *rq = task_rq(t); +	struct rq *rq = task_rq(t);  	unsigned long diff = jiffies - t->sched_info.last_arrival;  	t->sched_info.cpu_time += diff; @@ -605,9 +609,10 @@ static inline void sched_info_depart(task_t *t)   * their time slice.  (This may also be called when switching to or from   * the idle task.)  We are only called when prev != next.   */ -static inline void sched_info_switch(task_t *prev, task_t *next) +static inline void +sched_info_switch(struct task_struct *prev, struct task_struct *next)  { -	struct runqueue *rq = task_rq(prev); +	struct rq *rq = task_rq(prev);  	/*  	 * prev now departs the cpu.  It's not interesting to record @@ -628,7 +633,7 @@ static inline void sched_info_switch(task_t *prev, task_t *next)  /*   * Adding/removing a task to/from a priority array:   */ -static void dequeue_task(struct task_struct *p, prio_array_t *array) +static void dequeue_task(struct task_struct *p, struct prio_array *array)  {  	array->nr_active--;  	list_del(&p->run_list); @@ -636,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)  		__clear_bit(p->prio, array->bitmap);  } -static void enqueue_task(struct task_struct *p, prio_array_t *array) +static void enqueue_task(struct task_struct *p, struct prio_array *array)  {  	sched_info_queued(p);  	list_add_tail(&p->run_list, array->queue + p->prio); @@ -649,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)   * Put task to the end of the run list without the overhead of dequeue   * followed by enqueue.   */ -static void requeue_task(struct task_struct *p, prio_array_t *array) +static void requeue_task(struct task_struct *p, struct prio_array *array)  {  	list_move_tail(&p->run_list, array->queue + p->prio);  } -static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) +static inline void +enqueue_task_head(struct task_struct *p, struct prio_array *array)  {  	list_add(&p->run_list, array->queue + p->prio);  	__set_bit(p->prio, array->bitmap); @@ -677,7 +683,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)   * Both properties are important to certain workloads.   */ -static inline int __normal_prio(task_t *p) +static inline int __normal_prio(struct task_struct *p)  {  	int bonus, prio; @@ -713,7 +719,7 @@ static inline int __normal_prio(task_t *p)  #define RTPRIO_TO_LOAD_WEIGHT(rp) \  	(PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) -static void set_load_weight(task_t *p) +static void set_load_weight(struct task_struct *p)  {  	if (has_rt_policy(p)) {  #ifdef CONFIG_SMP @@ -731,23 +737,25 @@ static void set_load_weight(task_t *p)  		p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);  } -static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) +static inline void +inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)  {  	rq->raw_weighted_load += p->load_weight;  } -static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p) +static inline void +dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)  {  	rq->raw_weighted_load -= p->load_weight;  } -static inline void inc_nr_running(task_t *p, runqueue_t *rq) +static inline void inc_nr_running(struct task_struct *p, struct rq *rq)  {  	rq->nr_running++;  	inc_raw_weighted_load(rq, p);  } -static inline void dec_nr_running(task_t *p, runqueue_t *rq) +static inline void dec_nr_running(struct task_struct *p, struct rq *rq)  {  	rq->nr_running--;  	dec_raw_weighted_load(rq, p); @@ -760,7 +768,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq)   * setprio syscalls, and whenever the interactivity   * estimator recalculates.   */ -static inline int normal_prio(task_t *p) +static inline int normal_prio(struct task_struct *p)  {  	int prio; @@ -778,7 +786,7 @@ static inline int normal_prio(task_t *p)   * interactivity modifiers. Will be RT if the task got   * RT-boosted. If not then it returns p->normal_prio.   */ -static int effective_prio(task_t *p) +static int effective_prio(struct task_struct *p)  {  	p->normal_prio = normal_prio(p);  	/* @@ -794,9 +802,9 @@ static int effective_prio(task_t *p)  /*   * __activate_task - move a task to the runqueue.   */ -static void __activate_task(task_t *p, runqueue_t *rq) +static void __activate_task(struct task_struct *p, struct rq *rq)  { -	prio_array_t *target = rq->active; +	struct prio_array *target = rq->active;  	if (batch_task(p))  		target = rq->expired; @@ -807,7 +815,7 @@ static void __activate_task(task_t *p, runqueue_t *rq)  /*   * __activate_idle_task - move idle task to the _front_ of runqueue.   */ -static inline void __activate_idle_task(task_t *p, runqueue_t *rq) +static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)  {  	enqueue_task_head(p, rq->active);  	inc_nr_running(p, rq); @@ -817,7 +825,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)   * Recalculate p->normal_prio and p->prio after having slept,   * updating the sleep-average too:   */ -static int recalc_task_prio(task_t *p, unsigned long long now) +static int recalc_task_prio(struct task_struct *p, unsigned long long now)  {  	/* Caller must always ensure 'now >= p->timestamp' */  	unsigned long sleep_time = now - p->timestamp; @@ -889,7 +897,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)   * Update all the scheduling statistics stuff. (sleep average   * calculation, priority modifiers, etc.)   */ -static void activate_task(task_t *p, runqueue_t *rq, int local) +static void activate_task(struct task_struct *p, struct rq *rq, int local)  {  	unsigned long long now; @@ -897,7 +905,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)  #ifdef CONFIG_SMP  	if (!local) {  		/* Compensate for drifting sched_clock */ -		runqueue_t *this_rq = this_rq(); +		struct rq *this_rq = this_rq();  		now = (now - this_rq->timestamp_last_tick)  			+ rq->timestamp_last_tick;  	} @@ -936,7 +944,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)  /*   * deactivate_task - remove a task from the runqueue.   */ -static void deactivate_task(struct task_struct *p, runqueue_t *rq) +static void deactivate_task(struct task_struct *p, struct rq *rq)  {  	dec_nr_running(p, rq);  	dequeue_task(p, p->array); @@ -956,7 +964,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)  #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)  #endif -static void resched_task(task_t *p) +static void resched_task(struct task_struct *p)  {  	int cpu; @@ -977,7 +985,7 @@ static void resched_task(task_t *p)  		smp_send_reschedule(cpu);  }  #else -static inline void resched_task(task_t *p) +static inline void resched_task(struct task_struct *p)  {  	assert_spin_locked(&task_rq(p)->lock);  	set_tsk_need_resched(p); @@ -988,7 +996,7 @@ static inline void resched_task(task_t *p)   * task_curr - is this task currently executing on a CPU?   * @p: the task in question.   */ -inline int task_curr(const task_t *p) +inline int task_curr(const struct task_struct *p)  {  	return cpu_curr(task_cpu(p)) == p;  } @@ -1000,22 +1008,23 @@ unsigned long weighted_cpuload(const int cpu)  }  #ifdef CONFIG_SMP -typedef struct { +struct migration_req {  	struct list_head list; -	task_t *task; +	struct task_struct *task;  	int dest_cpu;  	struct completion done; -} migration_req_t; +};  /*   * The task's runqueue lock must be held.   * Returns true if you have to wait for migration thread.   */ -static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) +static int +migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)  { -	runqueue_t *rq = task_rq(p); +	struct rq *rq = task_rq(p);  	/*  	 * If the task is not on a runqueue (and not running), then @@ -1030,6 +1039,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)  	req->task = p;  	req->dest_cpu = dest_cpu;  	list_add(&req->list, &rq->migration_queue); +  	return 1;  } @@ -1042,10 +1052,10 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)   * smp_call_function() if an IPI is sent by the same process we are   * waiting to become inactive.   */ -void wait_task_inactive(task_t *p) +void wait_task_inactive(struct task_struct *p)  {  	unsigned long flags; -	runqueue_t *rq; +	struct rq *rq;  	int preempted;  repeat: @@ -1076,7 +1086,7 @@ repeat:   * to another CPU then no harm is done and the purpose has been   * achieved as well.   */ -void kick_process(task_t *p) +void kick_process(struct task_struct *p)  {  	int cpu; @@ -1096,7 +1106,7 @@ void kick_process(task_t *p)   */  static inline unsigned long source_load(int cpu, int type)  { -	runqueue_t *rq = cpu_rq(cpu); +	struct rq *rq = cpu_rq(cpu);  	if (type == 0)  		return rq->raw_weighted_load; @@ -1110,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type)   */  static inline unsigned long target_load(int cpu, int type)  { -	runqueue_t *rq = cpu_rq(cpu); +	struct rq *rq = cpu_rq(cpu);  	if (type == 0)  		return rq->raw_weighted_load; @@ -1123,10 +1133,10 @@ static inline unsigned long target_load(int cpu, int type)   */  static inline unsigned long cpu_avg_load_per_task(int cpu)  { -	runqueue_t *rq = cpu_rq(cpu); +	struct rq *rq = cpu_rq(cpu);  	unsigned long n = rq->nr_running; -	return n ?  rq->raw_weighted_load / n : SCHED_LOAD_SCALE; +	return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;  }  /* @@ -1279,7 +1289,7 @@ nextlevel:   * Returns the CPU we should wake onto.   */  #if defined(ARCH_HAS_SCHED_WAKE_IDLE) -static int wake_idle(int cpu, task_t *p) +static int wake_idle(int cpu, struct task_struct *p)  {  	cpumask_t tmp;  	struct sched_domain *sd; @@ -1302,7 +1312,7 @@ static int wake_idle(int cpu, task_t *p)  	return cpu;  }  #else -static inline int wake_idle(int cpu, task_t *p) +static inline int wake_idle(int cpu, struct task_struct *p)  {  	return cpu;  } @@ -1322,15 +1332,15 @@ static inline int wake_idle(int cpu, task_t *p)   *   * returns failure only if the task is already active.   */ -static int try_to_wake_up(task_t *p, unsigned int state, int sync) +static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)  {  	int cpu, this_cpu, success = 0;  	unsigned long flags;  	long old_state; -	runqueue_t *rq; +	struct rq *rq;  #ifdef CONFIG_SMP -	unsigned long load, this_load;  	struct sched_domain *sd, *this_sd = NULL; +	unsigned long load, this_load;  	int new_cpu;  #endif @@ -1480,15 +1490,14 @@ out:  	return success;  } -int fastcall wake_up_process(task_t *p) +int fastcall wake_up_process(struct task_struct *p)  {  	return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |  				 TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);  } -  EXPORT_SYMBOL(wake_up_process); -int fastcall wake_up_state(task_t *p, unsigned int state) +int fastcall wake_up_state(struct task_struct *p, unsigned int state)  {  	return try_to_wake_up(p, state, 0);  } @@ -1497,7 +1506,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state)   * Perform scheduler related setup for a newly forked process p.   * p is forked by current.   */ -void fastcall sched_fork(task_t *p, int clone_flags) +void fastcall sched_fork(struct task_struct *p, int clone_flags)  {  	int cpu = get_cpu(); @@ -1565,11 +1574,11 @@ void fastcall sched_fork(task_t *p, int clone_flags)   * that must be done for every newly created context, then puts the task   * on the runqueue and wakes it.   */ -void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) +void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)  { +	struct rq *rq, *this_rq;  	unsigned long flags;  	int this_cpu, cpu; -	runqueue_t *rq, *this_rq;  	rq = task_rq_lock(p, &flags);  	BUG_ON(p->state != TASK_RUNNING); @@ -1649,10 +1658,10 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)   * artificially, because any timeslice recovered here   * was given away by the parent in the first place.)   */ -void fastcall sched_exit(task_t *p) +void fastcall sched_exit(struct task_struct *p)  {  	unsigned long flags; -	runqueue_t *rq; +	struct rq *rq;  	/*  	 * If the child was a (relative-) CPU hog then decrease @@ -1683,7 +1692,7 @@ void fastcall sched_exit(task_t *p)   * prepare_task_switch sets up locking and calls architecture specific   * hooks.   */ -static inline void prepare_task_switch(runqueue_t *rq, task_t *next) +static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)  {  	prepare_lock_switch(rq, next);  	prepare_arch_switch(next); @@ -1704,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)   * with the lock held can cause deadlocks; see schedule() for   * details.)   */ -static inline void finish_task_switch(runqueue_t *rq, task_t *prev) +static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)  	__releases(rq->lock)  {  	struct mm_struct *mm = rq->prev_mm; @@ -1742,10 +1751,11 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)   * schedule_tail - first thing a freshly forked thread must call.   * @prev: the thread we just switched away from.   */ -asmlinkage void schedule_tail(task_t *prev) +asmlinkage void schedule_tail(struct task_struct *prev)  	__releases(rq->lock)  { -	runqueue_t *rq = this_rq(); +	struct rq *rq = this_rq(); +  	finish_task_switch(rq, prev);  #ifdef __ARCH_WANT_UNLOCKED_CTXSW  	/* In this case, finish_task_switch does not reenable preemption */ @@ -1759,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev)   * context_switch - switch to the new MM and the new   * thread's register state.   */ -static inline -task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) +static inline struct task_struct * +context_switch(struct rq *rq, struct task_struct *prev, +	       struct task_struct *next)  {  	struct mm_struct *mm = next->mm;  	struct mm_struct *oldmm = prev->active_mm; @@ -1777,6 +1788,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)  		WARN_ON(rq->prev_mm);  		rq->prev_mm = oldmm;  	} +	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);  	/* Here we just switch the register state and the stack. */  	switch_to(prev, next, prev); @@ -1857,12 +1869,21 @@ unsigned long nr_active(void)  #ifdef CONFIG_SMP  /* + * Is this task likely cache-hot: + */ +static inline int +task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd) +{ +	return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time; +} + +/*   * double_rq_lock - safely lock two runqueues   *   * Note this does not disable interrupts like task_rq_lock,   * you need to do so manually before calling.   */ -static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) +static void double_rq_lock(struct rq *rq1, struct rq *rq2)  	__acquires(rq1->lock)  	__acquires(rq2->lock)  { @@ -1886,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)   * Note this does not restore interrupts like task_rq_unlock,   * you need to do so manually after calling.   */ -static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) +static void double_rq_unlock(struct rq *rq1, struct rq *rq2)  	__releases(rq1->lock)  	__releases(rq2->lock)  { @@ -1900,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)  /*   * double_lock_balance - lock the busiest runqueue, this_rq is locked already.   */ -static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) +static void double_lock_balance(struct rq *this_rq, struct rq *busiest)  	__releases(this_rq->lock)  	__acquires(busiest->lock)  	__acquires(this_rq->lock) @@ -1921,11 +1942,11 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)   * allow dest_cpu, which will force the cpu onto dest_cpu.  Then   * the cpu_allowed mask is restored.   */ -static void sched_migrate_task(task_t *p, int dest_cpu) +static void sched_migrate_task(struct task_struct *p, int dest_cpu)  { -	migration_req_t req; -	runqueue_t *rq; +	struct migration_req req;  	unsigned long flags; +	struct rq *rq;  	rq = task_rq_lock(p, &flags);  	if (!cpu_isset(dest_cpu, p->cpus_allowed) @@ -1936,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu)  	if (migrate_task(p, dest_cpu, &req)) {  		/* Need to wait for migration thread (might exit: take ref). */  		struct task_struct *mt = rq->migration_thread; +  		get_task_struct(mt);  		task_rq_unlock(rq, &flags);  		wake_up_process(mt);  		put_task_struct(mt);  		wait_for_completion(&req.done); +  		return;  	}  out: @@ -1964,9 +1987,9 @@ void sched_exec(void)   * pull_task - move a task from a remote runqueue to the local runqueue.   * Both runqueues must be locked.   */ -static -void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, -	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) +static void pull_task(struct rq *src_rq, struct prio_array *src_array, +		      struct task_struct *p, struct rq *this_rq, +		      struct prio_array *this_array, int this_cpu)  {  	dequeue_task(p, src_array);  	dec_nr_running(p, src_rq); @@ -1987,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,   * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?   */  static -int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, +int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,  		     struct sched_domain *sd, enum idle_type idle,  		     int *all_pinned)  { @@ -2019,6 +2042,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,  }  #define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio) +  /*   * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted   * load from busiest to this_rq, as part of a balancing operation within @@ -2026,18 +2050,17 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,   *   * Called with both runqueues locked.   */ -static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, +static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,  		      unsigned long max_nr_move, unsigned long max_load_move,  		      struct sched_domain *sd, enum idle_type idle,  		      int *all_pinned)  { -	prio_array_t *array, *dst_array; +	int idx, pulled = 0, pinned = 0, this_best_prio, best_prio, +	    best_prio_seen, skip_for_load; +	struct prio_array *array, *dst_array;  	struct list_head *head, *curr; -	int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio; -	int busiest_best_prio_seen; -	int skip_for_load; /* skip the task based on weighted load issues */ +	struct task_struct *tmp;  	long rem_load_move; -	task_t *tmp;  	if (max_nr_move == 0 || max_load_move == 0)  		goto out; @@ -2045,15 +2068,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,  	rem_load_move = max_load_move;  	pinned = 1;  	this_best_prio = rq_best_prio(this_rq); -	busiest_best_prio = rq_best_prio(busiest); +	best_prio = rq_best_prio(busiest);  	/*  	 * Enable handling of the case where there is more than one task  	 * with the best priority.   If the current running task is one -	 * of those with prio==busiest_best_prio we know it won't be moved +	 * of those with prio==best_prio we know it won't be moved  	 * and therefore it's safe to override the skip (based on load) of  	 * any task we find with that prio.  	 */ -	busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio; +	best_prio_seen = best_prio == busiest->curr->prio;  	/*  	 * We first consider expired tasks. Those will likely not be @@ -2089,7 +2112,7 @@ skip_bitmap:  	head = array->queue + idx;  	curr = head->prev;  skip_queue: -	tmp = list_entry(curr, task_t, run_list); +	tmp = list_entry(curr, struct task_struct, run_list);  	curr = curr->prev; @@ -2100,10 +2123,11 @@ skip_queue:  	 */  	skip_for_load = tmp->load_weight > rem_load_move;  	if (skip_for_load && idx < this_best_prio) -		skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio; +		skip_for_load = !best_prio_seen && idx == best_prio;  	if (skip_for_load ||  	    !can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) { -		busiest_best_prio_seen |= idx == busiest_best_prio; + +		best_prio_seen |= idx == best_prio;  		if (curr != head)  			goto skip_queue;  		idx++; @@ -2146,8 +2170,8 @@ out:  /*   * find_busiest_group finds and returns the busiest CPU group within the - * domain. It calculates and returns the amount of weighted load which should be - * moved to restore balance via the imbalance parameter. + * domain. It calculates and returns the amount of weighted load which + * should be moved to restore balance via the imbalance parameter.   */  static struct sched_group *  find_busiest_group(struct sched_domain *sd, int this_cpu, @@ -2188,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,  		sum_weighted_load = sum_nr_running = avg_load = 0;  		for_each_cpu_mask(i, group->cpumask) { -			runqueue_t *rq = cpu_rq(i); +			struct rq *rq = cpu_rq(i);  			if (*sd_idle && !idle_cpu(i))  				*sd_idle = 0; @@ -2269,7 +2293,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,   		 * capacity but still has some space to pick up some load   		 * from other group and save more power   		 */ - 		if (sum_nr_running <= group_capacity - 1) + 		if (sum_nr_running <= group_capacity - 1) {   			if (sum_nr_running > leader_nr_running ||   			    (sum_nr_running == leader_nr_running &&   			     first_cpu(group->cpumask) > @@ -2277,7 +2301,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,   				group_leader = group;   				leader_nr_running = sum_nr_running;   			} - +		}  group_next:  #endif  		group = group->next; @@ -2332,8 +2356,7 @@ group_next:  	 * moved  	 */  	if (*imbalance < busiest_load_per_task) { -		unsigned long pwr_now, pwr_move; -		unsigned long tmp; +		unsigned long tmp, pwr_now, pwr_move;  		unsigned int imbn;  small_imbalance: @@ -2405,22 +2428,23 @@ ret:  /*   * find_busiest_queue - find the busiest runqueue among the cpus in group.   */ -static runqueue_t *find_busiest_queue(struct sched_group *group, -	enum idle_type idle, unsigned long imbalance) +static struct rq * +find_busiest_queue(struct sched_group *group, enum idle_type idle, +		   unsigned long imbalance)  { +	struct rq *busiest = NULL, *rq;  	unsigned long max_load = 0; -	runqueue_t *busiest = NULL, *rqi;  	int i;  	for_each_cpu_mask(i, group->cpumask) { -		rqi = cpu_rq(i); +		rq = cpu_rq(i); -		if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance) +		if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)  			continue; -		if (rqi->raw_weighted_load > max_load) { -			max_load = rqi->raw_weighted_load; -			busiest = rqi; +		if (rq->raw_weighted_load > max_load) { +			max_load = rq->raw_weighted_load; +			busiest = rq;  		}  	} @@ -2433,22 +2457,24 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,   */  #define MAX_PINNED_INTERVAL	512 -#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0) +static inline unsigned long minus_1_or_zero(unsigned long n) +{ +	return n > 0 ? n - 1 : 0; +} +  /*   * Check this_cpu to ensure it is balanced within domain. Attempt to move   * tasks if there is an imbalance.   *   * Called with this_rq unlocked.   */ -static int load_balance(int this_cpu, runqueue_t *this_rq, +static int load_balance(int this_cpu, struct rq *this_rq,  			struct sched_domain *sd, enum idle_type idle)  { +	int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;  	struct sched_group *group; -	runqueue_t *busiest;  	unsigned long imbalance; -	int nr_moved, all_pinned = 0; -	int active_balance = 0; -	int sd_idle = 0; +	struct rq *busiest;  	if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&  	    !sched_smt_power_savings) @@ -2482,8 +2508,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,  		 */  		double_rq_lock(this_rq, busiest);  		nr_moved = move_tasks(this_rq, this_cpu, busiest, -					minus_1_or_zero(busiest->nr_running), -					imbalance, sd, idle, &all_pinned); +				      minus_1_or_zero(busiest->nr_running), +				      imbalance, sd, idle, &all_pinned);  		double_rq_unlock(this_rq, busiest);  		/* All tasks on this runqueue were pinned by CPU affinity */ @@ -2556,7 +2582,8 @@ out_one_pinned:  			(sd->balance_interval < sd->max_interval))  		sd->balance_interval *= 2; -	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) +	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && +			!sched_smt_power_savings)  		return -1;  	return 0;  } @@ -2568,11 +2595,11 @@ out_one_pinned:   * Called from schedule when this_rq is about to become idle (NEWLY_IDLE).   * this_rq is locked.   */ -static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, -				struct sched_domain *sd) +static int +load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)  {  	struct sched_group *group; -	runqueue_t *busiest = NULL; +	struct rq *busiest = NULL;  	unsigned long imbalance;  	int nr_moved = 0;  	int sd_idle = 0; @@ -2618,9 +2645,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,  out_balanced:  	schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); -	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) +	if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && +					!sched_smt_power_savings)  		return -1;  	sd->nr_balance_failed = 0; +  	return 0;  } @@ -2628,16 +2657,15 @@ out_balanced:   * idle_balance is called by schedule() if this_cpu is about to become   * idle. Attempts to pull tasks from other CPUs.   */ -static void idle_balance(int this_cpu, runqueue_t *this_rq) +static void idle_balance(int this_cpu, struct rq *this_rq)  {  	struct sched_domain *sd;  	for_each_domain(this_cpu, sd) {  		if (sd->flags & SD_BALANCE_NEWIDLE) { -			if (load_balance_newidle(this_cpu, this_rq, sd)) { -				/* We've pulled tasks over so stop searching */ +			/* If we've pulled tasks over stop searching: */ +			if (load_balance_newidle(this_cpu, this_rq, sd))  				break; -			}  		}  	}  } @@ -2650,14 +2678,14 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)   *   * Called with busiest_rq locked.   */ -static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) +static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)  { -	struct sched_domain *sd; -	runqueue_t *target_rq;  	int target_cpu = busiest_rq->push_cpu; +	struct sched_domain *sd; +	struct rq *target_rq; +	/* Is there any task to move? */  	if (busiest_rq->nr_running <= 1) -		/* no task to move */  		return;  	target_rq = cpu_rq(target_cpu); @@ -2675,21 +2703,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)  	/* Search for an sd spanning us and the target CPU. */  	for_each_domain(target_cpu, sd) {  		if ((sd->flags & SD_LOAD_BALANCE) && -			cpu_isset(busiest_cpu, sd->span)) +		    cpu_isset(busiest_cpu, sd->span))  				break;  	} -	if (unlikely(sd == NULL)) -		goto out; - -	schedstat_inc(sd, alb_cnt); +	if (likely(sd)) { +		schedstat_inc(sd, alb_cnt); -	if (move_tasks(target_rq, target_cpu, busiest_rq, 1, -			RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL)) -		schedstat_inc(sd, alb_pushed); -	else -		schedstat_inc(sd, alb_failed); -out: +		if (move_tasks(target_rq, target_cpu, busiest_rq, 1, +			       RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, +			       NULL)) +			schedstat_inc(sd, alb_pushed); +		else +			schedstat_inc(sd, alb_failed); +	}  	spin_unlock(&target_rq->lock);  } @@ -2702,23 +2729,27 @@ out:   * Balancing parameters are set up in arch_init_sched_domains.   */ -/* Don't have all balancing operations going off at once */ -#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) +/* Don't have all balancing operations going off at once: */ +static inline unsigned long cpu_offset(int cpu) +{ +	return jiffies + cpu * HZ / NR_CPUS; +} -static void rebalance_tick(int this_cpu, runqueue_t *this_rq, -			   enum idle_type idle) +static void +rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)  { -	unsigned long old_load, this_load; -	unsigned long j = jiffies + CPU_OFFSET(this_cpu); +	unsigned long this_load, interval, j = cpu_offset(this_cpu);  	struct sched_domain *sd; -	int i; +	int i, scale;  	this_load = this_rq->raw_weighted_load; -	/* Update our load */ -	for (i = 0; i < 3; i++) { -		unsigned long new_load = this_load; -		int scale = 1 << i; + +	/* Update our load: */ +	for (i = 0, scale = 1; i < 3; i++, scale <<= 1) { +		unsigned long old_load, new_load; +  		old_load = this_rq->cpu_load[i]; +		new_load = this_load;  		/*  		 * Round up the averaging division if load is increasing. This  		 * prevents us from getting stuck on 9 if the load is 10, for @@ -2730,8 +2761,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,  	}  	for_each_domain(this_cpu, sd) { -		unsigned long interval; -  		if (!(sd->flags & SD_LOAD_BALANCE))  			continue; @@ -2761,17 +2790,18 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,  /*   * on UP we do not need to balance between CPUs:   */ -static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) +static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)  {  } -static inline void idle_balance(int cpu, runqueue_t *rq) +static inline void idle_balance(int cpu, struct rq *rq)  {  }  #endif -static inline int wake_priority_sleeper(runqueue_t *rq) +static inline int wake_priority_sleeper(struct rq *rq)  {  	int ret = 0; +  #ifdef CONFIG_SCHED_SMT  	spin_lock(&rq->lock);  	/* @@ -2795,25 +2825,26 @@ EXPORT_PER_CPU_SYMBOL(kstat);   * This is called on clock ticks and on context switches.   * Bank in p->sched_time the ns elapsed since the last tick or switch.   */ -static inline void update_cpu_clock(task_t *p, runqueue_t *rq, -				    unsigned long long now) +static inline void +update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)  { -	unsigned long long last = max(p->timestamp, rq->timestamp_last_tick); -	p->sched_time += now - last; +	p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);  }  /*   * Return current->sched_time plus any more ns on the sched_clock   * that have not yet been banked.   */ -unsigned long long current_sched_time(const task_t *tsk) +unsigned long long current_sched_time(const struct task_struct *p)  {  	unsigned long long ns;  	unsigned long flags; +  	local_irq_save(flags); -	ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick); -	ns = tsk->sched_time + (sched_clock() - ns); +	ns = max(p->timestamp, task_rq(p)->timestamp_last_tick); +	ns = p->sched_time + sched_clock() - ns;  	local_irq_restore(flags); +  	return ns;  } @@ -2827,11 +2858,16 @@ unsigned long long current_sched_time(const task_t *tsk)   * increasing number of running tasks. We also ignore the interactivity   * if a better static_prio task has expired:   */ -#define EXPIRED_STARVING(rq) \ -	((STARVATION_LIMIT && ((rq)->expired_timestamp && \ -		(jiffies - (rq)->expired_timestamp >= \ -			STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \ -			((rq)->curr->static_prio > (rq)->best_expired_prio)) +static inline int expired_starving(struct rq *rq) +{ +	if (rq->curr->static_prio > rq->best_expired_prio) +		return 1; +	if (!STARVATION_LIMIT || !rq->expired_timestamp) +		return 0; +	if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running) +		return 1; +	return 0; +}  /*   * Account user cpu time to a process. @@ -2864,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,  			 cputime_t cputime)  {  	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; -	runqueue_t *rq = this_rq(); +	struct rq *rq = this_rq();  	cputime64_t tmp;  	p->stime = cputime_add(p->stime, cputime); @@ -2894,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)  {  	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;  	cputime64_t tmp = cputime_to_cputime64(steal); -	runqueue_t *rq = this_rq(); +	struct rq *rq = this_rq();  	if (p == rq->idle) {  		p->stime = cputime_add(p->stime, steal); @@ -2915,10 +2951,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)   */  void scheduler_tick(void)  { -	int cpu = smp_processor_id(); -	runqueue_t *rq = this_rq(); -	task_t *p = current;  	unsigned long long now = sched_clock(); +	struct task_struct *p = current; +	int cpu = smp_processor_id(); +	struct rq *rq = cpu_rq(cpu);  	update_cpu_clock(p, rq, now); @@ -2968,7 +3004,7 @@ void scheduler_tick(void)  		if (!rq->expired_timestamp)  			rq->expired_timestamp = jiffies; -		if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { +		if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {  			enqueue_task(p, rq->expired);  			if (p->static_prio < rq->best_expired_prio)  				rq->best_expired_prio = p->static_prio; @@ -3007,7 +3043,7 @@ out:  }  #ifdef CONFIG_SCHED_SMT -static inline void wakeup_busy_runqueue(runqueue_t *rq) +static inline void wakeup_busy_runqueue(struct rq *rq)  {  	/* If an SMT runqueue is sleeping due to priority reasons wake it up */  	if (rq->curr == rq->idle && rq->nr_running) @@ -3033,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu)  		return;  	for_each_cpu_mask(i, sd->span) { -		runqueue_t *smt_rq = cpu_rq(i); +		struct rq *smt_rq = cpu_rq(i);  		if (i == this_cpu)  			continue; @@ -3050,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu)   * utilize, if another task runs on a sibling. This models the   * slowdown effect of other tasks running on siblings:   */ -static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) +static inline unsigned long +smt_slice(struct task_struct *p, struct sched_domain *sd)  {  	return p->time_slice * (100 - sd->per_cpu_gain) / 100;  } @@ -3061,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)   * acquire their lock. As we only trylock the normal locking order does not   * need to be obeyed.   */ -static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) +static int +dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)  {  	struct sched_domain *tmp, *sd = NULL;  	int ret = 0, i; @@ -3081,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)  		return 0;  	for_each_cpu_mask(i, sd->span) { -		runqueue_t *smt_rq; -		task_t *smt_curr; +		struct task_struct *smt_curr; +		struct rq *smt_rq;  		if (i == this_cpu)  			continue; @@ -3127,9 +3165,8 @@ unlock:  static inline void wake_sleeping_dependent(int this_cpu)  {  } - -static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq, -					task_t *p) +static inline int +dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)  {  	return 0;  } @@ -3142,12 +3179,13 @@ void fastcall add_preempt_count(int val)  	/*  	 * Underflow?  	 */ -	BUG_ON((preempt_count() < 0)); +	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) +		return;  	preempt_count() += val;  	/*  	 * Spinlock count overflowing soon?  	 */ -	BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10); +	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);  }  EXPORT_SYMBOL(add_preempt_count); @@ -3156,11 +3194,15 @@ void fastcall sub_preempt_count(int val)  	/*  	 * Underflow?  	 */ -	BUG_ON(val > preempt_count()); +	if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) +		return;  	/*  	 * Is the spinlock portion underflowing?  	 */ -	BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK)); +	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && +			!(preempt_count() & PREEMPT_MASK))) +		return; +  	preempt_count() -= val;  }  EXPORT_SYMBOL(sub_preempt_count); @@ -3178,14 +3220,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type)   */  asmlinkage void __sched schedule(void)  { -	long *switch_count; -	task_t *prev, *next; -	runqueue_t *rq; -	prio_array_t *array; +	struct task_struct *prev, *next; +	struct prio_array *array;  	struct list_head *queue;  	unsigned long long now;  	unsigned long run_time;  	int cpu, idx, new_prio; +	long *switch_count; +	struct rq *rq;  	/*  	 * Test if we are atomic.  Since do_exit() needs to call into @@ -3275,7 +3317,7 @@ need_resched_nonpreemptible:  	idx = sched_find_first_bit(array->bitmap);  	queue = array->queue + idx; -	next = list_entry(queue->next, task_t, run_list); +	next = list_entry(queue->next, struct task_struct, run_list);  	if (!rt_task(next) && interactive_sleep(next->sleep_type)) {  		unsigned long long delta = now - next->timestamp; @@ -3338,7 +3380,6 @@ switch_tasks:  	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))  		goto need_resched;  } -  EXPORT_SYMBOL(schedule);  #ifdef CONFIG_PREEMPT @@ -3383,7 +3424,6 @@ need_resched:  	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))  		goto need_resched;  } -  EXPORT_SYMBOL(preempt_schedule);  /* @@ -3432,10 +3472,8 @@ need_resched:  int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,  			  void *key)  { -	task_t *p = curr->private; -	return try_to_wake_up(p, mode, sync); +	return try_to_wake_up(curr->private, mode, sync);  } -  EXPORT_SYMBOL(default_wake_function);  /* @@ -3453,13 +3491,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,  	struct list_head *tmp, *next;  	list_for_each_safe(tmp, next, &q->task_list) { -		wait_queue_t *curr; -		unsigned flags; -		curr = list_entry(tmp, wait_queue_t, task_list); -		flags = curr->flags; +		wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); +		unsigned flags = curr->flags; +  		if (curr->func(curr, mode, sync, key) && -		    (flags & WQ_FLAG_EXCLUSIVE) && -		    !--nr_exclusive) +				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)  			break;  	}  } @@ -3480,7 +3516,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,  	__wake_up_common(q, mode, nr_exclusive, 0, key);  	spin_unlock_irqrestore(&q->lock, flags);  } -  EXPORT_SYMBOL(__wake_up);  /* @@ -3549,6 +3584,7 @@ EXPORT_SYMBOL(complete_all);  void fastcall __sched wait_for_completion(struct completion *x)  {  	might_sleep(); +  	spin_lock_irq(&x->wait.lock);  	if (!x->done) {  		DECLARE_WAITQUEUE(wait, current); @@ -3693,7 +3729,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)  	schedule();  	SLEEP_ON_TAIL  } -  EXPORT_SYMBOL(interruptible_sleep_on);  long fastcall __sched @@ -3709,7 +3744,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)  	return timeout;  } -  EXPORT_SYMBOL(interruptible_sleep_on_timeout);  void fastcall __sched sleep_on(wait_queue_head_t *q) @@ -3722,7 +3756,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q)  	schedule();  	SLEEP_ON_TAIL  } -  EXPORT_SYMBOL(sleep_on);  long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout) @@ -3752,11 +3785,11 @@ EXPORT_SYMBOL(sleep_on_timeout);   *   * Used by the rt_mutex code to implement priority inheritance logic.   */ -void rt_mutex_setprio(task_t *p, int prio) +void rt_mutex_setprio(struct task_struct *p, int prio)  { +	struct prio_array *array;  	unsigned long flags; -	prio_array_t *array; -	runqueue_t *rq; +	struct rq *rq;  	int oldprio;  	BUG_ON(prio < 0 || prio > MAX_PRIO); @@ -3793,12 +3826,12 @@ void rt_mutex_setprio(task_t *p, int prio)  #endif -void set_user_nice(task_t *p, long nice) +void set_user_nice(struct task_struct *p, long nice)  { -	unsigned long flags; -	prio_array_t *array; -	runqueue_t *rq; +	struct prio_array *array;  	int old_prio, delta; +	unsigned long flags; +	struct rq *rq;  	if (TASK_NICE(p) == nice || nice < -20 || nice > 19)  		return; @@ -3849,10 +3882,11 @@ EXPORT_SYMBOL(set_user_nice);   * @p: task   * @nice: nice value   */ -int can_nice(const task_t *p, const int nice) +int can_nice(const struct task_struct *p, const int nice)  {  	/* convert nice value [19,-20] to rlimit style value [1,40] */  	int nice_rlim = 20 - nice; +  	return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||  		capable(CAP_SYS_NICE));  } @@ -3868,8 +3902,7 @@ int can_nice(const task_t *p, const int nice)   */  asmlinkage long sys_nice(int increment)  { -	int retval; -	long nice; +	long nice, retval;  	/*  	 * Setpriority might change our priority at the same moment. @@ -3908,7 +3941,7 @@ asmlinkage long sys_nice(int increment)   * RT tasks are offset by -200. Normal tasks are centered   * around 0, value goes from -16 to +15.   */ -int task_prio(const task_t *p) +int task_prio(const struct task_struct *p)  {  	return p->prio - MAX_RT_PRIO;  } @@ -3917,7 +3950,7 @@ int task_prio(const task_t *p)   * task_nice - return the nice value of a given task.   * @p: the task in question.   */ -int task_nice(const task_t *p) +int task_nice(const struct task_struct *p)  {  	return TASK_NICE(p);  } @@ -3936,7 +3969,7 @@ int idle_cpu(int cpu)   * idle_task - return the idle task for a given cpu.   * @cpu: the processor in question.   */ -task_t *idle_task(int cpu) +struct task_struct *idle_task(int cpu)  {  	return cpu_rq(cpu)->idle;  } @@ -3945,7 +3978,7 @@ task_t *idle_task(int cpu)   * find_process_by_pid - find a process with a matching PID value.   * @pid: the pid in question.   */ -static inline task_t *find_process_by_pid(pid_t pid) +static inline struct task_struct *find_process_by_pid(pid_t pid)  {  	return pid ? find_task_by_pid(pid) : current;  } @@ -3954,6 +3987,7 @@ static inline task_t *find_process_by_pid(pid_t pid)  static void __setscheduler(struct task_struct *p, int policy, int prio)  {  	BUG_ON(p->array); +  	p->policy = policy;  	p->rt_priority = prio;  	p->normal_prio = normal_prio(p); @@ -3977,11 +4011,10 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)  int sched_setscheduler(struct task_struct *p, int policy,  		       struct sched_param *param)  { -	int retval; -	int oldprio, oldpolicy = -1; -	prio_array_t *array; +	int retval, oldprio, oldpolicy = -1; +	struct prio_array *array;  	unsigned long flags; -	runqueue_t *rq; +	struct rq *rq;  	/* may grab non-irq protected spin_locks */  	BUG_ON(in_interrupt()); @@ -4079,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);  static int  do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)  { -	int retval;  	struct sched_param lparam;  	struct task_struct *p; +	int retval;  	if (!param || pid < 0)  		return -EINVAL; @@ -4097,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)  	read_unlock_irq(&tasklist_lock);  	retval = sched_setscheduler(p, policy, &lparam);  	put_task_struct(p); +  	return retval;  } @@ -4132,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)   */  asmlinkage long sys_sched_getscheduler(pid_t pid)  { +	struct task_struct *p;  	int retval = -EINVAL; -	task_t *p;  	if (pid < 0)  		goto out_nounlock; @@ -4160,8 +4194,8 @@ out_nounlock:  asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)  {  	struct sched_param lp; +	struct task_struct *p;  	int retval = -EINVAL; -	task_t *p;  	if (!param || pid < 0)  		goto out_nounlock; @@ -4194,9 +4228,9 @@ out_unlock:  long sched_setaffinity(pid_t pid, cpumask_t new_mask)  { -	task_t *p; -	int retval;  	cpumask_t cpus_allowed; +	struct task_struct *p; +	int retval;  	lock_cpu_hotplug();  	read_lock(&tasklist_lock); @@ -4282,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;  long sched_getaffinity(pid_t pid, cpumask_t *mask)  { +	struct task_struct *p;  	int retval; -	task_t *p;  	lock_cpu_hotplug();  	read_lock(&tasklist_lock); @@ -4342,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,   */  asmlinkage long sys_sched_yield(void)  { -	runqueue_t *rq = this_rq_lock(); -	prio_array_t *array = current->array; -	prio_array_t *target = rq->expired; +	struct rq *rq = this_rq_lock(); +	struct prio_array *array = current->array, *target = rq->expired;  	schedstat_inc(rq, yld_cnt);  	/* @@ -4378,6 +4411,7 @@ asmlinkage long sys_sched_yield(void)  	 * no need to preempt or enable interrupts:  	 */  	__release(rq->lock); +	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);  	_raw_spin_unlock(&rq->lock);  	preempt_enable_no_resched(); @@ -4441,6 +4475,7 @@ int cond_resched_lock(spinlock_t *lock)  		spin_lock(lock);  	}  	if (need_resched() && __resched_legal()) { +		spin_release(&lock->dep_map, 1, _THIS_IP_);  		_raw_spin_unlock(lock);  		preempt_enable_no_resched();  		__cond_resched(); @@ -4456,7 +4491,9 @@ int __sched cond_resched_softirq(void)  	BUG_ON(!in_softirq());  	if (need_resched() && __resched_legal()) { -		__local_bh_enable(); +		raw_local_irq_disable(); +		_local_bh_enable(); +		raw_local_irq_enable();  		__cond_resched();  		local_bh_disable();  		return 1; @@ -4476,7 +4513,6 @@ void __sched yield(void)  	set_current_state(TASK_RUNNING);  	sys_sched_yield();  } -  EXPORT_SYMBOL(yield);  /* @@ -4488,18 +4524,17 @@ EXPORT_SYMBOL(yield);   */  void __sched io_schedule(void)  { -	struct runqueue *rq = &__raw_get_cpu_var(runqueues); +	struct rq *rq = &__raw_get_cpu_var(runqueues);  	atomic_inc(&rq->nr_iowait);  	schedule();  	atomic_dec(&rq->nr_iowait);  } -  EXPORT_SYMBOL(io_schedule);  long __sched io_schedule_timeout(long timeout)  { -	struct runqueue *rq = &__raw_get_cpu_var(runqueues); +	struct rq *rq = &__raw_get_cpu_var(runqueues);  	long ret;  	atomic_inc(&rq->nr_iowait); @@ -4566,9 +4601,9 @@ asmlinkage long sys_sched_get_priority_min(int policy)  asmlinkage  long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)  { +	struct task_struct *p;  	int retval = -EINVAL;  	struct timespec t; -	task_t *p;  	if (pid < 0)  		goto out_nounlock; @@ -4596,28 +4631,32 @@ out_unlock:  static inline struct task_struct *eldest_child(struct task_struct *p)  { -	if (list_empty(&p->children)) return NULL; +	if (list_empty(&p->children)) +		return NULL;  	return list_entry(p->children.next,struct task_struct,sibling);  }  static inline struct task_struct *older_sibling(struct task_struct *p)  { -	if (p->sibling.prev==&p->parent->children) return NULL; +	if (p->sibling.prev==&p->parent->children) +		return NULL;  	return list_entry(p->sibling.prev,struct task_struct,sibling);  }  static inline struct task_struct *younger_sibling(struct task_struct *p)  { -	if (p->sibling.next==&p->parent->children) return NULL; +	if (p->sibling.next==&p->parent->children) +		return NULL;  	return list_entry(p->sibling.next,struct task_struct,sibling);  } -static void show_task(task_t *p) +static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; + +static void show_task(struct task_struct *p)  { -	task_t *relative; -	unsigned state; +	struct task_struct *relative;  	unsigned long free = 0; -	static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; +	unsigned state;  	printk("%-13.13s ", p->comm);  	state = p->state ? __ffs(p->state) + 1 : 0; @@ -4668,7 +4707,7 @@ static void show_task(task_t *p)  void show_state(void)  { -	task_t *g, *p; +	struct task_struct *g, *p;  #if (BITS_PER_LONG == 32)  	printk("\n" @@ -4690,7 +4729,7 @@ void show_state(void)  	} while_each_thread(g, p);  	read_unlock(&tasklist_lock); -	mutex_debug_show_all_locks(); +	debug_show_all_locks();  }  /** @@ -4701,9 +4740,9 @@ void show_state(void)   * NOTE: this function does not set the idle thread's NEED_RESCHED   * flag, to make booting more robust.   */ -void __devinit init_idle(task_t *idle, int cpu) +void __devinit init_idle(struct task_struct *idle, int cpu)  { -	runqueue_t *rq = cpu_rq(cpu); +	struct rq *rq = cpu_rq(cpu);  	unsigned long flags;  	idle->timestamp = sched_clock(); @@ -4742,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;  /*   * This is how migration works:   * - * 1) we queue a migration_req_t structure in the source CPU's + * 1) we queue a struct migration_req structure in the source CPU's   *    runqueue and wake up that CPU's migration thread.   * 2) we down() the locked semaphore => thread blocks.   * 3) migration thread wakes up (implicitly it forces the migrated @@ -4764,12 +4803,12 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;   * task must not exit() & deallocate itself prematurely.  The   * call is not atomic; no spinlocks may be held.   */ -int set_cpus_allowed(task_t *p, cpumask_t new_mask) +int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)  { +	struct migration_req req;  	unsigned long flags; +	struct rq *rq;  	int ret = 0; -	migration_req_t req; -	runqueue_t *rq;  	rq = task_rq_lock(p, &flags);  	if (!cpus_intersects(new_mask, cpu_online_map)) { @@ -4792,9 +4831,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)  	}  out:  	task_rq_unlock(rq, &flags); +  	return ret;  } -  EXPORT_SYMBOL_GPL(set_cpus_allowed);  /* @@ -4810,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);   */  static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)  { -	runqueue_t *rq_dest, *rq_src; +	struct rq *rq_dest, *rq_src;  	int ret = 0;  	if (unlikely(cpu_is_offline(dest_cpu))) @@ -4855,16 +4894,16 @@ out:   */  static int migration_thread(void *data)  { -	runqueue_t *rq;  	int cpu = (long)data; +	struct rq *rq;  	rq = cpu_rq(cpu);  	BUG_ON(rq->migration_thread != current);  	set_current_state(TASK_INTERRUPTIBLE);  	while (!kthread_should_stop()) { +		struct migration_req *req;  		struct list_head *head; -		migration_req_t *req;  		try_to_freeze(); @@ -4888,7 +4927,7 @@ static int migration_thread(void *data)  			set_current_state(TASK_INTERRUPTIBLE);  			continue;  		} -		req = list_entry(head->next, migration_req_t, list); +		req = list_entry(head->next, struct migration_req, list);  		list_del_init(head->next);  		spin_unlock(&rq->lock); @@ -4913,28 +4952,28 @@ wait_to_die:  #ifdef CONFIG_HOTPLUG_CPU  /* Figure out where task on dead CPU should go, use force if neccessary. */ -static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk) +static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)  { -	runqueue_t *rq;  	unsigned long flags; -	int dest_cpu;  	cpumask_t mask; +	struct rq *rq; +	int dest_cpu;  restart:  	/* On same node? */  	mask = node_to_cpumask(cpu_to_node(dead_cpu)); -	cpus_and(mask, mask, tsk->cpus_allowed); +	cpus_and(mask, mask, p->cpus_allowed);  	dest_cpu = any_online_cpu(mask);  	/* On any allowed CPU? */  	if (dest_cpu == NR_CPUS) -		dest_cpu = any_online_cpu(tsk->cpus_allowed); +		dest_cpu = any_online_cpu(p->cpus_allowed);  	/* No more Mr. Nice Guy. */  	if (dest_cpu == NR_CPUS) { -		rq = task_rq_lock(tsk, &flags); -		cpus_setall(tsk->cpus_allowed); -		dest_cpu = any_online_cpu(tsk->cpus_allowed); +		rq = task_rq_lock(p, &flags); +		cpus_setall(p->cpus_allowed); +		dest_cpu = any_online_cpu(p->cpus_allowed);  		task_rq_unlock(rq, &flags);  		/* @@ -4942,12 +4981,12 @@ restart:  		 * kernel threads (both mm NULL), since they never  		 * leave kernel.  		 */ -		if (tsk->mm && printk_ratelimit()) +		if (p->mm && printk_ratelimit())  			printk(KERN_INFO "process %d (%s) no "  			       "longer affine to cpu%d\n", -			       tsk->pid, tsk->comm, dead_cpu); +			       p->pid, p->comm, dead_cpu);  	} -	if (!__migrate_task(tsk, dead_cpu, dest_cpu)) +	if (!__migrate_task(p, dead_cpu, dest_cpu))  		goto restart;  } @@ -4958,9 +4997,9 @@ restart:   * their home CPUs. So we just add the counter to another CPU's counter,   * to keep the global sum constant after CPU-down:   */ -static void migrate_nr_uninterruptible(runqueue_t *rq_src) +static void migrate_nr_uninterruptible(struct rq *rq_src)  { -	runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL)); +	struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));  	unsigned long flags;  	local_irq_save(flags); @@ -4974,48 +5013,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src)  /* Run through task list and migrate tasks from the dead cpu. */  static void migrate_live_tasks(int src_cpu)  { -	struct task_struct *tsk, *t; +	struct task_struct *p, *t;  	write_lock_irq(&tasklist_lock); -	do_each_thread(t, tsk) { -		if (tsk == current) +	do_each_thread(t, p) { +		if (p == current)  			continue; -		if (task_cpu(tsk) == src_cpu) -			move_task_off_dead_cpu(src_cpu, tsk); -	} while_each_thread(t, tsk); +		if (task_cpu(p) == src_cpu) +			move_task_off_dead_cpu(src_cpu, p); +	} while_each_thread(t, p);  	write_unlock_irq(&tasklist_lock);  }  /* Schedules idle task to be the next runnable task on current CPU.   * It does so by boosting its priority to highest possible and adding it to - * the _front_ of runqueue. Used by CPU offline code. + * the _front_ of the runqueue. Used by CPU offline code.   */  void sched_idle_next(void)  { -	int cpu = smp_processor_id(); -	runqueue_t *rq = this_rq(); +	int this_cpu = smp_processor_id(); +	struct rq *rq = cpu_rq(this_cpu);  	struct task_struct *p = rq->idle;  	unsigned long flags;  	/* cpu has to be offline */ -	BUG_ON(cpu_online(cpu)); +	BUG_ON(cpu_online(this_cpu)); -	/* Strictly not necessary since rest of the CPUs are stopped by now -	 * and interrupts disabled on current cpu. +	/* +	 * Strictly not necessary since rest of the CPUs are stopped by now +	 * and interrupts disabled on the current cpu.  	 */  	spin_lock_irqsave(&rq->lock, flags);  	__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1); -	/* Add idle task to _front_ of it's priority queue */ + +	/* Add idle task to the _front_ of its priority queue: */  	__activate_idle_task(p, rq);  	spin_unlock_irqrestore(&rq->lock, flags);  } -/* Ensures that the idle task is using init_mm right before its cpu goes +/* + * Ensures that the idle task is using init_mm right before its cpu goes   * offline.   */  void idle_task_exit(void) @@ -5029,17 +5071,17 @@ void idle_task_exit(void)  	mmdrop(mm);  } -static void migrate_dead(unsigned int dead_cpu, task_t *tsk) +static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)  { -	struct runqueue *rq = cpu_rq(dead_cpu); +	struct rq *rq = cpu_rq(dead_cpu);  	/* Must be exiting, otherwise would be on tasklist. */ -	BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD); +	BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);  	/* Cannot have done final schedule yet: would have vanished. */ -	BUG_ON(tsk->flags & PF_DEAD); +	BUG_ON(p->flags & PF_DEAD); -	get_task_struct(tsk); +	get_task_struct(p);  	/*  	 * Drop lock around migration; if someone else moves it, @@ -5047,25 +5089,25 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk)  	 * fine.  	 */  	spin_unlock_irq(&rq->lock); -	move_task_off_dead_cpu(dead_cpu, tsk); +	move_task_off_dead_cpu(dead_cpu, p);  	spin_lock_irq(&rq->lock); -	put_task_struct(tsk); +	put_task_struct(p);  }  /* release_task() removes task from tasklist, so we won't find dead tasks. */  static void migrate_dead_tasks(unsigned int dead_cpu)  { -	unsigned arr, i; -	struct runqueue *rq = cpu_rq(dead_cpu); +	struct rq *rq = cpu_rq(dead_cpu); +	unsigned int arr, i;  	for (arr = 0; arr < 2; arr++) {  		for (i = 0; i < MAX_PRIO; i++) {  			struct list_head *list = &rq->arrays[arr].queue[i]; +  			while (!list_empty(list)) -				migrate_dead(dead_cpu, -					     list_entry(list->next, task_t, -							run_list)); +				migrate_dead(dead_cpu, list_entry(list->next, +					     struct task_struct, run_list));  		}  	}  } @@ -5075,14 +5117,13 @@ static void migrate_dead_tasks(unsigned int dead_cpu)   * migration_call - callback that gets triggered when a CPU is added.   * Here we can start up the necessary migration thread for the new CPU.   */ -static int __cpuinit migration_call(struct notifier_block *nfb, -			unsigned long action, -			void *hcpu) +static int __cpuinit +migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)  { -	int cpu = (long)hcpu;  	struct task_struct *p; -	struct runqueue *rq; +	int cpu = (long)hcpu;  	unsigned long flags; +	struct rq *rq;  	switch (action) {  	case CPU_UP_PREPARE: @@ -5097,10 +5138,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb,  		task_rq_unlock(rq, &flags);  		cpu_rq(cpu)->migration_thread = p;  		break; +  	case CPU_ONLINE:  		/* Strictly unneccessary, as first user will wake it. */  		wake_up_process(cpu_rq(cpu)->migration_thread);  		break; +  #ifdef CONFIG_HOTPLUG_CPU  	case CPU_UP_CANCELED:  		if (!cpu_rq(cpu)->migration_thread) @@ -5111,6 +5154,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb,  		kthread_stop(cpu_rq(cpu)->migration_thread);  		cpu_rq(cpu)->migration_thread = NULL;  		break; +  	case CPU_DEAD:  		migrate_live_tasks(cpu);  		rq = cpu_rq(cpu); @@ -5131,9 +5175,10 @@ static int __cpuinit migration_call(struct notifier_block *nfb,  		 * the requestors. */  		spin_lock_irq(&rq->lock);  		while (!list_empty(&rq->migration_queue)) { -			migration_req_t *req; +			struct migration_req *req; +  			req = list_entry(rq->migration_queue.next, -					 migration_req_t, list); +					 struct migration_req, list);  			list_del_init(&req->list);  			complete(&req->done);  		} @@ -5155,10 +5200,12 @@ static struct notifier_block __cpuinitdata migration_notifier = {  int __init migration_init(void)  {  	void *cpu = (void *)(long)smp_processor_id(); -	/* Start one for boot CPU. */ + +	/* Start one for the boot CPU: */  	migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);  	migration_call(&migration_notifier, CPU_ONLINE, cpu);  	register_cpu_notifier(&migration_notifier); +  	return 0;  }  #endif @@ -5254,7 +5301,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)  	} while (sd);  }  #else -#define sched_domain_debug(sd, cpu) {} +# define sched_domain_debug(sd, cpu) do { } while (0)  #endif  static int sd_degenerate(struct sched_domain *sd) @@ -5280,8 +5327,8 @@ static int sd_degenerate(struct sched_domain *sd)  	return 1;  } -static int sd_parent_degenerate(struct sched_domain *sd, -						struct sched_domain *parent) +static int +sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)  {  	unsigned long cflags = sd->flags, pflags = parent->flags; @@ -5314,7 +5361,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,   */  static void cpu_attach_domain(struct sched_domain *sd, int cpu)  { -	runqueue_t *rq = cpu_rq(cpu); +	struct rq *rq = cpu_rq(cpu);  	struct sched_domain *tmp;  	/* Remove the sched domains which do not contribute to scheduling. */ @@ -5576,8 +5623,8 @@ static void touch_cache(void *__cache, unsigned long __size)  /*   * Measure the cache-cost of one task migration. Returns in units of nsec.   */ -static unsigned long long measure_one(void *cache, unsigned long size, -				      int source, int target) +static unsigned long long +measure_one(void *cache, unsigned long size, int source, int target)  {  	cpumask_t mask, saved_mask;  	unsigned long long t0, t1, t2, t3, cost; @@ -5927,9 +5974,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes)   */  static cpumask_t sched_domain_node_span(int node)  { -	int i; -	cpumask_t span, nodemask;  	DECLARE_BITMAP(used_nodes, MAX_NUMNODES); +	cpumask_t span, nodemask; +	int i;  	cpus_clear(span);  	bitmap_zero(used_nodes, MAX_NUMNODES); @@ -5940,6 +5987,7 @@ static cpumask_t sched_domain_node_span(int node)  	for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {  		int next_node = find_next_best_node(node, used_nodes); +  		nodemask = node_to_cpumask(next_node);  		cpus_or(span, span, nodemask);  	} @@ -5949,19 +5997,23 @@ static cpumask_t sched_domain_node_span(int node)  #endif  int sched_smt_power_savings = 0, sched_mc_power_savings = 0; +  /* - * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we - * can switch it on easily if needed. + * SMT sched-domains:   */  #ifdef CONFIG_SCHED_SMT  static DEFINE_PER_CPU(struct sched_domain, cpu_domains);  static struct sched_group sched_group_cpus[NR_CPUS]; +  static int cpu_to_cpu_group(int cpu)  {  	return cpu;  }  #endif +/* + * multi-core sched-domains: + */  #ifdef CONFIG_SCHED_MC  static DEFINE_PER_CPU(struct sched_domain, core_domains);  static struct sched_group *sched_group_core_bycpu[NR_CPUS]; @@ -5981,9 +6033,10 @@ static int cpu_to_core_group(int cpu)  static DEFINE_PER_CPU(struct sched_domain, phys_domains);  static struct sched_group *sched_group_phys_bycpu[NR_CPUS]; +  static int cpu_to_phys_group(int cpu)  { -#if defined(CONFIG_SCHED_MC) +#ifdef CONFIG_SCHED_MC  	cpumask_t mask = cpu_coregroup_map(cpu);  	return first_cpu(mask);  #elif defined(CONFIG_SCHED_SMT) @@ -6529,6 +6582,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)  int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)  {  	int err = 0; +  #ifdef CONFIG_SCHED_SMT  	if (smt_capable())  		err = sysfs_create_file(&cls->kset.kobj, @@ -6548,7 +6602,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)  {  	return sprintf(page, "%u\n", sched_mc_power_savings);  } -static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count) +static ssize_t sched_mc_power_savings_store(struct sys_device *dev, +					    const char *buf, size_t count)  {  	return sched_power_savings_store(buf, count, 0);  } @@ -6561,7 +6616,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)  {  	return sprintf(page, "%u\n", sched_smt_power_savings);  } -static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count) +static ssize_t sched_smt_power_savings_store(struct sys_device *dev, +					     const char *buf, size_t count)  {  	return sched_power_savings_store(buf, count, 1);  } @@ -6623,6 +6679,7 @@ int in_sched_functions(unsigned long addr)  {  	/* Linker adds these: start and end of __sched functions */  	extern char __sched_text_start[], __sched_text_end[]; +  	return in_lock_functions(addr) ||  		(addr >= (unsigned long)__sched_text_start  		&& addr < (unsigned long)__sched_text_end); @@ -6630,14 +6687,15 @@ int in_sched_functions(unsigned long addr)  void __init sched_init(void)  { -	runqueue_t *rq;  	int i, j, k;  	for_each_possible_cpu(i) { -		prio_array_t *array; +		struct prio_array *array; +		struct rq *rq;  		rq = cpu_rq(i);  		spin_lock_init(&rq->lock); +		lockdep_set_class(&rq->lock, &rq->rq_lock_key);  		rq->nr_running = 0;  		rq->active = rq->arrays;  		rq->expired = rq->arrays + 1; @@ -6684,7 +6742,7 @@ void __init sched_init(void)  #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP  void __might_sleep(char *file, int line)  { -#if defined(in_atomic) +#ifdef in_atomic  	static unsigned long prev_jiffy;	/* ratelimiting */  	if ((in_atomic() || irqs_disabled()) && @@ -6706,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep);  #ifdef CONFIG_MAGIC_SYSRQ  void normalize_rt_tasks(void)  { +	struct prio_array *array;  	struct task_struct *p; -	prio_array_t *array;  	unsigned long flags; -	runqueue_t *rq; +	struct rq *rq;  	read_lock_irq(&tasklist_lock);  	for_each_process(p) { @@ -6753,7 +6811,7 @@ void normalize_rt_tasks(void)   *   * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!   */ -task_t *curr_task(int cpu) +struct task_struct *curr_task(int cpu)  {  	return cpu_curr(cpu);  } @@ -6773,7 +6831,7 @@ task_t *curr_task(int cpu)   *   * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!   */ -void set_curr_task(int cpu, task_t *p) +void set_curr_task(int cpu, struct task_struct *p)  {  	cpu_curr(cpu) = p;  } diff --git a/kernel/softirq.c b/kernel/softirq.c index 8f03e3b89b55..215541e26c1a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -62,6 +62,119 @@ static inline void wakeup_softirqd(void)  }  /* + * This one is for softirq.c-internal use, + * where hardirqs are disabled legitimately: + */ +static void __local_bh_disable(unsigned long ip) +{ +	unsigned long flags; + +	WARN_ON_ONCE(in_irq()); + +	raw_local_irq_save(flags); +	add_preempt_count(SOFTIRQ_OFFSET); +	/* +	 * Were softirqs turned off above: +	 */ +	if (softirq_count() == SOFTIRQ_OFFSET) +		trace_softirqs_off(ip); +	raw_local_irq_restore(flags); +} + +void local_bh_disable(void) +{ +	__local_bh_disable((unsigned long)__builtin_return_address(0)); +} + +EXPORT_SYMBOL(local_bh_disable); + +void __local_bh_enable(void) +{ +	WARN_ON_ONCE(in_irq()); + +	/* +	 * softirqs should never be enabled by __local_bh_enable(), +	 * it always nests inside local_bh_enable() sections: +	 */ +	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET); + +	sub_preempt_count(SOFTIRQ_OFFSET); +} +EXPORT_SYMBOL_GPL(__local_bh_enable); + +/* + * Special-case - softirqs can safely be enabled in + * cond_resched_softirq(), or by __do_softirq(), + * without processing still-pending softirqs: + */ +void _local_bh_enable(void) +{ +	WARN_ON_ONCE(in_irq()); +	WARN_ON_ONCE(!irqs_disabled()); + +	if (softirq_count() == SOFTIRQ_OFFSET) +		trace_softirqs_on((unsigned long)__builtin_return_address(0)); +	sub_preempt_count(SOFTIRQ_OFFSET); +} + +EXPORT_SYMBOL(_local_bh_enable); + +void local_bh_enable(void) +{ +	unsigned long flags; + +	WARN_ON_ONCE(in_irq()); +	WARN_ON_ONCE(irqs_disabled()); + +	local_irq_save(flags); +	/* +	 * Are softirqs going to be turned on now: +	 */ +	if (softirq_count() == SOFTIRQ_OFFSET) +		trace_softirqs_on((unsigned long)__builtin_return_address(0)); +	/* +	 * Keep preemption disabled until we are done with +	 * softirq processing: + 	 */ + 	sub_preempt_count(SOFTIRQ_OFFSET - 1); + +	if (unlikely(!in_interrupt() && local_softirq_pending())) +		do_softirq(); + +	dec_preempt_count(); +	local_irq_restore(flags); +	preempt_check_resched(); +} +EXPORT_SYMBOL(local_bh_enable); + +void local_bh_enable_ip(unsigned long ip) +{ +	unsigned long flags; + +	WARN_ON_ONCE(in_irq()); + +	local_irq_save(flags); +	/* +	 * Are softirqs going to be turned on now: +	 */ +	if (softirq_count() == SOFTIRQ_OFFSET) +		trace_softirqs_on(ip); +	/* +	 * Keep preemption disabled until we are done with +	 * softirq processing: + 	 */ + 	sub_preempt_count(SOFTIRQ_OFFSET - 1); + +	if (unlikely(!in_interrupt() && local_softirq_pending())) +		do_softirq(); + +	dec_preempt_count(); +	local_irq_restore(flags); +	preempt_check_resched(); +} +EXPORT_SYMBOL(local_bh_enable_ip); + +/*   * We restart softirq processing MAX_SOFTIRQ_RESTART times,   * and we fall back to softirqd after that.   * @@ -80,8 +193,11 @@ asmlinkage void __do_softirq(void)  	int cpu;  	pending = local_softirq_pending(); +	account_system_vtime(current); + +	__local_bh_disable((unsigned long)__builtin_return_address(0)); +	trace_softirq_enter(); -	local_bh_disable();  	cpu = smp_processor_id();  restart:  	/* Reset the pending bitmask before enabling irqs */ @@ -109,7 +225,10 @@ restart:  	if (pending)  		wakeup_softirqd(); -	__local_bh_enable(); +	trace_softirq_exit(); + +	account_system_vtime(current); +	_local_bh_enable();  }  #ifndef __ARCH_HAS_DO_SOFTIRQ @@ -136,23 +255,6 @@ EXPORT_SYMBOL(do_softirq);  #endif -void local_bh_enable(void) -{ -	WARN_ON(irqs_disabled()); -	/* -	 * Keep preemption disabled until we are done with -	 * softirq processing: - 	 */ - 	sub_preempt_count(SOFTIRQ_OFFSET - 1); - -	if (unlikely(!in_interrupt() && local_softirq_pending())) -		do_softirq(); - -	dec_preempt_count(); -	preempt_check_resched(); -} -EXPORT_SYMBOL(local_bh_enable); -  #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED  # define invoke_softirq()	__do_softirq()  #else @@ -165,6 +267,7 @@ EXPORT_SYMBOL(local_bh_enable);  void irq_exit(void)  {  	account_system_vtime(current); +	trace_hardirq_exit();  	sub_preempt_count(IRQ_EXIT_OFFSET);  	if (!in_interrupt() && local_softirq_pending())  		invoke_softirq(); diff --git a/kernel/spinlock.c b/kernel/spinlock.c index b31e54eadf56..bfd6ad9c0330 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -13,6 +13,7 @@  #include <linux/preempt.h>  #include <linux/spinlock.h>  #include <linux/interrupt.h> +#include <linux/debug_locks.h>  #include <linux/module.h>  /* @@ -29,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock);  int __lockfunc _spin_trylock(spinlock_t *lock)  {  	preempt_disable(); -	if (_raw_spin_trylock(lock)) +	if (_raw_spin_trylock(lock)) { +		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);  		return 1; +	}  	preempt_enable();  	return 0; @@ -40,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock);  int __lockfunc _read_trylock(rwlock_t *lock)  {  	preempt_disable(); -	if (_raw_read_trylock(lock)) +	if (_raw_read_trylock(lock)) { +		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);  		return 1; +	}  	preempt_enable();  	return 0; @@ -51,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock);  int __lockfunc _write_trylock(rwlock_t *lock)  {  	preempt_disable(); -	if (_raw_write_trylock(lock)) +	if (_raw_write_trylock(lock)) { +		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);  		return 1; +	}  	preempt_enable();  	return 0;  }  EXPORT_SYMBOL(_write_trylock); -#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) +/* + * If lockdep is enabled then we use the non-preemption spin-ops + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \ +	defined(CONFIG_PROVE_LOCKING)  void __lockfunc _read_lock(rwlock_t *lock)  {  	preempt_disable(); +	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_read_lock(lock);  }  EXPORT_SYMBOL(_read_lock); @@ -74,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)  	local_irq_save(flags);  	preempt_disable(); +	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); +	/* +	 * On lockdep we dont want the hand-coded irq-enable of +	 * _raw_spin_lock_flags() code, because lockdep assumes +	 * that interrupts are not re-enabled during lock-acquire: +	 */ +#ifdef CONFIG_PROVE_LOCKING +	_raw_spin_lock(lock); +#else  	_raw_spin_lock_flags(lock, &flags); +#endif  	return flags;  }  EXPORT_SYMBOL(_spin_lock_irqsave); @@ -83,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)  {  	local_irq_disable();  	preempt_disable(); +	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_spin_lock(lock);  }  EXPORT_SYMBOL(_spin_lock_irq); @@ -91,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)  {  	local_bh_disable();  	preempt_disable(); +	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_spin_lock(lock);  }  EXPORT_SYMBOL(_spin_lock_bh); @@ -101,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)  	local_irq_save(flags);  	preempt_disable(); +	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_read_lock(lock);  	return flags;  } @@ -110,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)  {  	local_irq_disable();  	preempt_disable(); +	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_read_lock(lock);  }  EXPORT_SYMBOL(_read_lock_irq); @@ -118,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)  {  	local_bh_disable();  	preempt_disable(); +	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_read_lock(lock);  }  EXPORT_SYMBOL(_read_lock_bh); @@ -128,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)  	local_irq_save(flags);  	preempt_disable(); +	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_write_lock(lock);  	return flags;  } @@ -137,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)  {  	local_irq_disable();  	preempt_disable(); +	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_write_lock(lock);  }  EXPORT_SYMBOL(_write_lock_irq); @@ -145,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)  {  	local_bh_disable();  	preempt_disable(); +	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_write_lock(lock);  }  EXPORT_SYMBOL(_write_lock_bh); @@ -152,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh);  void __lockfunc _spin_lock(spinlock_t *lock)  {  	preempt_disable(); +	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_spin_lock(lock);  } @@ -160,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock);  void __lockfunc _write_lock(rwlock_t *lock)  {  	preempt_disable(); +	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);  	_raw_write_lock(lock);  } @@ -255,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock);  #endif /* CONFIG_PREEMPT */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) +{ +	preempt_disable(); +	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); +	_raw_spin_lock(lock); +} + +EXPORT_SYMBOL(_spin_lock_nested); + +#endif +  void __lockfunc _spin_unlock(spinlock_t *lock)  { +	spin_release(&lock->dep_map, 1, _RET_IP_);  	_raw_spin_unlock(lock);  	preempt_enable();  } @@ -264,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock);  void __lockfunc _write_unlock(rwlock_t *lock)  { +	rwlock_release(&lock->dep_map, 1, _RET_IP_);  	_raw_write_unlock(lock);  	preempt_enable();  } @@ -271,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock);  void __lockfunc _read_unlock(rwlock_t *lock)  { +	rwlock_release(&lock->dep_map, 1, _RET_IP_);  	_raw_read_unlock(lock);  	preempt_enable();  } @@ -278,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock);  void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)  { +	spin_release(&lock->dep_map, 1, _RET_IP_);  	_raw_spin_unlock(lock);  	local_irq_restore(flags);  	preempt_enable(); @@ -286,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);  void __lockfunc _spin_unlock_irq(spinlock_t *lock)  { +	spin_release(&lock->dep_map, 1, _RET_IP_);  	_raw_spin_unlock(lock);  	local_irq_enable();  	preempt_enable(); @@ -294,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq);  void __lockfunc _spin_unlock_bh(spinlock_t *lock)  { +	spin_release(&lock->dep_map, 1, _RET_IP_);  	_raw_spin_unlock(lock);  	preempt_enable_no_resched(); -	local_bh_enable(); +	local_bh_enable_ip((unsigned long)__builtin_return_address(0));  }  EXPORT_SYMBOL(_spin_unlock_bh);  void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)  { +	rwlock_release(&lock->dep_map, 1, _RET_IP_);  	_raw_read_unlock(lock);  	local_irq_restore(flags);  	preempt_enable(); @@ -310,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore);  void __lockfunc _read_unlock_irq(rwlock_t *lock)  { +	rwlock_release(&lock->dep_map, 1, _RET_IP_);  	_raw_read_unlock(lock);  	local_irq_enable();  	preempt_enable(); @@ -318,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq);  void __lockfunc _read_unlock_bh(rwlock_t *lock)  { +	rwlock_release(&lock->dep_map, 1, _RET_IP_);  	_raw_read_unlock(lock);  	preempt_enable_no_resched(); -	local_bh_enable(); +	local_bh_enable_ip((unsigned long)__builtin_return_address(0));  }  EXPORT_SYMBOL(_read_unlock_bh);  void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)  { +	rwlock_release(&lock->dep_map, 1, _RET_IP_);  	_raw_write_unlock(lock);  	local_irq_restore(flags);  	preempt_enable(); @@ -334,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore);  void __lockfunc _write_unlock_irq(rwlock_t *lock)  { +	rwlock_release(&lock->dep_map, 1, _RET_IP_);  	_raw_write_unlock(lock);  	local_irq_enable();  	preempt_enable(); @@ -342,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq);  void __lockfunc _write_unlock_bh(rwlock_t *lock)  { +	rwlock_release(&lock->dep_map, 1, _RET_IP_);  	_raw_write_unlock(lock);  	preempt_enable_no_resched(); -	local_bh_enable(); +	local_bh_enable_ip((unsigned long)__builtin_return_address(0));  }  EXPORT_SYMBOL(_write_unlock_bh); @@ -352,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)  {  	local_bh_disable();  	preempt_disable(); -	if (_raw_spin_trylock(lock)) +	if (_raw_spin_trylock(lock)) { +		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);  		return 1; +	}  	preempt_enable_no_resched(); -	local_bh_enable(); +	local_bh_enable_ip((unsigned long)__builtin_return_address(0));  	return 0;  }  EXPORT_SYMBOL(_spin_trylock_bh); diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c new file mode 100644 index 000000000000..b71816e47a30 --- /dev/null +++ b/kernel/stacktrace.c @@ -0,0 +1,24 @@ +/* + * kernel/stacktrace.c + * + * Stack trace management functions + * + *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/sched.h> +#include <linux/kallsyms.h> +#include <linux/stacktrace.h> + +void print_stack_trace(struct stack_trace *trace, int spaces) +{ +	int i, j; + +	for (i = 0; i < trace->nr_entries; i++) { +		unsigned long ip = trace->entries[i]; + +		for (j = 0; j < spaces + 1; j++) +			printk(" "); +		print_ip_sym(ip); +	} +} + diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2c0aacc37c55..dcfb5d731466 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -4,7 +4,6 @@  #include <linux/cpu.h>  #include <linux/err.h>  #include <linux/syscalls.h> -#include <linux/kthread.h>  #include <asm/atomic.h>  #include <asm/semaphore.h>  #include <asm/uaccess.h> @@ -26,11 +25,13 @@ static unsigned int stopmachine_num_threads;  static atomic_t stopmachine_thread_ack;  static DECLARE_MUTEX(stopmachine_mutex); -static int stopmachine(void *unused) +static int stopmachine(void *cpu)  {  	int irqs_disabled = 0;  	int prepared = 0; +	set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu)); +  	/* Ack: we are alive */  	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */  	atomic_inc(&stopmachine_thread_ack); @@ -84,8 +85,7 @@ static void stopmachine_set_state(enum stopmachine_state state)  static int stop_machine(void)  { -	int ret = 0; -	unsigned int i; +	int i, ret = 0;  	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };  	/* One high-prio thread per cpu.  We'll do this one. */ @@ -96,16 +96,11 @@ static int stop_machine(void)  	stopmachine_state = STOPMACHINE_WAIT;  	for_each_online_cpu(i) { -		struct task_struct *tsk;  		if (i == raw_smp_processor_id())  			continue; -		tsk = kthread_create(stopmachine, NULL, "stopmachine"); -		if (IS_ERR(tsk)) { -			ret = PTR_ERR(tsk); +		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); +		if (ret < 0)  			break; -		} -		kthread_bind(tsk, i); -		wake_up_process(tsk);  		stopmachine_num_threads++;  	} diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 99a58f279077..362a0cc37138 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -932,6 +932,17 @@ static ctl_table vm_table[] = {  		.strategy	= &sysctl_intvec,  		.extra1		= &zero,  	}, +	{ +		.ctl_name	= VM_MIN_UNMAPPED, +		.procname	= "min_unmapped_ratio", +		.data		= &sysctl_min_unmapped_ratio, +		.maxlen		= sizeof(sysctl_min_unmapped_ratio), +		.mode		= 0644, +		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler, +		.strategy	= &sysctl_intvec, +		.extra1		= &zero, +		.extra2		= &one_hundred, +	},  #endif  #ifdef CONFIG_X86_32  	{ diff --git a/kernel/timer.c b/kernel/timer.c index 5a8960253063..396a3c024c2c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1208,7 +1208,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES;   * playing with xtime and avenrun.   */  #ifndef ARCH_HAVE_XTIME_LOCK -seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; +__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);  EXPORT_SYMBOL(xtime_lock);  #endif @@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void)  static void process_timeout(unsigned long __data)  { -	wake_up_process((task_t *)__data); +	wake_up_process((struct task_struct *)__data);  }  /** @@ -1559,6 +1559,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)  	return 0;  } +/* + * lockdep: we want to track each per-CPU base as a separate lock-class, + * but timer-bases are kmalloc()-ed, so we need to attach separate + * keys to them: + */ +static struct lock_class_key base_lock_keys[NR_CPUS]; +  static int __devinit init_timers_cpu(int cpu)  {  	int j; @@ -1594,6 +1601,8 @@ static int __devinit init_timers_cpu(int cpu)  	}  	spin_lock_init(&base->lock); +	lockdep_set_class(&base->lock, base_lock_keys + cpu); +  	for (j = 0; j < TVN_SIZE; j++) {  		INIT_LIST_HEAD(base->tv5.vec + j);  		INIT_LIST_HEAD(base->tv4.vec + j); diff --git a/kernel/wait.c b/kernel/wait.c index 5985d866531f..a1d57aeb7f75 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -10,6 +10,10 @@  #include <linux/wait.h>  #include <linux/hash.h> +struct lock_class_key waitqueue_lock_key; + +EXPORT_SYMBOL(waitqueue_lock_key); +  void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)  {  	unsigned long flags; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 59f0b42bd89e..eebb1d839235 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -51,7 +51,7 @@ struct cpu_workqueue_struct {  	wait_queue_head_t work_done;  	struct workqueue_struct *wq; -	task_t *thread; +	struct task_struct *thread;  	int run_depth;		/* Detect run_workqueue() recursion depth */  } ____cacheline_aligned; @@ -114,6 +114,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)  	put_cpu();  	return ret;  } +EXPORT_SYMBOL_GPL(queue_work);  static void delayed_work_timer_fn(unsigned long __data)  { @@ -147,6 +148,29 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,  	}  	return ret;  } +EXPORT_SYMBOL_GPL(queue_delayed_work); + +int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, +			struct work_struct *work, unsigned long delay) +{ +	int ret = 0; +	struct timer_list *timer = &work->timer; + +	if (!test_and_set_bit(0, &work->pending)) { +		BUG_ON(timer_pending(timer)); +		BUG_ON(!list_empty(&work->entry)); + +		/* This stores wq for the moment, for the timer_fn */ +		work->wq_data = wq; +		timer->expires = jiffies + delay; +		timer->data = (unsigned long)work; +		timer->function = delayed_work_timer_fn; +		add_timer_on(timer, cpu); +		ret = 1; +	} +	return ret; +} +EXPORT_SYMBOL_GPL(queue_delayed_work_on);  static void run_workqueue(struct cpu_workqueue_struct *cwq)  { @@ -281,6 +305,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)  		unlock_cpu_hotplug();  	}  } +EXPORT_SYMBOL_GPL(flush_workqueue);  static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,  						   int cpu) @@ -358,6 +383,7 @@ struct workqueue_struct *__create_workqueue(const char *name,  	}  	return wq;  } +EXPORT_SYMBOL_GPL(__create_workqueue);  static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)  { @@ -395,6 +421,7 @@ void destroy_workqueue(struct workqueue_struct *wq)  	free_percpu(wq->cpu_wq);  	kfree(wq);  } +EXPORT_SYMBOL_GPL(destroy_workqueue);  static struct workqueue_struct *keventd_wq; @@ -402,31 +429,20 @@ int fastcall schedule_work(struct work_struct *work)  {  	return queue_work(keventd_wq, work);  } +EXPORT_SYMBOL(schedule_work);  int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)  {  	return queue_delayed_work(keventd_wq, work, delay);  } +EXPORT_SYMBOL(schedule_delayed_work);  int schedule_delayed_work_on(int cpu,  			struct work_struct *work, unsigned long delay)  { -	int ret = 0; -	struct timer_list *timer = &work->timer; - -	if (!test_and_set_bit(0, &work->pending)) { -		BUG_ON(timer_pending(timer)); -		BUG_ON(!list_empty(&work->entry)); -		/* This stores keventd_wq for the moment, for the timer_fn */ -		work->wq_data = keventd_wq; -		timer->expires = jiffies + delay; -		timer->data = (unsigned long)work; -		timer->function = delayed_work_timer_fn; -		add_timer_on(timer, cpu); -		ret = 1; -	} -	return ret; +	return queue_delayed_work_on(cpu, keventd_wq, work, delay);  } +EXPORT_SYMBOL(schedule_delayed_work_on);  /**   * schedule_on_each_cpu - call a function on each online CPU from keventd @@ -463,6 +479,7 @@ void flush_scheduled_work(void)  {  	flush_workqueue(keventd_wq);  } +EXPORT_SYMBOL(flush_scheduled_work);  /**   * cancel_rearming_delayed_workqueue - reliably kill off a delayed @@ -619,13 +636,3 @@ void init_workqueues(void)  	BUG_ON(!keventd_wq);  } -EXPORT_SYMBOL_GPL(__create_workqueue); -EXPORT_SYMBOL_GPL(queue_work); -EXPORT_SYMBOL_GPL(queue_delayed_work); -EXPORT_SYMBOL_GPL(flush_workqueue); -EXPORT_SYMBOL_GPL(destroy_workqueue); - -EXPORT_SYMBOL(schedule_work); -EXPORT_SYMBOL(schedule_delayed_work); -EXPORT_SYMBOL(schedule_delayed_work_on); -EXPORT_SYMBOL(flush_scheduled_work); | 
