diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/binfmt_elf.c | 15 | ||||
| -rw-r--r-- | fs/block_dev.c | 102 | ||||
| -rw-r--r-- | fs/dcache.c | 6 | ||||
| -rw-r--r-- | fs/direct-io.c | 6 | ||||
| -rw-r--r-- | fs/eventpoll.c | 4 | ||||
| -rw-r--r-- | fs/ext2/super.c | 2 | ||||
| -rw-r--r-- | fs/ext3/super.c | 2 | ||||
| -rw-r--r-- | fs/jffs2/acl.c | 4 | ||||
| -rw-r--r-- | fs/jffs2/acl.h | 4 | ||||
| -rw-r--r-- | fs/jffs2/malloc.c | 2 | ||||
| -rw-r--r-- | fs/jffs2/nodelist.h | 2 | ||||
| -rw-r--r-- | fs/jffs2/readinode.c | 1 | ||||
| -rw-r--r-- | fs/jffs2/scan.c | 4 | ||||
| -rw-r--r-- | fs/jffs2/xattr.c | 45 | ||||
| -rw-r--r-- | fs/namei.c | 20 | ||||
| -rw-r--r-- | fs/ntfs/inode.c | 33 | ||||
| -rw-r--r-- | fs/ntfs/super.c | 31 | ||||
| -rw-r--r-- | fs/proc/task_nommu.c | 2 | ||||
| -rw-r--r-- | fs/reiserfs/super.c | 2 | ||||
| -rw-r--r-- | fs/super.c | 11 | ||||
| -rw-r--r-- | fs/ufs/super.c | 2 | 
21 files changed, 220 insertions, 80 deletions
| diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d0434406eaeb..f42e64210ee5 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = {  		.min_coredump	= ELF_EXEC_PAGESIZE  }; -#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) +#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)  static int set_brk(unsigned long start, unsigned long end)  { @@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,  			 * <= p_memsize so it's only necessary to check p_memsz.  			 */  			k = load_addr + eppnt->p_vaddr; -			if (k > TASK_SIZE || +			if (BAD_ADDR(k) ||  			    eppnt->p_filesz > eppnt->p_memsz ||  			    eppnt->p_memsz > TASK_SIZE ||  			    TASK_SIZE - eppnt->p_memsz < k) { @@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)  		 * allowed task size. Note that p_filesz must always be  		 * <= p_memsz so it is only necessary to check p_memsz.  		 */ -		if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || +		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||  		    elf_ppnt->p_memsz > TASK_SIZE ||  		    TASK_SIZE - elf_ppnt->p_memsz < k) {  			/* set_brk can never work. Avoid overflows. */ @@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)  						    interpreter,  						    &interp_load_addr);  		if (BAD_ADDR(elf_entry)) { -			printk(KERN_ERR "Unable to load interpreter %.128s\n", -				elf_interpreter);  			force_sig(SIGSEGV, current); -			retval = -ENOEXEC; /* Nobody gets to see this, but.. */ +			retval = IS_ERR((void *)elf_entry) ? +					(int)elf_entry : -EINVAL;  			goto out_free_dentry;  		}  		reloc_func_desc = interp_load_addr; @@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)  	} else {  		elf_entry = loc->elf_ex.e_entry;  		if (BAD_ADDR(elf_entry)) { -			send_sig(SIGSEGV, current, 0); -			retval = -ENOEXEC; /* Nobody gets to see this, but.. */ +			force_sig(SIGSEGV, current); +			retval = -EINVAL;  			goto out_free_dentry;  		}  	} diff --git a/fs/block_dev.c b/fs/block_dev.c index 9633a490dab0..37534573960b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,  	if (!bo)  		return -ENOMEM; -	mutex_lock(&bdev->bd_mutex); +	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);  	res = bd_claim(bdev, holder);  	if (res || !add_bd_holder(bdev, bo))  		free_bd_holder(bo); @@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev,  	if (!kobj)  		return; -	mutex_lock(&bdev->bd_mutex); +	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);  	bd_release(bdev);  	if ((bo = del_bd_holder(bdev, kobj)))  		free_bd_holder(bo); @@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)  EXPORT_SYMBOL(open_by_devnum); +static int +blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags); + +struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode) +{ +	struct block_device *bdev = bdget(dev); +	int err = -ENOMEM; +	int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; +	if (bdev) +		err = blkdev_get_partition(bdev, mode, flags); +	return err ? ERR_PTR(err) : bdev; +} + +EXPORT_SYMBOL(open_partition_by_devnum); + +  /*   * This routine checks whether a removable media has been changed,   * and invalidates all buffer-cache-entries in that case. This @@ -868,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size)  }  EXPORT_SYMBOL(bd_set_size); -static int do_open(struct block_device *bdev, struct file *file) +static int +blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); + +static int +do_open(struct block_device *bdev, struct file *file, unsigned int subclass)  {  	struct module *owner = NULL;  	struct gendisk *disk; @@ -885,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file)  	}  	owner = disk->fops->owner; -	mutex_lock(&bdev->bd_mutex); +	mutex_lock_nested(&bdev->bd_mutex, subclass); +  	if (!bdev->bd_openers) {  		bdev->bd_disk = disk;  		bdev->bd_contains = bdev; @@ -912,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file)  			ret = -ENOMEM;  			if (!whole)  				goto out_first; -			ret = blkdev_get(whole, file->f_mode, file->f_flags); +			ret = blkdev_get_whole(whole, file->f_mode, file->f_flags);  			if (ret)  				goto out_first;  			bdev->bd_contains = whole; -			mutex_lock(&whole->bd_mutex); +			mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE);  			whole->bd_part_count++;  			p = disk->part[part - 1];  			bdev->bd_inode->i_data.backing_dev_info = @@ -944,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file)  			if (bdev->bd_invalidated)  				rescan_partitions(bdev->bd_disk, bdev);  		} else { -			mutex_lock(&bdev->bd_contains->bd_mutex); +			mutex_lock_nested(&bdev->bd_contains->bd_mutex, +					  BD_MUTEX_PARTITION);  			bdev->bd_contains->bd_part_count++;  			mutex_unlock(&bdev->bd_contains->bd_mutex);  		} @@ -985,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)  	fake_file.f_dentry = &fake_dentry;  	fake_dentry.d_inode = bdev->bd_inode; -	return do_open(bdev, &fake_file); +	return do_open(bdev, &fake_file, BD_MUTEX_NORMAL);  }  EXPORT_SYMBOL(blkdev_get); +static int +blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags) +{ +	/* +	 * This crockload is due to bad choice of ->open() type. +	 * It will go away. +	 * For now, block device ->open() routine must _not_ +	 * examine anything in 'inode' argument except ->i_rdev. +	 */ +	struct file fake_file = {}; +	struct dentry fake_dentry = {}; +	fake_file.f_mode = mode; +	fake_file.f_flags = flags; +	fake_file.f_dentry = &fake_dentry; +	fake_dentry.d_inode = bdev->bd_inode; + +	return do_open(bdev, &fake_file, BD_MUTEX_WHOLE); +} + +static int +blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags) +{ +	/* +	 * This crockload is due to bad choice of ->open() type. +	 * It will go away. +	 * For now, block device ->open() routine must _not_ +	 * examine anything in 'inode' argument except ->i_rdev. +	 */ +	struct file fake_file = {}; +	struct dentry fake_dentry = {}; +	fake_file.f_mode = mode; +	fake_file.f_flags = flags; +	fake_file.f_dentry = &fake_dentry; +	fake_dentry.d_inode = bdev->bd_inode; + +	return do_open(bdev, &fake_file, BD_MUTEX_PARTITION); +} +  static int blkdev_open(struct inode * inode, struct file * filp)  {  	struct block_device *bdev; @@ -1005,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)  	bdev = bd_acquire(inode); -	res = do_open(bdev, filp); +	res = do_open(bdev, filp, BD_MUTEX_NORMAL);  	if (res)  		return res; @@ -1019,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp)  	return res;  } -int blkdev_put(struct block_device *bdev) +static int __blkdev_put(struct block_device *bdev, unsigned int subclass)  {  	int ret = 0;  	struct inode *bd_inode = bdev->bd_inode;  	struct gendisk *disk = bdev->bd_disk; -	mutex_lock(&bdev->bd_mutex); +	mutex_lock_nested(&bdev->bd_mutex, subclass);  	lock_kernel();  	if (!--bdev->bd_openers) {  		sync_blockdev(bdev); @@ -1035,7 +1095,8 @@ int blkdev_put(struct block_device *bdev)  		if (disk->fops->release)  			ret = disk->fops->release(bd_inode, NULL);  	} else { -		mutex_lock(&bdev->bd_contains->bd_mutex); +		mutex_lock_nested(&bdev->bd_contains->bd_mutex, +				  subclass + 1);  		bdev->bd_contains->bd_part_count--;  		mutex_unlock(&bdev->bd_contains->bd_mutex);  	} @@ -1051,9 +1112,8 @@ int blkdev_put(struct block_device *bdev)  		}  		bdev->bd_disk = NULL;  		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; -		if (bdev != bdev->bd_contains) { -			blkdev_put(bdev->bd_contains); -		} +		if (bdev != bdev->bd_contains) +			__blkdev_put(bdev->bd_contains, subclass + 1);  		bdev->bd_contains = NULL;  	}  	unlock_kernel(); @@ -1062,8 +1122,20 @@ int blkdev_put(struct block_device *bdev)  	return ret;  } +int blkdev_put(struct block_device *bdev) +{ +	return __blkdev_put(bdev, BD_MUTEX_NORMAL); +} +  EXPORT_SYMBOL(blkdev_put); +int blkdev_put_partition(struct block_device *bdev) +{ +	return __blkdev_put(bdev, BD_MUTEX_PARTITION); +} + +EXPORT_SYMBOL(blkdev_put_partition); +  static int blkdev_close(struct inode * inode, struct file * filp)  {  	struct block_device *bdev = I_BDEV(filp->f_mapping->host); diff --git a/fs/dcache.c b/fs/dcache.c index c6e3535be192..1b4a3a34ec57 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;  EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);   __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); -static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);  EXPORT_SYMBOL(dcache_lock); @@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target)  	 */  	if (target < dentry) {  		spin_lock(&target->d_lock); -		spin_lock(&dentry->d_lock); +		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);  	} else {  		spin_lock(&dentry->d_lock); -		spin_lock(&target->d_lock); +		spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);  	}  	/* Move the dentry to the target hash queue, if on different bucket */ diff --git a/fs/direct-io.c b/fs/direct-io.c index 538fb0418fba..5981e17f46f0 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)  	if (dio->end_io && dio->result)  		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);  	if (dio->lock_type == DIO_LOCKING) -		up_read(&dio->inode->i_alloc_sem); +		/* lockdep: non-owner release */ +		up_read_non_owner(&dio->inode->i_alloc_sem);  }  /* @@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,  		}  		if (dio_lock_type == DIO_LOCKING) -			down_read(&inode->i_alloc_sem); +			/* lockdep: not the owner will release it */ +			down_read_non_owner(&inode->i_alloc_sem);  	}  	/* diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9c677bbd0b08..19ffb043abbc 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -120,7 +120,7 @@ struct epoll_filefd {   */  struct wake_task_node {  	struct list_head llink; -	task_t *task; +	struct task_struct *task;  	wait_queue_head_t *wq;  }; @@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)  {  	int wake_nests = 0;  	unsigned long flags; -	task_t *this_task = current; +	struct task_struct *this_task = current;  	struct list_head *lsthead = &psw->wake_task_list, *lnk;  	struct wake_task_node *tncur;  	struct wake_task_node tnode; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9f43879d6d68..f2702cda9779 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1157,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,  	struct buffer_head tmp_bh;  	struct buffer_head *bh; -	mutex_lock(&inode->i_mutex); +	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);  	while (towrite > 0) {  		tocopy = sb->s_blocksize - offset < towrite ?  				sb->s_blocksize - offset : towrite; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f2dd71336612..813d589cc6c0 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2614,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,  	struct buffer_head *bh;  	handle_t *handle = journal_current_handle(); -	mutex_lock(&inode->i_mutex); +	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);  	while (towrite > 0) {  		tocopy = sb->s_blocksize - offset < towrite ?  				sb->s_blocksize - offset : towrite; diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 9c2077e7e081..0ae3cd10702c 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -345,10 +345,8 @@ int jffs2_init_acl(struct inode *inode, struct inode *dir)  	return rc;  } -void jffs2_clear_acl(struct inode *inode) +void jffs2_clear_acl(struct jffs2_inode_info *f)  { -	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); -  	if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {  		posix_acl_release(f->i_acl_access);  		f->i_acl_access = JFFS2_ACL_NOT_CACHED; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 8893bd1a6ba7..fa327dbd3171 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -30,7 +30,7 @@ struct jffs2_acl_header {  extern int jffs2_permission(struct inode *, int, struct nameidata *);  extern int jffs2_acl_chmod(struct inode *);  extern int jffs2_init_acl(struct inode *, struct inode *); -extern void jffs2_clear_acl(struct inode *); +extern void jffs2_clear_acl(struct jffs2_inode_info *);  extern struct xattr_handler jffs2_acl_access_xattr_handler;  extern struct xattr_handler jffs2_acl_default_xattr_handler; @@ -40,6 +40,6 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;  #define jffs2_permission NULL  #define jffs2_acl_chmod(inode)		(0)  #define jffs2_init_acl(inode,dir)	(0) -#define jffs2_clear_acl(inode) +#define jffs2_clear_acl(f)  #endif	/* CONFIG_JFFS2_FS_POSIX_ACL */ diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 8310c95478e9..33f291005012 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -190,7 +190,7 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x)  	kmem_cache_free(tmp_dnode_info_slab, x);  } -struct jffs2_raw_node_ref *jffs2_alloc_refblock(void) +static struct jffs2_raw_node_ref *jffs2_alloc_refblock(void)  {  	struct jffs2_raw_node_ref *ret; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index f752baa8d399..cae92c14116d 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -426,8 +426,6 @@ char *jffs2_getlink(struct jffs2_sb_info *c, struct jffs2_inode_info *f);  /* scan.c */  int jffs2_scan_medium(struct jffs2_sb_info *c);  void jffs2_rotate_lists(struct jffs2_sb_info *c); -int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, -				uint32_t ofs, uint32_t len);  struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino);  int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);  int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size); diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index cc1899268c43..266423b2709d 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -968,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)  	struct jffs2_full_dirent *fd, *fds;  	int deleted; +	jffs2_clear_acl(f);  	jffs2_xattr_delete_inode(c, f->inocache);  	down(&f->sem);  	deleted = f->inocache && !f->inocache->nlink; diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 2bfdc33752d3..e2413466ddd5 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -274,8 +274,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)  	return ret;  } -int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf, -				uint32_t ofs, uint32_t len) +static int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, +			       uint32_t ofs, uint32_t len)  {  	int ret;  	size_t retlen; diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 18e66dbf23b4..25bc1ae08648 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -50,9 +50,10 @@   *   is used to write xdatum to medium. xd->version will be incremented.   * create_xattr_datum(c, xprefix, xname, xvalue, xsize)   *   is used to create new xdatum and write to medium. - * delete_xattr_datum(c, xd) - *   is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows - *   GC to reclaim those physical nodes. + * unrefer_xattr_datum(c, xd) + *   is used to delete a xdatum. When nobody refers this xdatum, JFFS2_XFLAGS_DEAD + *   is set on xd->flags and chained xattr_dead_list or release it immediately. + *   In the first case, the garbage collector release it later.   * -------------------------------------------------- */  static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize)  { @@ -394,22 +395,24 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,  	return xd;  } -static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)  {  	/* must be called under down_write(xattr_sem) */ -	BUG_ON(atomic_read(&xd->refcnt)); +	if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) { +		uint32_t xid = xd->xid, version = xd->version; -	unload_xattr_datum(c, xd); -	xd->flags |= JFFS2_XFLAGS_DEAD; -	spin_lock(&c->erase_completion_lock); -	if (xd->node == (void *)xd) { -		BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID)); -		jffs2_free_xattr_datum(xd); -	} else { -		list_add(&xd->xindex, &c->xattr_dead_list); +		unload_xattr_datum(c, xd); +		xd->flags |= JFFS2_XFLAGS_DEAD; +		if (xd->node == (void *)xd) { +			BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID)); +			jffs2_free_xattr_datum(xd); +		} else { +			list_add(&xd->xindex, &c->xattr_dead_list); +		} +		spin_unlock(&c->erase_completion_lock); + +		dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version);  	} -	spin_unlock(&c->erase_completion_lock); -	dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version);  }  /* -------- xref related functions ------------------ @@ -580,8 +583,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re  	dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n",  		  ref->ino, ref->xid, ref->xseqno); -	if (atomic_dec_and_test(&xd->refcnt)) -		delete_xattr_datum(c, xd); +	unrefer_xattr_datum(c, xd);  }  void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) @@ -1119,8 +1121,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,  					ref->next = c->xref_dead_list;  					c->xref_dead_list = ref;  					spin_unlock(&c->erase_completion_lock); -					if (atomic_dec_and_test(&xd->refcnt)) -						delete_xattr_datum(c, xd); +					unrefer_xattr_datum(c, xd);  				} else {  					ref->ic = ic;  					ref->xd = xd; @@ -1156,8 +1157,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,  	down_write(&c->xattr_sem);  	if (rc) {  		JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request); -		if (atomic_dec_and_test(&xd->refcnt)) -			delete_xattr_datum(c, xd); +		unrefer_xattr_datum(c, xd);  		up_write(&c->xattr_sem);  		return rc;  	} @@ -1170,8 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,  			ic->xref = ref;  		}  		rc = PTR_ERR(newref); -		if (atomic_dec_and_test(&xd->refcnt)) -			delete_xattr_datum(c, xd); +		unrefer_xattr_datum(c, xd);  	} else if (ref) {  		delete_xattr_ref(c, ref);  	} diff --git a/fs/namei.c b/fs/namei.c index c784e8bb57a3..c9750d755aff 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)  	struct dentry *p;  	if (p1 == p2) { -		mutex_lock(&p1->d_inode->i_mutex); +		mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);  		return NULL;  	} @@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)  	for (p = p1; p->d_parent != p; p = p->d_parent) {  		if (p->d_parent == p2) { -			mutex_lock(&p2->d_inode->i_mutex); -			mutex_lock(&p1->d_inode->i_mutex); +			mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); +			mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);  			return p;  		}  	}  	for (p = p2; p->d_parent != p; p = p->d_parent) {  		if (p->d_parent == p1) { -			mutex_lock(&p1->d_inode->i_mutex); -			mutex_lock(&p2->d_inode->i_mutex); +			mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); +			mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);  			return p;  		}  	} -	mutex_lock(&p1->d_inode->i_mutex); -	mutex_lock(&p2->d_inode->i_mutex); +	mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); +	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);  	return NULL;  } @@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)  {  	struct dentry *dentry = ERR_PTR(-EEXIST); -	mutex_lock(&nd->dentry->d_inode->i_mutex); +	mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT);  	/*  	 * Yucky last component or no last component at all?  	 * (foo/., foo/.., /////) @@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname)  			error = -EBUSY;  			goto exit1;  	} -	mutex_lock(&nd.dentry->d_inode->i_mutex); +	mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);  	dentry = lookup_hash(&nd);  	error = PTR_ERR(dentry);  	if (!IS_ERR(dentry)) { @@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)  	error = -EISDIR;  	if (nd.last_type != LAST_NORM)  		goto exit1; -	mutex_lock(&nd.dentry->d_inode->i_mutex); +	mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);  	dentry = lookup_hash(&nd);  	error = PTR_ERR(dentry);  	if (!IS_ERR(dentry)) { diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4c86b7e1d1eb..d313f356e66a 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)  	kmem_cache_free(ntfs_inode_cache, ni);  } +/* + * The attribute runlist lock has separate locking rules from the + * normal runlist lock, so split the two lock-classes: + */ +static struct lock_class_key attr_list_rl_lock_class; +  /**   * __ntfs_init_inode - initialize ntfs specific part of an inode   * @sb:		super block of mounted volume @@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)  	ni->attr_list_size = 0;  	ni->attr_list = NULL;  	ntfs_init_runlist(&ni->attr_list_rl); +	lockdep_set_class(&ni->attr_list_rl.lock, +				&attr_list_rl_lock_class);  	ni->itype.index.bmp_ino = NULL;  	ni->itype.index.block_size = 0;  	ni->itype.index.vcn_size = 0; @@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)  	ni->ext.base_ntfs_ino = NULL;  } +/* + * Extent inodes get MFT-mapped in a nested way, while the base inode + * is still mapped. Teach this nesting to the lock validator by creating + * a separate class for nested inode's mrec_lock's: + */ +static struct lock_class_key extent_inode_mrec_lock_key; +  inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,  		unsigned long mft_no)  { @@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,  	ntfs_debug("Entering.");  	if (likely(ni != NULL)) {  		__ntfs_init_inode(sb, ni); +		lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);  		ni->mft_no = mft_no;  		ni->type = AT_UNUSED;  		ni->name = NULL; @@ -1722,6 +1738,15 @@ err_out:  	return err;  } +/* + * The MFT inode has special locking, so teach the lock validator + * about this by splitting off the locking rules of the MFT from + * the locking rules of other inodes. The MFT inode can never be + * accessed from the VFS side (or even internally), only by the + * map_mft functions. + */ +static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key; +  /**   * ntfs_read_inode_mount - special read_inode for mount time use only   * @vi:		inode to read @@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi)  	ntfs_attr_put_search_ctx(ctx);  	ntfs_debug("Done.");  	ntfs_free(m); + +	/* +	 * Split the locking rules of the MFT inode from the +	 * locking rules of other inodes: +	 */ +	lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key); +	lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key); +  	return 0;  em_put_err_out: diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 0e14acea3f8b..74e0ee8fce72 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1724,6 +1724,14 @@ upcase_failed:  	return FALSE;  } +/* + * The lcn and mft bitmap inodes are NTFS-internal inodes with + * their own special locking rules: + */ +static struct lock_class_key +	lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key, +	mftbmp_runlist_lock_key, mftbmp_mrec_lock_key; +  /**   * load_system_files - open the system files using normal functions   * @vol:	ntfs super block describing device whose system files to load @@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol)  		ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");  		goto iput_mirr_err_out;  	} +	lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock, +			   &mftbmp_runlist_lock_key); +	lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock, +			   &mftbmp_mrec_lock_key);  	/* Read upcase table and setup @vol->upcase and @vol->upcase_len. */  	if (!load_and_init_upcase(vol))  		goto iput_mftbmp_err_out; @@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol)  			iput(vol->lcnbmp_ino);  		goto bitmap_failed;  	} +	lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock, +			   &lcnbmp_runlist_lock_key); +	lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock, +			   &lcnbmp_mrec_lock_key); +  	NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));  	if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {  		iput(vol->lcnbmp_ino); @@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)  	struct inode *tmp_ino;  	int blocksize, result; +	/* +	 * We do a pretty difficult piece of bootstrap by reading the +	 * MFT (and other metadata) from disk into memory. We'll only +	 * release this metadata during umount, so the locking patterns +	 * observed during bootstrap do not count. So turn off the +	 * observation of locking patterns (strictly for this context +	 * only) while mounting NTFS. [The validator is still active +	 * otherwise, even for this context: it will for example record +	 * lock class registrations.] +	 */ +	lockdep_off();  	ntfs_debug("Entering.");  #ifndef NTFS_RW  	sb->s_flags |= MS_RDONLY; @@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)  		if (!silent)  			ntfs_error(sb, "Allocation of NTFS volume structure "  					"failed. Aborting mount..."); +		lockdep_on();  		return -ENOMEM;  	}  	/* Initialize ntfs_volume structure. */ @@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)  		mutex_unlock(&ntfs_lock);  		sb->s_export_op = &ntfs_export_ops;  		lock_kernel(); +		lockdep_on();  		return 0;  	}  	ntfs_error(sb, "Failed to allocate root directory."); @@ -3059,6 +3089,7 @@ err_out_now:  	sb->s_fs_info = NULL;  	kfree(vol);  	ntfs_debug("Failed, returning -EINVAL."); +	lockdep_on();  	return -EINVAL;  } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index af69f28277b6..4616ed50ffcd 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -107,7 +107,7 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *  {  	struct vm_list_struct *vml;  	struct vm_area_struct *vma; -	struct task_struct *task = proc_task(inode); +	struct task_struct *task = get_proc_task(inode);  	struct mm_struct *mm = get_task_mm(task);  	int result = -ENOENT; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 28eb3c886034..5567328f1041 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,  	size_t towrite = len;  	struct buffer_head tmp_bh, *bh; -	mutex_lock(&inode->i_mutex); +	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);  	while (towrite > 0) {  		tocopy = sb->s_blocksize - offset < towrite ?  		    sb->s_blocksize - offset : towrite; diff --git a/fs/super.c b/fs/super.c index 9b780c42d845..6d4e8174b6db 100644 --- a/fs/super.c +++ b/fs/super.c @@ -53,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock);   *	Allocates and initializes a new &struct super_block.  alloc_super()   *	returns a pointer new superblock or %NULL if allocation had failed.   */ -static struct super_block *alloc_super(void) +static struct super_block *alloc_super(struct file_system_type *type)  {  	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);  	static struct super_operations default_op; @@ -72,6 +72,13 @@ static struct super_block *alloc_super(void)  		INIT_LIST_HEAD(&s->s_inodes);  		init_rwsem(&s->s_umount);  		mutex_init(&s->s_lock); +		lockdep_set_class(&s->s_umount, &type->s_umount_key); +		/* +		 * The locking rules for s_lock are up to the +		 * filesystem. For example ext3fs has different +		 * lock ordering than usbfs: +		 */ +		lockdep_set_class(&s->s_lock, &type->s_lock_key);  		down_write(&s->s_umount);  		s->s_count = S_BIAS;  		atomic_set(&s->s_active, 1); @@ -295,7 +302,7 @@ retry:  	}  	if (!s) {  		spin_unlock(&sb_lock); -		s = alloc_super(); +		s = alloc_super(type);  		if (!s)  			return ERR_PTR(-ENOMEM);  		goto retry; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 19a99726e58d..992ee0b87cc3 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,  	size_t towrite = len;  	struct buffer_head *bh; -	mutex_lock(&inode->i_mutex); +	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);  	while (towrite > 0) {  		tocopy = sb->s_blocksize - offset < towrite ?  				sb->s_blocksize - offset : towrite; | 
