From 7eaceaccab5f40bbfda044629a6298616aeaed50 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2011 08:52:07 +0100 Subject: block: remove per-queue plugging Code has been converted over to the new explicit on-stack plugging, and delay users have been converted to use the new API for that. So lets kill off the old plugging along with aops->sync_page(). Signed-off-by: Jens Axboe --- fs/btrfs/inode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fb9bd7832b6d..462e08e724b0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7218,7 +7218,6 @@ static const struct address_space_operations btrfs_aops = { .writepage = btrfs_writepage, .writepages = btrfs_writepages, .readpages = btrfs_readpages, - .sync_page = block_sync_page, .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, -- cgit From 57a45ced94fe48a701361d64230fc16eefa189dd Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 25 Jan 2011 16:30:38 -0500 Subject: Btrfs: change reserved_extents to an atomic_t We track delayed allocation per inodes via 2 counters, one is outstanding_extents and reserved_extents. Outstanding_extents is already an atomic_t, but reserved_extents is not and is protected by a spinlock. So convert this to an atomic_t and instead of using a spinlock, use atomic_cmpxchg when releasing delalloc bytes. This makes our inode 72 bytes smaller, and reduces locking overhead (albiet it was minimal to begin with). Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9007bbd01dbf..d97b69afbbfb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6632,9 +6632,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->index_cnt = (u64)-1; ei->last_unlink_trans = 0; - spin_lock_init(&ei->accounting_lock); atomic_set(&ei->outstanding_extents, 0); - ei->reserved_extents = 0; + atomic_set(&ei->reserved_extents, 0); ei->ordered_data_close = 0; ei->orphan_meta_reserved = 0; @@ -6670,7 +6669,7 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); - WARN_ON(BTRFS_I(inode)->reserved_extents); + WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); /* * This can happen where we create an inode, but somebody else also -- cgit From dc89e9824464e91fa0b06267864ceabe3186fd8b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Jan 2011 17:05:48 -0500 Subject: Btrfs: use a slab for the free space entries Since we alloc/free free space entries a whole lot, lets use a slab to keep track of them. This makes some of my tests slightly faster. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d97b69afbbfb..2d2e079713d7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -50,6 +50,7 @@ #include "tree-log.h" #include "compression.h" #include "locking.h" +#include "free-space-cache.h" struct btrfs_iget_args { u64 ino; @@ -70,6 +71,7 @@ static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; struct kmem_cache *btrfs_path_cachep; +struct kmem_cache *btrfs_free_space_cachep; #define S_SHIFT 12 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { @@ -6761,6 +6763,8 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_transaction_cachep); if (btrfs_path_cachep) kmem_cache_destroy(btrfs_path_cachep); + if (btrfs_free_space_cachep) + kmem_cache_destroy(btrfs_free_space_cachep); } int btrfs_init_cachep(void) @@ -6789,6 +6793,12 @@ int btrfs_init_cachep(void) if (!btrfs_path_cachep) goto fail; + btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache", + sizeof(struct btrfs_free_space), 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); + if (!btrfs_free_space_cachep) + goto fail; + return 0; fail: btrfs_destroy_cachep(); -- cgit From a41ad394a03b802497958d7c98a9dcf607266645 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 31 Jan 2011 15:30:16 -0500 Subject: Btrfs: convert to the new truncate sequence ->truncate() is going away, instead all of the work needs to be done in ->setattr(). So this converts us over to do this. It's fairly straightforward, just get rid of our .truncate inode operation and call btrfs_truncate() directly from btrfs_setsize. This works out better for us since truncate can technically return ENOSPC, and before we had no way of letting anybody know. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 80 +++++++++++++++++++++++++------------------------------- 1 file changed, 36 insertions(+), 44 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2d2e079713d7..3662ffec17d9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -84,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; -static void btrfs_truncate(struct inode *inode); +static int btrfs_setsize(struct inode *inode, loff_t newsize); +static int btrfs_truncate(struct inode *inode); static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); static noinline int cow_file_range(struct inode *inode, struct page *locked_page, @@ -2371,6 +2372,11 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { + if (!S_ISREG(inode->i_mode)) { + WARN_ON(1); + iput(inode); + continue; + } nr_truncate++; btrfs_truncate(inode); } else { @@ -3538,7 +3544,7 @@ out: return ret; } -int btrfs_cont_expand(struct inode *inode, loff_t size) +int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3546,7 +3552,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) struct extent_map *em = NULL; struct extent_state *cached_state = NULL; u64 mask = root->sectorsize - 1; - u64 hole_start = (inode->i_size + mask) & ~mask; + u64 hole_start = (oldsize + mask) & ~mask; u64 block_end = (size + mask) & ~mask; u64 last_byte; u64 cur_offset; @@ -3617,27 +3623,17 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) return err; } -static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) +static int btrfs_setsize(struct inode *inode, loff_t newsize) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; + loff_t oldsize = i_size_read(inode); unsigned long nr; int ret; - if (attr->ia_size == inode->i_size) + if (newsize == oldsize) return 0; - if (attr->ia_size > inode->i_size) { - unsigned long limit; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (attr->ia_size > inode->i_sb->s_maxbytes) - return -EFBIG; - if (limit != RLIM_INFINITY && attr->ia_size > limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } - } - trans = btrfs_start_transaction(root, 5); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -3651,16 +3647,16 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - if (attr->ia_size > inode->i_size) { - ret = btrfs_cont_expand(inode, attr->ia_size); + if (newsize > oldsize) { + i_size_write(inode, newsize); + btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); + truncate_pagecache(inode, oldsize, newsize); + ret = btrfs_cont_expand(inode, oldsize, newsize); if (ret) { - btrfs_truncate(inode); + btrfs_setsize(inode, oldsize); return ret; } - i_size_write(inode, attr->ia_size); - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); - trans = btrfs_start_transaction(root, 0); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); @@ -3676,22 +3672,22 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - return 0; - } + } else { - /* - * We're truncating a file that used to have good data down to - * zero. Make sure it gets into the ordered flush list so that - * any new writes get down to disk quickly. - */ - if (attr->ia_size == 0) - BTRFS_I(inode)->ordered_data_close = 1; + /* + * We're truncating a file that used to have good data down to + * zero. Make sure it gets into the ordered flush list so that + * any new writes get down to disk quickly. + */ + if (newsize == 0) + BTRFS_I(inode)->ordered_data_close = 1; - /* we don't support swapfiles, so vmtruncate shouldn't fail */ - ret = vmtruncate(inode, attr->ia_size); - BUG_ON(ret); + /* we don't support swapfiles, so vmtruncate shouldn't fail */ + truncate_setsize(inode, newsize); + ret = btrfs_truncate(inode); + } - return 0; + return ret; } static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -3708,7 +3704,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - err = btrfs_setattr_size(inode, attr); + err = btrfs_setsize(inode, attr->ia_size); if (err) return err; } @@ -6478,7 +6474,7 @@ out: return ret; } -static void btrfs_truncate(struct inode *inode) +static int btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -6486,14 +6482,9 @@ static void btrfs_truncate(struct inode *inode) unsigned long nr; u64 mask = root->sectorsize - 1; - if (!S_ISREG(inode->i_mode)) { - WARN_ON(1); - return; - } - ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (ret) - return; + return ret; btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_ordered_update_i_size(inode, inode->i_size, NULL); @@ -6568,6 +6559,8 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); btrfs_btree_balance_dirty(root, nr); + + return ret; } /* @@ -7367,7 +7360,6 @@ static const struct address_space_operations btrfs_symlink_aops = { }; static const struct inode_operations btrfs_file_inode_operations = { - .truncate = btrfs_truncate, .getattr = btrfs_getattr, .setattr = btrfs_setattr, .setxattr = btrfs_setxattr, -- cgit From 3893e33b0bebee2f67d96b6c15259dc884523c20 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 31 Jan 2011 16:03:11 -0500 Subject: Btrfs: cleanup error handling in the truncate path Now that we can handle having errors in the truncate path lets make sure we return errors instead of doing BUG_ON() and such. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 64 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3662ffec17d9..d83025063ee7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3597,13 +3597,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, &hint_byte, 1); - BUG_ON(err); + if (err) + break; err = btrfs_insert_file_extent(trans, root, inode->i_ino, cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); - BUG_ON(err); + if (err) + break; btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); @@ -3641,7 +3643,10 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) btrfs_set_trans_block_group(trans, inode); ret = btrfs_orphan_add(trans, inode); - BUG_ON(ret); + if (ret) { + btrfs_end_transaction(trans, root); + return ret; + } nr = trans->blocks_used; btrfs_end_transaction(trans, root); @@ -3658,17 +3663,24 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) } trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) + return PTR_ERR(trans); + btrfs_set_trans_block_group(trans, inode); trans->block_rsv = root->orphan_block_rsv; BUG_ON(!trans->block_rsv); + /* + * If this fails just leave the orphan item so that it can get + * cleaned up next time we mount. + */ ret = btrfs_update_inode(trans, root, inode); - BUG_ON(ret); - if (inode->i_nlink > 0) { - ret = btrfs_orphan_del(trans, inode); - BUG_ON(ret); + if (ret) { + btrfs_end_transaction(trans, root); + return ret; } + if (inode->i_nlink > 0) + ret = btrfs_orphan_del(trans, inode); nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -6478,6 +6490,7 @@ static int btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; + int err = 0; struct btrfs_trans_handle *trans; unsigned long nr; u64 mask = root->sectorsize - 1; @@ -6490,7 +6503,8 @@ static int btrfs_truncate(struct inode *inode) btrfs_ordered_update_i_size(inode, inode->i_size, NULL); trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = root->orphan_block_rsv; @@ -6517,29 +6531,38 @@ static int btrfs_truncate(struct inode *inode) while (1) { if (!trans) { trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_set_trans_block_group(trans, inode); trans->block_rsv = root->orphan_block_rsv; } ret = btrfs_block_rsv_check(trans, root, root->orphan_block_rsv, 0, 5); - if (ret) { - BUG_ON(ret != -EAGAIN); + if (ret == -EAGAIN) { ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); + if (ret) + return ret; trans = NULL; continue; + } else if (ret) { + err = ret; + break; } ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); - if (ret != -EAGAIN) + if (ret != -EAGAIN) { + err = ret; break; + } ret = btrfs_update_inode(trans, root, inode); - BUG_ON(ret); + if (ret) { + err = ret; + break; + } nr = trans->blocks_used; btrfs_end_transaction(trans, root); @@ -6549,18 +6572,21 @@ static int btrfs_truncate(struct inode *inode) if (ret == 0 && inode->i_nlink > 0) { ret = btrfs_orphan_del(trans, inode); - BUG_ON(ret); + if (ret) + err = ret; } ret = btrfs_update_inode(trans, root, inode); - BUG_ON(ret); + if (ret && !err) + err = ret; nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); - BUG_ON(ret); + if (ret && !err) + err = ret; btrfs_btree_balance_dirty(root, nr); - return ret; + return err; } /* -- cgit From 66b4ffd110f9b48b8d8c1319ee446b53b8d073bf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 31 Jan 2011 16:22:42 -0500 Subject: Btrfs: handle errors in btrfs_orphan_cleanup If we cannot truncate an inode for some reason we will never delete the orphan item associated with that inode, which means that we will loop forever in btrfs_orphan_cleanup. Instead of doing this just return error so we fail to mount. It sucks, but hey it's better than hanging. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d83025063ee7..0600265cb9b0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2284,7 +2284,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) * this cleans up any orphans that may be left on the list from the last use * of this root. */ -void btrfs_orphan_cleanup(struct btrfs_root *root) +int btrfs_orphan_cleanup(struct btrfs_root *root) { struct btrfs_path *path; struct extent_buffer *leaf; @@ -2294,10 +2294,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) int ret = 0, nr_unlink = 0, nr_truncate = 0; if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) - return; + return 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto out; + } path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; @@ -2306,11 +2309,8 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - printk(KERN_ERR "Error searching slot for orphan: %d" - "\n", ret); - break; - } + if (ret < 0) + goto out; /* * if ret == 0 means we found what we were searching for, which @@ -2318,6 +2318,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * find the key and see if we have stuff that matches */ if (ret > 0) { + ret = 0; if (path->slots[0] == 0) break; path->slots[0]--; @@ -2345,7 +2346,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); - BUG_ON(IS_ERR(inode)); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } /* * add this inode to the orphan list so btrfs_orphan_del does @@ -2363,7 +2367,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) */ if (is_bad_inode(inode)) { trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } btrfs_orphan_del(trans, inode); btrfs_end_transaction(trans, root); iput(inode); @@ -2378,16 +2385,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; - btrfs_truncate(inode); + ret = btrfs_truncate(inode); } else { nr_unlink++; } /* this will do delete_inode and everything for us */ iput(inode); + if (ret) + goto out; } - btrfs_free_path(path); - root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; if (root->orphan_block_rsv) @@ -2396,14 +2403,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) if (root->orphan_block_rsv || root->orphan_item_inserted) { trans = btrfs_join_transaction(root, 1); - BUG_ON(IS_ERR(trans)); - btrfs_end_transaction(trans, root); + if (!IS_ERR(trans)) + btrfs_end_transaction(trans, root); } if (nr_unlink) printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); if (nr_truncate) printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); + +out: + if (ret) + printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); + btrfs_free_path(path); + return ret; } /* @@ -4156,8 +4169,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (!IS_ERR(inode) && root != sub_root) { down_read(&root->fs_info->cleanup_work_sem); if (!(inode->i_sb->s_flags & MS_RDONLY)) - btrfs_orphan_cleanup(sub_root); + ret = btrfs_orphan_cleanup(sub_root); up_read(&root->fs_info->cleanup_work_sem); + if (ret) + inode = ERR_PTR(ret); } return inode; -- cgit From ded5db9de78f963979e1605f859de67626f54693 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 4 Mar 2011 14:09:46 -0500 Subject: Btrfs: make sure to remove the orphan item from the in-memory list This fixes a problem where if truncate fails the inode will still be on the in memory orphan list. This is will make us complain when the inode gets destroyed because it's still on the orphan list. So if we fail just remove us from the in memory list and carry on. Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0600265cb9b0..3bd0ff63bf30 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6589,6 +6589,12 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_orphan_del(trans, inode); if (ret) err = ret; + } else if (ret && inode->i_nlink > 0) { + /* + * Failed to do the truncate, remove us from the in memory + * orphan list. + */ + ret = btrfs_orphan_del(NULL, inode); } ret = btrfs_update_inode(trans, root, inode); -- cgit From f0cd846e9221811d87047f1428cf5226e7236efe Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 4 Mar 2011 14:37:08 -0500 Subject: Btrfs: only add orphan items when truncating We don't need an orphan item when expanding files, we just need them for truncating them, so only add the orphan item in btrfs_truncate instead of in btrfs_setsize. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3bd0ff63bf30..206b60362cec 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3649,22 +3649,6 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) if (newsize == oldsize) return 0; - trans = btrfs_start_transaction(root, 5); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - btrfs_set_trans_block_group(trans, inode); - - ret = btrfs_orphan_add(trans, inode); - if (ret) { - btrfs_end_transaction(trans, root); - return ret; - } - - nr = trans->blocks_used; - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); - if (newsize > oldsize) { i_size_write(inode, newsize); btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); @@ -3675,25 +3659,15 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) return ret; } - trans = btrfs_start_transaction(root, 0); + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) return PTR_ERR(trans); - btrfs_set_trans_block_group(trans, inode); - trans->block_rsv = root->orphan_block_rsv; - BUG_ON(!trans->block_rsv); - - /* - * If this fails just leave the orphan item so that it can get - * cleaned up next time we mount. - */ ret = btrfs_update_inode(trans, root, inode); if (ret) { btrfs_end_transaction(trans, root); return ret; } - if (inode->i_nlink > 0) - ret = btrfs_orphan_del(trans, inode); nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -6517,6 +6491,23 @@ static int btrfs_truncate(struct inode *inode) btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_ordered_update_i_size(inode, inode->i_size, NULL); + trans = btrfs_start_transaction(root, 5); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_orphan_add(trans, inode); + if (ret) { + btrfs_end_transaction(trans, root); + return ret; + } + + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + + /* Now start a transaction for the truncate */ trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) return PTR_ERR(trans); -- cgit From 930f028abe39dfd0849b53131d19c4b67aacbe67 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 4 Mar 2011 14:41:41 -0500 Subject: Btrfs: use mark_inode_dirty when expanding the file Mark_inode_dirty will call btrfs_dirty_inode which will take care of updating the inode. This makes setsize a little cleaner since we don't have to start a transaction and update the inode in there, we can just call mark_inode_dirty. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 206b60362cec..64d57e032b4e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3640,10 +3640,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) static int btrfs_setsize(struct inode *inode, loff_t newsize) { - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; loff_t oldsize = i_size_read(inode); - unsigned long nr; int ret; if (newsize == oldsize) @@ -3659,18 +3656,7 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) return ret; } - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - ret = btrfs_update_inode(trans, root, inode); - if (ret) { - btrfs_end_transaction(trans, root); - return ret; - } - nr = trans->blocks_used; - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); + mark_inode_dirty(inode); } else { /* -- cgit From 695a0d0da09e75c4475bbb303def159023ef72ca Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 4 Mar 2011 15:46:53 -0500 Subject: Btrfs: add a comment explaining what btrfs_cont_expand does Everytime I have to deal with btrfs_cont_expand I stare at it for 20 minutes trying to remember what exactly it does and why the hell we need it. So add a comment to save future-Josef some time. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 64d57e032b4e..888dbdb3b128 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3557,6 +3557,12 @@ out: return ret; } +/* + * This function puts in dummy file extents for the area we're creating a hole + * for. So if we are truncating this file to a larger size we need to insert + * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for + * the range between oldsize and size + */ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) { struct btrfs_trans_handle *trans; -- cgit From 22a94d44bd6876a90630338229da6c0436d46593 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Mar 2011 16:47:17 -0400 Subject: Btrfs: add checks to verify dir items are correct We need to make sure the dir items we get are valid dir items. So any time we try and read one check it with verify_dir_item, which will do various sanity checks to make sure it looks sane. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 888dbdb3b128..e010000d4bc9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4272,6 +4272,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; + if (verify_dir_item(root, leaf, di)) + break; + name_len = btrfs_dir_name_len(leaf, di); if (name_len <= sizeof(tmp_name)) { name_ptr = tmp_name; -- cgit From 98bc3149fad639c8f50c7110b961a2a2fe085eed Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 22 Mar 2011 11:00:46 -0400 Subject: Btrfs: don't allocate dip->csums when doing writes When doing direct writes we store the checksums in the ordered sum stuff in the ordered extent for writing them when the write completes, so we don't even use the dip->csums array. So if we're writing, don't bother allocating dip->csums since we won't use it anyway. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e010000d4bc9..570cd44fe91b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5944,6 +5944,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, int nr_pages = 0; u32 *csums = dip->csums; int ret = 0; + int write = rw & REQ_WRITE; bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS); if (!bio) @@ -5980,7 +5981,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, goto out_err; } - if (!skip_sum) + /* Write's use the ordered csums */ + if (!write && !skip_sum) csums = csums + nr_pages; start_sector += submit_len >> 9; file_offset += submit_len; @@ -6048,7 +6050,8 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, } dip->csums = NULL; - if (!skip_sum) { + /* Write's use the ordered csum stuff, so we don't need dip->csums */ + if (!write && !skip_sum) { dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); if (!dip->csums) { kfree(dip); -- cgit From c0da7aa1a2d8fcafe271a7077599253c8ed94bb2 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 22 Mar 2011 11:05:07 -0400 Subject: Btrfs: mark the bio with an error if we have a failure in dio I noticed that dio_end_io calls the appropriate endio function with an error, but the endio functions don't actually do anything with that error, they assume that if there was an error then the bio will not be uptodate. So if we had checksum failures we would never pass back EIO. So if there is an error in our endio functions make sure to clear the uptodate flag on the bio. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 570cd44fe91b..e9813bd7d556 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5744,6 +5744,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) kfree(dip->csums); kfree(dip); + + /* If we had a csum failure make sure to clear the uptodate flag */ + if (err) + clear_bit(BIO_UPTODATE, &bio->bi_flags); dio_end_io(bio, err); } @@ -5845,6 +5849,10 @@ out_done: kfree(dip->csums); kfree(dip); + + /* If we had an error make sure to clear the uptodate flag */ + if (err) + clear_bit(BIO_UPTODATE, &bio->bi_flags); dio_end_io(bio, err); } -- cgit From 1abe9b8a138c9988ba8f7bfded6453649a31541f Mon Sep 17 00:00:00 2001 From: liubo Date: Thu, 24 Mar 2011 11:18:59 +0000 Subject: Btrfs: add initial tracepoint support for btrfs Tracepoints can provide insight into why btrfs hits bugs and be greatly helpful for debugging, e.g dd-7822 [000] 2121.641088: btrfs_inode_request: root = 5(FS_TREE), gen = 4, ino = 256, blocks = 8, disk_i_size = 0, last_trans = 8, logged_trans = 0 dd-7822 [000] 2121.641100: btrfs_inode_new: root = 5(FS_TREE), gen = 8, ino = 257, blocks = 0, disk_i_size = 0, last_trans = 0, logged_trans = 0 btrfs-transacti-7804 [001] 2146.935420: btrfs_cow_block: root = 2(EXTENT_TREE), refs = 2, orig_buf = 29368320 (orig_level = 0), cow_buf = 29388800 (cow_level = 0) btrfs-transacti-7804 [001] 2146.935473: btrfs_cow_block: root = 1(ROOT_TREE), refs = 2, orig_buf = 29364224 (orig_level = 0), cow_buf = 29392896 (cow_level = 0) btrfs-transacti-7804 [001] 2146.972221: btrfs_transaction_commit: root = 1(ROOT_TREE), gen = 8 flush-btrfs-2-7821 [001] 2155.824210: btrfs_chunk_alloc: root = 3(CHUNK_TREE), offset = 1103101952, size = 1073741824, num_stripes = 1, sub_stripes = 0, type = DATA flush-btrfs-2-7821 [001] 2155.824241: btrfs_cow_block: root = 2(EXTENT_TREE), refs = 2, orig_buf = 29388800 (orig_level = 0), cow_buf = 29396992 (cow_level = 0) flush-btrfs-2-7821 [001] 2155.824255: btrfs_cow_block: root = 4(DEV_TREE), refs = 2, orig_buf = 29372416 (orig_level = 0), cow_buf = 29401088 (cow_level = 0) flush-btrfs-2-7821 [000] 2155.824329: btrfs_cow_block: root = 3(CHUNK_TREE), refs = 2, orig_buf = 20971520 (orig_level = 0), cow_buf = 20975616 (cow_level = 0) btrfs-endio-wri-7800 [001] 2155.898019: btrfs_cow_block: root = 5(FS_TREE), refs = 2, orig_buf = 29384704 (orig_level = 0), cow_buf = 29405184 (cow_level = 0) btrfs-endio-wri-7800 [001] 2155.898043: btrfs_cow_block: root = 7(CSUM_TREE), refs = 2, orig_buf = 29376512 (orig_level = 0), cow_buf = 29409280 (cow_level = 0) Here is what I have added: 1) ordere_extent: btrfs_ordered_extent_add btrfs_ordered_extent_remove btrfs_ordered_extent_start btrfs_ordered_extent_put These provide critical information to understand how ordered_extents are updated. 2) extent_map: btrfs_get_extent extent_map is used in both read and write cases, and it is useful for tracking how btrfs specific IO is running. 3) writepage: __extent_writepage btrfs_writepage_end_io_hook Pages are cirtical resourses and produce a lot of corner cases during writeback, so it is valuable to know how page is written to disk. 4) inode: btrfs_inode_new btrfs_inode_request btrfs_inode_evict These can show where and when a inode is created, when a inode is evicted. 5) sync: btrfs_sync_file btrfs_sync_fs These show sync arguments. 6) transaction: btrfs_transaction_commit In transaction based filesystem, it will be useful to know the generation and who does commit. 7) back reference and cow: btrfs_delayed_tree_ref btrfs_delayed_data_ref btrfs_delayed_ref_head btrfs_cow_block Btrfs natively supports back references, these tracepoints are helpful on understanding btrfs's COW mechanism. 8) chunk: btrfs_chunk_alloc btrfs_chunk_free Chunk is a link between physical offset and logical offset, and stands for space infomation in btrfs, and these are helpful on tracing space things. 9) reserved_extent: btrfs_reserved_extent_alloc btrfs_reserved_extent_free These can show how btrfs uses its space. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9813bd7d556..eaa271484199 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1787,6 +1787,8 @@ out: static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { + trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); + ClearPagePrivate2(page); return btrfs_finish_ordered_io(page->mapping->host, start, end); } @@ -3718,6 +3720,8 @@ void btrfs_evict_inode(struct inode *inode) unsigned long nr; int ret; + trace_btrfs_inode_evict(inode); + truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || root == root->fs_info->tree_root)) @@ -4510,6 +4514,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); if (dir) { + trace_btrfs_inode_request(dir); + ret = btrfs_set_inode_index(dir, index); if (ret) { iput(inode); @@ -4584,6 +4590,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, insert_inode_hash(inode); inode_tree_add(inode); + + trace_btrfs_inode_new(inode); + return inode; fail: if (dir) @@ -5261,6 +5270,9 @@ insert: } write_unlock(&em_tree->lock); out: + + trace_btrfs_get_extent(root, em); + if (path) btrfs_free_path(path); if (trans) { -- cgit From 75e7cb7fe0c391561bd3af36515be3f3c64a04c6 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 22 Mar 2011 10:12:20 +0000 Subject: Btrfs: Per file/directory controls for COW and compression Data compression and data cow are controlled across the entire FS by mount options right now. ioctls are needed to set this on a per file or per directory basis. This has been proposed previously, but VFS developers wanted us to use generic ioctls rather than btrfs-specific ones. According to Chris's comment, there should be just one true compression method(probably LZO) stored in the super. However, before this, we would wait for that one method is stable enough to be adopted into the super. So I list it as a long term goal, and just store it in ram today. After applying this patch, we can use the generic "FS_IOC_SETFLAGS" ioctl to control file and directory's datacow and compression attribute. NOTE: - The compression type is selected by such rules: If we mount btrfs with compress options, ie, zlib/lzo, the type is it. Otherwise, we'll use the default compress type (zlib today). v1->v2: - rebase to the latest btrfs. v2->v3: - fix a problem, i.e. when a file is set NOCOW via mount option, then this NOCOW will be screwed by inheritance from parent directory. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eaa271484199..7a7a202b82ab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -384,7 +384,8 @@ again: */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && (btrfs_test_opt(root, COMPRESS) || - (BTRFS_I(inode)->force_compress))) { + (BTRFS_I(inode)->force_compress) || + (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); @@ -1256,7 +1257,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); else if (!btrfs_test_opt(root, COMPRESS) && - !(BTRFS_I(inode)->force_compress)) + !(BTRFS_I(inode)->force_compress) && + !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) ret = cow_file_range(inode, locked_page, start, end, page_started, nr_written, 1); else @@ -4584,7 +4586,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if ((mode & S_IFREG)) { if (btrfs_test_opt(root, NODATASUM)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; - if (btrfs_test_opt(root, NODATACOW)) + if (btrfs_test_opt(root, NODATACOW) || + (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; } @@ -6866,6 +6869,26 @@ static int btrfs_getattr(struct vfsmount *mnt, return 0; } +/* + * If a file is moved, it will inherit the cow and compression flags of the new + * directory. + */ +static void fixup_inode_flags(struct inode *dir, struct inode *inode) +{ + struct btrfs_inode *b_dir = BTRFS_I(dir); + struct btrfs_inode *b_inode = BTRFS_I(inode); + + if (b_dir->flags & BTRFS_INODE_NODATACOW) + b_inode->flags |= BTRFS_INODE_NODATACOW; + else + b_inode->flags &= ~BTRFS_INODE_NODATACOW; + + if (b_dir->flags & BTRFS_INODE_COMPRESS) + b_inode->flags |= BTRFS_INODE_COMPRESS; + else + b_inode->flags &= ~BTRFS_INODE_COMPRESS; +} + static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { @@ -6999,6 +7022,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } + fixup_inode_flags(new_dir, old_inode); + ret = btrfs_add_link(trans, new_dir, old_inode, new_dentry->d_name.name, new_dentry->d_name.len, 0, index); -- cgit From 3ab3564f018b9b265d0258e4a231794bacd5ad85 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 22 Mar 2011 17:20:26 +0000 Subject: btrfs: return EXDEV when linking from different subvolumes btrfs_link returns EPERM if a cross-subvolume link is attempted. However, in this case I believe EXDEV to be the more appropriate value. >From the link(2) man page: EXDEV oldpath and newpath are not on the same mounted file system. (Linux permits a file system to be mounted at multiple points, but link() does not work across different mount points, even if the same file system is mounted on both.) This matters because an application may have different behaviors based on return codes. Signed-off-by: Mark Fasheh Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7a7a202b82ab..67fd6e9552d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4817,7 +4817,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, /* do not allow sys_link's with other subvols of the same device */ if (root->objectid != BTRFS_I(inode)->root->objectid) - return -EPERM; + return -EXDEV; btrfs_inc_nlink(inode); inode->i_ctime = CURRENT_TIME; -- cgit From dac97e516c617f9c797f64b0224050b70aea30c7 Mon Sep 17 00:00:00 2001 From: Yoshinori Sano Date: Tue, 15 Feb 2011 12:01:42 +0000 Subject: Btrfs: fix uncheck memory allocations To make Btrfs code more robust, several return value checks where memory allocation can fail are introduced. I use BUG_ON where I don't know how to handle the error properly, which increases the number of using the notorious BUG_ON, though. Signed-off-by: Yoshinori Sano Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67fd6e9552d3..f739b256967e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -290,6 +290,7 @@ static noinline int add_async_extent(struct async_cow *cow, struct async_extent *async_extent; async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS); + BUG_ON(!async_extent); async_extent->start = start; async_extent->ram_size = ram_size; async_extent->compressed_size = compressed_size; @@ -388,6 +389,7 @@ again: (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); + BUG_ON(!pages); if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; -- cgit From c2db1073fdf9757e6fd8b4a59d15b6ecc7a2af8a Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 1 Mar 2011 06:48:31 +0000 Subject: Btrfs: check return value of btrfs_alloc_path() Adding the check on the return value of btrfs_alloc_path() to several places. And, some of callers are modified by this change. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f739b256967e..04e9fffee8cc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1467,8 +1467,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, if (bio_flags & EXTENT_BIO_COMPRESSED) { return btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags); - } else if (!skip_sum) - btrfs_lookup_bio_sums(root, inode, bio, NULL); + } else if (!skip_sum) { + ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); + if (ret) + return ret; + } goto mapit; } else if (!skip_sum) { /* csum items have already been cloned */ @@ -1903,10 +1906,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, else rw = READ; - BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, + ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, failrec->last_mirror, failrec->bio_flags, 0); - return 0; + return ret; } /* @@ -5943,9 +5946,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, __btrfs_submit_bio_start_direct_io, __btrfs_submit_bio_done); goto err; - } else if (!skip_sum) - btrfs_lookup_bio_sums_dio(root, inode, bio, + } else if (!skip_sum) { + ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset, csums); + if (ret) + goto err; + } ret = btrfs_map_bio(root, rw, bio, 0, 1); err: -- cgit From 92986796d84ef939e304099dece32572a755b280 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Mar 2011 17:14:37 +0000 Subject: btrfs: don't mess with i_nlink of unlocked inode in rename() old_inode is not locked; it's not safe to play with its link count. Instead of bumping it and calling btrfs_unlink_inode(), add a variant of the latter that does not do btrfs_drop_nlink()/ btrfs_update_inode(), call it instead of btrfs_inc_nlink()/ btrfs_unlink_inode() and do btrfs_update_inode() ourselves. Signed-off-by: Al Viro Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 04e9fffee8cc..4822b3132784 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2664,10 +2664,10 @@ failed: * recovery code. It remove a link in a directory with a given name, and * also drops the back refs in the inode to the directory */ -int btrfs_unlink_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *dir, struct inode *inode, - const char *name, int name_len) +static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, struct inode *inode, + const char *name, int name_len) { struct btrfs_path *path; int ret = 0; @@ -2739,12 +2739,25 @@ err: btrfs_i_size_write(dir, dir->i_size - name_len * 2); inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); - btrfs_drop_nlink(inode); - ret = btrfs_update_inode(trans, root, inode); out: return ret; } +int btrfs_unlink_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, struct inode *inode, + const char *name, int name_len) +{ + int ret; + ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len); + if (!ret) { + btrfs_drop_nlink(inode); + ret = btrfs_update_inode(trans, root, inode); + } + return ret; +} + + /* helper to check if there is any shared block in the path */ static int check_path_shared(struct btrfs_root *root, struct btrfs_path *path) @@ -6999,11 +7012,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry->d_name.name, old_dentry->d_name.len); } else { - btrfs_inc_nlink(old_dentry->d_inode); - ret = btrfs_unlink_inode(trans, root, old_dir, - old_dentry->d_inode, - old_dentry->d_name.name, - old_dentry->d_name.len); + ret = __btrfs_unlink_inode(trans, root, old_dir, + old_dentry->d_inode, + old_dentry->d_name.name, + old_dentry->d_name.len); + if (!ret) + ret = btrfs_update_inode(trans, root, old_inode); } BUG_ON(ret); -- cgit From c055e99eea6e4f614267632fac546e7896c0227b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Mar 2011 17:15:18 +0000 Subject: btrfs: check link counter overflow in link(2) Signed-off-by: Al Viro Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4822b3132784..04babaf31a33 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4837,6 +4837,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (root->objectid != BTRFS_I(inode)->root->objectid) return -EXDEV; + if (inode->i_nlink == ~0U) + return -EMLINK; + btrfs_inc_nlink(inode); inode->i_ctime = CURRENT_TIME; -- cgit From 1561deda687eef0e95065f1268d680ddc5976ee7 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 27 Mar 2011 16:07:36 +0800 Subject: btrfs: fix possible deadlock by clearing __GFP_FS flag Using the GFP_HIGHUSER_MOVABLE flag to allocate the metadata's page may cause deadlock. Task1 open() ... btrfs_search_slot() ... btrfs_cow_block() ... alloc_page() wait for reclaiming shrink_slab() ... shrink_icache_memory() ... btrfs_evict_inode() ... btrfs_search_slot() If the path is locked by task1, the deadlock happens. So the btree's page cache is different with the file's page cache, it can not allocate pages by GFP_HIGHUSER_MOVABLE flag, we must clear __GFP_FS flag in GFP_HIGHUSER_MOVABLE flag. Reported-by: Itaru Kitayama Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 04babaf31a33..06274186b290 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2536,6 +2536,8 @@ static void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); alloc_group_block = btrfs_inode_block_group(leaf, inode_item); + if (location.objectid == BTRFS_FREE_SPACE_OBJECTID) + inode->i_mapping->flags &= ~__GFP_FS; /* * try to precache a NULL acl entry for files that don't have @@ -4084,7 +4086,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, BTRFS_I(inode)->root = root; memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); btrfs_read_locked_inode(inode); - inode_tree_add(inode); unlock_new_inode(inode); if (new) -- cgit