summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c199
1 files changed, 62 insertions, 137 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index aa58e0663a5d..1beb9458f622 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -193,10 +193,11 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
u64 end = min_t(u64, eb->start + eb->len,
folio_pos(folio) + eb->folio_size);
u32 len = end - start;
+ phys_addr_t paddr = PFN_PHYS(folio_pfn(folio)) +
+ offset_in_folio(folio, start);
- ret = btrfs_repair_io_failure(fs_info, 0, start, len,
- start, folio, offset_in_folio(folio, start),
- mirror_num);
+ ret = btrfs_repair_io_failure(fs_info, 0, start, len, start,
+ paddr, mirror_num);
if (ret)
break;
}
@@ -224,7 +225,6 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
ASSERT(check);
while (1) {
- clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(eb, mirror_num, check);
if (!ret)
break;
@@ -256,7 +256,7 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
/*
* Checksum a dirty tree block before IO.
*/
-blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
+int btree_csum_one_bio(struct btrfs_bio *bbio)
{
struct extent_buffer *eb = bbio->private;
struct btrfs_fs_info *fs_info = eb->fs_info;
@@ -267,9 +267,9 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
/* Btree blocks are always contiguous on disk. */
if (WARN_ON_ONCE(bbio->file_offset != eb->start))
- return BLK_STS_IOERR;
+ return -EIO;
if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
- return BLK_STS_IOERR;
+ return -EIO;
/*
* If an extent_buffer is marked as EXTENT_BUFFER_ZONED_ZEROOUT, don't
@@ -278,13 +278,13 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
*/
if (test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)) {
memzero_extent_buffer(eb, 0, eb->len);
- return BLK_STS_OK;
+ return 0;
}
if (WARN_ON_ONCE(found_start != eb->start))
- return BLK_STS_IOERR;
+ return -EIO;
if (WARN_ON(!btrfs_meta_folio_test_uptodate(eb->folios[0], eb)))
- return BLK_STS_IOERR;
+ return -EIO;
ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
offsetof(struct btrfs_header, fsid),
@@ -312,7 +312,7 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
goto error;
}
write_extent_buffer(eb, result, 0, fs_info->csum_size);
- return BLK_STS_OK;
+ return 0;
error:
btrfs_print_tree(eb, 0);
@@ -326,7 +326,7 @@ error:
*/
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
- return errno_to_blk_status(ret);
+ return ret;
}
static bool check_tree_block_fsid(struct extent_buffer *eb)
@@ -452,15 +452,9 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
goto out;
}
- /*
- * If this is a leaf block and it is corrupt, set the corrupt bit so
- * that we don't try and read the other copies of this block, just
- * return -EIO.
- */
- if (found_level == 0 && btrfs_check_leaf(eb)) {
- set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+ /* If this is a leaf block and it is corrupt, just return -EIO. */
+ if (found_level == 0 && btrfs_check_leaf(eb))
ret = -EIO;
- }
if (found_level > 0 && btrfs_check_node(eb))
ret = -EIO;
@@ -641,11 +635,16 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
}
-static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
- u64 objectid)
+static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
+ u64 objectid, gfp_t flags)
{
+ struct btrfs_root *root;
bool dummy = btrfs_is_testing(fs_info);
+ root = kzalloc(sizeof(*root), flags);
+ if (!root)
+ return NULL;
+
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
@@ -698,10 +697,10 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
btrfs_set_root_last_log_commit(root, 0);
root->anon_dev = 0;
if (!dummy) {
- extent_io_tree_init(fs_info, &root->dirty_log_pages,
- IO_TREE_ROOT_DIRTY_LOG_PAGES);
- extent_io_tree_init(fs_info, &root->log_csum_range,
- IO_TREE_LOG_CSUM_RANGE);
+ btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages,
+ IO_TREE_ROOT_DIRTY_LOG_PAGES);
+ btrfs_extent_io_tree_init(fs_info, &root->log_csum_range,
+ IO_TREE_LOG_CSUM_RANGE);
}
spin_lock_init(&root->root_item_lock);
@@ -712,14 +711,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
list_add_tail(&root->leak_list, &fs_info->allocated_roots);
spin_unlock(&fs_info->fs_roots_radix_lock);
#endif
-}
-static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
- u64 objectid, gfp_t flags)
-{
- struct btrfs_root *root = kzalloc(sizeof(*root), flags);
- if (root)
- __setup_root(root, fs_info, objectid);
return root;
}
@@ -1863,8 +1855,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
int i;
while (!list_empty(&fs_info->dead_roots)) {
- gang[0] = list_entry(fs_info->dead_roots.next,
- struct btrfs_root, root_list);
+ gang[0] = list_first_entry(&fs_info->dead_roots,
+ struct btrfs_root, root_list);
list_del(&gang[0]->root_list);
if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
@@ -1927,9 +1919,9 @@ static int btrfs_init_btree_inode(struct super_block *sb)
inode->i_mapping->a_ops = &btree_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
- extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
- IO_TREE_BTREE_INODE_IO);
- extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
+ btrfs_extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
+ IO_TREE_BTREE_INODE_IO);
+ btrfs_extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
@@ -2002,7 +1994,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan",
ordered_flags);
fs_info->discard_ctl.discard_workers =
- alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE);
+ alloc_ordered_workqueue("btrfs-discard", WQ_FREEZABLE);
if (!(fs_info->workers &&
fs_info->delalloc_workers && fs_info->flush_workers &&
@@ -2769,10 +2761,21 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
return ret;
}
+/*
+ * Lockdep gets confused between our buffer_tree which requires IRQ locking because
+ * we modify marks in the IRQ context, and our delayed inode xarray which doesn't
+ * have these requirements. Use a class key so lockdep doesn't get them mixed up.
+ */
+static struct lock_class_key buffer_xa_class;
+
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
- INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
+
+ /* Use the same flags as mapping->i_pages. */
+ xa_init_flags(&fs_info->buffer_tree, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
+ lockdep_set_class(&fs_info->buffer_tree.xa_lock, &buffer_xa_class);
+
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2784,7 +2787,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->super_lock);
- spin_lock_init(&fs_info->buffer_lock);
spin_lock_init(&fs_info->unused_bgs_lock);
spin_lock_init(&fs_info->treelog_bg_lock);
spin_lock_init(&fs_info->zone_active_bgs_lock);
@@ -2829,6 +2831,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
BTRFS_BLOCK_RSV_GLOBAL);
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
+ btrfs_init_block_rsv(&fs_info->treelog_rsv, BTRFS_BLOCK_RSV_TREELOG);
btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
BTRFS_BLOCK_RSV_DELOPS);
@@ -2862,8 +2865,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
rwlock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
- extent_io_tree_init(fs_info, &fs_info->excluded_extents,
- IO_TREE_FS_EXCLUDED_EXTENTS);
+ btrfs_extent_io_tree_init(fs_info, &fs_info->excluded_extents,
+ IO_TREE_FS_EXCLUDED_EXTENTS);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
@@ -3315,7 +3318,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
/*
* Read super block and check the signature bytes only
*/
- disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
+ disk_super = btrfs_read_disk_super(fs_devices->latest_dev->bdev, 0, false);
if (IS_ERR(disk_super)) {
ret = PTR_ERR(disk_super);
goto fail_alloc;
@@ -3710,85 +3713,6 @@ static void btrfs_end_super_write(struct bio *bio)
bio_put(bio);
}
-struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
- int copy_num, bool drop_cache)
-{
- struct btrfs_super_block *super;
- struct page *page;
- u64 bytenr, bytenr_orig;
- struct address_space *mapping = bdev->bd_mapping;
- int ret;
-
- bytenr_orig = btrfs_sb_offset(copy_num);
- ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
- if (ret == -ENOENT)
- return ERR_PTR(-EINVAL);
- else if (ret)
- return ERR_PTR(ret);
-
- if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
- return ERR_PTR(-EINVAL);
-
- if (drop_cache) {
- /* This should only be called with the primary sb. */
- ASSERT(copy_num == 0);
-
- /*
- * Drop the page of the primary superblock, so later read will
- * always read from the device.
- */
- invalidate_inode_pages2_range(mapping,
- bytenr >> PAGE_SHIFT,
- (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
- }
-
- page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
- if (IS_ERR(page))
- return ERR_CAST(page);
-
- super = page_address(page);
- if (btrfs_super_magic(super) != BTRFS_MAGIC) {
- btrfs_release_disk_super(super);
- return ERR_PTR(-ENODATA);
- }
-
- if (btrfs_super_bytenr(super) != bytenr_orig) {
- btrfs_release_disk_super(super);
- return ERR_PTR(-EINVAL);
- }
-
- return super;
-}
-
-
-struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
-{
- struct btrfs_super_block *super, *latest = NULL;
- int i;
- u64 transid = 0;
-
- /* we would like to check all the supers, but that would make
- * a btrfs mount succeed after a mkfs from a different FS.
- * So, we need to add a special mount option to scan for
- * later supers, using BTRFS_SUPER_MIRROR_MAX instead
- */
- for (i = 0; i < 1; i++) {
- super = btrfs_read_dev_one_super(bdev, i, false);
- if (IS_ERR(super))
- continue;
-
- if (!latest || btrfs_super_generation(super) > transid) {
- if (latest)
- btrfs_release_disk_super(super);
-
- latest = super;
- transid = btrfs_super_generation(super);
- }
- }
-
- return super;
-}
-
/*
* Write superblock @sb to the @device. Do not wait for completion, all the
* folios we use for writing are locked.
@@ -3828,8 +3752,8 @@ static int write_dev_supers(struct btrfs_device *device,
continue;
} else if (ret < 0) {
btrfs_err(device->fs_info,
- "couldn't get super block location for mirror %d",
- i);
+ "couldn't get super block location for mirror %d error %d",
+ i, ret);
atomic_inc(&device->sb_write_errors);
continue;
}
@@ -3848,8 +3772,8 @@ static int write_dev_supers(struct btrfs_device *device,
GFP_NOFS);
if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
- "couldn't get super block page for bytenr %llu",
- bytenr);
+ "couldn't get super block page for bytenr %llu error %ld",
+ bytenr, PTR_ERR(folio));
atomic_inc(&device->sb_write_errors);
continue;
}
@@ -4244,8 +4168,9 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
u64 found_end;
found = true;
- while (find_first_extent_bit(&trans->dirty_pages, cur,
- &found_start, &found_end, EXTENT_DIRTY, &cached)) {
+ while (btrfs_find_first_extent_bit(&trans->dirty_pages, cur,
+ &found_start, &found_end,
+ EXTENT_DIRTY, &cached)) {
dirty_bytes += found_end + 1 - found_start;
cur = found_end + 1;
}
@@ -4441,7 +4366,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
if (btrfs_check_quota_leak(fs_info)) {
- WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+ DEBUG_WARN("qgroup reserved space leaked");
btrfs_err(fs_info, "qgroup reserved space leaked");
}
@@ -4698,9 +4623,9 @@ static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0;
u64 end;
- while (find_first_extent_bit(dirty_pages, start, &start, &end,
- mark, NULL)) {
- clear_extent_bits(dirty_pages, start, end, mark);
+ while (btrfs_find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark, NULL)) {
+ btrfs_clear_extent_bits(dirty_pages, start, end, mark);
while (start <= end) {
eb = find_extent_buffer(fs_info, start);
start += fs_info->nodesize;
@@ -4733,14 +4658,14 @@ static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
* the same extent range.
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
- if (!find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, &cached_state)) {
+ if (!btrfs_find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY, &cached_state)) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
- clear_extent_dirty(unpin, start, end, &cached_state);
- free_extent_state(cached_state);
+ btrfs_clear_extent_dirty(unpin, start, end, &cached_state);
+ btrfs_free_extent_state(cached_state);
btrfs_error_unpin_extent_range(fs_info, start, end);
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();