diff options
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r-- | fs/btrfs/block-group.c | 351 |
1 files changed, 211 insertions, 140 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index c0a8f7d92acc..5b0cb04b2b93 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -191,21 +191,21 @@ static int btrfs_bg_start_cmp(const struct rb_node *new, /* * This adds the block group to the fs_info rb tree for the block group cache */ -static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, - struct btrfs_block_group *block_group) +static int btrfs_add_block_group_cache(struct btrfs_block_group *block_group) { + struct btrfs_fs_info *fs_info = block_group->fs_info; struct rb_node *exist; int ret = 0; ASSERT(block_group->length != 0); - write_lock(&info->block_group_cache_lock); + write_lock(&fs_info->block_group_cache_lock); exist = rb_find_add_cached(&block_group->cache_node, - &info->block_group_cache_tree, btrfs_bg_start_cmp); + &fs_info->block_group_cache_tree, btrfs_bg_start_cmp); if (exist) ret = -EEXIST; - write_unlock(&info->block_group_cache_lock); + write_unlock(&fs_info->block_group_cache_lock); return ret; } @@ -525,10 +525,9 @@ int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start, *total_added_ret = 0; while (start < end) { - if (!find_first_extent_bit(&info->excluded_extents, start, - &extent_start, &extent_end, - EXTENT_DIRTY | EXTENT_UPTODATE, - NULL)) + if (!btrfs_find_first_extent_bit(&info->excluded_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY, NULL)) break; if (extent_start <= start) { @@ -584,7 +583,7 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ struct btrfs_root *extent_root; u64 search_offset; u64 search_end = block_group->start + block_group->length; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key search_key; int ret = 0; @@ -626,7 +625,6 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ lockdep_assert_held(&caching_ctl->mutex); lockdep_assert_held_read(&fs_info->commit_root_sem); - btrfs_free_path(path); return ret; } @@ -702,7 +700,7 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) struct btrfs_block_group *block_group = caching_ctl->block_group; struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_root *extent_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_key key; u64 total_found = 0; @@ -738,8 +736,8 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) path->reada = READA_FORWARD; key.objectid = last; - key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); @@ -785,8 +783,8 @@ next: if (key.objectid < last) { key.objectid = last; - key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; btrfs_release_path(path); goto next; } @@ -829,14 +827,13 @@ next: block_group->start + block_group->length, NULL); out: - btrfs_free_path(path); return ret; } static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg) { - clear_extent_bits(&bg->fs_info->excluded_extents, bg->start, - bg->start + bg->length - 1, EXTENT_UPTODATE); + btrfs_clear_extent_bits(&bg->fs_info->excluded_extents, bg->start, + bg->start + bg->length - 1, EXTENT_DIRTY); } static noinline void caching_thread(struct btrfs_work *work) @@ -1421,9 +1418,8 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans, int ret; spin_lock(&fs_info->trans_lock); - if (trans->transaction->list.prev != &fs_info->trans_list) { - prev_trans = list_last_entry(&trans->transaction->list, - struct btrfs_transaction, list); + if (!list_is_first(&trans->transaction->list, &fs_info->trans_list)) { + prev_trans = list_prev_entry(trans->transaction, list); refcount_inc(&prev_trans->use_count); } spin_unlock(&fs_info->trans_lock); @@ -1440,14 +1436,14 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans, */ mutex_lock(&fs_info->unused_bg_unpin_mutex); if (prev_trans) { - ret = clear_extent_bits(&prev_trans->pinned_extents, start, end, - EXTENT_DIRTY); + ret = btrfs_clear_extent_bits(&prev_trans->pinned_extents, start, end, + EXTENT_DIRTY); if (ret) goto out; } - ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end, - EXTENT_DIRTY); + ret = btrfs_clear_extent_bits(&trans->transaction->pinned_extents, start, end, + EXTENT_DIRTY); out: mutex_unlock(&fs_info->unused_bg_unpin_mutex); if (prev_trans) @@ -1457,6 +1453,32 @@ out: } /* + * Link the block_group to a list via bg_list. + * + * @bg: The block_group to link to the list. + * @list: The list to link it to. + * + * Use this rather than list_add_tail() directly to ensure proper respect + * to locking and refcounting. + * + * Returns: true if the bg was linked with a refcount bump and false otherwise. + */ +static bool btrfs_link_bg_list(struct btrfs_block_group *bg, struct list_head *list) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + bool added = false; + + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&bg->bg_list)) { + btrfs_get_block_group(bg); + list_add_tail(&bg->bg_list, list); + added = true; + } + spin_unlock(&fs_info->unused_bgs_lock); + return added; +} + +/* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. */ @@ -1571,8 +1593,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * drop under the "next" label for the * fs_info->unused_bgs list. */ - btrfs_get_block_group(block_group); - list_add_tail(&block_group->bg_list, &retry_list); + btrfs_link_bg_list(block_group, &retry_list); trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); @@ -1823,7 +1844,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp); while (!list_empty(&fs_info->reclaim_bgs)) { u64 zone_unusable; - u64 reclaimed; + u64 used; + u64 reserved; int ret = 0; bg = list_first_entry(&fs_info->reclaim_bgs, @@ -1887,6 +1909,17 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) up_write(&space_info->groups_sem); goto next; } + + /* + * Cache the zone_unusable value before turning the block group + * to read only. As soon as the block group is read only it's + * zone_unusable value gets moved to the block group's read-only + * bytes and isn't available for calculations anymore. We also + * cache it before unlocking the block group, to prevent races + * (reports from KCSAN and such tools) with tasks updating it. + */ + zone_unusable = bg->zone_unusable; + spin_unlock(&bg->lock); spin_unlock(&space_info->lock); @@ -1903,31 +1936,47 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) goto next; } - /* - * Cache the zone_unusable value before turning the block group - * to read only. As soon as the blog group is read only it's - * zone_unusable value gets moved to the block group's read-only - * bytes and isn't available for calculations anymore. - */ - zone_unusable = bg->zone_unusable; ret = inc_block_group_ro(bg, 0); up_write(&space_info->groups_sem); if (ret < 0) goto next; + /* + * The amount of bytes reclaimed corresponds to the sum of the + * "used" and "reserved" counters. We have set the block group + * to RO above, which prevents reservations from happening but + * we may have existing reservations for which allocation has + * not yet been done - btrfs_update_block_group() was not yet + * called, which is where we will transfer a reserved extent's + * size from the "reserved" counter to the "used" counter - this + * happens when running delayed references. When we relocate the + * chunk below, relocation first flushes dellaloc, waits for + * ordered extent completion (which is where we create delayed + * references for data extents) and commits the current + * transaction (which runs delayed references), and only after + * it does the actual work to move extents out of the block + * group. So the reported amount of reclaimed bytes is + * effectively the sum of the 'used' and 'reserved' counters. + */ + spin_lock(&bg->lock); + used = bg->used; + reserved = bg->reserved; + spin_unlock(&bg->lock); + btrfs_info(fs_info, - "reclaiming chunk %llu with %llu%% used %llu%% unusable", + "reclaiming chunk %llu with %llu%% used %llu%% reserved %llu%% unusable", bg->start, - div64_u64(bg->used * 100, bg->length), + div64_u64(used * 100, bg->length), + div64_u64(reserved * 100, bg->length), div64_u64(zone_unusable * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); - reclaimed = bg->used; ret = btrfs_relocate_chunk(fs_info, bg->start); if (ret) { btrfs_dec_block_group_ro(bg); btrfs_err(fs_info, "error relocating chunk %llu", bg->start); - reclaimed = 0; + used = 0; + reserved = 0; spin_lock(&space_info->lock); space_info->reclaim_errors++; if (READ_ONCE(space_info->periodic_reclaim)) @@ -1936,24 +1985,13 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) } spin_lock(&space_info->lock); space_info->reclaim_count++; - space_info->reclaim_bytes += reclaimed; + space_info->reclaim_bytes += used; + space_info->reclaim_bytes += reserved; spin_unlock(&space_info->lock); next: - if (ret && !READ_ONCE(space_info->periodic_reclaim)) { - /* Refcount held by the reclaim_bgs list after splice. */ - spin_lock(&fs_info->unused_bgs_lock); - /* - * This block group might be added to the unused list - * during the above process. Move it back to the - * reclaim list otherwise. - */ - if (list_empty(&bg->bg_list)) { - btrfs_get_block_group(bg); - list_add_tail(&bg->bg_list, &retry_list); - } - spin_unlock(&fs_info->unused_bgs_lock); - } + if (ret && !READ_ONCE(space_info->periodic_reclaim)) + btrfs_link_bg_list(bg, &retry_list); btrfs_put_block_group(bg); mutex_unlock(&fs_info->reclaim_bgs_lock); @@ -1993,13 +2031,8 @@ void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; - spin_lock(&fs_info->unused_bgs_lock); - if (list_empty(&bg->bg_list)) { - btrfs_get_block_group(bg); + if (btrfs_link_bg_list(bg, &fs_info->reclaim_bgs)) trace_btrfs_add_reclaim_block_group(bg); - list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs); - } - spin_unlock(&fs_info->unused_bgs_lock); } static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key *key, @@ -2182,9 +2215,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) if (cache->start < BTRFS_SUPER_INFO_OFFSET) { stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start; cache->bytes_super += stripe_len; - ret = set_extent_bit(&fs_info->excluded_extents, cache->start, - cache->start + stripe_len - 1, - EXTENT_UPTODATE, NULL); + ret = btrfs_set_extent_bit(&fs_info->excluded_extents, cache->start, + cache->start + stripe_len - 1, + EXTENT_DIRTY, NULL); if (ret) return ret; } @@ -2210,9 +2243,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) cache->start + cache->length - logical[nr]); cache->bytes_super += len; - ret = set_extent_bit(&fs_info->excluded_extents, logical[nr], - logical[nr] + len - 1, - EXTENT_UPTODATE, NULL); + ret = btrfs_set_extent_bit(&fs_info->excluded_extents, + logical[nr], logical[nr] + len - 1, + EXTENT_DIRTY, NULL); if (ret) { kfree(logical); return ret; @@ -2337,6 +2370,7 @@ static int read_one_block_group(struct btrfs_fs_info *info, cache->commit_used = cache->used; cache->flags = btrfs_stack_block_group_flags(bgi); cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi); + cache->space_info = btrfs_find_space_info(info, cache->flags); set_free_space_tree_thresholds(cache); @@ -2410,11 +2444,12 @@ static int read_one_block_group(struct btrfs_fs_info *info, goto error; } - ret = btrfs_add_block_group_cache(info, cache); + ret = btrfs_add_block_group_cache(cache); if (ret) { btrfs_remove_free_space_cache(cache); goto error; } + trace_btrfs_add_block_group(info, cache, 0); btrfs_add_bg_to_space_info(info, cache); @@ -2459,7 +2494,8 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) bg->cached = BTRFS_CACHE_FINISHED; bg->used = map->chunk_len; bg->flags = map->type; - ret = btrfs_add_block_group_cache(fs_info, bg); + bg->space_info = btrfs_find_space_info(fs_info, bg->flags); + ret = btrfs_add_block_group_cache(bg); /* * We may have some valid block group cache added already, in * that case we skip to the next one. @@ -2509,8 +2545,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) return fill_dummy_bgs(info); key.objectid = 0; - key.offset = 0; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + key.offset = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2641,7 +2677,7 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_root *root = fs_info->dev_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_dev_extent *extent; struct extent_buffer *leaf; struct btrfs_key key; @@ -2658,7 +2694,7 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans, key.offset = start; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); if (ret) - goto out; + return ret; leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); @@ -2666,10 +2702,8 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_objectid(leaf, extent, BTRFS_FIRST_CHUNK_TREE_OBJECTID); btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); - btrfs_set_dev_extent_length(leaf, extent, num_bytes); -out: - btrfs_free_path(path); + return ret; } @@ -2771,8 +2805,12 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) /* Already aborted the transaction if it failed. */ next: btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info); + + spin_lock(&fs_info->unused_bgs_lock); list_del_init(&block_group->bg_list); clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); + btrfs_put_block_group(block_group); + spin_unlock(&fs_info->unused_bgs_lock); /* * If the block group is still unused, add it to the list of @@ -2830,8 +2868,8 @@ static u64 calculate_global_root_id(const struct btrfs_fs_info *fs_info, u64 off } struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans, - u64 type, - u64 chunk_offset, u64 size) + struct btrfs_space_info *space_info, + u64 type, u64 chunk_offset, u64 size) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache; @@ -2885,10 +2923,10 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran * assigned to our block group. We want our bg to be added to the rbtree * with its ->space_info set. */ - cache->space_info = btrfs_find_space_info(fs_info, cache->flags); + cache->space_info = space_info; ASSERT(cache->space_info); - ret = btrfs_add_block_group_cache(fs_info, cache); + ret = btrfs_add_block_group_cache(cache); if (ret) { btrfs_remove_free_space_cache(cache); btrfs_put_block_group(cache); @@ -2910,7 +2948,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran } #endif - list_add_tail(&cache->bg_list, &trans->new_bgs); + btrfs_link_bg_list(cache, &trans->new_bgs); btrfs_inc_delayed_refs_rsv_bg_inserts(fs_info); set_avail_alloc_bits(fs_info, type); @@ -2930,6 +2968,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, bool do_chunk_alloc) { struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_space_info *space_info = cache->space_info; struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_block_group_root(fs_info); u64 alloc_flags; @@ -2982,7 +3021,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, */ alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); if (alloc_flags != cache->flags) { - ret = btrfs_chunk_alloc(trans, alloc_flags, + ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); /* * ENOSPC is allowed here, we may have enough space @@ -3010,15 +3049,15 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) goto unlock_out; - alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags); - ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags); + ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); if (ret < 0) goto out; /* * We have allocated a new chunk. We also need to activate that chunk to * grant metadata tickets for zoned filesystem. */ - ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true); + ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true); if (ret < 0) goto out; @@ -3306,7 +3345,7 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans) struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache, *tmp; struct btrfs_transaction *cur_trans = trans->transaction; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); if (list_empty(&cur_trans->dirty_bgs) || !btrfs_test_opt(fs_info, SPACE_CACHE)) @@ -3323,7 +3362,6 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans) cache_save_setup(cache, trans, path); } - btrfs_free_path(path); return 0; } @@ -3346,7 +3384,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; int should_put; - struct btrfs_path *path = NULL; + BTRFS_PATH_AUTO_FREE(path); LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; int loops = 0; @@ -3501,7 +3539,6 @@ out: btrfs_cleanup_dirty_bgs(cur_trans, fs_info); } - btrfs_free_path(path); return ret; } @@ -3512,7 +3549,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; int should_put; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct list_head *io = &cur_trans->io_bgs; path = btrfs_alloc_path(); @@ -3624,7 +3661,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) btrfs_put_block_group(cache); } - btrfs_free_path(path); return ret; } @@ -3703,8 +3739,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cache->lock); spin_unlock(&space_info->lock); - set_extent_bit(&trans->transaction->pinned_extents, bytenr, - bytenr + num_bytes - 1, EXTENT_DIRTY, NULL); + btrfs_set_extent_bit(&trans->transaction->pinned_extents, bytenr, + bytenr + num_bytes - 1, EXTENT_DIRTY, NULL); } spin_lock(&trans->transaction->dirty_bgs_lock); @@ -3793,17 +3829,17 @@ out: /* * Update the block_group and space info counters. * - * @cache: The cache we are manipulating - * @num_bytes: The number of bytes in question - * @delalloc: The blocks are allocated for the delalloc write + * @cache: The cache we are manipulating. + * @num_bytes: The number of bytes in question. + * @is_delalloc: Whether the blocks are allocated for a delalloc write. * * This is called by somebody who is freeing space that was never actually used * on disk. For example if you reserve some space for a new leaf in transaction * A and before transaction A commits you free that leaf, you call this with * reserve set to 0 in order to clear the reservation. */ -void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, - u64 num_bytes, int delalloc) +void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes, + bool is_delalloc) { struct btrfs_space_info *space_info = cache->space_info; @@ -3817,7 +3853,7 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, space_info->bytes_reserved -= num_bytes; space_info->max_extent_size = 0; - if (delalloc) + if (is_delalloc) cache->delalloc_bytes -= num_bytes; spin_unlock(&cache->lock); @@ -3836,14 +3872,14 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) } } -static int should_alloc_chunk(const struct btrfs_fs_info *fs_info, - const struct btrfs_space_info *sinfo, int force) +static bool should_alloc_chunk(const struct btrfs_fs_info *fs_info, + const struct btrfs_space_info *sinfo, int force) { u64 bytes_used = btrfs_space_info_used(sinfo, false); u64 thresh; if (force == CHUNK_ALLOC_FORCE) - return 1; + return true; /* * in limited mode, we want to have some free space up to @@ -3854,22 +3890,31 @@ static int should_alloc_chunk(const struct btrfs_fs_info *fs_info, thresh = max_t(u64, SZ_64M, mult_perc(thresh, 1)); if (sinfo->total_bytes - bytes_used < thresh) - return 1; + return true; } if (bytes_used + SZ_2M < mult_perc(sinfo->total_bytes, 80)) - return 0; - return 1; + return false; + return true; } int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) { u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type); + struct btrfs_space_info *space_info; - return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + space_info = btrfs_find_space_info(trans->fs_info, type); + if (!space_info) { + DEBUG_WARN(); + return -EINVAL; + } + + return btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); } -static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) +static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_space_info *space_info, + u64 flags) { struct btrfs_block_group *bg; int ret; @@ -3882,7 +3927,7 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans */ check_system_chunk(trans, flags); - bg = btrfs_create_chunk(trans, flags); + bg = btrfs_create_chunk(trans, space_info, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); goto out; @@ -3930,8 +3975,16 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans if (ret == -ENOSPC) { const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info); struct btrfs_block_group *sys_bg; + struct btrfs_space_info *sys_space_info; + + sys_space_info = btrfs_find_space_info(trans->fs_info, sys_flags); + if (!sys_space_info) { + ret = -EINVAL; + btrfs_abort_transaction(trans, ret); + goto out; + } - sys_bg = btrfs_create_chunk(trans, sys_flags); + sys_bg = btrfs_create_chunk(trans, sys_space_info, sys_flags); if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); @@ -4062,6 +4115,8 @@ out: * * This function, btrfs_chunk_alloc(), belongs to phase 1. * + * @space_info: specify which space_info the new chunk should belong to. + * * If @force is CHUNK_ALLOC_FORCE: * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. @@ -4070,11 +4125,11 @@ out: * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. */ -int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, +int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_space_info *space_info, u64 flags, enum btrfs_chunk_alloc_enum force) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_space_info *space_info; struct btrfs_block_group *ret_bg; bool wait_for_alloc = false; bool should_alloc = false; @@ -4113,9 +4168,6 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, if (flags & BTRFS_BLOCK_GROUP_SYSTEM) return -ENOSPC; - space_info = btrfs_find_space_info(fs_info, flags); - ASSERT(space_info); - do { spin_lock(&space_info->lock); if (force < space_info->force_alloc) @@ -4176,7 +4228,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, force_metadata_allocation(fs_info); } - ret_bg = do_chunk_alloc(trans, flags); + ret_bg = do_chunk_alloc(trans, space_info, flags); trans->allocating_chunk = false; if (IS_ERR(ret_bg)) { @@ -4252,6 +4304,10 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans, if (left < bytes) { u64 flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *bg; + struct btrfs_space_info *space_info; + + space_info = btrfs_find_space_info(fs_info, flags); + ASSERT(space_info); /* * Ignore failure to create system chunk. We might end up not @@ -4259,7 +4315,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans, * the paths we visit in the chunk tree (they were already COWed * or created in the current transaction for example). */ - bg = btrfs_create_chunk(trans, flags); + bg = btrfs_create_chunk(trans, space_info, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); } else { @@ -4367,6 +4423,43 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) } } +static void check_removing_space_info(struct btrfs_space_info *space_info) +{ + struct btrfs_fs_info *info = space_info->fs_info; + + if (space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY) { + /* This is a top space_info, proceed with its children first. */ + for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) { + if (space_info->sub_group[i]) { + check_removing_space_info(space_info->sub_group[i]); + kfree(space_info->sub_group[i]); + space_info->sub_group[i] = NULL; + } + } + } + + /* + * Do not hide this behind enospc_debug, this is actually important and + * indicates a real bug if this happens. + */ + if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0)) + btrfs_dump_space_info(info, space_info, 0, 0); + + /* + * If there was a failure to cleanup a log tree, very likely due to an + * IO failure on a writeback attempt of one or more of its extent + * buffers, we could not do proper (and cheap) unaccounting of their + * reserved space, so don't warn on bytes_reserved > 0 in that case. + */ + if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || + !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { + if (WARN_ON(space_info->bytes_reserved > 0)) + btrfs_dump_space_info(info, space_info, 0, 0); + } + + WARN_ON(space_info->reclaim_size > 0); +} + /* * Must be called only after stopping all workers, since we could have block * group caching kthreads running, and therefore they could race with us if we @@ -4392,8 +4485,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) write_lock(&info->block_group_cache_lock); while (!list_empty(&info->caching_block_groups)) { - caching_ctl = list_entry(info->caching_block_groups.next, - struct btrfs_caching_control, list); + caching_ctl = list_first_entry(&info->caching_block_groups, + struct btrfs_caching_control, list); list_del(&caching_ctl->list); btrfs_put_caching_control(caching_ctl); } @@ -4464,32 +4557,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) btrfs_release_global_block_rsv(info); while (!list_empty(&info->space_info)) { - space_info = list_entry(info->space_info.next, - struct btrfs_space_info, - list); - - /* - * Do not hide this behind enospc_debug, this is actually - * important and indicates a real bug if this happens. - */ - if (WARN_ON(space_info->bytes_pinned > 0 || - space_info->bytes_may_use > 0)) - btrfs_dump_space_info(info, space_info, 0, 0); - - /* - * If there was a failure to cleanup a log tree, very likely due - * to an IO failure on a writeback attempt of one or more of its - * extent buffers, we could not do proper (and cheap) unaccounting - * of their reserved space, so don't warn on bytes_reserved > 0 in - * that case. - */ - if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || - !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { - if (WARN_ON(space_info->bytes_reserved > 0)) - btrfs_dump_space_info(info, space_info, 0, 0); - } + space_info = list_first_entry(&info->space_info, + struct btrfs_space_info, list); - WARN_ON(space_info->reclaim_size > 0); + check_removing_space_info(space_info); list_del(&space_info->list); btrfs_sysfs_remove_space_info(space_info); } |