summaryrefslogtreecommitdiff
path: root/fs/btrfs/block-group.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r--fs/btrfs/block-group.c382
1 files changed, 220 insertions, 162 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index a8129f1ce78c..08b14449fabe 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -34,6 +34,19 @@ int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group
}
#endif
+static inline bool has_unwritten_metadata(struct btrfs_block_group *block_group)
+{
+ /* The meta_write_pointer is available only on the zoned setup. */
+ if (!btrfs_is_zoned(block_group->fs_info))
+ return false;
+
+ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
+ return false;
+
+ return block_group->start + block_group->alloc_offset >
+ block_group->meta_write_pointer;
+}
+
/*
* Return target flags in extended format or 0 if restripe for this chunk_type
* is not in progress
@@ -525,10 +538,9 @@ int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start,
*total_added_ret = 0;
while (start < end) {
- if (!find_first_extent_bit(&info->excluded_extents, start,
- &extent_start, &extent_end,
- EXTENT_DIRTY | EXTENT_UPTODATE,
- NULL))
+ if (!btrfs_find_first_extent_bit(&info->excluded_extents, start,
+ &extent_start, &extent_end,
+ EXTENT_DIRTY, NULL))
break;
if (extent_start <= start) {
@@ -601,8 +613,8 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_
extent_root = btrfs_extent_root(fs_info, max_t(u64, block_group->start,
BTRFS_SUPER_INFO_OFFSET));
- path->skip_locking = 1;
- path->search_commit_root = 1;
+ path->skip_locking = true;
+ path->search_commit_root = true;
path->reada = READA_FORWARD;
search_offset = index * div_u64(block_group->length, max_index);
@@ -701,7 +713,7 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
struct btrfs_block_group *block_group = caching_ctl->block_group;
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *extent_root;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_key key;
u64 total_found = 0;
@@ -732,8 +744,8 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
* root to add free space. So we skip locking and search the commit
* root, since its read-only
*/
- path->skip_locking = 1;
- path->search_commit_root = 1;
+ path->skip_locking = true;
+ path->search_commit_root = true;
path->reada = READA_FORWARD;
key.objectid = last;
@@ -828,14 +840,13 @@ next:
block_group->start + block_group->length,
NULL);
out:
- btrfs_free_path(path);
return ret;
}
static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg)
{
- clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
- bg->start + bg->length - 1, EXTENT_UPTODATE);
+ btrfs_clear_extent_bit(&bg->fs_info->excluded_extents, bg->start,
+ bg->start + bg->length - 1, EXTENT_DIRTY, NULL);
}
static noinline void caching_thread(struct btrfs_work *work)
@@ -879,7 +890,7 @@ static noinline void caching_thread(struct btrfs_work *work)
*/
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
!(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags)))
- ret = load_free_space_tree(caching_ctl);
+ ret = btrfs_load_free_space_tree(caching_ctl);
else
ret = load_extent_tree_free(caching_ctl);
done:
@@ -1054,7 +1065,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_chunk_map *map)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_path *path;
+ BTRFS_PATH_AUTO_FREE(path);
struct btrfs_block_group *block_group;
struct btrfs_free_cluster *cluster;
struct inode *inode;
@@ -1237,7 +1248,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* another task to attempt to create another block group with the same
* item key (and failing with -EEXIST and a transaction abort).
*/
- ret = remove_block_group_free_space(trans, block_group);
+ ret = btrfs_remove_block_group_free_space(trans, block_group);
if (ret)
goto out;
@@ -1246,6 +1257,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
goto out;
spin_lock(&block_group->lock);
+ /*
+ * Hitting this WARN means we removed a block group with an unwritten
+ * region. It will cause "unable to find chunk map for logical" errors.
+ */
+ if (WARN_ON(has_unwritten_metadata(block_group)))
+ btrfs_warn(fs_info,
+ "block group %llu is removed before metadata write out",
+ block_group->start);
+
set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags);
/*
@@ -1285,7 +1305,6 @@ out:
btrfs_put_block_group(block_group);
if (remove_rsv)
btrfs_dec_delayed_refs_rsv_bg_updates(fs_info);
- btrfs_free_path(path);
return ret;
}
@@ -1338,7 +1357,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
* data in this block group. That check should be done by relocation routine,
* not this function.
*/
-static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
+static int inc_block_group_ro(struct btrfs_block_group *cache, bool force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
@@ -1383,8 +1402,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
* BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of
* leeway to allow us to mark this block group as read only.
*/
- if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes,
- BTRFS_RESERVE_NO_FLUSH))
+ if (btrfs_can_overcommit(sinfo, num_bytes, BTRFS_RESERVE_NO_FLUSH))
ret = 0;
}
@@ -1405,7 +1423,7 @@ out:
if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
btrfs_info(cache->fs_info,
"unable to make block group %llu ro", cache->start);
- btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
+ btrfs_dump_space_info(cache->space_info, 0, false);
}
return ret;
}
@@ -1420,9 +1438,8 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
int ret;
spin_lock(&fs_info->trans_lock);
- if (trans->transaction->list.prev != &fs_info->trans_list) {
- prev_trans = list_last_entry(&trans->transaction->list,
- struct btrfs_transaction, list);
+ if (!list_is_first(&trans->transaction->list, &fs_info->trans_list)) {
+ prev_trans = list_prev_entry(trans->transaction, list);
refcount_inc(&prev_trans->use_count);
}
spin_unlock(&fs_info->trans_lock);
@@ -1439,14 +1456,14 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans) {
- ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
- EXTENT_DIRTY);
+ ret = btrfs_clear_extent_bit(&prev_trans->pinned_extents, start, end,
+ EXTENT_DIRTY, NULL);
if (ret)
goto out;
}
- ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
- EXTENT_DIRTY);
+ ret = btrfs_clear_extent_bit(&trans->transaction->pinned_extents, start, end,
+ EXTENT_DIRTY, NULL);
out:
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans)
@@ -1589,8 +1606,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* needing to allocate extents from the block group.
*/
used = btrfs_space_info_used(space_info, true);
- if (space_info->total_bytes - block_group->length < used &&
- block_group->zone_unusable < block_group->length) {
+ if ((space_info->total_bytes - block_group->length < used &&
+ block_group->zone_unusable < block_group->length) ||
+ has_unwritten_metadata(block_group)) {
/*
* Add a reference for the list, compensate for the ref
* drop under the "next" label for the
@@ -1619,8 +1637,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
ret = btrfs_zone_finish(block_group);
if (ret < 0) {
btrfs_dec_block_group_ro(block_group);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
+ btrfs_link_bg_list(block_group, &retry_list);
ret = 0;
+ }
goto next;
}
@@ -1773,7 +1793,14 @@ static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
bg1 = list_entry(a, struct btrfs_block_group, bg_list);
bg2 = list_entry(b, struct btrfs_block_group, bg_list);
- return bg1->used > bg2->used;
+ /*
+ * Some other task may be updating the ->used field concurrently, but it
+ * is not serious if we get a stale value or load/store tearing issues,
+ * as sorting the list of block groups to reclaim is not critical and an
+ * occasional imperfect order is ok. So silence KCSAN and avoid the
+ * overhead of locking or any other synchronization.
+ */
+ return data_race(bg1->used > bg2->used);
}
static inline bool btrfs_should_reclaim(const struct btrfs_fs_info *fs_info)
@@ -1821,12 +1848,10 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!btrfs_should_reclaim(fs_info))
return;
- sb_start_write(fs_info->sb);
+ guard(super_write)(fs_info->sb);
- if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
- sb_end_write(fs_info->sb);
+ if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
return;
- }
/*
* Long running balances can keep us blocked here for eternity, so
@@ -1834,7 +1859,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/
if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
btrfs_exclop_finish(fs_info);
- sb_end_write(fs_info->sb);
return;
}
@@ -1846,7 +1870,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
while (!list_empty(&fs_info->reclaim_bgs)) {
- u64 zone_unusable;
u64 used;
u64 reserved;
int ret = 0;
@@ -1913,23 +1936,13 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
goto next;
}
- /*
- * Cache the zone_unusable value before turning the block group
- * to read only. As soon as the block group is read only it's
- * zone_unusable value gets moved to the block group's read-only
- * bytes and isn't available for calculations anymore. We also
- * cache it before unlocking the block group, to prevent races
- * (reports from KCSAN and such tools) with tasks updating it.
- */
- zone_unusable = bg->zone_unusable;
-
spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
/*
* Get out fast, in case we're read-only or unmounting the
* filesystem. It is OK to drop block groups from the list even
- * for the read-only case. As we did sb_start_write(),
+ * for the read-only case. As we did take the super write lock,
* "mount -o remount,ro" won't happen and read-only filesystem
* means it is forced read-only due to a fatal error. So, it
* never gets back to read-write to let us reclaim again.
@@ -1953,7 +1966,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
* called, which is where we will transfer a reserved extent's
* size from the "reserved" counter to the "used" counter - this
* happens when running delayed references. When we relocate the
- * chunk below, relocation first flushes dellaloc, waits for
+ * chunk below, relocation first flushes delalloc, waits for
* ordered extent completion (which is where we create delayed
* references for data extents) and commits the current
* transaction (which runs delayed references), and only after
@@ -1966,14 +1979,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
reserved = bg->reserved;
spin_unlock(&bg->lock);
- btrfs_info(fs_info,
- "reclaiming chunk %llu with %llu%% used %llu%% reserved %llu%% unusable",
- bg->start,
- div64_u64(used * 100, bg->length),
- div64_u64(reserved * 100, bg->length),
- div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
- ret = btrfs_relocate_chunk(fs_info, bg->start);
+ ret = btrfs_relocate_chunk(fs_info, bg->start, false);
if (ret) {
btrfs_dec_block_group_ro(bg);
btrfs_err(fs_info, "error relocating chunk %llu",
@@ -2018,7 +2025,6 @@ end:
list_splice_tail(&retry_list, &fs_info->reclaim_bgs);
spin_unlock(&fs_info->unused_bgs_lock);
btrfs_exclop_finish(fs_info);
- sb_end_write(fs_info->sb);
}
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
@@ -2026,7 +2032,7 @@ void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
btrfs_reclaim_sweep(fs_info);
spin_lock(&fs_info->unused_bgs_lock);
if (!list_empty(&fs_info->reclaim_bgs))
- queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
+ queue_work(system_dfl_wq, &fs_info->reclaim_bgs_work);
spin_unlock(&fs_info->unused_bgs_lock);
}
@@ -2059,7 +2065,7 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key
return -ENOENT;
}
- if (map->start != key->objectid || map->chunk_len != key->offset) {
+ if (unlikely(map->start != key->objectid || map->chunk_len != key->offset)) {
btrfs_err(fs_info,
"block group %llu len %llu mismatch with chunk %llu len %llu",
key->objectid, key->offset, map->start, map->chunk_len);
@@ -2072,7 +2078,7 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key
flags = btrfs_stack_block_group_flags(&bg) &
BTRFS_BLOCK_GROUP_TYPE_MASK;
- if (flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ if (unlikely(flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK))) {
btrfs_err(fs_info,
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
key->objectid, key->offset, flags,
@@ -2218,9 +2224,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
cache->bytes_super += stripe_len;
- ret = set_extent_bit(&fs_info->excluded_extents, cache->start,
- cache->start + stripe_len - 1,
- EXTENT_UPTODATE, NULL);
+ ret = btrfs_set_extent_bit(&fs_info->excluded_extents, cache->start,
+ cache->start + stripe_len - 1,
+ EXTENT_DIRTY, NULL);
if (ret)
return ret;
}
@@ -2233,7 +2239,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
return ret;
/* Shouldn't have super stripes in sequential zones */
- if (zoned && nr) {
+ if (unlikely(zoned && nr)) {
kfree(logical);
btrfs_err(fs_info,
"zoned: block group %llu must not contain super block",
@@ -2246,9 +2252,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
cache->start + cache->length - logical[nr]);
cache->bytes_super += len;
- ret = set_extent_bit(&fs_info->excluded_extents, logical[nr],
- logical[nr] + len - 1,
- EXTENT_UPTODATE, NULL);
+ ret = btrfs_set_extent_bit(&fs_info->excluded_extents,
+ logical[nr], logical[nr] + len - 1,
+ EXTENT_DIRTY, NULL);
if (ret) {
kfree(logical);
return ret;
@@ -2324,7 +2330,7 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
break;
bg = btrfs_lookup_block_group(fs_info, map->start);
- if (!bg) {
+ if (unlikely(!bg)) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu doesn't have corresponding block group",
map->start, map->chunk_len);
@@ -2332,9 +2338,9 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
btrfs_free_chunk_map(map);
break;
}
- if (bg->start != map->start || bg->length != map->chunk_len ||
- (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
- (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ if (unlikely(bg->start != map->start || bg->length != map->chunk_len ||
+ (bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
+ (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK))) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
map->start, map->chunk_len,
@@ -2373,8 +2379,9 @@ static int read_one_block_group(struct btrfs_fs_info *info,
cache->commit_used = cache->used;
cache->flags = btrfs_stack_block_group_flags(bgi);
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
+ cache->space_info = btrfs_find_space_info(info, cache->flags);
- set_free_space_tree_thresholds(cache);
+ btrfs_set_free_space_tree_thresholds(cache);
if (need_clear) {
/*
@@ -2451,6 +2458,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
btrfs_remove_free_space_cache(cache);
goto error;
}
+
trace_btrfs_add_block_group(info, cache, 0);
btrfs_add_bg_to_space_info(info, cache);
@@ -2495,6 +2503,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
bg->cached = BTRFS_CACHE_FINISHED;
bg->used = map->chunk_len;
bg->flags = map->type;
+ bg->space_info = btrfs_find_space_info(fs_info, bg->flags);
ret = btrfs_add_block_group_cache(bg);
/*
* We may have some valid block group cache added already, in
@@ -2791,7 +2800,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
- add_block_group_free_space(trans, block_group);
+ btrfs_add_block_group_free_space(trans, block_group);
/*
* If we restriped during balance, we may have added a new raid
@@ -2824,7 +2833,7 @@ next:
* space or none at all (due to no need to COW, extent buffers
* were already COWed in the current transaction and still
* unwritten, tree heights lower than the maximum possible
- * height, etc). For data we generally reserve the axact amount
+ * height, etc). For data we generally reserve the exact amount
* of space we are going to allocate later, the exception is
* when using compression, as we must reserve space based on the
* uncompressed data size, because the compression is only done
@@ -2868,8 +2877,8 @@ static u64 calculate_global_root_id(const struct btrfs_fs_info *fs_info, u64 off
}
struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
- u64 type,
- u64 chunk_offset, u64 size)
+ struct btrfs_space_info *space_info,
+ u64 type, u64 chunk_offset, u64 size)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_block_group *cache;
@@ -2889,7 +2898,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags);
cache->length = size;
- set_free_space_tree_thresholds(cache);
+ btrfs_set_free_space_tree_thresholds(cache);
cache->flags = type;
cache->cached = BTRFS_CACHE_FINISHED;
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
@@ -2923,7 +2932,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
* assigned to our block group. We want our bg to be added to the rbtree
* with its ->space_info set.
*/
- cache->space_info = btrfs_find_space_info(fs_info, cache->flags);
+ cache->space_info = space_info;
ASSERT(cache->space_info);
ret = btrfs_add_block_group_cache(cache);
@@ -2968,6 +2977,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
bool do_chunk_alloc)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_space_info *space_info = cache->space_info;
struct btrfs_trans_handle *trans;
struct btrfs_root *root = btrfs_block_group_root(fs_info);
u64 alloc_flags;
@@ -3020,7 +3030,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
*/
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
if (alloc_flags != cache->flags) {
- ret = btrfs_chunk_alloc(trans, alloc_flags,
+ ret = btrfs_chunk_alloc(trans, space_info, alloc_flags,
CHUNK_ALLOC_FORCE);
/*
* ENOSPC is allowed here, we may have enough space
@@ -3048,15 +3058,15 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
(cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
goto unlock_out;
- alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
- ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags);
+ ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
goto out;
/*
* We have allocated a new chunk. We also need to activate that chunk to
* grant metadata tickets for zoned filesystem.
*/
- ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true);
+ ret = btrfs_zoned_activate_one_bg(space_info, true);
if (ret < 0)
goto out;
@@ -3232,7 +3242,7 @@ again:
*/
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, BTRFS_I(inode));
- if (ret) {
+ if (unlikely(ret)) {
/*
* So theoretically we could recover from this, simply set the
* super cache generation to 0 so we know to invalidate the
@@ -3635,9 +3645,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
ret = update_block_group_item(trans, path, cache);
- }
- if (ret)
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+ } else if (ret) {
btrfs_abort_transaction(trans, ret);
+ }
}
/* If its not on the io list, we need to put the block group */
@@ -3738,8 +3750,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
- set_extent_bit(&trans->transaction->pinned_extents, bytenr,
- bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
+ btrfs_set_extent_bit(&trans->transaction->pinned_extents, bytenr,
+ bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
}
spin_lock(&trans->transaction->dirty_bgs_lock);
@@ -3785,7 +3797,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* reservation and return -EAGAIN, otherwise this function always succeeds.
*/
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
- u64 ram_bytes, u64 num_bytes, int delalloc,
+ u64 ram_bytes, u64 num_bytes, bool delalloc,
bool force_wrong_size_class)
{
struct btrfs_space_info *space_info = cache->space_info;
@@ -3796,30 +3808,38 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
spin_lock(&cache->lock);
if (cache->ro) {
ret = -EAGAIN;
- goto out;
+ goto out_error;
}
if (btrfs_block_group_should_use_size_class(cache)) {
size_class = btrfs_calc_block_group_size_class(num_bytes);
ret = btrfs_use_block_group_size_class(cache, size_class, force_wrong_size_class);
if (ret)
- goto out;
+ goto out_error;
}
+
cache->reserved += num_bytes;
- space_info->bytes_reserved += num_bytes;
+ if (delalloc)
+ cache->delalloc_bytes += num_bytes;
+
trace_btrfs_space_reservation(cache->fs_info, "space_info",
space_info->flags, num_bytes, 1);
+ spin_unlock(&cache->lock);
+
+ space_info->bytes_reserved += num_bytes;
btrfs_space_info_update_bytes_may_use(space_info, -ram_bytes);
- if (delalloc)
- cache->delalloc_bytes += num_bytes;
/*
* Compression can use less space than we reserved, so wake tickets if
* that happens.
*/
if (num_bytes < ram_bytes)
- btrfs_try_granting_tickets(cache->fs_info, space_info);
-out:
+ btrfs_try_granting_tickets(space_info);
+ spin_unlock(&space_info->lock);
+
+ return 0;
+
+out_error:
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
return ret;
@@ -3828,35 +3848,38 @@ out:
/*
* Update the block_group and space info counters.
*
- * @cache: The cache we are manipulating
- * @num_bytes: The number of bytes in question
- * @delalloc: The blocks are allocated for the delalloc write
+ * @cache: The cache we are manipulating.
+ * @num_bytes: The number of bytes in question.
+ * @is_delalloc: Whether the blocks are allocated for a delalloc write.
*
* This is called by somebody who is freeing space that was never actually used
* on disk. For example if you reserve some space for a new leaf in transaction
* A and before transaction A commits you free that leaf, you call this with
* reserve set to 0 in order to clear the reservation.
*/
-void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
- u64 num_bytes, int delalloc)
+void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes,
+ bool is_delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
+ bool bg_ro;
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
- if (cache->ro)
+ bg_ro = cache->ro;
+ cache->reserved -= num_bytes;
+ if (is_delalloc)
+ cache->delalloc_bytes -= num_bytes;
+ spin_unlock(&cache->lock);
+
+ if (bg_ro)
space_info->bytes_readonly += num_bytes;
else if (btrfs_is_zoned(cache->fs_info))
space_info->bytes_zone_unusable += num_bytes;
- cache->reserved -= num_bytes;
+
space_info->bytes_reserved -= num_bytes;
space_info->max_extent_size = 0;
- if (delalloc)
- cache->delalloc_bytes -= num_bytes;
- spin_unlock(&cache->lock);
-
- btrfs_try_granting_tickets(cache->fs_info, space_info);
+ btrfs_try_granting_tickets(space_info);
spin_unlock(&space_info->lock);
}
@@ -3871,14 +3894,14 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
}
}
-static int should_alloc_chunk(const struct btrfs_fs_info *fs_info,
- const struct btrfs_space_info *sinfo, int force)
+static bool should_alloc_chunk(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo, int force)
{
u64 bytes_used = btrfs_space_info_used(sinfo, false);
u64 thresh;
if (force == CHUNK_ALLOC_FORCE)
- return 1;
+ return true;
/*
* in limited mode, we want to have some free space up to
@@ -3889,22 +3912,31 @@ static int should_alloc_chunk(const struct btrfs_fs_info *fs_info,
thresh = max_t(u64, SZ_64M, mult_perc(thresh, 1));
if (sinfo->total_bytes - bytes_used < thresh)
- return 1;
+ return true;
}
if (bytes_used + SZ_2M < mult_perc(sinfo->total_bytes, 80))
- return 0;
- return 1;
+ return false;
+ return true;
}
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
{
u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type);
+ struct btrfs_space_info *space_info;
- return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ space_info = btrfs_find_space_info(trans->fs_info, type);
+ if (!space_info) {
+ DEBUG_WARN();
+ return -EINVAL;
+ }
+
+ return btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
}
-static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
+static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_space_info *space_info,
+ u64 flags)
{
struct btrfs_block_group *bg;
int ret;
@@ -3917,7 +3949,7 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
*/
check_system_chunk(trans, flags);
- bg = btrfs_create_chunk(trans, flags);
+ bg = btrfs_create_chunk(trans, space_info, flags);
if (IS_ERR(bg)) {
ret = PTR_ERR(bg);
goto out;
@@ -3965,8 +3997,16 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
if (ret == -ENOSPC) {
const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
struct btrfs_block_group *sys_bg;
+ struct btrfs_space_info *sys_space_info;
- sys_bg = btrfs_create_chunk(trans, sys_flags);
+ sys_space_info = btrfs_find_space_info(trans->fs_info, sys_flags);
+ if (unlikely(!sys_space_info)) {
+ ret = -EINVAL;
+ btrfs_abort_transaction(trans, ret);
+ goto out;
+ }
+
+ sys_bg = btrfs_create_chunk(trans, sys_space_info, sys_flags);
if (IS_ERR(sys_bg)) {
ret = PTR_ERR(sys_bg);
btrfs_abort_transaction(trans, ret);
@@ -3974,17 +4014,17 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
}
ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
- if (ret) {
+ if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
- } else if (ret) {
+ } else if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -4097,6 +4137,8 @@ out:
*
* This function, btrfs_chunk_alloc(), belongs to phase 1.
*
+ * @space_info: specify which space_info the new chunk should belong to.
+ *
* If @force is CHUNK_ALLOC_FORCE:
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
@@ -4105,11 +4147,11 @@ out:
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
*/
-int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_space_info *space_info, u64 flags,
enum btrfs_chunk_alloc_enum force)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_space_info *space_info;
struct btrfs_block_group *ret_bg;
bool wait_for_alloc = false;
bool should_alloc = false;
@@ -4148,9 +4190,6 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
return -ENOSPC;
- space_info = btrfs_find_space_info(fs_info, flags);
- ASSERT(space_info);
-
do {
spin_lock(&space_info->lock);
if (force < space_info->force_alloc)
@@ -4158,11 +4197,11 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
should_alloc = should_alloc_chunk(fs_info, space_info, force);
if (space_info->full) {
/* No more free physical space */
+ spin_unlock(&space_info->lock);
if (should_alloc)
ret = -ENOSPC;
else
ret = 0;
- spin_unlock(&space_info->lock);
return ret;
} else if (!should_alloc) {
spin_unlock(&space_info->lock);
@@ -4174,16 +4213,16 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
* recheck if we should continue with our allocation
* attempt.
*/
+ spin_unlock(&space_info->lock);
wait_for_alloc = true;
force = CHUNK_ALLOC_NO_FORCE;
- spin_unlock(&space_info->lock);
mutex_lock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->chunk_mutex);
} else {
/* Proceed with allocation */
- space_info->chunk_alloc = 1;
- wait_for_alloc = false;
+ space_info->chunk_alloc = true;
spin_unlock(&space_info->lock);
+ wait_for_alloc = false;
}
cond_resched();
@@ -4211,7 +4250,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
force_metadata_allocation(fs_info);
}
- ret_bg = do_chunk_alloc(trans, flags);
+ ret_bg = do_chunk_alloc(trans, space_info, flags);
trans->allocating_chunk = false;
if (IS_ERR(ret_bg)) {
@@ -4230,7 +4269,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
spin_lock(&space_info->lock);
if (ret < 0) {
if (ret == -ENOSPC)
- space_info->full = 1;
+ space_info->full = true;
else
goto out;
} else {
@@ -4240,7 +4279,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
out:
- space_info->chunk_alloc = 0;
+ space_info->chunk_alloc = false;
spin_unlock(&space_info->lock);
mutex_unlock(&fs_info->chunk_mutex);
@@ -4281,12 +4320,16 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
left, bytes, type);
- btrfs_dump_space_info(fs_info, info, 0, 0);
+ btrfs_dump_space_info(info, 0, false);
}
if (left < bytes) {
u64 flags = btrfs_system_alloc_profile(fs_info);
struct btrfs_block_group *bg;
+ struct btrfs_space_info *space_info;
+
+ space_info = btrfs_find_space_info(fs_info, flags);
+ ASSERT(space_info);
/*
* Ignore failure to create system chunk. We might end up not
@@ -4294,7 +4337,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
* the paths we visit in the chunk tree (they were already COWed
* or created in the current transaction for example).
*/
- bg = btrfs_create_chunk(trans, flags);
+ bg = btrfs_create_chunk(trans, space_info, flags);
if (IS_ERR(bg)) {
ret = PTR_ERR(bg);
} else {
@@ -4302,7 +4345,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
* We have a new chunk. We also need to activate it for
* zoned filesystem.
*/
- ret = btrfs_zoned_activate_one_bg(fs_info, info, true);
+ ret = btrfs_zoned_activate_one_bg(info, true);
if (ret < 0)
return;
@@ -4402,6 +4445,43 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
}
}
+static void check_removing_space_info(struct btrfs_space_info *space_info)
+{
+ struct btrfs_fs_info *info = space_info->fs_info;
+
+ if (space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY) {
+ /* This is a top space_info, proceed with its children first. */
+ for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) {
+ if (space_info->sub_group[i]) {
+ check_removing_space_info(space_info->sub_group[i]);
+ kfree(space_info->sub_group[i]);
+ space_info->sub_group[i] = NULL;
+ }
+ }
+ }
+
+ /*
+ * Do not hide this behind enospc_debug, this is actually important and
+ * indicates a real bug if this happens.
+ */
+ if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0))
+ btrfs_dump_space_info(space_info, 0, false);
+
+ /*
+ * If there was a failure to cleanup a log tree, very likely due to an
+ * IO failure on a writeback attempt of one or more of its extent
+ * buffers, we could not do proper (and cheap) unaccounting of their
+ * reserved space, so don't warn on bytes_reserved > 0 in that case.
+ */
+ if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+ !BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
+ if (WARN_ON(space_info->bytes_reserved > 0))
+ btrfs_dump_space_info(space_info, 0, false);
+ }
+
+ WARN_ON(space_info->reclaim_size > 0);
+}
+
/*
* Must be called only after stopping all workers, since we could have block
* group caching kthreads running, and therefore they could race with us if we
@@ -4427,8 +4507,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
write_lock(&info->block_group_cache_lock);
while (!list_empty(&info->caching_block_groups)) {
- caching_ctl = list_entry(info->caching_block_groups.next,
- struct btrfs_caching_control, list);
+ caching_ctl = list_first_entry(&info->caching_block_groups,
+ struct btrfs_caching_control, list);
list_del(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
}
@@ -4499,32 +4579,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
btrfs_release_global_block_rsv(info);
while (!list_empty(&info->space_info)) {
- space_info = list_entry(info->space_info.next,
- struct btrfs_space_info,
- list);
-
- /*
- * Do not hide this behind enospc_debug, this is actually
- * important and indicates a real bug if this happens.
- */
- if (WARN_ON(space_info->bytes_pinned > 0 ||
- space_info->bytes_may_use > 0))
- btrfs_dump_space_info(info, space_info, 0, 0);
-
- /*
- * If there was a failure to cleanup a log tree, very likely due
- * to an IO failure on a writeback attempt of one or more of its
- * extent buffers, we could not do proper (and cheap) unaccounting
- * of their reserved space, so don't warn on bytes_reserved > 0 in
- * that case.
- */
- if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
- !BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
- if (WARN_ON(space_info->bytes_reserved > 0))
- btrfs_dump_space_info(info, space_info, 0, 0);
- }
+ space_info = list_first_entry(&info->space_info,
+ struct btrfs_space_info, list);
- WARN_ON(space_info->reclaim_size > 0);
+ check_removing_space_info(space_info);
list_del(&space_info->list);
btrfs_sysfs_remove_space_info(space_info);
}