diff options
Diffstat (limited to 'fs/bcachefs/btree_trans_commit.c')
-rw-r--r-- | fs/bcachefs/btree_trans_commit.c | 199 |
1 files changed, 97 insertions, 102 deletions
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index c4f524b2ca9a..1c03c965d836 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -11,6 +11,7 @@ #include "btree_write_buffer.h" #include "buckets.h" #include "disk_accounting.h" +#include "enumerated_ref.h" #include "errcode.h" #include "error.h" #include "journal.h" @@ -20,6 +21,7 @@ #include "snapshot.h" #include <linux/prefetch.h> +#include <linux/string_helpers.h> static const char * const trans_commit_flags_strs[] = { #define x(n, ...) #n, @@ -164,6 +166,7 @@ bool bch2_btree_bset_insert_key(struct btree_trans *trans, EBUG_ON(bpos_gt(insert->k.p, b->data->max_key)); EBUG_ON(insert->k.u64s > bch2_btree_keys_u64s_remaining(b)); EBUG_ON(!b->c.level && !bpos_eq(insert->k.p, path->pos)); + kmsan_check_memory(insert, bkey_bytes(&insert->k)); k = bch2_btree_node_iter_peek_all(node_iter, b); if (k && bkey_cmp_left_packed(b, k, &insert->k.p)) @@ -336,6 +339,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, BUG_ON(i->cached != path->cached); BUG_ON(i->level != path->level); BUG_ON(i->btree_id != path->btree_id); + BUG_ON(i->bkey_type != __btree_node_type(path->level, path->btree_id)); EBUG_ON(!i->level && btree_type_has_snapshots(i->btree_id) && !(i->flags & BTREE_UPDATE_internal_snapshot_node) && @@ -364,7 +368,8 @@ static noinline void journal_transaction_name(struct btree_trans *trans) struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); - strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64)); + memcpy_and_pad(l->d, JSET_ENTRY_LOG_U64s * sizeof(u64), + trans->fn, strlen(trans->fn), 0); } static inline int btree_key_can_insert(struct btree_trans *trans, @@ -517,69 +522,45 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_ } } -static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id, - unsigned *btree_id_updates_start) +static int bch2_trans_commit_run_triggers(struct btree_trans *trans) { - bool trans_trigger_run; + unsigned sort_id_start = 0; - /* - * Running triggers will append more updates to the list of updates as - * we're walking it: - */ - do { - trans_trigger_run = false; + while (sort_id_start < trans->nr_updates) { + unsigned i, sort_id = trans->updates[sort_id_start].sort_order; + bool trans_trigger_run; - for (unsigned i = *btree_id_updates_start; - i < trans->nr_updates && trans->updates[i].btree_id <= btree_id; - i++) { - if (trans->updates[i].btree_id < btree_id) { - *btree_id_updates_start = i; - continue; + /* + * For a given btree, this algorithm runs insert triggers before + * overwrite triggers: this is so that when extents are being + * moved (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop + * references before they are re-added. + * + * Running triggers will append more updates to the list of + * updates as we're walking it: + */ + do { + trans_trigger_run = false; + + for (i = sort_id_start; + i < trans->nr_updates && trans->updates[i].sort_order <= sort_id; + i++) { + if (trans->updates[i].sort_order < sort_id) { + sort_id_start = i; + continue; + } + + int ret = run_one_trans_trigger(trans, trans->updates + i); + if (ret < 0) + return ret; + if (ret) + trans_trigger_run = true; } + } while (trans_trigger_run); - int ret = run_one_trans_trigger(trans, trans->updates + i); - if (ret < 0) - return ret; - if (ret) - trans_trigger_run = true; - } - } while (trans_trigger_run); - - trans_for_each_update(trans, i) - BUG_ON(!(i->flags & BTREE_TRIGGER_norun) && - i->btree_id == btree_id && - btree_node_type_has_trans_triggers(i->bkey_type) && - (!i->insert_trigger_run || !i->overwrite_trigger_run)); - - return 0; -} - -static int bch2_trans_commit_run_triggers(struct btree_trans *trans) -{ - unsigned btree_id = 0, btree_id_updates_start = 0; - int ret = 0; - - /* - * - * For a given btree, this algorithm runs insert triggers before - * overwrite triggers: this is so that when extents are being moved - * (e.g. by FALLOCATE_FL_INSERT_RANGE), we don't drop references before - * they are re-added. - */ - for (btree_id = 0; btree_id < BTREE_ID_NR; btree_id++) { - if (btree_id == BTREE_ID_alloc) - continue; - - ret = run_btree_triggers(trans, btree_id, &btree_id_updates_start); - if (ret) - return ret; + sort_id_start = i; } - btree_id_updates_start = 0; - ret = run_btree_triggers(trans, BTREE_ID_alloc, &btree_id_updates_start); - if (ret) - return ret; - #ifdef CONFIG_BCACHEFS_DEBUG trans_for_each_update(trans, i) BUG_ON(!(i->flags & BTREE_TRIGGER_norun) && @@ -666,10 +647,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && !(flags & BCH_TRANS_COMMIT_no_journal_res)) { - if (bch2_journal_seq_verify) + if (static_branch_unlikely(&bch2_journal_seq_verify)) trans_for_each_update(trans, i) i->k->k.bversion.lo = trans->journal_res.seq; - else if (bch2_inject_invalid_keys) + else if (static_branch_unlikely(&bch2_inject_invalid_keys)) trans_for_each_update(trans, i) i->k->k.bversion = MAX_VERSION; } @@ -682,18 +663,17 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, h = h->next; } - struct jset_entry *entry = trans->journal_entries; + struct bkey_i *accounting; percpu_down_read(&c->mark_lock); - for (entry = trans->journal_entries; - entry != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); - entry = vstruct_next(entry)) - if (entry->type == BCH_JSET_ENTRY_write_buffer_keys && - entry->start->k.type == KEY_TYPE_accounting) { - ret = bch2_accounting_trans_commit_hook(trans, bkey_i_to_accounting(entry->start), flags); - if (ret) - goto revert_fs_usage; - } + for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); + accounting != btree_trans_subbuf_top(trans, &trans->accounting); + accounting = bkey_next(accounting)) { + ret = bch2_accounting_trans_commit_hook(trans, + bkey_i_to_accounting(accounting), flags); + if (ret) + goto revert_fs_usage; + } percpu_up_read(&c->mark_lock); /* XXX: we only want to run this if deltas are nonzero */ @@ -717,8 +697,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) validate_context.flags = BCH_VALIDATE_write|BCH_VALIDATE_commit; - for (struct jset_entry *i = trans->journal_entries; - i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); + for (struct jset_entry *i = btree_trans_journal_entries_start(trans); + i != btree_trans_journal_entries_top(trans); i = vstruct_next(i)) { ret = bch2_journal_entry_validate(c, NULL, i, bcachefs_metadata_version_current, @@ -773,11 +753,18 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, } memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), - trans->journal_entries, - trans->journal_entries_u64s); + btree_trans_journal_entries_start(trans), + trans->journal_entries.u64s); + + trans->journal_res.offset += trans->journal_entries.u64s; + trans->journal_res.u64s -= trans->journal_entries.u64s; - trans->journal_res.offset += trans->journal_entries_u64s; - trans->journal_res.u64s -= trans->journal_entries_u64s; + memcpy_u64s_small(bch2_journal_add_entry(j, &trans->journal_res, + BCH_JSET_ENTRY_write_buffer_keys, + BTREE_ID_accounting, 0, + trans->accounting.u64s)->_data, + btree_trans_subbuf_base(trans, &trans->accounting), + trans->accounting.u64s); if (trans->journal_seq) *trans->journal_seq = trans->journal_res.seq; @@ -799,13 +786,10 @@ fatal_err: bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret)); percpu_down_read(&c->mark_lock); revert_fs_usage: - for (struct jset_entry *entry2 = trans->journal_entries; - entry2 != entry; - entry2 = vstruct_next(entry2)) - if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && - entry2->start->k.type == KEY_TYPE_accounting) - bch2_accounting_trans_commit_revert(trans, - bkey_i_to_accounting(entry2->start), flags); + for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); + i != accounting; + i = bkey_next(i)) + bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); percpu_up_read(&c->mark_lock); return ret; } @@ -903,18 +887,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, struct bch_fs *c = trans->c; enum bch_watermark watermark = flags & BCH_WATERMARK_MASK; - switch (ret) { - case -BCH_ERR_btree_insert_btree_node_full: - ret = bch2_btree_split_leaf(trans, i->path, flags); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - trace_and_count(c, trans_restart_btree_node_split, trans, - trace_ip, trans->paths + i->path); - break; - case -BCH_ERR_btree_insert_need_mark_replicas: - ret = drop_locks_do(trans, - bch2_accounting_update_sb(trans)); - break; - case -BCH_ERR_journal_res_get_blocked: + if (bch2_err_matches(ret, BCH_ERR_journal_res_blocked)) { /* * XXX: this should probably be a separate BTREE_INSERT_NONBLOCK * flag @@ -922,13 +895,26 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, if ((flags & BCH_TRANS_COMMIT_journal_reclaim) && watermark < BCH_WATERMARK_reclaim) { ret = -BCH_ERR_journal_reclaim_would_deadlock; - break; + goto out; } ret = drop_locks_do(trans, bch2_trans_journal_res_get(trans, (flags & BCH_WATERMARK_MASK)| JOURNAL_RES_GET_CHECK)); + goto out; + } + + switch (ret) { + case -BCH_ERR_btree_insert_btree_node_full: + ret = bch2_btree_split_leaf(trans, i->path, flags); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) + trace_and_count(c, trans_restart_btree_node_split, trans, + trace_ip, trans->paths + i->path); + break; + case -BCH_ERR_btree_insert_need_mark_replicas: + ret = drop_locks_do(trans, + bch2_accounting_update_sb(trans)); break; case -BCH_ERR_btree_insert_need_journal_reclaim: bch2_trans_unlock(trans); @@ -950,7 +936,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, BUG_ON(ret >= 0); break; } - +out: BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOSPC) && @@ -978,8 +964,8 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) return ret; } - for (struct jset_entry *i = trans->journal_entries; - i != (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s); + for (struct jset_entry *i = btree_trans_journal_entries_start(trans); + i != btree_trans_journal_entries_top(trans); i = vstruct_next(i)) if (i->type == BCH_JSET_ENTRY_btree_keys || i->type == BCH_JSET_ENTRY_write_buffer_keys) { @@ -988,6 +974,14 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) return ret; } + for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); + i != btree_trans_subbuf_top(trans, &trans->accounting); + i = bkey_next(i)) { + int ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i); + if (ret) + return ret; + } + return 0; } @@ -1004,7 +998,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; if (!trans->nr_updates && - !trans->journal_entries_u64s) + !trans->journal_entries.u64s && + !trans->accounting.u64s) goto out_reset; ret = bch2_trans_commit_run_triggers(trans); @@ -1012,7 +1007,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && - unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_trans))) { + unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) { if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) ret = do_bch2_trans_commit_to_journal_replay(trans); else @@ -1022,7 +1017,7 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); - trans->journal_u64s = trans->journal_entries_u64s; + trans->journal_u64s = trans->journal_entries.u64s + jset_u64s(trans->accounting.u64s); trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names); if (trans->journal_transaction_names) trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s); @@ -1078,7 +1073,7 @@ retry: trace_and_count(c, transaction_commit, trans, _RET_IP_); out: if (likely(!(flags & BCH_TRANS_COMMIT_no_check_rw))) - bch2_write_ref_put(c, BCH_WRITE_REF_trans); + enumerated_ref_put(&c->writes, BCH_WRITE_REF_trans); out_reset: if (!ret) bch2_trans_downgrade(trans); |