summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-31 18:33:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-31 18:33:51 -0700
commit98fb679d19a17aec624d53b016953a3fcd272e8d (patch)
tree78cea0d1b5368dd6991869aae78fba81c0aebfd2
parent4080cf02f11e337c5031013f77e0ba1a475985ee (diff)
parent650f5353dcc9b6e690a1c763754fa1e98d217bfc (diff)
Merge tag 'bcachefs-2025-03-31' of git://evilpiepirate.org/bcachefs
Pull more bcachefs updates from Kent Overstreet: "All bugfixes and logging improvements" * tag 'bcachefs-2025-03-31' of git://evilpiepirate.org/bcachefs: (35 commits) bcachefs: fix bch2_write_point_to_text() units bcachefs: Log original key being moved in data updates bcachefs: BCH_JSET_ENTRY_log_bkey bcachefs: Reorder error messages that include journal debug bcachefs: Don't use designated initializers for disk_accounting_pos bcachefs: Silence errors after emergency shutdown bcachefs: fix units in rebalance_status bcachefs: bch2_ioctl_subvolume_destroy() fixes bcachefs: Clear fs_path_parent on subvolume unlink bcachefs: Change btree_insert_node() assertion to error bcachefs: Better printing of inconsistency errors bcachefs: bch2_count_fsck_err() bcachefs: Better helpers for inconsistency errors bcachefs: Consistent indentation of multiline fsck errors bcachefs: Add an "ignore unknown" option to bch2_parse_mount_opts() bcachefs: bch2_time_stats_init_no_pcpu() bcachefs: Fix bch2_fs_get_tree() error path bcachefs: fix logging in journal_entry_err_msg() bcachefs: add missing newline in bch2_trans_updates_to_text() bcachefs: print_string_as_lines: fix extra newline ...
-rw-r--r--fs/bcachefs/alloc_background.c22
-rw-r--r--fs/bcachefs/alloc_foreground.c2
-rw-r--r--fs/bcachefs/backpointers.c43
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/btree_cache.c2
-rw-r--r--fs/bcachefs/btree_gc.c23
-rw-r--r--fs/bcachefs/btree_io.c63
-rw-r--r--fs/bcachefs/btree_iter.c14
-rw-r--r--fs/bcachefs/btree_iter.h1
-rw-r--r--fs/bcachefs/btree_journal_iter.c2
-rw-r--r--fs/bcachefs/btree_node_scan.c14
-rw-r--r--fs/bcachefs/btree_update.c13
-rw-r--r--fs/bcachefs/btree_update.h2
-rw-r--r--fs/bcachefs/btree_update_interior.c91
-rw-r--r--fs/bcachefs/buckets.c161
-rw-r--r--fs/bcachefs/chardev.c6
-rw-r--r--fs/bcachefs/data_update.c22
-rw-r--r--fs/bcachefs/data_update.h12
-rw-r--r--fs/bcachefs/disk_accounting.c40
-rw-r--r--fs/bcachefs/disk_accounting.h8
-rw-r--r--fs/bcachefs/disk_accounting_format.h80
-rw-r--r--fs/bcachefs/ec.c22
-rw-r--r--fs/bcachefs/errcode.h3
-rw-r--r--fs/bcachefs/error.c226
-rw-r--r--fs/bcachefs/error.h48
-rw-r--r--fs/bcachefs/extents.c7
-rw-r--r--fs/bcachefs/fs-io-buffered.c2
-rw-r--r--fs/bcachefs/fs-io.c31
-rw-r--r--fs/bcachefs/fs-ioctl.c6
-rw-r--r--fs/bcachefs/fs.c9
-rw-r--r--fs/bcachefs/fsck.c22
-rw-r--r--fs/bcachefs/io_read.c4
-rw-r--r--fs/bcachefs/io_read.h6
-rw-r--r--fs/bcachefs/io_write.c44
-rw-r--r--fs/bcachefs/journal.c19
-rw-r--r--fs/bcachefs/journal_io.c38
-rw-r--r--fs/bcachefs/lru.c7
-rw-r--r--fs/bcachefs/move.c37
-rw-r--r--fs/bcachefs/namei.c4
-rw-r--r--fs/bcachefs/opts.c49
-rw-r--r--fs/bcachefs/opts.h3
-rw-r--r--fs/bcachefs/printbuf.c19
-rw-r--r--fs/bcachefs/printbuf.h1
-rw-r--r--fs/bcachefs/progress.c6
-rw-r--r--fs/bcachefs/rebalance.c5
-rw-r--r--fs/bcachefs/recovery_passes.c12
-rw-r--r--fs/bcachefs/reflink.c12
-rw-r--r--fs/bcachefs/sb-errors_format.h6
-rw-r--r--fs/bcachefs/snapshot.c16
-rw-r--r--fs/bcachefs/str_hash.c2
-rw-r--r--fs/bcachefs/subvolume.c1
-rw-r--r--fs/bcachefs/super.c38
-rw-r--r--fs/bcachefs/sysfs.c9
-rw-r--r--fs/bcachefs/time_stats.c20
-rw-r--r--fs/bcachefs/time_stats.h1
-rw-r--r--fs/bcachefs/util.c2
-rw-r--r--fs/bcachefs/util.h1
57 files changed, 856 insertions, 506 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index 5fb396be9127..c12ca7538e4f 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -589,6 +589,8 @@ iter_err:
int bch2_alloc_read(struct bch_fs *c)
{
+ down_read(&c->state_lock);
+
struct btree_trans *trans = bch2_trans_get(c);
struct bch_dev *ca = NULL;
int ret;
@@ -652,6 +654,7 @@ int bch2_alloc_read(struct bch_fs *c)
bch2_dev_put(ca);
bch2_trans_put(trans);
+ up_read(&c->state_lock);
bch_err_fn(c, ret);
return ret;
}
@@ -673,8 +676,7 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans,
bch2_bkey_val_to_text(&buf, c, alloc_k);
int ret = __bch2_fsck_err(NULL, trans, flags, err_id,
- "bucket incorrectly %sset in %s btree\n"
- " %s",
+ "bucket incorrectly %sset in %s btree\n%s",
set ? "" : "un",
bch2_btree_id_str(btree),
buf.buf);
@@ -1027,7 +1029,7 @@ fsck_err:
bch2_dev_put(ca);
return ret;
invalid_bucket:
- bch2_fs_inconsistent(c, "reference to invalid bucket\n %s",
+ bch2_fs_inconsistent(c, "reference to invalid bucket\n%s",
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
ret = -BCH_ERR_trigger_alloc;
goto err;
@@ -1201,8 +1203,7 @@ int bch2_check_alloc_key(struct btree_trans *trans,
if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
trans, bucket_gens_key_wrong,
- "incorrect gen in bucket_gens btree (got %u should be %u)\n"
- " %s",
+ "incorrect gen in bucket_gens btree (got %u should be %u)\n%s",
alloc_gen(k, gens_offset), a->gen,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
@@ -1260,7 +1261,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
if (fsck_err_on(k.k->type != KEY_TYPE_set,
trans, freespace_hole_missing,
"hole in alloc btree missing in freespace btree\n"
- " device %llu buckets %llu-%llu",
+ "device %llu buckets %llu-%llu",
freespace_iter->pos.inode,
freespace_iter->pos.offset,
end->offset)) {
@@ -1419,7 +1420,7 @@ int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_ite
(state == BCH_DATA_free &&
genbits != alloc_freespace_genbits(*a))) {
if (fsck_err(trans, need_discard_freespace_key_bad,
- "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
+ "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
(bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
bch2_btree_id_str(iter->btree_id),
iter->pos.inode,
@@ -1500,7 +1501,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode);
if (!ca) {
if (fsck_err(trans, bucket_gens_to_invalid_dev,
- "bucket_gens key for invalid device:\n %s",
+ "bucket_gens key for invalid device:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter, 0);
goto out;
@@ -1509,7 +1510,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans,
if (fsck_err_on(end <= ca->mi.first_bucket ||
start >= ca->mi.nbuckets,
trans, bucket_gens_to_invalid_buckets,
- "bucket_gens key for invalid buckets:\n %s",
+ "bucket_gens key for invalid buckets:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = bch2_btree_delete_at(trans, iter, 0);
goto out;
@@ -1712,8 +1713,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
if (fsck_err_on(!a->io_time[READ],
trans, alloc_key_cached_but_read_time_zero,
- "cached bucket with read_time 0\n"
- " %s",
+ "cached bucket with read_time 0\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
struct bkey_i_alloc_v4 *a_mut =
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 0cac65347a5d..da0d72928b5b 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -1560,7 +1560,7 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c,
unsigned i;
prt_printf(out, "%lu: ", wp->write_point);
- prt_human_readable_u64(out, wp->sectors_allocated);
+ prt_human_readable_u64(out, wp->sectors_allocated << 9);
prt_printf(out, " last wrote: ");
bch2_pr_time_units(out, sched_clock() - wp->last_used);
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 20c497f0c2cb..21d1d86d5008 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -96,6 +96,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
+ int ret = 0;
if (insert) {
prt_printf(&buf, "existing backpointer found when inserting ");
@@ -125,17 +126,15 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
-
- bch_err(c, "%s", buf.buf);
}
- printbuf_exit(&buf);
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers &&
+ __bch2_inconsistent_error(c, &buf))
+ ret = -BCH_ERR_erofs_unfixed_errors;
- if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
- return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
- } else {
- return 0;
- }
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return ret;
}
int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans,
@@ -210,11 +209,11 @@ static int backpointer_target_not_found(struct btree_trans *trans,
if (ret)
return ret;
- prt_printf(&buf, "backpointer doesn't match %s it points to:\n ",
+ prt_printf(&buf, "backpointer doesn't match %s it points to:\n",
bp.v->level ? "btree node" : "extent");
bch2_bkey_val_to_text(&buf, c, bp.s_c);
- prt_printf(&buf, "\n ");
+ prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, target_k);
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k);
@@ -222,7 +221,7 @@ static int backpointer_target_not_found(struct btree_trans *trans,
struct extent_ptr_decoded p;
bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry)
if (p.ptr.dev == bp.k->p.inode) {
- prt_printf(&buf, "\n ");
+ prt_newline(&buf);
struct bkey_i_backpointer bp2;
bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2);
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i));
@@ -443,12 +442,11 @@ found:
if (ret)
goto err;
- prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
- prt_printf(&buf, "\n ");
+ prt_printf(&buf, "extents pointing to same space, but first extent checksum bad:\n");
bch2_btree_id_to_text(&buf, btree);
prt_str(&buf, " ");
bch2_bkey_val_to_text(&buf, c, extent);
- prt_printf(&buf, "\n ");
+ prt_newline(&buf);
bch2_btree_id_to_text(&buf, o_btree);
prt_str(&buf, " ");
bch2_bkey_val_to_text(&buf, c, extent2);
@@ -539,9 +537,9 @@ check_existing_bp:
if (bch2_extents_match(orig_k, other_extent)) {
printbuf_reset(&buf);
- prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n ");
+ prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n");
bch2_bkey_val_to_text(&buf, c, orig_k);
- prt_str(&buf, "\n ");
+ prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, other_extent);
bch_err(c, "%s", buf.buf);
@@ -580,20 +578,20 @@ check_existing_bp:
}
printbuf_reset(&buf);
- prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode);
+ prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n", bp->k.p.inode);
bch2_bkey_val_to_text(&buf, c, orig_k);
- prt_str(&buf, "\n ");
+ prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, other_extent);
bch_err(c, "%s", buf.buf);
ret = -BCH_ERR_fsck_repair_unimplemented;
goto err;
missing:
printbuf_reset(&buf);
- prt_str(&buf, "missing backpointer\n for: ");
+ prt_str(&buf, "missing backpointer\nfor: ");
bch2_bkey_val_to_text(&buf, c, orig_k);
- prt_printf(&buf, "\n want: ");
+ prt_printf(&buf, "\nwant: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i));
- prt_printf(&buf, "\n got: ");
+ prt_printf(&buf, "\ngot: ");
bch2_bkey_val_to_text(&buf, c, bp_k);
if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf))
@@ -1023,7 +1021,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
* Can't allow devices to come/go/resize while we have bucket bitmaps
* allocated
*/
- lockdep_assert_held(&c->state_lock);
+ down_read(&c->state_lock);
for_each_member_device(c, ca) {
BUG_ON(ca->bucket_backpointer_mismatches);
@@ -1108,6 +1106,7 @@ err_free_bitmaps:
ca->bucket_backpointer_mismatches = NULL;
}
+ up_read(&c->state_lock);
bch_err_fn(c, ret);
return ret;
}
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index e96d87767020..a3db328dee31 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1143,7 +1143,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
x(log, 9) \
x(overwrite, 10) \
x(write_buffer_keys, 11) \
- x(datetime, 12)
+ x(datetime, 12) \
+ x(log_bkey, 13)
enum bch_jset_entry_type {
#define x(f, nr) BCH_JSET_ENTRY_##f = nr,
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 54666027aa85..9b80201c7982 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -1417,7 +1417,7 @@ void __bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "%u", r->level);
else
prt_printf(out, "(unknown)");
- prt_printf(out, "\n ");
+ prt_newline(out);
bch2_bkey_val_to_text(out, c, k);
}
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index ff681e733598..2025d408979c 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -213,15 +213,15 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree *
prt_printf(&buf, " at ");
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
- prt_printf(&buf, ":\n parent: ");
+ prt_printf(&buf, ":\nparent: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
if (prev) {
- prt_printf(&buf, "\n prev: ");
+ prt_printf(&buf, "\nprev: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
}
- prt_str(&buf, "\n next: ");
+ prt_str(&buf, "\nnext: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
if (bpos_lt(expected_start, cur->data->min_key)) { /* gap */
@@ -280,12 +280,12 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b,
if (bpos_eq(child->key.k.p, b->key.k.p))
return 0;
- prt_printf(&buf, " at ");
+ prt_printf(&buf, "\nat: ");
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
- prt_printf(&buf, ":\n parent: ");
+ prt_printf(&buf, "\nparent: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- prt_str(&buf, "\n child: ");
+ prt_str(&buf, "\nchild: ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key,
@@ -351,8 +351,7 @@ again:
if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO),
trans, btree_node_read_error,
- "Topology repair: unreadable btree node at\n"
- " %s",
+ "Topology repair: unreadable btree node at\n%s",
buf.buf)) {
bch2_btree_node_evict(trans, cur_k.k);
cur = NULL;
@@ -612,7 +611,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (fsck_err_on(btree_id != BTREE_ID_accounting &&
k.k->bversion.lo > atomic64_read(&c->key_version),
trans, bkey_version_in_future,
- "key version number higher than recorded %llu\n %s",
+ "key version number higher than recorded %llu\n%s",
atomic64_read(&c->key_version),
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
atomic64_set(&c->key_version, k.k->bversion.lo);
@@ -620,7 +619,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
trans, btree_bitmap_not_marked,
- "btree ptr not marked in member info btree allocated bitmap\n %s",
+ "btree ptr not marked in member info btree allocated bitmap\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
@@ -1021,8 +1020,7 @@ int bch2_check_allocations(struct bch_fs *c)
{
int ret;
- lockdep_assert_held(&c->state_lock);
-
+ down_read(&c->state_lock);
down_write(&c->gc_lock);
bch2_btree_interior_updates_flush(c);
@@ -1060,6 +1058,7 @@ out:
percpu_up_write(&c->mark_lock);
up_write(&c->gc_lock);
+ up_read(&c->state_lock);
/*
* At startup, allocations can happen directly instead of via the
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 2ba33ffc9795..1d94a2bf706d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -525,8 +525,6 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "at btree ");
bch2_btree_pos_to_text(out, c, b);
- printbuf_indent_add(out, 2);
-
prt_printf(out, "\nnode offset %u/%u",
b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)));
if (i)
@@ -550,32 +548,39 @@ static int __btree_err(int ret,
enum bch_sb_error_id err_type,
const char *fmt, ...)
{
- struct printbuf out = PRINTBUF;
bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
- va_list args;
+
+ if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
+ ret = -BCH_ERR_btree_node_read_err_fixable;
+ if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
+ ret = -BCH_ERR_btree_node_read_err_bad_node;
+
+ if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
+ bch2_sb_error_count(c, err_type);
+
+ struct printbuf out = PRINTBUF;
+ if (write != WRITE && ret != -BCH_ERR_btree_node_read_err_fixable) {
+ printbuf_indent_add_nextline(&out, 2);
+#ifdef BCACHEFS_LOG_PREFIX
+ prt_printf(&out, bch2_log_msg(c, ""));
+#endif
+ }
btree_err_msg(&out, c, ca, b, i, k, b->written, write);
+ va_list args;
va_start(args, fmt);
prt_vprintf(&out, fmt, args);
va_end(args);
if (write == WRITE) {
- bch2_print_string_as_lines(KERN_ERR, out.buf);
- ret = c->opts.errors == BCH_ON_ERROR_continue
- ? 0
- : -BCH_ERR_fsck_errors_not_fixed;
- goto out;
+ prt_str(&out, ", ");
+ ret = __bch2_inconsistent_error(c, &out)
+ ? -BCH_ERR_fsck_errors_not_fixed
+ : 0;
+ silent = false;
}
- if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry)
- ret = -BCH_ERR_btree_node_read_err_fixable;
- if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry)
- ret = -BCH_ERR_btree_node_read_err_bad_node;
-
- if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable)
- bch2_sb_error_count(c, err_type);
-
switch (ret) {
case -BCH_ERR_btree_node_read_err_fixable:
ret = !silent
@@ -585,25 +590,21 @@ static int __btree_err(int ret,
ret != -BCH_ERR_fsck_ignore)
goto fsck_err;
ret = -BCH_ERR_fsck_fix;
- break;
- case -BCH_ERR_btree_node_read_err_want_retry:
- case -BCH_ERR_btree_node_read_err_must_retry:
- if (!silent)
- bch2_print_string_as_lines(KERN_ERR, out.buf);
- break;
+ goto out;
case -BCH_ERR_btree_node_read_err_bad_node:
- if (!silent)
- bch2_print_string_as_lines(KERN_ERR, out.buf);
- ret = bch2_topology_error(c);
+ prt_str(&out, ", ");
+ ret = __bch2_topology_error(c, &out);
+ if (ret)
+ silent = false;
break;
case -BCH_ERR_btree_node_read_err_incompatible:
- if (!silent)
- bch2_print_string_as_lines(KERN_ERR, out.buf);
ret = -BCH_ERR_fsck_errors_not_fixed;
+ silent = false;
break;
- default:
- BUG();
}
+
+ if (!silent)
+ bch2_print_string_as_lines(KERN_ERR, out.buf);
out:
fsck_err:
printbuf_exit(&out);
@@ -817,7 +818,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
-BCH_ERR_btree_node_read_err_bad_node,
c, ca, b, i, NULL,
btree_node_bad_format,
- "invalid bkey format: %s\n %s", buf1.buf,
+ "invalid bkey format: %s\n%s", buf1.buf,
(printbuf_reset(&buf2),
bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf));
printbuf_reset(&buf1);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 7542c6f9c88e..a9c110b846b5 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1487,22 +1487,14 @@ void bch2_trans_updates_to_text(struct printbuf *buf, struct btree_trans *trans)
for (struct jset_entry *e = trans->journal_entries;
e != btree_trans_journal_entries_top(trans);
- e = vstruct_next(e))
+ e = vstruct_next(e)) {
bch2_journal_entry_to_text(buf, trans->c, e);
+ prt_newline(buf);
+ }
printbuf_indent_sub(buf, 2);
}
-noinline __cold
-void bch2_dump_trans_updates(struct btree_trans *trans)
-{
- struct printbuf buf = PRINTBUF;
-
- bch2_trans_updates_to_text(&buf, trans);
- bch2_print_str(trans->c, buf.buf);
- printbuf_exit(&buf);
-}
-
static void bch2_btree_path_to_text_short(struct printbuf *out, struct btree_trans *trans, btree_path_idx_t path_idx)
{
struct btree_path *path = trans->paths + path_idx;
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 8823eec6b284..e6f51a3b8187 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -9,7 +9,6 @@
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t);
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
-void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
static inline int __bkey_err(const struct bkey *k)
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 6d25e3f85ce8..d1ad1a7613c9 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -644,6 +644,8 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
*/
static int journal_sort_key_cmp(const void *_l, const void *_r)
{
+ cond_resched();
+
const struct journal_key *l = _l;
const struct journal_key *r = _r;
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 678161321e42..25d54b77cdc2 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -13,6 +13,7 @@
#include <linux/kthread.h>
#include <linux/min_heap.h>
+#include <linux/sched/sysctl.h>
#include <linux/sort.h>
struct find_btree_nodes_worker {
@@ -313,7 +314,8 @@ static int read_btree_nodes(struct find_btree_nodes *f)
wake_up_process(t);
}
err:
- closure_sync(&cl);
+ while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2))
+ ;
return f->ret ?: ret;
}
@@ -577,10 +579,12 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
found_btree_node_to_key(&tmp.k, &n);
- struct printbuf buf = PRINTBUF;
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
- bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
- printbuf_exit(&buf);
+ if (c->opts.verbose) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
+ bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k),
(struct bkey_validate_context) {
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index bd2eb42edb24..c05394f56424 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -846,6 +846,19 @@ int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf)
return 0;
}
+int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree,
+ unsigned level, struct bkey_i *k)
+{
+ struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
+ int ret = PTR_ERR_OR_ZERO(e);
+ if (ret)
+ return ret;
+
+ journal_entry_init(e, BCH_JSET_ENTRY_log_bkey, btree, level, k->k.u64s);
+ bkey_copy(e->start, k);
+ return 0;
+}
+
__printf(3, 0)
static int
__bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index d2e1c04353f6..568e56c91190 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -170,6 +170,8 @@ void bch2_trans_commit_hook(struct btree_trans *,
int __bch2_trans_commit(struct btree_trans *, unsigned);
int bch2_trans_log_msg(struct btree_trans *, struct printbuf *);
+int bch2_trans_log_bkey(struct btree_trans *, enum btree_id, unsigned, struct bkey_i *);
+
__printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
__printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 67f1e3202835..bf7e1dac7f46 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -35,6 +35,8 @@ static const char * const bch2_btree_update_modes[] = {
NULL
};
+static void bch2_btree_update_to_text(struct printbuf *, struct btree_update *);
+
static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
btree_path_idx_t, struct btree *, struct keylist *);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
@@ -54,6 +56,8 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
struct bkey_buf prev;
int ret = 0;
+ printbuf_indent_add_nextline(&buf, 2);
+
BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
!bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
b->data->min_key));
@@ -64,19 +68,20 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
if (b == btree_node_root(c, b)) {
if (!bpos_eq(b->data->min_key, POS_MIN)) {
- printbuf_reset(&buf);
+ ret = __bch2_topology_error(c, &buf);
+
bch2_bpos_to_text(&buf, b->data->min_key);
log_fsck_err(trans, btree_root_bad_min_key,
"btree root with incorrect min_key: %s", buf.buf);
- goto topology_repair;
+ goto out;
}
if (!bpos_eq(b->data->max_key, SPOS_MAX)) {
- printbuf_reset(&buf);
+ ret = __bch2_topology_error(c, &buf);
bch2_bpos_to_text(&buf, b->data->max_key);
log_fsck_err(trans, btree_root_bad_max_key,
"btree root with incorrect max_key: %s", buf.buf);
- goto topology_repair;
+ goto out;
}
}
@@ -94,20 +99,19 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
: bpos_successor(prev.k->k.p);
if (!bpos_eq(expected_min, bp.v->min_key)) {
- bch2_topology_error(c);
+ ret = __bch2_topology_error(c, &buf);
- printbuf_reset(&buf);
- prt_str(&buf, "end of prev node doesn't match start of next node\n in ");
+ prt_str(&buf, "end of prev node doesn't match start of next node\nin ");
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
prt_str(&buf, " node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- prt_str(&buf, "\n prev ");
+ prt_str(&buf, "\nprev ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
- prt_str(&buf, "\n next ");
+ prt_str(&buf, "\nnext ");
bch2_bkey_val_to_text(&buf, c, k);
log_fsck_err(trans, btree_node_topology_bad_min_key, "%s", buf.buf);
- goto topology_repair;
+ goto out;
}
bch2_bkey_buf_reassemble(&prev, c, k);
@@ -115,29 +119,25 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
}
if (bkey_deleted(&prev.k->k)) {
- bch2_topology_error(c);
+ ret = __bch2_topology_error(c, &buf);
- printbuf_reset(&buf);
- prt_str(&buf, "empty interior node\n in ");
+ prt_str(&buf, "empty interior node\nin ");
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
prt_str(&buf, " node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
log_fsck_err(trans, btree_node_topology_empty_interior_node, "%s", buf.buf);
- goto topology_repair;
} else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
- bch2_topology_error(c);
+ ret = __bch2_topology_error(c, &buf);
- printbuf_reset(&buf);
- prt_str(&buf, "last child node doesn't end at end of parent node\n in ");
+ prt_str(&buf, "last child node doesn't end at end of parent node\nin ");
bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level);
prt_str(&buf, " node ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
- prt_str(&buf, "\n last key ");
+ prt_str(&buf, "\nlast key ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
log_fsck_err(trans, btree_node_topology_bad_max_key, "%s", buf.buf);
- goto topology_repair;
}
out:
fsck_err:
@@ -145,9 +145,6 @@ fsck_err:
bch2_bkey_buf_exit(&prev, c);
printbuf_exit(&buf);
return ret;
-topology_repair:
- ret = bch2_topology_error(c);
- goto out;
}
/* Calculate ideal packed bkey format for new btree nodes: */
@@ -1271,7 +1268,8 @@ err:
bch2_btree_update_free(as, trans);
if (!bch2_err_matches(ret, ENOSPC) &&
!bch2_err_matches(ret, EROFS) &&
- ret != -BCH_ERR_journal_reclaim_would_deadlock)
+ ret != -BCH_ERR_journal_reclaim_would_deadlock &&
+ ret != -BCH_ERR_journal_shutdown)
bch_err_fn_ratelimited(c, ret);
return ERR_PTR(ret);
}
@@ -1782,11 +1780,24 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
int ret;
lockdep_assert_held(&c->gc_lock);
- BUG_ON(!btree_node_intent_locked(path, b->c.level));
BUG_ON(!b->c.level);
BUG_ON(!as || as->b);
bch2_verify_keylist_sorted(keys);
+ if (!btree_node_intent_locked(path, b->c.level)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf, "%s(): node not locked at level %u\n",
+ __func__, b->c.level);
+ bch2_btree_update_to_text(&buf, as);
+ bch2_btree_path_to_text(&buf, trans, path_idx);
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ bch2_fs_emergency_read_only(c);
+ return -EIO;
+ }
+
ret = bch2_btree_node_lock_write(trans, path, &b->c);
if (ret)
return ret;
@@ -2007,18 +2018,22 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
}
if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) {
- struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-
- bch2_bpos_to_text(&buf1, prev->data->max_key);
- bch2_bpos_to_text(&buf2, next->data->min_key);
- bch_err(c,
- "%s(): btree topology error:\n"
- " prev ends at %s\n"
- " next starts at %s",
- __func__, buf1.buf, buf2.buf);
- printbuf_exit(&buf1);
- printbuf_exit(&buf2);
- ret = bch2_topology_error(c);
+ struct printbuf buf = PRINTBUF;
+
+ printbuf_indent_add_nextline(&buf, 2);
+ prt_printf(&buf, "%s(): ", __func__);
+ ret = __bch2_topology_error(c, &buf);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "prev ends at ");
+ bch2_bpos_to_text(&buf, prev->data->max_key);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "next starts at ");
+ bch2_bpos_to_text(&buf, next->data->min_key);
+
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
goto err;
}
@@ -2288,7 +2303,9 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
int ret = bch2_trans_do(c, bch2_btree_node_rewrite_key(trans,
a->btree_id, a->level, a->key.k, 0));
- if (ret != -ENOENT)
+ if (ret != -ENOENT &&
+ !bch2_err_matches(ret, EROFS) &&
+ ret != -BCH_ERR_journal_shutdown)
bch_err_fn_ratelimited(c, ret);
spin_lock(&c->btree_node_rewrites_lock);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index e56ef623ebc1..0903311cc71e 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -381,6 +381,36 @@ err:
return ret;
}
+static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf,
+ struct bkey_s_c k, bool insert, enum bch_sb_error_id id)
+{
+ struct bch_fs *c = trans->c;
+ bool repeat = false, print = true, suppress = false;
+
+ prt_printf(buf, "\nwhile marking ");
+ bch2_bkey_val_to_text(buf, c, k);
+ prt_newline(buf);
+
+ __bch2_count_fsck_err(c, id, buf->buf, &repeat, &print, &suppress);
+
+ int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
+
+ if (insert) {
+ print = true;
+ suppress = false;
+
+ bch2_trans_updates_to_text(buf, trans);
+ __bch2_inconsistent_error(c, buf);
+ ret = -BCH_ERR_bucket_ref_update;
+ }
+
+ if (suppress)
+ prt_printf(buf, "Ratelimiting new instances of previous error\n");
+ if (print)
+ bch2_print_string_as_lines(KERN_ERR, buf->buf);
+ return ret;
+}
+
int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
struct bkey_s_c k,
const struct bch_extent_ptr *ptr,
@@ -396,32 +426,29 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
BUG_ON(!sectors);
- if (gen_after(ptr->gen, b_gen)) {
- bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
- log_fsck_err(trans, ptr_gen_newer_than_bucket_gen,
- "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n"
- "while marking %s",
+ if (unlikely(gen_after(ptr->gen, b_gen))) {
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf,
+ "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen",
ptr->dev, bucket_nr, b_gen,
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
- ptr->gen,
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- if (inserting)
- goto err;
+ ptr->gen);
+
+ ret = bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen);
goto out;
}
- if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) {
- bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
- log_fsck_err(trans, ptr_too_stale,
- "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
- "while marking %s",
+ if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) {
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf,
+ "bucket %u:%zu gen %u data type %s: ptr gen %u too stale",
ptr->dev, bucket_nr, b_gen,
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
- ptr->gen,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- if (inserting)
- goto err;
+ ptr->gen);
+
+ ret = bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_ptr_too_stale);
goto out;
}
@@ -430,62 +457,50 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca,
goto out;
}
- if (b_gen != ptr->gen) {
- bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
- log_fsck_err(trans, stale_dirty_ptr,
- "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n"
- "while marking %s",
+ if (unlikely(b_gen != ptr->gen)) {
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf,
+ "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)",
ptr->dev, bucket_nr, b_gen,
bucket_gen_get(ca, bucket_nr),
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
- ptr->gen,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- if (inserting)
- goto err;
+ ptr->gen);
+
+ ret = bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_stale_dirty_ptr);
goto out;
}
- if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) {
- bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
- log_fsck_err(trans, ptr_bucket_data_type_mismatch,
- "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n"
- "while marking %s",
- ptr->dev, bucket_nr, b_gen,
- bch2_data_type_str(bucket_data_type),
- bch2_data_type_str(ptr_data_type),
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- if (inserting)
- goto err;
+ if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) {
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf, "bucket %u:%zu gen %u different types of data in same bucket: %s, %s",
+ ptr->dev, bucket_nr, b_gen,
+ bch2_data_type_str(bucket_data_type),
+ bch2_data_type_str(ptr_data_type));
+
+ ret = bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_ptr_bucket_data_type_mismatch);
goto out;
}
- if ((u64) *bucket_sectors + sectors > U32_MAX) {
- bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_allocations);
- log_fsck_err(trans, bucket_sector_count_overflow,
- "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n"
- "while marking %s",
+ if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) {
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf,
+ "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX",
ptr->dev, bucket_nr, b_gen,
bch2_data_type_str(bucket_data_type ?: ptr_data_type),
- *bucket_sectors, sectors,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- if (inserting)
- goto err;
+ *bucket_sectors, sectors);
+
+ ret = bucket_ref_update_err(trans, &buf, k, inserting,
+ BCH_FSCK_ERR_bucket_sector_count_overflow);
sectors = -*bucket_sectors;
+ goto out;
}
*bucket_sectors += sectors;
out:
printbuf_exit(&buf);
return ret;
-err:
-fsck_err:
- bch2_dump_trans_updates(trans);
- bch2_inconsistent_error(c);
- ret = -BCH_ERR_bucket_ref_update;
- goto out;
}
void bch2_trans_account_disk_usage_change(struct btree_trans *trans)
@@ -651,9 +666,9 @@ static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
stripe_blockcount_get(&s->v, p.ec.block) +
sectors);
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ struct disk_accounting_pos acc;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
acc.replicas.data_type = data_type;
ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
@@ -677,19 +692,21 @@ err:
if (!m || !m->alive) {
gc_stripe_unlock(m);
struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+ prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ",
+ (u64) p.ec.idx);
bch2_bkey_val_to_text(&buf, c, k);
- bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s",
- (u64) p.ec.idx, buf.buf);
+ __bch2_inconsistent_error(c, &buf);
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
- bch2_inconsistent_error(c);
return -BCH_ERR_trigger_stripe_pointer;
}
m->block_sectors[p.ec.block] += sectors;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ struct disk_accounting_pos acc;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e));
gc_stripe_unlock(m);
@@ -717,12 +734,12 @@ static int __trigger_extent(struct btree_trans *trans,
: BCH_DATA_user;
int ret = 0;
- struct disk_accounting_pos acc_replicas_key = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- .replicas.data_type = data_type,
- .replicas.nr_devs = 0,
- .replicas.nr_required = 1,
- };
+ struct disk_accounting_pos acc_replicas_key;
+ memset(&acc_replicas_key, 0, sizeof(acc_replicas_key));
+ acc_replicas_key.type = BCH_DISK_ACCOUNTING_replicas;
+ acc_replicas_key.replicas.data_type = data_type;
+ acc_replicas_key.replicas.nr_devs = 0;
+ acc_replicas_key.replicas.nr_required = 1;
unsigned cur_compression_type = 0;
u64 compression_acct[3] = { 1, 0, 0 };
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 57d55b3ddc71..584f4a3eb670 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -426,10 +426,8 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c,
arg.replica_entries_bytes = replicas.nr;
for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) {
- struct disk_accounting_pos k = {
- .type = BCH_DISK_ACCOUNTING_persistent_reserved,
- .persistent_reserved.nr_replicas = i,
- };
+ struct disk_accounting_pos k;
+ disk_accounting_key_init(k, persistent_reserved, .nr_replicas = i);
bch2_accounting_mem_read(c,
disk_accounting_pos_to_bpos(&k),
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 0ec273daccb7..fe400dfc5d76 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -22,6 +22,13 @@
#include <linux/ioprio.h>
+static const char * const bch2_data_update_type_strs[] = {
+#define x(t, n, ...) [n] = #t,
+ BCH_DATA_UPDATE_TYPES()
+#undef x
+ NULL
+};
+
static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -181,6 +188,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
container_of(op, struct data_update, op);
struct keylist *keys = &op->insert_keys;
struct bkey_buf _new, _insert;
+ struct printbuf journal_msg = PRINTBUF;
int ret = 0;
bch2_bkey_buf_init(&_new);
@@ -372,7 +380,12 @@ restart_drop_extra_replicas:
printbuf_exit(&buf);
}
- ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id,
+ printbuf_reset(&journal_msg);
+ prt_str(&journal_msg, bch2_data_update_type_strs[m->type]);
+
+ ret = bch2_trans_log_msg(trans, &journal_msg) ?:
+ bch2_trans_log_bkey(trans, m->btree_id, 0, m->k.k) ?:
+ bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, bkey_start_pos(&insert->k)) ?:
bch2_insert_snapshot_whiteouts(trans, m->btree_id,
k.k->p, insert->k.p) ?:
@@ -417,6 +430,7 @@ nowork:
goto next;
}
out:
+ printbuf_exit(&journal_msg);
bch2_trans_iter_exit(trans, &iter);
bch2_bkey_buf_exit(&_insert, c);
bch2_bkey_buf_exit(&_new, c);
@@ -577,6 +591,9 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
{
+ prt_str(out, bch2_data_update_type_strs[m->type]);
+ prt_newline(out);
+
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
prt_newline(out);
@@ -738,6 +755,9 @@ int bch2_data_update_init(struct btree_trans *trans,
bch2_bkey_buf_init(&m->k);
bch2_bkey_buf_reassemble(&m->k, c, k);
+ m->type = data_opts.btree_insert_flags & BCH_WATERMARK_copygc
+ ? BCH_DATA_UPDATE_copygc
+ : BCH_DATA_UPDATE_rebalance;
m->btree_id = btree_id;
m->data_opts = data_opts;
m->ctxt = ctxt;
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index c194cbbf5b51..ed05125867da 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -24,7 +24,19 @@ struct data_update_opts {
void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
struct bch_io_opts *, struct data_update_opts *);
+#define BCH_DATA_UPDATE_TYPES() \
+ x(copygc, 0) \
+ x(rebalance, 1) \
+ x(promote, 2)
+
+enum bch_data_update_types {
+#define x(n, id) BCH_DATA_UPDATE_##n = id,
+ BCH_DATA_UPDATE_TYPES()
+#undef x
+};
+
struct data_update {
+ enum bch_data_update_types type;
/* extent being updated: */
bool read_done;
enum btree_id btree_id;
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index b32e91ba8be8..a59f6c12529b 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -114,10 +114,9 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans,
unsigned dev, s64 sectors,
bool gc)
{
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
-
+ struct disk_accounting_pos acc;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_replicas_entry_cached(&acc.replicas, dev);
return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
@@ -135,6 +134,12 @@ static inline bool is_zero(char *start, char *end)
#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member))
+static const unsigned bch2_accounting_type_nr_counters[] = {
+#define x(f, id, nr) [BCH_DISK_ACCOUNTING_##f] = nr,
+ BCH_DISK_ACCOUNTING_TYPES()
+#undef x
+};
+
int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
struct bkey_validate_context from)
{
@@ -193,6 +198,11 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)),
c, accounting_key_junk_at_end,
"junk at end of accounting key");
+
+ bkey_fsck_err_on(bch2_accounting_counters(k.k) != bch2_accounting_type_nr_counters[acc_k.type],
+ c, accounting_key_nr_counters_wrong,
+ "accounting key with %u counters, should be %u",
+ bch2_accounting_counters(k.k), bch2_accounting_type_nr_counters[acc_k.type]);
fsck_err:
return ret;
}
@@ -635,7 +645,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
trans, accounting_replicas_not_marked,
- "accounting not marked in superblock replicas\n %s",
+ "accounting not marked in superblock replicas\n%s",
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &acc),
buf.buf))) {
@@ -665,7 +675,7 @@ fsck_err:
return ret;
invalid_device:
if (fsck_err(trans, accounting_to_invalid_device,
- "accounting entry points to invalid device %i\n %s",
+ "accounting entry points to invalid device %i\n%s",
invalid_dev,
(printbuf_reset(&buf),
bch2_accounting_key_to_text(&buf, &acc),
@@ -726,7 +736,9 @@ int bch2_accounting_read(struct bch_fs *c)
break;
if (!bch2_accounting_is_mem(acc_k)) {
- struct disk_accounting_pos next = { .type = acc_k.type + 1 };
+ struct disk_accounting_pos next;
+ memset(&next, 0, sizeof(next));
+ next.type = acc_k.type + 1;
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
continue;
}
@@ -882,15 +894,13 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev)
int bch2_dev_usage_init(struct bch_dev *ca, bool gc)
{
struct bch_fs *c = ca->fs;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_dev_data_type,
- .dev_data_type.dev = ca->dev_idx,
- .dev_data_type.data_type = BCH_DATA_free,
- };
u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 };
int ret = bch2_trans_do(c, ({
- bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?:
+ bch2_disk_accounting_mod2(trans, gc,
+ v, dev_data_type,
+ .dev = ca->dev_idx,
+ .data_type = BCH_DATA_free) ?:
(!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0);
}));
bch_err_fn(c, ret);
@@ -917,7 +927,9 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
break;
if (!bch2_accounting_is_mem(acc_k)) {
- struct disk_accounting_pos next = { .type = acc_k.type + 1 };
+ struct disk_accounting_pos next;
+ memset(&next, 0, sizeof(next));
+ next.type = acc_k.type + 1;
bch2_btree_iter_set_pos(&iter, disk_accounting_pos_to_bpos(&next));
continue;
}
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index f9214e2d1346..abb1f6206fe9 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -33,10 +33,12 @@ static inline bool bch2_accounting_key_is_zero(struct bkey_s_c_accounting a)
static inline void bch2_accounting_accumulate(struct bkey_i_accounting *dst,
struct bkey_s_c_accounting src)
{
- EBUG_ON(dst->k.u64s != src.k->u64s);
-
- for (unsigned i = 0; i < bch2_accounting_counters(&dst->k); i++)
+ for (unsigned i = 0;
+ i < min(bch2_accounting_counters(&dst->k),
+ bch2_accounting_counters(src.k));
+ i++)
dst->v.d[i] += src.v->d[i];
+
if (bversion_cmp(dst->k.bversion, src.k->bversion) < 0)
dst->k.bversion = src.k->bversion;
}
diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h
index 15190196485f..8269af1dbe2a 100644
--- a/fs/bcachefs/disk_accounting_format.h
+++ b/fs/bcachefs/disk_accounting_format.h
@@ -95,40 +95,81 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
}
}
+/*
+ * field 1: name
+ * field 2: id
+ * field 3: number of counters (max 3)
+ */
+
#define BCH_DISK_ACCOUNTING_TYPES() \
- x(nr_inodes, 0) \
- x(persistent_reserved, 1) \
- x(replicas, 2) \
- x(dev_data_type, 3) \
- x(compression, 4) \
- x(snapshot, 5) \
- x(btree, 6) \
- x(rebalance_work, 7) \
- x(inum, 8)
+ x(nr_inodes, 0, 1) \
+ x(persistent_reserved, 1, 1) \
+ x(replicas, 2, 1) \
+ x(dev_data_type, 3, 3) \
+ x(compression, 4, 3) \
+ x(snapshot, 5, 1) \
+ x(btree, 6, 1) \
+ x(rebalance_work, 7, 1) \
+ x(inum, 8, 3)
enum disk_accounting_type {
-#define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr,
+#define x(f, nr, ...) BCH_DISK_ACCOUNTING_##f = nr,
BCH_DISK_ACCOUNTING_TYPES()
#undef x
BCH_DISK_ACCOUNTING_TYPE_NR,
};
+/*
+ * No subtypes - number of inodes in the entire filesystem
+ *
+ * XXX: perhaps we could add a per-subvolume counter?
+ */
struct bch_acct_nr_inodes {
};
+/*
+ * Tracks KEY_TYPE_reservation sectors, broken out by number of replicas for the
+ * reservation:
+ */
struct bch_acct_persistent_reserved {
__u8 nr_replicas;
};
+/*
+ * device, data type counter fields:
+ * [
+ * nr_buckets
+ * live sectors (in buckets of that data type)
+ * sectors of internal fragmentation
+ * ]
+ *
+ * XXX: live sectors should've been done differently, you can have multiple data
+ * types in the same bucket (user, stripe, cached) and this collapses them to
+ * the bucket data type, and makes the internal fragmentation counter redundant
+ */
struct bch_acct_dev_data_type {
__u8 dev;
__u8 data_type;
};
+/*
+ * Compression type fields:
+ * [
+ * number of extents
+ * uncompressed size
+ * compressed size
+ * ]
+ *
+ * Compression ratio, average extent size (fragmentation).
+ */
struct bch_acct_compression {
__u8 type;
};
+/*
+ * On disk usage by snapshot id; counts same values as replicas counter, but
+ * aggregated differently
+ */
struct bch_acct_snapshot {
__u32 id;
} __packed;
@@ -137,10 +178,27 @@ struct bch_acct_btree {
__u32 id;
} __packed;
+/*
+ * inum counter fields:
+ * [
+ * number of extents
+ * sum of extent sizes - bkey size
+ * this field is similar to inode.bi_sectors, except here extents in
+ * different snapshots but the same inode number are all collapsed to the
+ * same counter
+ * sum of on disk size - same values tracked by replicas counters
+ * ]
+ *
+ * This tracks on disk fragmentation.
+ */
struct bch_acct_inum {
__u64 inum;
} __packed;
+/*
+ * Simple counter of the amount of data (on disk sectors) rebalance needs to
+ * move, extents counted here are also in the rebalance_work btree.
+ */
struct bch_acct_rebalance_work {
};
@@ -149,7 +207,7 @@ struct disk_accounting_pos {
struct {
__u8 type;
union {
- struct bch_acct_nr_inodes nr_inodes;
+ struct bch_acct_nr_inodes nr_inodes;
struct bch_acct_persistent_reserved persistent_reserved;
struct bch_replicas_entry_v1 replicas;
struct bch_acct_dev_data_type dev_data_type;
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index f2b9225fe0bc..6faeda7ad03d 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -320,7 +320,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
if (flags & BTREE_TRIGGER_gc) {
struct bucket *g = gc_bucket(ca, bucket.offset);
- if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
+ if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s",
ptr->dev,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
ret = -BCH_ERR_mark_stripe;
@@ -453,9 +453,9 @@ int bch2_trigger_stripe(struct btree_trans *trans,
if (new_s) {
s64 sectors = (u64) le16_to_cpu(new_s->sectors) * new_s->nr_redundant;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ struct disk_accounting_pos acc;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, new);
int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
if (ret)
@@ -468,9 +468,9 @@ int bch2_trigger_stripe(struct btree_trans *trans,
if (old_s) {
s64 sectors = -((s64) le16_to_cpu(old_s->sectors)) * old_s->nr_redundant;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ struct disk_accounting_pos acc;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, old);
int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
if (ret)
@@ -2110,14 +2110,14 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
if (ret)
return ret;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ struct disk_accounting_pos acc;
s64 sectors = 0;
for (unsigned i = 0; i < s->v.nr_blocks; i++)
sectors -= stripe_blockcount_get(&s->v, i);
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
acc.replicas.data_type = BCH_DATA_user;
ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
@@ -2131,6 +2131,8 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
sectors = -sectors;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
acc.replicas.data_type = BCH_DATA_user;
ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 101806d7ebe1..c8696f01eb14 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -5,6 +5,8 @@
#define BCH_ERRCODES() \
x(ERANGE, ERANGE_option_too_small) \
x(ERANGE, ERANGE_option_too_big) \
+ x(EINVAL, injected) \
+ x(BCH_ERR_injected, injected_fs_start) \
x(EINVAL, mount_option) \
x(BCH_ERR_mount_option, option_name) \
x(BCH_ERR_mount_option, option_value) \
@@ -308,6 +310,7 @@
x(BCH_ERR_data_write, data_write_misaligned) \
x(BCH_ERR_decompress, data_read) \
x(BCH_ERR_data_read, no_device_to_read_from) \
+ x(BCH_ERR_data_read, no_devices_valid) \
x(BCH_ERR_data_read, data_read_io_err) \
x(BCH_ERR_data_read, data_read_csum_err) \
x(BCH_ERR_data_read, data_read_retry) \
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index 207f35d3cce2..d4dfd13a8076 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -11,7 +11,16 @@
#define FSCK_ERR_RATELIMIT_NR 10
-bool bch2_inconsistent_error(struct bch_fs *c)
+void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out)
+{
+ printbuf_indent_add_nextline(out, 2);
+
+#ifdef BCACHEFS_LOG_PREFIX
+ prt_printf(out, bch2_log_msg(c, ""));
+#endif
+}
+
+bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out)
{
set_bit(BCH_FS_error, &c->flags);
@@ -21,10 +30,11 @@ bool bch2_inconsistent_error(struct bch_fs *c)
case BCH_ON_ERROR_fix_safe:
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
- bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
- journal_cur_seq(&c->journal));
+ prt_printf(out, "inconsistency detected - emergency read only at journal seq %llu\n",
+ journal_cur_seq(&c->journal));
return true;
case BCH_ON_ERROR_panic:
+ bch2_print_string_as_lines(KERN_ERR, out->buf);
panic(bch2_fmt(c, "panic after error"));
return true;
default:
@@ -32,11 +42,63 @@ bool bch2_inconsistent_error(struct bch_fs *c)
}
}
-int bch2_topology_error(struct bch_fs *c)
+bool bch2_inconsistent_error(struct bch_fs *c)
+{
+ struct printbuf buf = PRINTBUF;
+ printbuf_indent_add_nextline(&buf, 2);
+
+ bool ret = __bch2_inconsistent_error(c, &buf);
+ if (ret)
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+__printf(3, 0)
+static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans,
+ const char *fmt, va_list args)
+{
+ struct printbuf buf = PRINTBUF;
+
+ bch2_log_msg_start(c, &buf);
+
+ prt_vprintf(&buf, fmt, args);
+ prt_newline(&buf);
+
+ if (trans)
+ bch2_trans_updates_to_text(&buf, trans);
+ bool ret = __bch2_inconsistent_error(c, &buf);
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+
+ printbuf_exit(&buf);
+ return ret;
+}
+
+bool bch2_fs_inconsistent(struct bch_fs *c, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ bool ret = bch2_fs_trans_inconsistent(c, NULL, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+bool bch2_trans_inconsistent(struct btree_trans *trans, const char *fmt, ...)
{
+ va_list args;
+ va_start(args, fmt);
+ bool ret = bch2_fs_trans_inconsistent(trans->c, trans, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+int __bch2_topology_error(struct bch_fs *c, struct printbuf *out)
+{
+ prt_printf(out, "btree topology error: ");
+
set_bit(BCH_FS_topology_error, &c->flags);
if (!test_bit(BCH_FS_recovery_running, &c->flags)) {
- bch2_inconsistent_error(c);
+ __bch2_inconsistent_error(c, out);
return -BCH_ERR_btree_need_topology_repair;
} else {
return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
@@ -44,6 +106,24 @@ int bch2_topology_error(struct bch_fs *c)
}
}
+int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...)
+{
+ struct printbuf buf = PRINTBUF;
+
+ bch2_log_msg_start(c, &buf);
+
+ va_list args;
+ va_start(args, fmt);
+ prt_vprintf(&buf, fmt, args);
+ va_end(args);
+
+ int ret = __bch2_topology_error(c, &buf);
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+
+ printbuf_exit(&buf);
+ return ret;
+}
+
void bch2_fatal_error(struct bch_fs *c)
{
if (bch2_fs_emergency_read_only(c))
@@ -184,7 +264,8 @@ static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans)
#endif
-static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
+static struct fsck_err_state *fsck_err_get(struct bch_fs *c,
+ enum bch_sb_error_id id)
{
struct fsck_err_state *s;
@@ -192,7 +273,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
return NULL;
list_for_each_entry(s, &c->fsck_error_msgs, list)
- if (s->fmt == fmt) {
+ if (s->id == id) {
/*
* move it to the head of the list: repeated fsck errors
* are common
@@ -210,7 +291,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
}
INIT_LIST_HEAD(&s->list);
- s->fmt = fmt;
+ s->id = id;
list_add(&s->list, &c->fsck_error_msgs);
return s;
}
@@ -260,15 +341,59 @@ static int do_fsck_ask_yn(struct bch_fs *c,
return ask;
}
+static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c,
+ enum bch_sb_error_id id, const char *msg,
+ bool *repeat, bool *print, bool *suppress)
+{
+ bch2_sb_error_count(c, id);
+
+ struct fsck_err_state *s = fsck_err_get(c, id);
+ if (s) {
+ /*
+ * We may be called multiple times for the same error on
+ * transaction restart - this memoizes instead of asking the user
+ * multiple times for the same error:
+ */
+ if (s->last_msg && !strcmp(msg, s->last_msg)) {
+ *repeat = true;
+ *print = false;
+ return s;
+ }
+
+ kfree(s->last_msg);
+ s->last_msg = kstrdup(msg, GFP_KERNEL);
+
+ if (c->opts.ratelimit_errors &&
+ s->nr >= FSCK_ERR_RATELIMIT_NR) {
+ if (s->nr == FSCK_ERR_RATELIMIT_NR)
+ *suppress = true;
+ else
+ *print = false;
+ }
+
+ s->nr++;
+ }
+ return s;
+}
+
+void __bch2_count_fsck_err(struct bch_fs *c,
+ enum bch_sb_error_id id, const char *msg,
+ bool *repeat, bool *print, bool *suppress)
+{
+ bch2_sb_error_count(c, id);
+
+ mutex_lock(&c->fsck_error_msgs_lock);
+ count_fsck_err_locked(c, id, msg, repeat, print, suppress);
+ mutex_unlock(&c->fsck_error_msgs_lock);
+}
+
int __bch2_fsck_err(struct bch_fs *c,
struct btree_trans *trans,
enum bch_fsck_flags flags,
enum bch_sb_error_id err,
const char *fmt, ...)
{
- struct fsck_err_state *s = NULL;
va_list args;
- bool print = true, suppressing = false, inconsistent = false, exiting = false;
struct printbuf buf = PRINTBUF, *out = &buf;
int ret = -BCH_ERR_fsck_ignore;
const char *action_orig = "fix?", *action = action_orig;
@@ -303,7 +428,12 @@ int __bch2_fsck_err(struct bch_fs *c,
? -BCH_ERR_fsck_fix
: -BCH_ERR_fsck_ignore;
- bch2_sb_error_count(c, err);
+ printbuf_indent_add_nextline(out, 2);
+
+#ifdef BCACHEFS_LOG_PREFIX
+ if (strncmp(fmt, "bcachefs", 8))
+ prt_printf(out, bch2_log_msg(c, ""));
+#endif
va_start(args, fmt);
prt_vprintf(out, fmt, args);
@@ -323,42 +453,15 @@ int __bch2_fsck_err(struct bch_fs *c,
}
mutex_lock(&c->fsck_error_msgs_lock);
- s = fsck_err_get(c, fmt);
- if (s) {
- /*
- * We may be called multiple times for the same error on
- * transaction restart - this memoizes instead of asking the user
- * multiple times for the same error:
- */
- if (s->last_msg && !strcmp(buf.buf, s->last_msg)) {
- ret = s->ret;
- goto err_unlock;
- }
-
- kfree(s->last_msg);
- s->last_msg = kstrdup(buf.buf, GFP_KERNEL);
- if (!s->last_msg) {
- ret = -ENOMEM;
- goto err_unlock;
- }
-
- if (c->opts.ratelimit_errors &&
- !(flags & FSCK_NO_RATELIMIT) &&
- s->nr >= FSCK_ERR_RATELIMIT_NR) {
- if (s->nr == FSCK_ERR_RATELIMIT_NR)
- suppressing = true;
- else
- print = false;
- }
-
- s->nr++;
+ bool repeat = false, print = true, suppress = false;
+ bool inconsistent = false, exiting = false;
+ struct fsck_err_state *s =
+ count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress);
+ if (repeat) {
+ ret = s->ret;
+ goto err_unlock;
}
-#ifdef BCACHEFS_LOG_PREFIX
- if (!strncmp(fmt, "bcachefs:", 9))
- prt_printf(out, bch2_log_msg(c, ""));
-#endif
-
if ((flags & FSCK_AUTOFIX) &&
(c->opts.errors == BCH_ON_ERROR_continue ||
c->opts.errors == BCH_ON_ERROR_fix_safe)) {
@@ -377,6 +480,7 @@ int __bch2_fsck_err(struct bch_fs *c,
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
prt_str(out, ", shutting down");
inconsistent = true;
+ print = true;
ret = -BCH_ERR_fsck_errors_not_fixed;
} else if (flags & FSCK_CAN_FIX) {
prt_str(out, ", ");
@@ -435,24 +539,30 @@ int __bch2_fsck_err(struct bch_fs *c,
print = true;
}
print:
+ prt_newline(out);
+
+ if (inconsistent)
+ __bch2_inconsistent_error(c, out);
+ else if (exiting)
+ prt_printf(out, "Unable to continue, halting\n");
+ else if (suppress)
+ prt_printf(out, "Ratelimiting new instances of previous error\n");
+
if (print) {
+ /* possibly strip an empty line, from printbuf_indent_add */
+ while (out->pos && out->buf[out->pos - 1] == ' ')
+ --out->pos;
+ printbuf_nul_terminate(out);
+
if (bch2_fs_stdio_redirect(c))
- bch2_print(c, "%s\n", out->buf);
+ bch2_print(c, "%s", out->buf);
else
bch2_print_string_as_lines(KERN_ERR, out->buf);
}
- if (exiting)
- bch_err(c, "Unable to continue, halting");
- else if (suppressing)
- bch_err(c, "Ratelimiting new instances of previous error");
-
if (s)
s->ret = ret;
- if (inconsistent)
- bch2_inconsistent_error(c);
-
/*
* We don't yet track whether the filesystem currently has errors, for
* log_fsck_err()s: that would require us to track for every error type
@@ -514,16 +624,14 @@ int __bch2_bkey_fsck_err(struct bch_fs *c,
prt_printf(&buf, " level=%u: ", from.level);
bch2_bkey_val_to_text(&buf, c, k);
- prt_str(&buf, "\n ");
+ prt_newline(&buf);
va_list args;
va_start(args, fmt);
prt_vprintf(&buf, fmt, args);
va_end(args);
- prt_str(&buf, ": delete?");
-
- int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf);
+ int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf);
printbuf_exit(&buf);
return ret;
}
@@ -536,7 +644,7 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) {
if (s->ratelimited && s->last_msg)
- bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
+ bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg);
list_del(&s->list);
kfree(s->last_msg);
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 7d3f0e2a5fd6..d0d024dc714b 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -18,6 +18,8 @@ struct work_struct;
/* Error messages: */
+void bch2_log_msg_start(struct bch_fs *, struct printbuf *);
+
/*
* Inconsistency errors: The on disk data is inconsistent. If these occur during
* initial recovery, they don't indicate a bug in the running code - we walk all
@@ -29,21 +31,10 @@ struct work_struct;
* BCH_ON_ERROR_CONTINUE mode
*/
+bool __bch2_inconsistent_error(struct bch_fs *, struct printbuf *);
bool bch2_inconsistent_error(struct bch_fs *);
-
-int bch2_topology_error(struct bch_fs *);
-
-#define bch2_fs_topology_error(c, ...) \
-({ \
- bch_err(c, "btree topology error: " __VA_ARGS__); \
- bch2_topology_error(c); \
-})
-
-#define bch2_fs_inconsistent(c, ...) \
-({ \
- bch_err(c, __VA_ARGS__); \
- bch2_inconsistent_error(c); \
-})
+__printf(2, 3)
+bool bch2_fs_inconsistent(struct bch_fs *, const char *, ...);
#define bch2_fs_inconsistent_on(cond, ...) \
({ \
@@ -53,26 +44,21 @@ int bch2_topology_error(struct bch_fs *);
_ret; \
})
-/*
- * When a transaction update discovers or is causing a fs inconsistency, it's
- * helpful to also dump the pending updates:
- */
-#define bch2_trans_inconsistent(trans, ...) \
-({ \
- bch_err(trans->c, __VA_ARGS__); \
- bch2_dump_trans_updates(trans); \
- bch2_inconsistent_error(trans->c); \
-})
+__printf(2, 3)
+bool bch2_trans_inconsistent(struct btree_trans *, const char *, ...);
-#define bch2_trans_inconsistent_on(cond, trans, ...) \
+#define bch2_trans_inconsistent_on(cond, ...) \
({ \
bool _ret = unlikely(!!(cond)); \
- \
if (_ret) \
- bch2_trans_inconsistent(trans, __VA_ARGS__); \
+ bch2_trans_inconsistent(__VA_ARGS__); \
_ret; \
})
+int __bch2_topology_error(struct bch_fs *, struct printbuf *);
+__printf(2, 3)
+int bch2_fs_topology_error(struct bch_fs *, const char *, ...);
+
/*
* Fsck errors: inconsistency errors we detect at mount time, and should ideally
* be able to repair:
@@ -80,7 +66,7 @@ int bch2_topology_error(struct bch_fs *);
struct fsck_err_state {
struct list_head list;
- const char *fmt;
+ enum bch_sb_error_id id;
u64 nr;
bool ratelimited;
int ret;
@@ -90,6 +76,12 @@ struct fsck_err_state {
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
+void __bch2_count_fsck_err(struct bch_fs *,
+ enum bch_sb_error_id, const char *,
+ bool *, bool *, bool *);
+#define bch2_count_fsck_err(_c, _err, ...) \
+ __bch2_count_fsck_err(_c, BCH_FSCK_ERR_##_err, __VA_ARGS__)
+
__printf(5, 6) __cold
int __bch2_fsck_err(struct bch_fs *, struct btree_trans *,
enum bch_fsck_flags,
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index ae1a1d917805..ae7c7a177e10 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -227,8 +227,11 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
if (have_io_errors)
return -BCH_ERR_data_read_io_err;
- WARN_ONCE(1, "unhandled error case in %s\n", __func__);
- return -EINVAL;
+ /*
+ * If we get here, we have pointers (bkey_ptrs_validate() ensures that),
+ * but they don't point to valid devices:
+ */
+ return -BCH_ERR_no_devices_valid;
}
/* KEY_TYPE_btree_ptr: */
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 5ab1c73c8d4c..a03e2c780cba 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -225,11 +225,11 @@ static void bchfs_read(struct btree_trans *trans,
bch2_read_extent(trans, rbio, iter.pos,
data_btree, k, offset_into_extent, flags);
+ swap(rbio->bio.bi_iter.bi_size, bytes);
if (flags & BCH_READ_last_fragment)
break;
- swap(rbio->bio.bi_iter.bi_size, bytes);
bio_advance(&rbio->bio, bytes);
err:
if (ret &&
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 717e7b94c66f..c80ed3a54e70 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -999,17 +999,28 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
POS(inode->v.i_ino, offset >> 9),
POS(inode->v.i_ino, U64_MAX),
inum.subvol, BTREE_ITER_slots, k, ({
- if (k.k->p.inode != inode->v.i_ino) {
- next_hole = bch2_seek_pagecache_hole(&inode->v,
- offset, MAX_LFS_FILESIZE, 0, false);
- break;
- } else if (!bkey_extent_is_data(k.k)) {
- next_hole = bch2_seek_pagecache_hole(&inode->v,
- max(offset, bkey_start_offset(k.k) << 9),
- k.k->p.offset << 9, 0, false);
-
- if (next_hole < k.k->p.offset << 9)
+ if (k.k->p.inode != inode->v.i_ino ||
+ !bkey_extent_is_data(k.k)) {
+ loff_t start_offset = k.k->p.inode == inode->v.i_ino
+ ? max(offset, bkey_start_offset(k.k) << 9)
+ : offset;
+ loff_t end_offset = k.k->p.inode == inode->v.i_ino
+ ? MAX_LFS_FILESIZE
+ : k.k->p.offset << 9;
+
+ /*
+ * Found a hole in the btree, now make sure it's
+ * a hole in the pagecache. We might have to
+ * keep searching if this hole is entirely dirty
+ * in the page cache:
+ */
+ bch2_trans_unlock(trans);
+ loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v,
+ start_offset, end_offset, 0, false);
+ if (pagecache_hole < end_offset) {
+ next_hole = pagecache_hole;
break;
+ }
} else {
offset = max(offset, bkey_start_offset(k.k) << 9);
}
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index f45054cee746..c1553e44e049 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -537,10 +537,12 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
ret = -EXDEV;
goto err;
}
- ret = __bch2_unlink(dir, victim, true);
+
+ ret = inode_permission(file_mnt_idmap(filp), d_inode(victim), MAY_WRITE) ?:
+ __bch2_unlink(dir, victim, true);
if (!ret) {
fsnotify_rmdir(dir, victim);
- d_delete(victim);
+ d_invalidate(victim);
}
err:
inode_unlock(dir);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index c88c149d5de5..fc834bdf1f52 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -673,7 +673,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
* back to this dirent
*/
bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT),
- c, "dirent to missing inode:\n %s",
+ c, "dirent to missing inode:\n%s",
(bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf));
if (ret)
goto err;
@@ -2179,7 +2179,7 @@ static int bch2_fs_get_tree(struct fs_context *fc)
/* Some options can't be parsed until after the fs is started: */
opts = bch2_opts_empty();
- ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf);
+ ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf, false);
if (ret)
goto err_stop_fs;
@@ -2290,7 +2290,8 @@ err_stop_fs:
goto err;
err_put_super:
- __bch2_fs_stop(c);
+ if (!sb->s_root)
+ __bch2_fs_stop(c);
deactivate_locked_super(sb);
goto err;
}
@@ -2333,6 +2334,8 @@ static int bch2_fs_parse_param(struct fs_context *fc,
int ret = bch2_parse_one_mount_opt(c, &opts->opts,
&opts->parse_later, param->key,
param->string);
+ if (ret)
+ pr_err("Error parsing option %s: %s", param->key, bch2_err_str(ret));
return bch2_err_class(ret);
}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 091057023fc5..52320295dcf6 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -1421,14 +1421,14 @@ static int check_key_has_inode(struct btree_trans *trans,
if (fsck_err_on(!i,
trans, key_in_missing_inode,
- "key in missing inode:\n %s",
+ "key in missing inode:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
goto delete;
if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
trans, key_in_wrong_inode_type,
- "key for wrong inode mode %o:\n %s",
+ "key for wrong inode mode %o:\n%s",
i->inode.bi_mode,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
@@ -1571,13 +1571,13 @@ static int overlapping_extents_found(struct btree_trans *trans,
if (ret)
goto err;
- prt_str(&buf, "\n ");
+ prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, k1);
if (!bpos_eq(pos1, k1.k->p)) {
- prt_str(&buf, "\n wanted\n ");
+ prt_str(&buf, "\nwanted\n ");
bch2_bpos_to_text(&buf, pos1);
- prt_str(&buf, "\n ");
+ prt_str(&buf, "\n");
bch2_bkey_to_text(&buf, &pos2);
bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
@@ -1600,7 +1600,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
break;
}
- prt_str(&buf, "\n ");
+ prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, k2);
if (bpos_gt(k2.k->p, pos2.p) ||
@@ -1611,7 +1611,7 @@ static int overlapping_extents_found(struct btree_trans *trans,
goto err;
}
- prt_printf(&buf, "\n overwriting %s extent",
+ prt_printf(&buf, "\noverwriting %s extent",
pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
if (fsck_err(trans, extent_overlapping,
@@ -1632,6 +1632,8 @@ static int overlapping_extents_found(struct btree_trans *trans,
bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc);
bch2_disk_reservation_put(c, &res);
+ bch_info(c, "repair ret %s", bch2_err_str(ret));
+
if (ret)
goto err;
@@ -1784,7 +1786,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
!bkey_extent_is_reservation(k),
trans, extent_past_end_of_inode,
- "extent type past end of inode %llu:%u, i_size %llu\n %s",
+ "extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
struct btree_iter iter2;
@@ -3021,7 +3023,7 @@ long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
if (arg.opts) {
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
ret = PTR_ERR_OR_ZERO(optstr) ?:
- bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr);
+ bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr, false);
if (!IS_ERR(optstr))
kfree(optstr);
@@ -3129,7 +3131,7 @@ long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg)
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
ret = PTR_ERR_OR_ZERO(optstr) ?:
- bch2_parse_mount_opts(c, &thr->opts, NULL, optstr);
+ bch2_parse_mount_opts(c, &thr->opts, NULL, optstr, false);
if (!IS_ERR(optstr))
kfree(optstr);
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index f1503df57dc7..fd01e67b3e84 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -259,6 +259,7 @@ static struct bch_read_bio *__promote_alloc(struct btree_trans *trans,
&orig->opts,
update_opts,
btree_id, k);
+ op->write.type = BCH_DATA_UPDATE_promote;
/*
* possible errors: -BCH_ERR_nocow_lock_blocked,
* -BCH_ERR_ENOSPC_disk_reservation:
@@ -1322,13 +1323,14 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio,
ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos,
data_btree, k,
offset_into_extent, failed, flags, -1);
+ swap(bvec_iter.bi_size, bytes);
+
if (ret)
goto err;
if (flags & BCH_READ_last_fragment)
break;
- swap(bvec_iter.bi_size, bytes);
bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
err:
if (ret == -BCH_ERR_data_read_retry_csum_err_maybe_userspace)
diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h
index cd21950417f6..c78025d863e0 100644
--- a/fs/bcachefs/io_read.h
+++ b/fs/bcachefs/io_read.h
@@ -137,8 +137,10 @@ static inline void bch2_read_extent(struct btree_trans *trans,
enum btree_id data_btree, struct bkey_s_c k,
unsigned offset_into_extent, unsigned flags)
{
- __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
- data_btree, k, offset_into_extent, NULL, flags, -1);
+ int ret = __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos,
+ data_btree, k, offset_into_extent, NULL, flags, -1);
+ /* __bch2_read_extent only returns errors if BCH_READ_in_retry is set */
+ WARN(ret, "unhandled error from __bch2_read_extent()");
}
int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter,
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 29671075e3f1..07b55839768e 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -434,12 +434,6 @@ void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, .
printbuf_exit(&buf);
}
-static void bch2_write_csum_err_msg(struct bch_write_op *op)
-{
- bch2_write_op_error(op, op->pos.offset,
- "error verifying existing checksum while rewriting existing data (memory corruption?)");
-}
-
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
enum bch_data_type type,
const struct bkey_i *k,
@@ -839,7 +833,7 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct
{
struct bch_fs *c = op->c;
struct bio *bio = &op->wbio.bio;
- struct nonce nonce = extent_nonce(op->version, op->crc);
+ struct bch_csum csum;
int ret = 0;
BUG_ON(bio_sectors(bio) != op->crc.compressed_size);
@@ -866,7 +860,8 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct
*/
if (crc_is_compressed(op->crc)) {
/* Last point we can still verify checksum: */
- struct bch_csum csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, bio);
+ struct nonce nonce = extent_nonce(op->version, op->crc);
+ csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, bio);
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
goto csum_err;
@@ -905,7 +900,8 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct
*/
if (bch2_csum_type_is_encryption(op->crc.csum_type) &&
(op->compression_opt || op->crc.csum_type != op->csum_type)) {
- struct bch_csum csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, bio);
+ struct nonce nonce = extent_nonce(op->version, op->crc);
+ csum = bch2_checksum_bio(c, op->crc.csum_type, nonce, bio);
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
goto csum_err;
@@ -919,7 +915,16 @@ static noinline int bch2_write_prep_encoded_data(struct bch_write_op *op, struct
return 0;
csum_err:
- bch2_write_csum_err_msg(op);
+ bch2_write_op_error(op, op->pos.offset,
+ "error verifying existing checksum while moving existing data (memory corruption?)\n"
+ " expected %0llx:%0llx got %0llx:%0llx type %s",
+ op->crc.csum.hi,
+ op->crc.csum.lo,
+ csum.hi,
+ csum.lo,
+ op->crc.csum_type < BCH_CSUM_NR
+ ? __bch2_csum_types[op->crc.csum_type]
+ : "(unknown)");
return -BCH_ERR_data_write_csum;
}
@@ -935,6 +940,9 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
bool page_alloc_failed = false;
int ret, more = 0;
+ if (op->incompressible)
+ op->compression_opt = 0;
+
BUG_ON(!bio_sectors(src));
ec_buf = bch2_writepoint_ec_buf(c, wp);
@@ -1046,12 +1054,13 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
* data can't be modified (by userspace) while it's in
* flight.
*/
- if (bch2_rechecksum_bio(c, src, version, op->crc,
+ ret = bch2_rechecksum_bio(c, src, version, op->crc,
&crc, &op->crc,
src_len >> 9,
bio_sectors(src) - (src_len >> 9),
- op->csum_type))
- goto csum_err;
+ op->csum_type);
+ if (ret)
+ goto err;
/*
* rchecksum_bio sets compression_type on crc from op->crc,
* this isn't always correct as sometimes we're changing
@@ -1061,12 +1070,12 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
crc.nonce = nonce;
} else {
if ((op->flags & BCH_WRITE_data_encoded) &&
- bch2_rechecksum_bio(c, src, version, op->crc,
+ (ret = bch2_rechecksum_bio(c, src, version, op->crc,
NULL, &op->crc,
src_len >> 9,
bio_sectors(src) - (src_len >> 9),
- op->crc.csum_type))
- goto csum_err;
+ op->crc.csum_type)))
+ goto err;
crc.compressed_size = dst_len >> 9;
crc.uncompressed_size = src_len >> 9;
@@ -1125,9 +1134,6 @@ static int bch2_write_extent(struct bch_write_op *op, struct write_point *wp,
do_write:
*_dst = dst;
return more;
-csum_err:
- bch2_write_csum_err_msg(op);
- ret = -BCH_ERR_data_write_csum;
err:
if (to_wbio(dst)->bounce)
bch2_bio_free_pages_pool(c, dst);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index bfdaea6569ae..8a36d5536668 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -62,8 +62,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6
prt_newline(out);
}
- prt_printf(out, "expires:\t");
- prt_printf(out, "%li jiffies\n", buf->expires - jiffies);
+ prt_printf(out, "expires:\t%li jiffies\n", buf->expires - jiffies);
prt_printf(out, "flags:\t");
if (buf->noflush)
@@ -142,6 +141,8 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
bool stuck = false;
struct printbuf buf = PRINTBUF;
+ buf.atomic++;
+
if (!(error == -BCH_ERR_journal_full ||
error == -BCH_ERR_journal_pin_full) ||
nr_unwritten_journal_entries(j) ||
@@ -167,12 +168,12 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
return stuck;
}
j->err_seq = journal_cur_seq(j);
- spin_unlock(&j->lock);
- bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
- bch2_err_str(error));
- bch2_journal_debug_to_text(&buf, j);
- bch_err(c, "%s", buf.buf);
+ __bch2_journal_debug_to_text(&buf, j);
+ spin_unlock(&j->lock);
+ prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"),
+ bch2_err_str(error));
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_reset(&buf);
bch2_journal_pins_to_text(&buf, j);
@@ -728,8 +729,8 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
struct printbuf buf = PRINTBUF;
bch2_journal_debug_to_text(&buf, j);
- bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s",
- buf.buf);
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret));
printbuf_exit(&buf);
closure_wait_event(&j->async_wait,
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 4ed6137f0439..2debc213e47c 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -214,12 +214,12 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
fsck_err_on(same_device,
c, journal_entry_dup_same_device,
- "duplicate journal entry on same device\n %s",
+ "duplicate journal entry on same device\n%s",
buf.buf);
fsck_err_on(not_identical,
c, journal_entry_replicas_data_mismatch,
- "found duplicate but non identical journal entries\n %s",
+ "found duplicate but non identical journal entries\n%s",
buf.buf);
if (entry_ptr.csum_good && !identical)
@@ -308,8 +308,8 @@ static void journal_entry_err_msg(struct printbuf *out,
break; \
case WRITE: \
bch2_sb_error_count(c, BCH_FSCK_ERR_##_err); \
- bch_err(c, "corrupt metadata before write: %s\n", _buf.buf);\
- if (bch2_fs_inconsistent(c)) { \
+ if (bch2_fs_inconsistent(c, \
+ "corrupt metadata before write: %s\n", _buf.buf)) {\
ret = -BCH_ERR_fsck_errors_not_fixed; \
goto fsck_err; \
} \
@@ -764,6 +764,23 @@ static void journal_entry_overwrite_to_text(struct printbuf *out, struct bch_fs
journal_entry_btree_keys_to_text(out, c, entry);
}
+static int journal_entry_log_bkey_validate(struct bch_fs *c,
+ struct jset *jset,
+ struct jset_entry *entry,
+ unsigned version, int big_endian,
+ struct bkey_validate_context from)
+{
+ from.flags = 0;
+ return journal_entry_btree_keys_validate(c, jset, entry,
+ version, big_endian, from);
+}
+
+static void journal_entry_log_bkey_to_text(struct printbuf *out, struct bch_fs *c,
+ struct jset_entry *entry)
+{
+ journal_entry_btree_keys_to_text(out, c, entry);
+}
+
static int journal_entry_write_buffer_keys_validate(struct bch_fs *c,
struct jset *jset,
struct jset_entry *entry,
@@ -1371,8 +1388,8 @@ int bch2_journal_read(struct bch_fs *c,
missing_end = seq - 1;
fsck_err(c, journal_entries_missing,
"journal entries %llu-%llu missing! (replaying %llu-%llu)\n"
- " prev at %s\n"
- " next at %s, continue?",
+ "prev at %s\n"
+ "next at %s, continue?",
missing_start, missing_end,
*last_seq, *blacklist_seq - 1,
buf1.buf, buf2.buf);
@@ -1426,7 +1443,7 @@ int bch2_journal_read(struct bch_fs *c,
!bch2_replicas_marked(c, &replicas.e) &&
(le64_to_cpu(i->j.seq) == *last_seq ||
fsck_err(c, journal_entry_replicas_not_marked,
- "superblock not marked as containing replicas for journal entry %llu\n %s",
+ "superblock not marked as containing replicas for journal entry %llu\n%s",
le64_to_cpu(i->j.seq), buf.buf))) {
ret = bch2_mark_replicas(c, &replicas.e);
if (ret)
@@ -1623,7 +1640,8 @@ static CLOSURE_CALLBACK(journal_write_done)
: j->noflush_write_time, j->write_start_time);
if (!w->devs_written.nr) {
- bch_err(c, "unable to write journal to sufficient devices");
+ if (!bch2_journal_error(j))
+ bch_err(c, "unable to write journal to sufficient devices");
err = -BCH_ERR_journal_write_err;
} else {
bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal,
@@ -2081,12 +2099,12 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct printbuf buf = PRINTBUF;
buf.atomic++;
+ __bch2_journal_debug_to_text(&buf, j);
+ spin_unlock(&j->lock);
prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
le64_to_cpu(w->data->seq),
vstruct_sectors(w->data, c->block_bits),
bch2_err_str(ret));
- __bch2_journal_debug_to_text(&buf, j);
- spin_unlock(&j->lock);
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
}
diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c
index a299d9ec8ee4..2f63fc6d456f 100644
--- a/fs/bcachefs/lru.c
+++ b/fs/bcachefs/lru.c
@@ -101,8 +101,7 @@ int bch2_lru_check_set(struct btree_trans *trans,
goto err;
if (fsck_err(trans, alloc_key_to_missing_lru_entry,
- "missing %s lru entry\n"
- " %s",
+ "missing %s lru entry\n%s",
bch2_lru_types[lru_type(lru_k)],
(bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
ret = bch2_lru_set(trans, lru_id, dev_bucket, time);
@@ -190,8 +189,8 @@ static int bch2_check_lru_key(struct btree_trans *trans,
if (fsck_err(trans, lru_entry_bad,
"incorrect lru entry: lru %s time %llu\n"
- " %s\n"
- " for %s",
+ "%s\n"
+ "for %s",
bch2_lru_types[type],
lru_pos_time(lru_k.k->p),
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 8fcdc6984f6e..5d41260e10da 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -528,6 +528,37 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
return 0;
}
+/*
+ * Move requires non extents iterators, and there's also no need for it to
+ * signal indirect_extent_missing_error:
+ */
+static struct bkey_s_c bch2_lookup_indirect_extent_for_move(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c_reflink_p p)
+{
+ if (unlikely(REFLINK_P_ERROR(p.v)))
+ return bkey_s_c_null;
+
+ struct bpos reflink_pos = POS(0, REFLINK_P_IDX(p.v));
+
+ bch2_trans_iter_init(trans, iter,
+ BTREE_ID_reflink, reflink_pos,
+ BTREE_ITER_not_extents);
+
+ struct bkey_s_c k = bch2_btree_iter_peek(iter);
+ if (!k.k || bkey_err(k)) {
+ bch2_trans_iter_exit(trans, iter);
+ return k;
+ }
+
+ if (bkey_lt(reflink_pos, bkey_start_pos(k.k))) {
+ bch2_trans_iter_exit(trans, iter);
+ return bkey_s_c_null;
+ }
+
+ return k;
+}
+
static int bch2_move_data_btree(struct moving_context *ctxt,
struct bpos start,
struct bpos end,
@@ -592,17 +623,16 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
k.k->type == KEY_TYPE_reflink_p &&
REFLINK_P_MAY_UPDATE_OPTIONS(bkey_s_c_to_reflink_p(k).v)) {
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
- s64 offset_into_extent = 0;
bch2_trans_iter_exit(trans, &reflink_iter);
- k = bch2_lookup_indirect_extent(trans, &reflink_iter, &offset_into_extent, p, true, 0);
+ k = bch2_lookup_indirect_extent_for_move(trans, &reflink_iter, p);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
- if (bkey_deleted(k.k))
+ if (!k.k)
goto next_nondata;
/*
@@ -611,7 +641,6 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
* pointer - need to fixup iter->k
*/
extent_iter = &reflink_iter;
- offset_into_extent = 0;
}
if (!bkey_extent_is_direct_data(k.k))
diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c
index 93246ad31541..ee7251709fb9 100644
--- a/fs/bcachefs/namei.c
+++ b/fs/bcachefs/namei.c
@@ -700,9 +700,9 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
if (bch2_inode_should_have_single_bp(target) &&
!fsck_err(trans, inode_wrong_backpointer,
- "dirent points to inode that does not point back:\n %s",
+ "dirent points to inode that does not point back:\n%s",
(bch2_bkey_val_to_text(&buf, c, d.s_c),
- prt_printf(&buf, "\n "),
+ prt_newline(&buf),
bch2_inode_unpacked_to_text(&buf, target),
buf.buf)))
goto err;
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 81fd6b7977d3..af3258814822 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -44,7 +44,7 @@ const char * const __bch2_btree_ids[] = {
NULL
};
-static const char * const __bch2_csum_types[] = {
+const char * const __bch2_csum_types[] = {
BCH_CSUM_TYPES()
NULL
};
@@ -482,14 +482,12 @@ void bch2_opts_to_text(struct printbuf *out,
int bch2_opt_check_may_set(struct bch_fs *c, struct bch_dev *ca, int id, u64 v)
{
- lockdep_assert_held(&c->state_lock);
-
int ret = 0;
switch (id) {
case Opt_state:
if (ca)
- return __bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
+ return bch2_dev_set_state(c, ca, v, BCH_FORCE_IF_DEGRADED);
break;
case Opt_compression:
@@ -551,14 +549,15 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
goto bad_opt;
ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err);
- if (ret == -BCH_ERR_option_needs_open_fs && parse_later) {
- prt_printf(parse_later, "%s=%s,", name, val);
- if (parse_later->allocation_failure) {
- ret = -ENOMEM;
- goto out;
+ if (ret == -BCH_ERR_option_needs_open_fs) {
+ ret = 0;
+
+ if (parse_later) {
+ prt_printf(parse_later, "%s=%s,", name, val);
+ if (parse_later->allocation_failure)
+ ret = -ENOMEM;
}
- ret = 0;
goto out;
}
@@ -569,28 +568,24 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts,
bch2_opt_set_by_id(opts, id, v);
ret = 0;
- goto out;
-
+out:
+ printbuf_exit(&err);
+ return ret;
bad_opt:
- pr_err("Bad mount option %s", name);
ret = -BCH_ERR_option_name;
goto out;
-
bad_val:
- pr_err("Invalid mount option %s", err.buf);
ret = -BCH_ERR_option_value;
-
-out:
- printbuf_exit(&err);
- return ret;
+ goto out;
}
int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
- struct printbuf *parse_later, char *options)
+ struct printbuf *parse_later, char *options,
+ bool ignore_unknown)
{
char *copied_opts, *copied_opts_start;
char *opt, *name, *val;
- int ret;
+ int ret = 0;
if (!options)
return 0;
@@ -615,14 +610,14 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
val = opt;
ret = bch2_parse_one_mount_opt(c, opts, parse_later, name, val);
- if (ret < 0)
- goto out;
+ if (ret == -BCH_ERR_option_name && ignore_unknown)
+ ret = 0;
+ if (ret) {
+ pr_err("Error parsing option %s: %s", name, bch2_err_str(ret));
+ break;
+ }
}
- ret = 0;
- goto out;
-
-out:
kfree(copied_opts_start);
return ret;
}
diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h
index bb621804d45a..4d06313076ff 100644
--- a/fs/bcachefs/opts.h
+++ b/fs/bcachefs/opts.h
@@ -16,6 +16,7 @@ extern const char * const bch2_version_upgrade_opts[];
extern const char * const bch2_sb_features[];
extern const char * const bch2_sb_compat[];
extern const char * const __bch2_btree_ids[];
+extern const char * const __bch2_csum_types[];
extern const char * const __bch2_csum_opts[];
extern const char * const __bch2_compression_types[];
extern const char * const bch2_compression_opts[];
@@ -635,7 +636,7 @@ int bch2_opts_check_may_set(struct bch_fs *);
int bch2_parse_one_mount_opt(struct bch_fs *, struct bch_opts *,
struct printbuf *, const char *, const char *);
int bch2_parse_mount_opts(struct bch_fs *, struct bch_opts *, struct printbuf *,
- char *);
+ char *, bool);
/* inode opts: */
diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c
index 4cf5a2af1e6f..3302bbc78a09 100644
--- a/fs/bcachefs/printbuf.c
+++ b/fs/bcachefs/printbuf.c
@@ -277,6 +277,25 @@ void bch2_printbuf_indent_add(struct printbuf *buf, unsigned spaces)
}
/**
+ * bch2_printbuf_indent_add_nextline() - add to the current indent level for
+ * subsequent lines
+ *
+ * @buf: printbuf to control
+ * @spaces: number of spaces to add to the current indent level
+ *
+ * Subsequent lines - not the current line - will be indented by @spaces more
+ * spaces.
+ */
+void bch2_printbuf_indent_add_nextline(struct printbuf *buf, unsigned spaces)
+{
+ if (WARN_ON_ONCE(buf->indent + spaces < buf->indent))
+ spaces = 0;
+
+ buf->indent += spaces;
+ buf->has_indent_or_tabstops = true;
+}
+
+/**
* bch2_printbuf_indent_sub() - subtract from the current indent level
*
* @buf: printbuf to control
diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h
index d0dd398baa2b..1ca476adbf6f 100644
--- a/fs/bcachefs/printbuf.h
+++ b/fs/bcachefs/printbuf.h
@@ -112,6 +112,7 @@ void bch2_printbuf_tabstop_pop(struct printbuf *);
int bch2_printbuf_tabstop_push(struct printbuf *, unsigned);
void bch2_printbuf_indent_add(struct printbuf *, unsigned);
+void bch2_printbuf_indent_add_nextline(struct printbuf *, unsigned);
void bch2_printbuf_indent_sub(struct printbuf *, unsigned);
void bch2_prt_newline(struct printbuf *);
diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c
index bafd1c91a802..d09898566abe 100644
--- a/fs/bcachefs/progress.c
+++ b/fs/bcachefs/progress.c
@@ -16,10 +16,8 @@ void bch2_progress_init(struct progress_indicator_state *s,
if (!(btree_id_mask & BIT_ULL(i)))
continue;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_btree,
- .btree.id = i,
- };
+ struct disk_accounting_pos acc;
+ disk_accounting_key_init(acc, btree, .id = i);
u64 v;
bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 29a569384146..b9bde04b66c0 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -600,12 +600,13 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
struct bch_fs_rebalance *r = &c->rebalance;
/* print pending work */
- struct disk_accounting_pos acc = { .type = BCH_DISK_ACCOUNTING_rebalance_work, };
+ struct disk_accounting_pos acc;
+ disk_accounting_key_init(acc, rebalance_work);
u64 v;
bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1);
prt_printf(out, "pending work:\t");
- prt_human_readable_u64(out, v);
+ prt_human_readable_u64(out, v << 9);
prt_printf(out, "\n\n");
prt_str(out, bch2_rebalance_state_strs[r->state]);
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index 0b3c951c32da..593ff142530d 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -234,28 +234,22 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
int bch2_run_online_recovery_passes(struct bch_fs *c)
{
- int ret = 0;
-
- down_read(&c->state_lock);
-
for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
struct recovery_pass_fn *p = recovery_pass_fns + i;
if (!(p->when & PASS_ONLINE))
continue;
- ret = bch2_run_recovery_pass(c, i);
+ int ret = bch2_run_recovery_pass(c, i);
if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
i = c->curr_recovery_pass;
continue;
}
if (ret)
- break;
+ return ret;
}
- up_read(&c->state_lock);
-
- return ret;
+ return 0;
}
int bch2_run_recovery_passes(struct bch_fs *c)
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 68172c6eba21..ee23f1f93acc 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -193,10 +193,10 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
if (ret)
goto err;
- prt_printf(&buf, "-%llu\n ", (missing_pos.offset + (missing_end - missing_start)) << 9);
+ prt_printf(&buf, "-%llu\n", (missing_pos.offset + (missing_end - missing_start)) << 9);
bch2_bkey_val_to_text(&buf, c, p.s_c);
- prt_printf(&buf, "\n missing reflink btree range %llu-%llu",
+ prt_printf(&buf, "\nmissing reflink btree range %llu-%llu",
missing_start, missing_end);
if (fsck_err(trans, reflink_p_to_missing_reflink_v, "%s", buf.buf)) {
@@ -323,10 +323,10 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans,
__le64 *refcount = bkey_refcount(bkey_i_to_s(new));
if (!*refcount && (flags & BTREE_TRIGGER_overwrite)) {
bch2_bkey_val_to_text(&buf, c, p.s_c);
- prt_printf(&buf, "\n ");
+ prt_newline(&buf);
bch2_bkey_val_to_text(&buf, c, k);
log_fsck_err(trans, reflink_refcount_underflow,
- "indirect extent refcount underflow while marking\n %s",
+ "indirect extent refcount underflow while marking\n%s",
buf.buf);
goto next;
}
@@ -795,8 +795,8 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount),
trans, reflink_v_refcount_wrong,
"reflink key has wrong refcount:\n"
- " %s\n"
- " should be %u",
+ "%s\n"
+ "should be %u",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf),
r->refcount)) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 67455beb8358..5d43e3504386 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -5,8 +5,7 @@
enum bch_fsck_flags {
FSCK_CAN_FIX = 1 << 0,
FSCK_CAN_IGNORE = 1 << 1,
- FSCK_NO_RATELIMIT = 1 << 2,
- FSCK_AUTOFIX = 1 << 3,
+ FSCK_AUTOFIX = 1 << 2,
};
#define BCH_SB_ERRS() \
@@ -311,13 +310,14 @@ enum bch_fsck_flags {
x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
+ x(accounting_key_nr_counters_wrong, 307, FSCK_AUTOFIX) \
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
x(directory_size_mismatch, 303, FSCK_AUTOFIX) \
x(dirent_cf_name_too_big, 304, 0) \
x(dirent_stray_data_after_cf_name, 305, 0) \
- x(MAX, 307, 0)
+ x(MAX, 308, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index e7f197896db1..0c65065b08ec 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -485,7 +485,7 @@ static int check_snapshot_tree(struct btree_trans *trans,
root_id != bch2_snapshot_root(c, root_id) ||
st.k->p.offset != le32_to_cpu(s.tree),
trans, snapshot_tree_to_missing_snapshot,
- "snapshot tree points to missing/incorrect snapshot:\n %s",
+ "snapshot tree points to missing/incorrect snapshot:\n%s",
(bch2_bkey_val_to_text(&buf, c, st.s_c),
prt_newline(&buf),
ret
@@ -505,19 +505,19 @@ static int check_snapshot_tree(struct btree_trans *trans,
if (fsck_err_on(ret,
trans, snapshot_tree_to_missing_subvol,
- "snapshot tree points to missing subvolume:\n %s",
+ "snapshot tree points to missing subvolume:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
fsck_err_on(!bch2_snapshot_is_ancestor(c,
le32_to_cpu(subvol.snapshot),
root_id),
trans, snapshot_tree_to_wrong_subvol,
- "snapshot tree points to subvolume that does not point to snapshot in this tree:\n %s",
+ "snapshot tree points to subvolume that does not point to snapshot in this tree:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf)) ||
fsck_err_on(BCH_SUBVOLUME_SNAP(&subvol),
trans, snapshot_tree_to_snapshot_subvol,
- "snapshot tree points to snapshot subvolume:\n %s",
+ "snapshot tree points to snapshot subvolume:\n%s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, st.s_c), buf.buf))) {
struct bkey_i_snapshot_tree *u;
@@ -756,7 +756,7 @@ static int check_snapshot(struct btree_trans *trans,
} else {
if (fsck_err_on(s.subvol,
trans, snapshot_should_not_have_subvol,
- "snapshot should not point to subvol:\n %s",
+ "snapshot should not point to subvol:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
@@ -774,7 +774,7 @@ static int check_snapshot(struct btree_trans *trans,
if (fsck_err_on(!ret,
trans, snapshot_to_bad_snapshot_tree,
- "snapshot points to missing/incorrect tree:\n %s",
+ "snapshot points to missing/incorrect tree:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
ret = snapshot_tree_ptr_repair(trans, iter, k, &s);
if (ret)
@@ -786,7 +786,7 @@ static int check_snapshot(struct btree_trans *trans,
if (fsck_err_on(le32_to_cpu(s.depth) != real_depth,
trans, snapshot_bad_depth,
- "snapshot with incorrect depth field, should be %u:\n %s",
+ "snapshot with incorrect depth field, should be %u:\n%s",
real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
@@ -803,7 +803,7 @@ static int check_snapshot(struct btree_trans *trans,
if (fsck_err_on(!ret,
trans, snapshot_bad_skiplist,
- "snapshot with bad skiplist field:\n %s",
+ "snapshot with bad skiplist field:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c
index 93e71119e5a4..602afca2f5ef 100644
--- a/fs/bcachefs/str_hash.c
+++ b/fs/bcachefs/str_hash.c
@@ -232,7 +232,7 @@ bad_hash:
goto out;
if (fsck_err(trans, hash_table_key_wrong_offset,
- "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
+ "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c
index b7b96283c316..cd0d8e5e44e7 100644
--- a/fs/bcachefs/subvolume.c
+++ b/fs/bcachefs/subvolume.c
@@ -561,6 +561,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
}
SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
+ n->v.fs_path_parent = 0;
bch2_trans_iter_exit(trans, &iter);
return ret;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 99f9a0aaa380..20208f3c5d8b 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -533,9 +533,11 @@ int bch2_fs_read_write(struct bch_fs *c)
int bch2_fs_read_write_early(struct bch_fs *c)
{
- lockdep_assert_held(&c->state_lock);
+ down_write(&c->state_lock);
+ int ret = __bch2_fs_read_write(c, true);
+ up_write(&c->state_lock);
- return __bch2_fs_read_write(c, true);
+ return ret;
}
/* Filesystem startup/shutdown: */
@@ -1019,38 +1021,39 @@ static void print_mount_opts(struct bch_fs *c)
int bch2_fs_start(struct bch_fs *c)
{
time64_t now = ktime_get_real_seconds();
- int ret;
+ int ret = 0;
print_mount_opts(c);
down_write(&c->state_lock);
+ mutex_lock(&c->sb_lock);
BUG_ON(test_bit(BCH_FS_started, &c->flags));
- mutex_lock(&c->sb_lock);
+ if (!bch2_sb_field_get_minsize(&c->disk_sb, ext,
+ sizeof(struct bch_sb_field_ext) / sizeof(u64))) {
+ mutex_unlock(&c->sb_lock);
+ up_write(&c->state_lock);
+ ret = -BCH_ERR_ENOSPC_sb;
+ goto err;
+ }
ret = bch2_sb_members_v2_init(c);
if (ret) {
mutex_unlock(&c->sb_lock);
+ up_write(&c->state_lock);
goto err;
}
for_each_online_member(c, ca)
bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
- struct bch_sb_field_ext *ext =
- bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
mutex_unlock(&c->sb_lock);
- if (!ext) {
- bch_err(c, "insufficient space in superblock for sb_field_ext");
- ret = -BCH_ERR_ENOSPC_sb;
- goto err;
- }
-
for_each_rw_member(c, ca)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ up_write(&c->state_lock);
c->recovery_task = current;
ret = BCH_SB_INITIALIZED(c->disk_sb.sb)
@@ -1066,31 +1069,28 @@ int bch2_fs_start(struct bch_fs *c)
goto err;
if (bch2_fs_init_fault("fs_start")) {
- bch_err(c, "fs_start fault injected");
- ret = -EINVAL;
+ ret = -BCH_ERR_injected_fs_start;
goto err;
}
set_bit(BCH_FS_started, &c->flags);
wake_up(&c->ro_ref_wait);
+ down_write(&c->state_lock);
if (c->opts.read_only) {
bch2_fs_read_only(c);
} else {
ret = !test_bit(BCH_FS_rw, &c->flags)
? bch2_fs_read_write(c)
: bch2_fs_read_write_late(c);
- if (ret)
- goto err;
}
+ up_write(&c->state_lock);
- ret = 0;
err:
if (ret)
bch_err_msg(c, ret, "starting filesystem");
else
bch_verbose(c, "done starting filesystem");
- up_write(&c->state_lock);
return ret;
}
@@ -2259,7 +2259,7 @@ BCH_DEBUG_PARAMS()
__maybe_unused
static unsigned bch2_metadata_version = bcachefs_metadata_version_current;
-module_param_named(version, bch2_metadata_version, uint, 0400);
+module_param_named(version, bch2_metadata_version, uint, 0444);
module_exit(bcachefs_exit);
module_init(bcachefs_init);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 251ba8224c1f..e5f003c29369 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -257,10 +257,8 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c
prt_printf(out, "type\tcompressed\runcompressed\raverage extent size\r\n");
for (unsigned i = 1; i < BCH_COMPRESSION_TYPE_NR; i++) {
- struct disk_accounting_pos a = {
- .type = BCH_DISK_ACCOUNTING_compression,
- .compression.type = i,
- };
+ struct disk_accounting_pos a;
+ disk_accounting_key_init(a, compression, .type = i);
struct bpos p = disk_accounting_pos_to_bpos(&a);
u64 v[3];
bch2_accounting_mem_read(c, p, v, ARRAY_SIZE(v));
@@ -631,8 +629,6 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
if (unlikely(!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs)))
return -EROFS;
- down_write(&c->state_lock);
-
char *tmp = kstrdup(buf, GFP_KERNEL);
if (!tmp) {
ret = -ENOMEM;
@@ -675,7 +671,6 @@ static ssize_t sysfs_opt_store(struct bch_fs *c,
ret = size;
err:
- up_write(&c->state_lock);
bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
return ret;
}
diff --git a/fs/bcachefs/time_stats.c b/fs/bcachefs/time_stats.c
index 3fe82757f93a..2c34fe4be912 100644
--- a/fs/bcachefs/time_stats.c
+++ b/fs/bcachefs/time_stats.c
@@ -10,6 +10,9 @@
#include "eytzinger.h"
#include "time_stats.h"
+/* disable automatic switching to percpu mode */
+#define TIME_STATS_NONPCPU ((unsigned long) 1)
+
static const struct time_unit time_units[] = {
{ "ns", 1 },
{ "us", NSEC_PER_USEC },
@@ -123,11 +126,12 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
{
unsigned long flags;
- if (!stats->buffer) {
+ if ((unsigned long) stats->buffer <= TIME_STATS_NONPCPU) {
spin_lock_irqsave(&stats->lock, flags);
time_stats_update_one(stats, start, end);
- if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT) < 32 &&
+ if (!stats->buffer &&
+ mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT) < 32 &&
stats->duration_stats.n > 1024)
stats->buffer =
alloc_percpu_gfp(struct time_stat_buffer,
@@ -157,7 +161,7 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats)
unsigned offset = offsetof(struct bch2_time_stats, min_duration);
memset((void *) stats + offset, 0, sizeof(*stats) - offset);
- if (stats->buffer) {
+ if ((unsigned long) stats->buffer > TIME_STATS_NONPCPU) {
int cpu;
for_each_possible_cpu(cpu)
per_cpu_ptr(stats->buffer, cpu)->nr = 0;
@@ -167,7 +171,9 @@ void bch2_time_stats_reset(struct bch2_time_stats *stats)
void bch2_time_stats_exit(struct bch2_time_stats *stats)
{
- free_percpu(stats->buffer);
+ if ((unsigned long) stats->buffer > TIME_STATS_NONPCPU)
+ free_percpu(stats->buffer);
+ stats->buffer = NULL;
}
void bch2_time_stats_init(struct bch2_time_stats *stats)
@@ -177,3 +183,9 @@ void bch2_time_stats_init(struct bch2_time_stats *stats)
stats->min_freq = U64_MAX;
spin_lock_init(&stats->lock);
}
+
+void bch2_time_stats_init_no_pcpu(struct bch2_time_stats *stats)
+{
+ bch2_time_stats_init(stats);
+ stats->buffer = (struct time_stat_buffer __percpu *) TIME_STATS_NONPCPU;
+}
diff --git a/fs/bcachefs/time_stats.h b/fs/bcachefs/time_stats.h
index dc6493f7bbab..eddb0985bab4 100644
--- a/fs/bcachefs/time_stats.h
+++ b/fs/bcachefs/time_stats.h
@@ -145,6 +145,7 @@ static inline bool track_event_change(struct bch2_time_stats *stats, bool v)
void bch2_time_stats_reset(struct bch2_time_stats *);
void bch2_time_stats_exit(struct bch2_time_stats *);
void bch2_time_stats_init(struct bch2_time_stats *);
+void bch2_time_stats_init_no_pcpu(struct bch2_time_stats *);
static inline void bch2_time_stats_quantiles_exit(struct bch2_time_stats_quantiles *statq)
{
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 553de8d8e3e5..87af551692f4 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -270,7 +270,7 @@ static void __bch2_print_string_as_lines(const char *prefix, const char *lines,
locked = console_trylock();
}
- while (1) {
+ while (*lines) {
p = strchrnul(lines, '\n');
printk("%s%.*s\n", prefix, (int) (p - lines), lines);
if (!*p)
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 7d921fc920a0..1e94f89aabed 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -94,6 +94,7 @@ do { \
#define printbuf_tabstop_push(_buf, _n) bch2_printbuf_tabstop_push(_buf, _n)
#define printbuf_indent_add(_out, _n) bch2_printbuf_indent_add(_out, _n)
+#define printbuf_indent_add_nextline(_out, _n) bch2_printbuf_indent_add_nextline(_out, _n)
#define printbuf_indent_sub(_out, _n) bch2_printbuf_indent_sub(_out, _n)
#define prt_newline(_out) bch2_prt_newline(_out)