summaryrefslogtreecommitdiff
path: root/fs/bcachefs/fsck.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/fsck.c')
-rw-r--r--fs/bcachefs/fsck.c317
1 files changed, 218 insertions, 99 deletions
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 68ed69a255e1..9920f1affc5b 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -327,7 +327,8 @@ static inline bool inode_should_reattach(struct bch_inode_unpacked *inode)
(inode->bi_flags & BCH_INODE_has_child_snapshot))
return false;
- return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked);
+ return !bch2_inode_has_backpointer(inode) &&
+ !(inode->bi_flags & BCH_INODE_unlinked);
}
static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot)
@@ -372,6 +373,18 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
if (inode->bi_subvol) {
inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
+ struct btree_iter subvol_iter;
+ struct bkey_i_subvolume *subvol =
+ bch2_bkey_get_mut_typed(trans, &subvol_iter,
+ BTREE_ID_subvolumes, POS(0, inode->bi_subvol),
+ 0, subvolume);
+ ret = PTR_ERR_OR_ZERO(subvol);
+ if (ret)
+ return ret;
+
+ subvol->v.fs_path_parent = BCACHEFS_ROOT_SUBVOL;
+ bch2_trans_iter_exit(trans, &subvol_iter);
+
u64 root_inum;
ret = subvol_lookup(trans, inode->bi_parent_subvol,
&dirent_snapshot, &root_inum);
@@ -387,6 +400,8 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
if (ret)
return ret;
+ bch_verbose(c, "got lostfound inum %llu", lostfound.bi_inum);
+
lostfound.bi_nlink += S_ISDIR(inode->bi_mode);
/* ensure lost+found inode is also present in inode snapshot */
@@ -423,6 +438,16 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
if (ret)
return ret;
+ {
+ CLASS(printbuf, buf)();
+ ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum,
+ inode->bi_snapshot, NULL, &buf);
+ if (ret)
+ return ret;
+
+ bch_info(c, "reattached at %s", buf.buf);
+ }
+
/*
* Fix up inodes in child snapshots: if they should also be reattached
* update the backpointer field, if they should not be we need to emit
@@ -490,13 +515,21 @@ static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
static int remove_backpointer(struct btree_trans *trans,
struct bch_inode_unpacked *inode)
{
- if (!inode->bi_dir)
+ if (!bch2_inode_has_backpointer(inode))
return 0;
+ u32 snapshot = inode->bi_snapshot;
+
+ if (inode->bi_parent_subvol) {
+ int ret = bch2_subvolume_get_snapshot(trans, inode->bi_parent_subvol, &snapshot);
+ if (ret)
+ return ret;
+ }
+
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter,
- SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot));
+ SPOS(inode->bi_dir, inode->bi_dir_offset, snapshot));
int ret = bkey_err(d) ?:
dirent_points_to_inode(c, d, inode) ?:
bch2_fsck_remove_dirent(trans, d.k->p);
@@ -695,14 +728,8 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen,
u32 id, u32 ancestor)
{
- ssize_t i;
-
EBUG_ON(id > ancestor);
- /* @ancestor should be the snapshot most recently added to @seen */
- EBUG_ON(ancestor != seen->pos.snapshot);
- EBUG_ON(ancestor != darray_last(seen->ids));
-
if (id == ancestor)
return true;
@@ -718,11 +745,8 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see
* numerically, since snapshot ID lists are kept sorted, so if we find
* an id that's an ancestor of @id we're done:
*/
-
- for (i = seen->ids.nr - 2;
- i >= 0 && seen->ids.data[i] >= id;
- --i)
- if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i]))
+ darray_for_each_reverse(seen->ids, i)
+ if (*i != ancestor && bch2_snapshot_is_ancestor(c, id, *i))
return false;
return true;
@@ -806,7 +830,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
if (!n->whiteout) {
return bch2_inode_unpack(inode, &n->inode);
} else {
- n->inode.bi_inum = inode.k->p.inode;
+ n->inode.bi_inum = inode.k->p.offset;
n->inode.bi_snapshot = inode.k->p.snapshot;
return 0;
}
@@ -903,17 +927,15 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str
w->last_pos.inode, k.k->p.snapshot, i->inode.bi_snapshot,
(bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
- struct bch_inode_unpacked new = i->inode;
- struct bkey_i whiteout;
-
- new.bi_snapshot = k.k->p.snapshot;
-
if (!i->whiteout) {
+ struct bch_inode_unpacked new = i->inode;
+ new.bi_snapshot = k.k->p.snapshot;
ret = __bch2_fsck_write_inode(trans, &new);
} else {
+ struct bkey_i whiteout;
bkey_init(&whiteout.k);
whiteout.k.type = KEY_TYPE_whiteout;
- whiteout.k.p = SPOS(0, i->inode.bi_inum, i->inode.bi_snapshot);
+ whiteout.k.p = SPOS(0, i->inode.bi_inum, k.k->p.snapshot);
ret = bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
&whiteout,
BTREE_UPDATE_internal_snapshot_node);
@@ -1135,13 +1157,14 @@ static int check_inode(struct btree_trans *trans,
if (ret)
goto err;
- if (u.bi_dir || u.bi_dir_offset) {
+ if (bch2_inode_has_backpointer(&u)) {
ret = check_inode_dirent_inode(trans, &u, &do_update);
if (ret)
goto err;
}
- if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked),
+ if (fsck_err_on(bch2_inode_has_backpointer(&u) &&
+ (u.bi_flags & BCH_INODE_unlinked),
trans, inode_unlinked_but_has_dirent,
"inode unlinked but has dirent\n%s",
(printbuf_reset(&buf),
@@ -1438,6 +1461,7 @@ static int check_key_has_inode(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
+ struct btree_iter iter2 = {};
int ret = PTR_ERR_OR_ZERO(i);
if (ret)
return ret;
@@ -1447,40 +1471,105 @@ static int check_key_has_inode(struct btree_trans *trans,
bool have_inode = i && !i->whiteout;
- if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
- ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?:
- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
- if (ret)
- goto err;
+ if (!have_inode && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes)))
+ goto reconstruct;
- inode->last_pos.inode--;
- ret = bch_err_throw(c, transaction_restart_nested);
- goto err;
+ if (have_inode && btree_matches_i_mode(iter->btree_id, i->inode.bi_mode))
+ goto out;
+
+ prt_printf(&buf, ", ");
+
+ bool have_old_inode = false;
+ darray_for_each(inode->inodes, i2)
+ if (!i2->whiteout &&
+ bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i2->inode.bi_snapshot) &&
+ btree_matches_i_mode(iter->btree_id, i2->inode.bi_mode)) {
+ prt_printf(&buf, "but found good inode in older snapshot\n");
+ bch2_inode_unpacked_to_text(&buf, &i2->inode);
+ prt_newline(&buf);
+ have_old_inode = true;
+ break;
+ }
+
+ struct bkey_s_c k2;
+ unsigned nr_keys = 0;
+
+ prt_printf(&buf, "found keys:\n");
+
+ for_each_btree_key_max_norestart(trans, iter2, iter->btree_id,
+ SPOS(k.k->p.inode, 0, k.k->p.snapshot),
+ POS(k.k->p.inode, U64_MAX),
+ 0, k2, ret) {
+ nr_keys++;
+ if (nr_keys <= 10) {
+ bch2_bkey_val_to_text(&buf, c, k2);
+ prt_newline(&buf);
+ }
+ if (nr_keys >= 100)
+ break;
}
- if (fsck_err_on(!have_inode,
- trans, key_in_missing_inode,
- "key in missing inode:\n%s",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
- goto delete;
+ if (ret)
+ goto err;
- if (fsck_err_on(have_inode && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
- trans, key_in_wrong_inode_type,
- "key for wrong inode mode %o:\n%s",
- i->inode.bi_mode,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
- goto delete;
+ if (nr_keys > 100)
+ prt_printf(&buf, "found > %u keys for this missing inode\n", nr_keys);
+ else if (nr_keys > 10)
+ prt_printf(&buf, "found %u keys for this missing inode\n", nr_keys);
+
+ if (!have_inode) {
+ if (fsck_err_on(!have_inode,
+ trans, key_in_missing_inode,
+ "key in missing inode%s", buf.buf)) {
+ /*
+ * Maybe a deletion that raced with data move, or something
+ * weird like that? But if we know the inode was deleted, or
+ * it's just a few keys, we can safely delete them.
+ *
+ * If it's many keys, we should probably recreate the inode
+ */
+ if (have_old_inode || nr_keys <= 2)
+ goto delete;
+ else
+ goto reconstruct;
+ }
+ } else {
+ /*
+ * not autofix, this one would be a giant wtf - bit error in the
+ * inode corrupting i_mode?
+ *
+ * may want to try repairing inode instead of deleting
+ */
+ if (fsck_err_on(!btree_matches_i_mode(iter->btree_id, i->inode.bi_mode),
+ trans, key_in_wrong_inode_type,
+ "key for wrong inode mode %o%s",
+ i->inode.bi_mode, buf.buf))
+ goto delete;
+ }
out:
err:
fsck_err:
+ bch2_trans_iter_exit(trans, &iter2);
printbuf_exit(&buf);
bch_err_fn(c, ret);
return ret;
delete:
+ /*
+ * XXX: print out more info
+ * count up extents for this inode, check if we have different inode in
+ * an older snapshot version, perhaps decide if we want to reconstitute
+ */
ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node);
goto out;
+reconstruct:
+ ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+ if (ret)
+ goto err;
+
+ inode->last_pos.inode--;
+ ret = bch_err_throw(c, transaction_restart_nested);
+ goto out;
}
static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w)
@@ -1822,18 +1911,39 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
!key_visible_in_snapshot(c, s, i->inode.bi_snapshot, k.k->p.snapshot))
continue;
- if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 &&
+ u64 last_block = round_up(i->inode.bi_size, block_bytes(c)) >> 9;
+
+ if (fsck_err_on(k.k->p.offset > last_block &&
!bkey_extent_is_reservation(k),
trans, extent_past_end_of_inode,
"extent type past end of inode %llu:%u, i_size %llu\n%s",
i->inode.bi_inum, i->inode.bi_snapshot, i->inode.bi_size,
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- struct btree_iter iter2;
+ struct bkey_i *whiteout = bch2_trans_kmalloc(trans, sizeof(*whiteout));
+ ret = PTR_ERR_OR_ZERO(whiteout);
+ if (ret)
+ goto err;
+
+ bkey_init(&whiteout->k);
+ whiteout->k.p = SPOS(k.k->p.inode,
+ last_block,
+ i->inode.bi_snapshot);
+ bch2_key_resize(&whiteout->k,
+ min(KEY_SIZE_MAX & (~0 << c->block_bits),
+ U64_MAX - whiteout->k.p.offset));
+
- bch2_trans_copy_iter(trans, &iter2, iter);
- bch2_btree_iter_set_snapshot(trans, &iter2, i->inode.bi_snapshot);
+ /*
+ * Need a normal (not BTREE_ITER_all_snapshots)
+ * iterator, if we're deleting in a different
+ * snapshot and need to emit a whiteout
+ */
+ struct btree_iter iter2;
+ bch2_trans_iter_init(trans, &iter2, BTREE_ID_extents,
+ bkey_start_pos(&whiteout->k),
+ BTREE_ITER_intent);
ret = bch2_btree_iter_traverse(trans, &iter2) ?:
- bch2_btree_delete_at(trans, &iter2,
+ bch2_trans_update(trans, &iter2, whiteout,
BTREE_UPDATE_internal_snapshot_node);
bch2_trans_iter_exit(trans, &iter2);
if (ret)
@@ -1949,14 +2059,22 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
continue;
}
- if (fsck_err_on(i->inode.bi_nlink != i->count,
- trans, inode_dir_wrong_nlink,
- "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
- w->last_pos.inode, i->inode.bi_snapshot, i->inode.bi_nlink, i->count)) {
- i->inode.bi_nlink = i->count;
- ret = bch2_fsck_write_inode(trans, &i->inode);
- if (ret)
- break;
+ if (i->inode.bi_nlink != i->count) {
+ CLASS(printbuf, buf)();
+
+ lockrestart_do(trans,
+ bch2_inum_snapshot_to_path(trans, w->last_pos.inode,
+ i->inode.bi_snapshot, NULL, &buf));
+
+ if (fsck_err_on(i->inode.bi_nlink != i->count,
+ trans, inode_dir_wrong_nlink,
+ "directory with wrong i_nlink: got %u, should be %llu\n%s",
+ i->inode.bi_nlink, i->count, buf.buf)) {
+ i->inode.bi_nlink = i->count;
+ ret = bch2_fsck_write_inode(trans, &i->inode);
+ if (ret)
+ break;
+ }
}
}
fsck_err:
@@ -2493,6 +2611,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
if (k.k->type != KEY_TYPE_subvolume)
return 0;
+ subvol_inum start = {
+ .subvol = k.k->p.offset,
+ .inum = le64_to_cpu(bkey_s_c_to_subvolume(k).v->inode),
+ };
+
while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) {
ret = darray_push(&subvol_path, k.k->p.offset);
if (ret)
@@ -2511,11 +2634,11 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter,
if (darray_u32_has(&subvol_path, parent)) {
printbuf_reset(&buf);
- prt_printf(&buf, "subvolume loop:\n");
+ prt_printf(&buf, "subvolume loop: ");
- darray_for_each_reverse(subvol_path, i)
- prt_printf(&buf, "%u ", *i);
- prt_printf(&buf, "%u", parent);
+ ret = bch2_inum_to_path(trans, start, &buf);
+ if (ret)
+ goto err;
if (fsck_err(trans, subvol_loop, "%s", buf.buf))
ret = reattach_subvol(trans, s);
@@ -2559,19 +2682,13 @@ int bch2_check_subvolume_structure(struct bch_fs *c)
return ret;
}
-struct pathbuf_entry {
- u64 inum;
- u32 snapshot;
-};
-
-typedef DARRAY(struct pathbuf_entry) pathbuf;
-
-static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p,
+static int bch2_bi_depth_renumber_one(struct btree_trans *trans,
+ u64 inum, u32 snapshot,
u32 new_depth)
{
struct btree_iter iter;
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
- SPOS(0, p->inum, p->snapshot), 0);
+ SPOS(0, inum, snapshot), 0);
struct bch_inode_unpacked inode;
int ret = bkey_err(k) ?:
@@ -2590,14 +2707,15 @@ err:
return ret;
}
-static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth)
+static int bch2_bi_depth_renumber(struct btree_trans *trans, darray_u64 *path,
+ u32 snapshot, u32 new_bi_depth)
{
u32 restart_count = trans->restart_count;
int ret = 0;
darray_for_each_reverse(*path, i) {
ret = nested_lockrestart_do(trans,
- bch2_bi_depth_renumber_one(trans, i, new_bi_depth));
+ bch2_bi_depth_renumber_one(trans, *i, snapshot, new_bi_depth));
bch_err_fn(trans->c, ret);
if (ret)
break;
@@ -2608,37 +2726,36 @@ static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32
return ret ?: trans_was_restarted(trans, restart_count);
}
-static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
-{
- darray_for_each(*p, i)
- if (i->inum == inum &&
- i->snapshot == snapshot)
- return true;
- return false;
-}
-
static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
{
struct bch_fs *c = trans->c;
struct btree_iter inode_iter = {};
- pathbuf path = {};
+ darray_u64 path = {};
struct printbuf buf = PRINTBUF;
u32 snapshot = inode_k.k->p.snapshot;
bool redo_bi_depth = false;
u32 min_bi_depth = U32_MAX;
int ret = 0;
+ struct bpos start = inode_k.k->p;
+
struct bch_inode_unpacked inode;
ret = bch2_inode_unpack(inode_k, &inode);
if (ret)
return ret;
- while (!inode.bi_subvol) {
+ /*
+ * If we're running full fsck, check_dirents() will have already ran,
+ * and we shouldn't see any missing backpointers here - otherwise that's
+ * handled separately, by check_unreachable_inodes
+ */
+ while (!inode.bi_subvol &&
+ bch2_inode_has_backpointer(&inode)) {
struct btree_iter dirent_iter;
struct bkey_s_c_dirent d;
- u32 parent_snapshot = snapshot;
- d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
+ d = dirent_get_by_pos(trans, &dirent_iter,
+ SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot));
ret = bkey_err(d.s_c);
if (ret && !bch2_err_matches(ret, ENOENT))
goto out;
@@ -2656,15 +2773,10 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
bch2_trans_iter_exit(trans, &dirent_iter);
- ret = darray_push(&path, ((struct pathbuf_entry) {
- .inum = inode.bi_inum,
- .snapshot = snapshot,
- }));
+ ret = darray_push(&path, inode.bi_inum);
if (ret)
return ret;
- snapshot = parent_snapshot;
-
bch2_trans_iter_exit(trans, &inode_iter);
inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
SPOS(0, inode.bi_dir, snapshot), 0);
@@ -2686,15 +2798,22 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
break;
inode = parent_inode;
- snapshot = inode_k.k->p.snapshot;
redo_bi_depth = true;
- if (path_is_dup(&path, inode.bi_inum, snapshot)) {
+ if (darray_find(path, inode.bi_inum)) {
printbuf_reset(&buf);
- prt_printf(&buf, "directory structure loop:\n");
- darray_for_each_reverse(path, i)
- prt_printf(&buf, "%llu:%u ", i->inum, i->snapshot);
- prt_printf(&buf, "%llu:%u", inode.bi_inum, snapshot);
+ prt_printf(&buf, "directory structure loop in snapshot %u: ",
+ snapshot);
+
+ ret = bch2_inum_snapshot_to_path(trans, start.offset, start.snapshot, NULL, &buf);
+ if (ret)
+ goto out;
+
+ if (c->opts.verbose) {
+ prt_newline(&buf);
+ darray_for_each(path, i)
+ prt_printf(&buf, "%llu ", *i);
+ }
if (fsck_err(trans, dir_loop, "%s", buf.buf)) {
ret = remove_backpointer(trans, &inode);
@@ -2714,7 +2833,7 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k)
min_bi_depth = 0;
if (redo_bi_depth)
- ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth);
+ ret = bch2_bi_depth_renumber(trans, &path, snapshot, min_bi_depth);
out:
fsck_err:
bch2_trans_iter_exit(trans, &inode_iter);
@@ -2731,7 +2850,7 @@ fsck_err:
int bch2_check_directory_structure(struct bch_fs *c)
{
int ret = bch2_trans_run(c,
- for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
+ for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN,
BTREE_ITER_intent|
BTREE_ITER_prefetch|
BTREE_ITER_all_snapshots, k,