summaryrefslogtreecommitdiff
path: root/fs/bcachefs/str_hash.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/str_hash.c')
-rw-r--r--fs/bcachefs/str_hash.c368
1 files changed, 236 insertions, 132 deletions
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c
index a90bf7b8a2b4..71b735a85026 100644
--- a/fs/bcachefs/str_hash.c
+++ b/fs/bcachefs/str_hash.c
@@ -31,14 +31,15 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir
}
}
-static noinline int fsck_rename_dirent(struct btree_trans *trans,
- struct snapshots_seen *s,
- const struct bch_hash_desc desc,
- struct bch_hash_info *hash_info,
- struct bkey_s_c_dirent old)
+static int bch2_fsck_rename_dirent(struct btree_trans *trans,
+ struct snapshots_seen *s,
+ const struct bch_hash_desc desc,
+ struct bch_hash_info *hash_info,
+ struct bkey_s_c_dirent old,
+ bool *updated_before_k_pos)
{
struct qstr old_name = bch2_dirent_get_name(old);
- struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
+ struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64));
int ret = PTR_ERR_OR_ZERO(new);
if (ret)
return ret;
@@ -47,28 +48,39 @@ static noinline int fsck_rename_dirent(struct btree_trans *trans,
dirent_copy_target(new, old);
new->k.p = old.k->p;
+ char *renamed_buf = bch2_trans_kmalloc(trans, old_name.len + 20);
+ ret = PTR_ERR_OR_ZERO(renamed_buf);
+ if (ret)
+ return ret;
+
for (unsigned i = 0; i < 1000; i++) {
- unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u",
- old_name.len, old_name.name, i);
- unsigned u64s = BKEY_U64s + dirent_val_u64s(len, 0);
+ new->k.u64s = BKEY_U64s_MAX;
- if (u64s > U8_MAX)
- return -EINVAL;
+ struct qstr renamed_name = (struct qstr) QSTR_INIT(renamed_buf,
+ sprintf(renamed_buf, "%.*s.fsck_renamed-%u",
+ old_name.len, old_name.name, i));
- new->k.u64s = u64s;
+ ret = bch2_dirent_init_name(new, hash_info, &renamed_name, NULL);
+ if (ret)
+ return ret;
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
(subvol_inum) { 0, old.k->p.inode },
old.k->p.snapshot, &new->k_i,
- BTREE_UPDATE_internal_snapshot_node);
- if (!bch2_err_matches(ret, EEXIST))
+ BTREE_UPDATE_internal_snapshot_node|
+ STR_HASH_must_create);
+ if (ret && !bch2_err_matches(ret, EEXIST))
+ break;
+ if (!ret) {
+ if (bpos_lt(new->k.p, old.k->p))
+ *updated_before_k_pos = true;
break;
+ }
}
- if (ret)
- return ret;
-
- return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
+ ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
+ bch_err_fn(trans->c, ret);
+ return ret;
}
static noinline int hash_pick_winner(struct btree_trans *trans,
@@ -101,17 +113,25 @@ static noinline int hash_pick_winner(struct btree_trans *trans,
}
}
-static int repair_inode_hash_info(struct btree_trans *trans,
- struct bch_inode_unpacked *snapshot_root)
+/*
+ * str_hash lookups across snapshots break in wild ways if hash_info in
+ * different snapshot versions doesn't match - so if we find one mismatch, check
+ * them all
+ */
+int bch2_repair_inode_hash_info(struct btree_trans *trans,
+ struct bch_inode_unpacked *snapshot_root)
{
+ struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+ bool need_commit = false;
int ret = 0;
- for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes,
- SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot - 1),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != snapshot_root->bi_inum)
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes,
+ POS(0, snapshot_root->bi_inum),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (bpos_ge(k.k->p, SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot)))
break;
if (!bkey_is_inode(k.k))
continue;
@@ -121,19 +141,72 @@ static int repair_inode_hash_info(struct btree_trans *trans,
if (ret)
break;
- if (fsck_err_on(inode.bi_hash_seed != snapshot_root->bi_hash_seed ||
- INODE_STR_HASH(&inode) != INODE_STR_HASH(snapshot_root),
- trans, inode_snapshot_mismatch,
- "inode hash info in different snapshots don't match")) {
+ if (inode.bi_hash_seed == snapshot_root->bi_hash_seed &&
+ INODE_STR_HASH(&inode) == INODE_STR_HASH(snapshot_root)) {
+#ifdef CONFIG_BCACHEFS_DEBUG
+ struct bch_hash_info hash1 = bch2_hash_info_init(c, snapshot_root);
+ struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode);
+
+ BUG_ON(hash1.type != hash2.type ||
+ memcmp(&hash1.siphash_key,
+ &hash2.siphash_key,
+ sizeof(hash1.siphash_key)));
+#endif
+ continue;
+ }
+
+ printbuf_reset(&buf);
+ prt_printf(&buf, "inode %llu hash info in snapshots %u %u don't match\n",
+ snapshot_root->bi_inum,
+ inode.bi_snapshot,
+ snapshot_root->bi_snapshot);
+
+ bch2_prt_str_hash_type(&buf, INODE_STR_HASH(&inode));
+ prt_printf(&buf, " %llx\n", inode.bi_hash_seed);
+
+ bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root));
+ prt_printf(&buf, " %llx", snapshot_root->bi_hash_seed);
+
+ if (fsck_err(trans, inode_snapshot_mismatch, "%s", buf.buf)) {
inode.bi_hash_seed = snapshot_root->bi_hash_seed;
SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root));
- ret = __bch2_fsck_write_inode(trans, &inode) ?:
- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_nested;
- break;
+
+ ret = __bch2_fsck_write_inode(trans, &inode);
+ if (ret)
+ break;
+ need_commit = true;
}
}
+
+ if (ret)
+ goto err;
+
+ if (!need_commit) {
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n",
+ snapshot_root->bi_inum);
+
+ prt_printf(&buf, "root snapshot %u ", snapshot_root->bi_snapshot);
+ bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root));
+ prt_printf(&buf, " %llx\n", snapshot_root->bi_hash_seed);
+#if 0
+ prt_printf(&buf, "vs snapshot %u ", hash_info->inum_snapshot);
+ bch2_prt_str_hash_type(&buf, hash_info->type);
+ prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1);
+#endif
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ ret = bch_err_throw(c, fsck_repair_unimplemented);
+ goto err;
+ }
+
+ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ -BCH_ERR_transaction_restart_nested;
+err:
fsck_err:
+ printbuf_exit(&buf);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -145,46 +218,121 @@ fsck_err:
static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
struct bch_hash_info *hash_info)
{
+ struct bch_inode_unpacked snapshot_root;
+ int ret = bch2_inode_find_snapshot_root(trans, inum, &snapshot_root);
+ if (ret)
+ return ret;
+
+ struct bch_hash_info hash_root = bch2_hash_info_init(trans->c, &snapshot_root);
+ if (hash_info->type != hash_root.type ||
+ memcmp(&hash_info->siphash_key,
+ &hash_root.siphash_key,
+ sizeof(hash_root.siphash_key)))
+ ret = bch2_repair_inode_hash_info(trans, &snapshot_root);
+
+ return ret;
+}
+
+/* Put a str_hash key in its proper location, checking for duplicates */
+int bch2_str_hash_repair_key(struct btree_trans *trans,
+ struct snapshots_seen *s,
+ const struct bch_hash_desc *desc,
+ struct bch_hash_info *hash_info,
+ struct btree_iter *k_iter, struct bkey_s_c k,
+ struct btree_iter *dup_iter, struct bkey_s_c dup_k,
+ bool *updated_before_k_pos)
+{
struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
+ struct printbuf buf = PRINTBUF;
+ bool free_snapshots_seen = false;
int ret = 0;
- for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, U32_MAX),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != inum)
- break;
- if (bkey_is_inode(k.k))
- goto found;
+ if (!s) {
+ s = bch2_trans_kmalloc(trans, sizeof(*s));
+ ret = PTR_ERR_OR_ZERO(s);
+ if (ret)
+ goto out;
+
+ s->pos = k_iter->pos;
+ darray_init(&s->ids);
+
+ ret = bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids);
+ if (ret)
+ goto out;
+
+ free_snapshots_seen = true;
}
- bch_err(c, "%s(): inum %llu not found", __func__, inum);
- ret = -BCH_ERR_fsck_repair_unimplemented;
- goto err;
-found:;
- struct bch_inode_unpacked inode;
- ret = bch2_inode_unpack(k, &inode);
- if (ret)
- goto err;
- struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode);
- if (hash_info->type != hash2.type ||
- memcmp(&hash_info->siphash_key, &hash2.siphash_key, sizeof(hash2.siphash_key))) {
- ret = repair_inode_hash_info(trans, &inode);
- if (!ret) {
- bch_err(c, "inode hash info mismatch with root, but mismatch not found\n"
- "%u %llx %llx\n"
- "%u %llx %llx",
- hash_info->type,
- hash_info->siphash_key.k0,
- hash_info->siphash_key.k1,
- hash2.type,
- hash2.siphash_key.k0,
- hash2.siphash_key.k1);
- ret = -BCH_ERR_fsck_repair_unimplemented;
+ if (!dup_k.k) {
+ struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ goto out;
+
+ dup_k = bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info,
+ (subvol_inum) { 0, new->k.p.inode },
+ new->k.p.snapshot, new,
+ STR_HASH_must_create|
+ BTREE_ITER_with_updates|
+ BTREE_UPDATE_internal_snapshot_node);
+ ret = bkey_err(dup_k);
+ if (ret)
+ goto out;
+ if (dup_k.k)
+ goto duplicate_entries;
+
+ if (bpos_lt(new->k.p, k.k->p))
+ *updated_before_k_pos = true;
+
+ ret = bch2_insert_snapshot_whiteouts(trans, desc->btree_id,
+ k_iter->pos, new->k.p) ?:
+ bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
+ BTREE_ITER_with_updates|
+ BTREE_UPDATE_internal_snapshot_node) ?:
+ bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ -BCH_ERR_transaction_restart_commit;
+ } else {
+duplicate_entries:
+ ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k);
+ if (ret < 0)
+ goto out;
+
+ if (!fsck_err(trans, hash_table_key_duplicate,
+ "duplicate hash table keys%s:\n%s",
+ ret != 2 ? "" : ", both point to valid inodes",
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k),
+ prt_newline(&buf),
+ bch2_bkey_val_to_text(&buf, c, dup_k),
+ buf.buf)))
+ goto out;
+
+ switch (ret) {
+ case 0:
+ ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
+ break;
+ case 1:
+ ret = bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0);
+ break;
+ case 2:
+ ret = bch2_fsck_rename_dirent(trans, s, *desc, hash_info,
+ bkey_s_c_to_dirent(k),
+ updated_before_k_pos) ?:
+ bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
+ BTREE_ITER_with_updates);
+ goto out;
}
+
+ ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
+ -BCH_ERR_transaction_restart_commit;
}
-err:
- bch2_trans_iter_exit(trans, &iter);
+out:
+fsck_err:
+ bch2_trans_iter_exit(trans, dup_iter);
+ printbuf_exit(&buf);
+ if (free_snapshots_seen)
+ darray_exit(&s->ids);
return ret;
}
@@ -192,7 +340,8 @@ int __bch2_str_hash_check_key(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc *desc,
struct bch_hash_info *hash_info,
- struct btree_iter *k_iter, struct bkey_s_c hash_k)
+ struct btree_iter *k_iter, struct bkey_s_c hash_k,
+ bool *updated_before_k_pos)
{
struct bch_fs *c = trans->c;
struct btree_iter iter = {};
@@ -206,24 +355,31 @@ int __bch2_str_hash_check_key(struct btree_trans *trans,
for_each_btree_key_norestart(trans, iter, desc->btree_id,
SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot),
- BTREE_ITER_slots, k, ret) {
+ BTREE_ITER_slots|
+ BTREE_ITER_with_updates, k, ret) {
if (bkey_eq(k.k->p, hash_k.k->p))
break;
if (k.k->type == desc->key_type &&
- !desc->cmp_bkey(k, hash_k))
- goto duplicate_entries;
+ !desc->cmp_bkey(k, hash_k)) {
+ ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode,
+ hash_info) ?:
+ bch2_str_hash_repair_key(trans, s, desc, hash_info,
+ k_iter, hash_k,
+ &iter, k, updated_before_k_pos);
+ break;
+ }
- if (bkey_deleted(k.k)) {
- bch2_trans_iter_exit(trans, &iter);
+ if (bkey_deleted(k.k))
goto bad_hash;
- }
}
-out:
bch2_trans_iter_exit(trans, &iter);
+out:
+fsck_err:
printbuf_exit(&buf);
return ret;
bad_hash:
+ bch2_trans_iter_exit(trans, &iter);
/*
* Before doing any repair, check hash_info itself:
*/
@@ -232,64 +388,12 @@ bad_hash:
goto out;
if (fsck_err(trans, hash_table_key_wrong_offset,
- "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
- bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
- struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
- if (IS_ERR(new))
- return PTR_ERR(new);
-
- k = bch2_hash_set_or_get_in_snapshot(trans, &iter, *desc, hash_info,
- (subvol_inum) { 0, hash_k.k->p.inode },
- hash_k.k->p.snapshot, new,
- STR_HASH_must_create|
- BTREE_ITER_with_updates|
- BTREE_UPDATE_internal_snapshot_node);
- ret = bkey_err(k);
- if (ret)
- goto out;
- if (k.k)
- goto duplicate_entries;
-
- ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter,
- BTREE_UPDATE_internal_snapshot_node) ?:
- bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?:
- bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
- -BCH_ERR_transaction_restart_nested;
- goto out;
- }
-fsck_err:
- goto out;
-duplicate_entries:
- ret = hash_pick_winner(trans, *desc, hash_info, hash_k, k);
- if (ret < 0)
- goto out;
-
- if (!fsck_err(trans, hash_table_key_duplicate,
- "duplicate hash table keys%s:\n%s",
- ret != 2 ? "" : ", both point to valid inodes",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, hash_k),
- prt_newline(&buf),
- bch2_bkey_val_to_text(&buf, c, k),
- buf.buf)))
- goto out;
-
- switch (ret) {
- case 0:
- ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
- break;
- case 1:
- ret = bch2_hash_delete_at(trans, *desc, hash_info, &iter, 0);
- break;
- case 2:
- ret = fsck_rename_dirent(trans, s, *desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
- bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0);
- goto out;
- }
-
- ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
- -BCH_ERR_transaction_restart_nested;
+ "hash table key at wrong offset: should be at %llu\n%s",
+ hash,
+ (bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)))
+ ret = bch2_str_hash_repair_key(trans, s, desc, hash_info,
+ k_iter, hash_k,
+ &iter, bkey_s_c_null,
+ updated_before_k_pos);
goto out;
}