diff options
Diffstat (limited to 'fs/bcachefs/str_hash.c')
-rw-r--r-- | fs/bcachefs/str_hash.c | 368 |
1 files changed, 236 insertions, 132 deletions
diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index a90bf7b8a2b4..71b735a85026 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -31,14 +31,15 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir } } -static noinline int fsck_rename_dirent(struct btree_trans *trans, - struct snapshots_seen *s, - const struct bch_hash_desc desc, - struct bch_hash_info *hash_info, - struct bkey_s_c_dirent old) +static int bch2_fsck_rename_dirent(struct btree_trans *trans, + struct snapshots_seen *s, + const struct bch_hash_desc desc, + struct bch_hash_info *hash_info, + struct bkey_s_c_dirent old, + bool *updated_before_k_pos) { struct qstr old_name = bch2_dirent_get_name(old); - struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32); + struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, BKEY_U64s_MAX * sizeof(u64)); int ret = PTR_ERR_OR_ZERO(new); if (ret) return ret; @@ -47,28 +48,39 @@ static noinline int fsck_rename_dirent(struct btree_trans *trans, dirent_copy_target(new, old); new->k.p = old.k->p; + char *renamed_buf = bch2_trans_kmalloc(trans, old_name.len + 20); + ret = PTR_ERR_OR_ZERO(renamed_buf); + if (ret) + return ret; + for (unsigned i = 0; i < 1000; i++) { - unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u", - old_name.len, old_name.name, i); - unsigned u64s = BKEY_U64s + dirent_val_u64s(len, 0); + new->k.u64s = BKEY_U64s_MAX; - if (u64s > U8_MAX) - return -EINVAL; + struct qstr renamed_name = (struct qstr) QSTR_INIT(renamed_buf, + sprintf(renamed_buf, "%.*s.fsck_renamed-%u", + old_name.len, old_name.name, i)); - new->k.u64s = u64s; + ret = bch2_dirent_init_name(new, hash_info, &renamed_name, NULL); + if (ret) + return ret; ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info, (subvol_inum) { 0, old.k->p.inode }, old.k->p.snapshot, &new->k_i, - BTREE_UPDATE_internal_snapshot_node); - if (!bch2_err_matches(ret, EEXIST)) + BTREE_UPDATE_internal_snapshot_node| + STR_HASH_must_create); + if (ret && !bch2_err_matches(ret, EEXIST)) + break; + if (!ret) { + if (bpos_lt(new->k.p, old.k->p)) + *updated_before_k_pos = true; break; + } } - if (ret) - return ret; - - return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); + ret = ret ?: bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i); + bch_err_fn(trans->c, ret); + return ret; } static noinline int hash_pick_winner(struct btree_trans *trans, @@ -101,17 +113,25 @@ static noinline int hash_pick_winner(struct btree_trans *trans, } } -static int repair_inode_hash_info(struct btree_trans *trans, - struct bch_inode_unpacked *snapshot_root) +/* + * str_hash lookups across snapshots break in wild ways if hash_info in + * different snapshot versions doesn't match - so if we find one mismatch, check + * them all + */ +int bch2_repair_inode_hash_info(struct btree_trans *trans, + struct bch_inode_unpacked *snapshot_root) { + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; + struct printbuf buf = PRINTBUF; + bool need_commit = false; int ret = 0; - for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, - SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot - 1), - BTREE_ITER_all_snapshots, k, ret) { - if (k.k->p.offset != snapshot_root->bi_inum) + for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, + POS(0, snapshot_root->bi_inum), + BTREE_ITER_all_snapshots, k, ret) { + if (bpos_ge(k.k->p, SPOS(0, snapshot_root->bi_inum, snapshot_root->bi_snapshot))) break; if (!bkey_is_inode(k.k)) continue; @@ -121,19 +141,72 @@ static int repair_inode_hash_info(struct btree_trans *trans, if (ret) break; - if (fsck_err_on(inode.bi_hash_seed != snapshot_root->bi_hash_seed || - INODE_STR_HASH(&inode) != INODE_STR_HASH(snapshot_root), - trans, inode_snapshot_mismatch, - "inode hash info in different snapshots don't match")) { + if (inode.bi_hash_seed == snapshot_root->bi_hash_seed && + INODE_STR_HASH(&inode) == INODE_STR_HASH(snapshot_root)) { +#ifdef CONFIG_BCACHEFS_DEBUG + struct bch_hash_info hash1 = bch2_hash_info_init(c, snapshot_root); + struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); + + BUG_ON(hash1.type != hash2.type || + memcmp(&hash1.siphash_key, + &hash2.siphash_key, + sizeof(hash1.siphash_key))); +#endif + continue; + } + + printbuf_reset(&buf); + prt_printf(&buf, "inode %llu hash info in snapshots %u %u don't match\n", + snapshot_root->bi_inum, + inode.bi_snapshot, + snapshot_root->bi_snapshot); + + bch2_prt_str_hash_type(&buf, INODE_STR_HASH(&inode)); + prt_printf(&buf, " %llx\n", inode.bi_hash_seed); + + bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root)); + prt_printf(&buf, " %llx", snapshot_root->bi_hash_seed); + + if (fsck_err(trans, inode_snapshot_mismatch, "%s", buf.buf)) { inode.bi_hash_seed = snapshot_root->bi_hash_seed; SET_INODE_STR_HASH(&inode, INODE_STR_HASH(snapshot_root)); - ret = __bch2_fsck_write_inode(trans, &inode) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: - -BCH_ERR_transaction_restart_nested; - break; + + ret = __bch2_fsck_write_inode(trans, &inode); + if (ret) + break; + need_commit = true; } } + + if (ret) + goto err; + + if (!need_commit) { + struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + + prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n", + snapshot_root->bi_inum); + + prt_printf(&buf, "root snapshot %u ", snapshot_root->bi_snapshot); + bch2_prt_str_hash_type(&buf, INODE_STR_HASH(snapshot_root)); + prt_printf(&buf, " %llx\n", snapshot_root->bi_hash_seed); +#if 0 + prt_printf(&buf, "vs snapshot %u ", hash_info->inum_snapshot); + bch2_prt_str_hash_type(&buf, hash_info->type); + prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1); +#endif + bch2_print_str(c, KERN_ERR, buf.buf); + printbuf_exit(&buf); + ret = bch_err_throw(c, fsck_repair_unimplemented); + goto err; + } + + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + -BCH_ERR_transaction_restart_nested; +err: fsck_err: + printbuf_exit(&buf); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -145,46 +218,121 @@ fsck_err: static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum, struct bch_hash_info *hash_info) { + struct bch_inode_unpacked snapshot_root; + int ret = bch2_inode_find_snapshot_root(trans, inum, &snapshot_root); + if (ret) + return ret; + + struct bch_hash_info hash_root = bch2_hash_info_init(trans->c, &snapshot_root); + if (hash_info->type != hash_root.type || + memcmp(&hash_info->siphash_key, + &hash_root.siphash_key, + sizeof(hash_root.siphash_key))) + ret = bch2_repair_inode_hash_info(trans, &snapshot_root); + + return ret; +} + +/* Put a str_hash key in its proper location, checking for duplicates */ +int bch2_str_hash_repair_key(struct btree_trans *trans, + struct snapshots_seen *s, + const struct bch_hash_desc *desc, + struct bch_hash_info *hash_info, + struct btree_iter *k_iter, struct bkey_s_c k, + struct btree_iter *dup_iter, struct bkey_s_c dup_k, + bool *updated_before_k_pos) +{ struct bch_fs *c = trans->c; - struct btree_iter iter; - struct bkey_s_c k; + struct printbuf buf = PRINTBUF; + bool free_snapshots_seen = false; int ret = 0; - for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, U32_MAX), - BTREE_ITER_all_snapshots, k, ret) { - if (k.k->p.offset != inum) - break; - if (bkey_is_inode(k.k)) - goto found; + if (!s) { + s = bch2_trans_kmalloc(trans, sizeof(*s)); + ret = PTR_ERR_OR_ZERO(s); + if (ret) + goto out; + + s->pos = k_iter->pos; + darray_init(&s->ids); + + ret = bch2_get_snapshot_overwrites(trans, desc->btree_id, k_iter->pos, &s->ids); + if (ret) + goto out; + + free_snapshots_seen = true; } - bch_err(c, "%s(): inum %llu not found", __func__, inum); - ret = -BCH_ERR_fsck_repair_unimplemented; - goto err; -found:; - struct bch_inode_unpacked inode; - ret = bch2_inode_unpack(k, &inode); - if (ret) - goto err; - struct bch_hash_info hash2 = bch2_hash_info_init(c, &inode); - if (hash_info->type != hash2.type || - memcmp(&hash_info->siphash_key, &hash2.siphash_key, sizeof(hash2.siphash_key))) { - ret = repair_inode_hash_info(trans, &inode); - if (!ret) { - bch_err(c, "inode hash info mismatch with root, but mismatch not found\n" - "%u %llx %llx\n" - "%u %llx %llx", - hash_info->type, - hash_info->siphash_key.k0, - hash_info->siphash_key.k1, - hash2.type, - hash2.siphash_key.k0, - hash2.siphash_key.k1); - ret = -BCH_ERR_fsck_repair_unimplemented; + if (!dup_k.k) { + struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); + ret = PTR_ERR_OR_ZERO(new); + if (ret) + goto out; + + dup_k = bch2_hash_set_or_get_in_snapshot(trans, dup_iter, *desc, hash_info, + (subvol_inum) { 0, new->k.p.inode }, + new->k.p.snapshot, new, + STR_HASH_must_create| + BTREE_ITER_with_updates| + BTREE_UPDATE_internal_snapshot_node); + ret = bkey_err(dup_k); + if (ret) + goto out; + if (dup_k.k) + goto duplicate_entries; + + if (bpos_lt(new->k.p, k.k->p)) + *updated_before_k_pos = true; + + ret = bch2_insert_snapshot_whiteouts(trans, desc->btree_id, + k_iter->pos, new->k.p) ?: + bch2_hash_delete_at(trans, *desc, hash_info, k_iter, + BTREE_ITER_with_updates| + BTREE_UPDATE_internal_snapshot_node) ?: + bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: + -BCH_ERR_transaction_restart_commit; + } else { +duplicate_entries: + ret = hash_pick_winner(trans, *desc, hash_info, k, dup_k); + if (ret < 0) + goto out; + + if (!fsck_err(trans, hash_table_key_duplicate, + "duplicate hash table keys%s:\n%s", + ret != 2 ? "" : ", both point to valid inodes", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, k), + prt_newline(&buf), + bch2_bkey_val_to_text(&buf, c, dup_k), + buf.buf))) + goto out; + + switch (ret) { + case 0: + ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); + break; + case 1: + ret = bch2_hash_delete_at(trans, *desc, hash_info, dup_iter, 0); + break; + case 2: + ret = bch2_fsck_rename_dirent(trans, s, *desc, hash_info, + bkey_s_c_to_dirent(k), + updated_before_k_pos) ?: + bch2_hash_delete_at(trans, *desc, hash_info, k_iter, + BTREE_ITER_with_updates); + goto out; } + + ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: + -BCH_ERR_transaction_restart_commit; } -err: - bch2_trans_iter_exit(trans, &iter); +out: +fsck_err: + bch2_trans_iter_exit(trans, dup_iter); + printbuf_exit(&buf); + if (free_snapshots_seen) + darray_exit(&s->ids); return ret; } @@ -192,7 +340,8 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, struct snapshots_seen *s, const struct bch_hash_desc *desc, struct bch_hash_info *hash_info, - struct btree_iter *k_iter, struct bkey_s_c hash_k) + struct btree_iter *k_iter, struct bkey_s_c hash_k, + bool *updated_before_k_pos) { struct bch_fs *c = trans->c; struct btree_iter iter = {}; @@ -206,24 +355,31 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, for_each_btree_key_norestart(trans, iter, desc->btree_id, SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), - BTREE_ITER_slots, k, ret) { + BTREE_ITER_slots| + BTREE_ITER_with_updates, k, ret) { if (bkey_eq(k.k->p, hash_k.k->p)) break; if (k.k->type == desc->key_type && - !desc->cmp_bkey(k, hash_k)) - goto duplicate_entries; + !desc->cmp_bkey(k, hash_k)) { + ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, + hash_info) ?: + bch2_str_hash_repair_key(trans, s, desc, hash_info, + k_iter, hash_k, + &iter, k, updated_before_k_pos); + break; + } - if (bkey_deleted(k.k)) { - bch2_trans_iter_exit(trans, &iter); + if (bkey_deleted(k.k)) goto bad_hash; - } } -out: bch2_trans_iter_exit(trans, &iter); +out: +fsck_err: printbuf_exit(&buf); return ret; bad_hash: + bch2_trans_iter_exit(trans, &iter); /* * Before doing any repair, check hash_info itself: */ @@ -232,64 +388,12 @@ bad_hash: goto out; if (fsck_err(trans, hash_table_key_wrong_offset, - "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", - bch2_btree_id_str(desc->btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k); - if (IS_ERR(new)) - return PTR_ERR(new); - - k = bch2_hash_set_or_get_in_snapshot(trans, &iter, *desc, hash_info, - (subvol_inum) { 0, hash_k.k->p.inode }, - hash_k.k->p.snapshot, new, - STR_HASH_must_create| - BTREE_ITER_with_updates| - BTREE_UPDATE_internal_snapshot_node); - ret = bkey_err(k); - if (ret) - goto out; - if (k.k) - goto duplicate_entries; - - ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, - BTREE_UPDATE_internal_snapshot_node) ?: - bch2_fsck_update_backpointers(trans, s, *desc, hash_info, new) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: - -BCH_ERR_transaction_restart_nested; - goto out; - } -fsck_err: - goto out; -duplicate_entries: - ret = hash_pick_winner(trans, *desc, hash_info, hash_k, k); - if (ret < 0) - goto out; - - if (!fsck_err(trans, hash_table_key_duplicate, - "duplicate hash table keys%s:\n%s", - ret != 2 ? "" : ", both point to valid inodes", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, hash_k), - prt_newline(&buf), - bch2_bkey_val_to_text(&buf, c, k), - buf.buf))) - goto out; - - switch (ret) { - case 0: - ret = bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); - break; - case 1: - ret = bch2_hash_delete_at(trans, *desc, hash_info, &iter, 0); - break; - case 2: - ret = fsck_rename_dirent(trans, s, *desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?: - bch2_hash_delete_at(trans, *desc, hash_info, k_iter, 0); - goto out; - } - - ret = bch2_trans_commit(trans, NULL, NULL, 0) ?: - -BCH_ERR_transaction_restart_nested; + "hash table key at wrong offset: should be at %llu\n%s", + hash, + (bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) + ret = bch2_str_hash_repair_key(trans, s, desc, hash_info, + k_iter, hash_k, + &iter, bkey_s_c_null, + updated_before_k_pos); goto out; } |