summaryrefslogtreecommitdiff
path: root/fs/bcachefs/ec.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/ec.c')
-rw-r--r--fs/bcachefs/ec.c326
1 files changed, 189 insertions, 137 deletions
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index a396865e8b17..543dbba9b14f 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -16,6 +16,7 @@
#include "disk_accounting.h"
#include "disk_groups.h"
#include "ec.h"
+#include "enumerated_ref.h"
#include "error.h"
#include "io_read.h"
#include "io_write.h"
@@ -212,7 +213,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
a->dirty_sectors,
a->stripe, s.k->p.offset,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -223,7 +224,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
a->dirty_sectors,
a->cached_sectors,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
} else {
@@ -233,7 +234,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
bucket.inode, bucket.offset, a->gen,
a->stripe,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -243,7 +244,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
bch2_data_type_str(a->data_type),
bch2_data_type_str(data_type),
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -255,7 +256,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
a->dirty_sectors,
a->cached_sectors,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
}
@@ -294,7 +295,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev);
if (unlikely(!ca)) {
if (ptr->dev != BCH_SB_MEMBER_INVALID && !(flags & BTREE_TRIGGER_overwrite))
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -324,7 +325,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s",
ptr->dev,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
- ret = -BCH_ERR_mark_stripe;
+ ret = bch_err_throw(c, mark_stripe);
goto err;
}
@@ -427,7 +428,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
gc = genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
if (!gc) {
bch_err(c, "error allocating memory for gc_stripes, idx %llu", idx);
- return -BCH_ERR_ENOMEM_mark_stripe;
+ return bch_err_throw(c, ENOMEM_mark_stripe);
}
/*
@@ -507,20 +508,14 @@ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
{
- switch (k.k->type) {
- case KEY_TYPE_extent: {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const union bch_extent_entry *entry;
-
- extent_for_each_entry(e, entry)
- if (extent_entry_type(entry) ==
- BCH_EXTENT_ENTRY_stripe_ptr &&
- entry->stripe_ptr.idx == idx)
- return true;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
- break;
- }
- }
+ bkey_extent_entry_for_each(ptrs, entry)
+ if (extent_entry_type(entry) ==
+ BCH_EXTENT_ENTRY_stripe_ptr &&
+ entry->stripe_ptr.idx == idx)
+ return true;
return false;
}
@@ -541,7 +536,8 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
}
/* XXX: this is a non-mempoolified memory allocation: */
-static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
+static int ec_stripe_buf_init(struct bch_fs *c,
+ struct ec_stripe_buf *buf,
unsigned offset, unsigned size)
{
struct bch_stripe *v = &bkey_i_to_stripe(&buf->key)->v;
@@ -569,7 +565,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
return 0;
err:
ec_stripe_buf_exit(buf);
- return -BCH_ERR_ENOMEM_stripe_buf;
+ return bch_err_throw(c, ENOMEM_stripe_buf);
}
/* Checksumming: */
@@ -706,6 +702,9 @@ static void ec_block_endio(struct bio *bio)
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
int rw = ec_bio->rw;
+ unsigned ref = rw == READ
+ ? BCH_DEV_READ_REF_ec_block
+ : BCH_DEV_WRITE_REF_ec_block;
bch2_account_io_completion(ca, bio_data_dir(bio),
ec_bio->submit_time, !bio->bi_status);
@@ -727,7 +726,7 @@ static void ec_block_endio(struct bio *bio)
}
bio_put(&ec_bio->bio);
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref);
closure_put(cl);
}
@@ -741,8 +740,11 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
? BCH_DATA_user
: BCH_DATA_parity;
int rw = op_is_write(opf);
+ unsigned ref = rw == READ
+ ? BCH_DEV_READ_REF_ec_block
+ : BCH_DEV_WRITE_REF_ec_block;
- struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw, ref);
if (!ca) {
clear_bit(idx, buf->valid);
return;
@@ -788,14 +790,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
closure_get(cl);
- percpu_ref_get(&ca->io_ref[rw]);
+ enumerated_ref_get(&ca->io_ref[rw], ref);
submit_bio(&ec_bio->bio);
offset += b;
}
- percpu_ref_put(&ca->io_ref[rw]);
+ enumerated_ref_put(&ca->io_ref[rw], ref);
}
static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
@@ -839,7 +841,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
buf = kzalloc(sizeof(*buf), GFP_NOFS);
if (!buf)
- return -BCH_ERR_ENOMEM_ec_read_extent;
+ return bch_err_throw(c, ENOMEM_ec_read_extent);
ret = lockrestart_do(trans, get_stripe_key_trans(trans, rbio->pick.ec.idx, buf));
if (ret) {
@@ -860,7 +862,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio,
goto err;
}
- ret = ec_stripe_buf_init(buf, offset, bio_sectors(&rbio->bio));
+ ret = ec_stripe_buf_init(c, buf, offset, bio_sectors(&rbio->bio));
if (ret) {
msg = "-ENOMEM";
goto err;
@@ -893,7 +895,7 @@ err:
bch_err_ratelimited(c,
"error doing reconstruct read: %s\n %s", msg, msgbuf.buf);
printbuf_exit(&msgbuf);
- ret = -BCH_ERR_stripe_reconstruct;
+ ret = bch_err_throw(c, stripe_reconstruct);
goto out;
}
@@ -903,7 +905,7 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
{
if (c->gc_pos.phase != GC_PHASE_not_running &&
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
- return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
+ return bch_err_throw(c, ENOMEM_ec_stripe_mem_alloc);
return 0;
}
@@ -1017,14 +1019,14 @@ static void ec_stripe_delete_work(struct work_struct *work)
BCH_TRANS_COMMIT_no_enospc, ({
ec_stripe_delete(trans, lru_k.k->p.offset);
})));
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
void bch2_do_stripe_deletes(struct bch_fs *c)
{
- if (bch2_write_ref_tryget(c, BCH_WRITE_REF_stripe_delete) &&
+ if (enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_stripe_delete) &&
!queue_work(c->write_ref_wq, &c->ec_stripe_delete_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_delete);
}
/* stripe creation: */
@@ -1128,7 +1130,7 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
bch2_fs_inconsistent(c, "%s", buf.buf);
printbuf_exit(&buf);
- return -BCH_ERR_erasure_coding_found_btree_node;
+ return bch_err_throw(c, erasure_coding_found_btree_node);
}
k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed);
@@ -1194,7 +1196,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev);
if (!ca)
- return -BCH_ERR_ENOENT_dev_not_found;
+ return bch_err_throw(c, ENOENT_dev_not_found);
struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr);
@@ -1252,9 +1254,10 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
unsigned block,
struct open_bucket *ob)
{
- struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, ob->dev, WRITE,
+ BCH_DEV_WRITE_REF_ec_bucket_zero);
if (!ca) {
- s->err = -BCH_ERR_erofs_no_writes;
+ s->err = bch_err_throw(c, erofs_no_writes);
return;
}
@@ -1268,7 +1271,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
ob->sectors_free,
GFP_KERNEL, 0);
- percpu_ref_put(&ca->io_ref[WRITE]);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_ec_bucket_zero);
if (ret)
s->err = ret;
@@ -1318,7 +1321,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (ec_do_recov(c, &s->existing_stripe)) {
bch_err(c, "error creating stripe: error reading existing stripe");
- ret = -BCH_ERR_ec_block_read;
+ ret = bch_err_throw(c, ec_block_read);
goto err;
}
@@ -1344,7 +1347,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (ec_nr_failed(&s->new_stripe)) {
bch_err(c, "error creating stripe: error writing redundancy buckets");
- ret = -BCH_ERR_ec_block_write;
+ ret = bch_err_throw(c, ec_block_write);
goto err;
}
@@ -1418,15 +1421,15 @@ static void ec_stripe_create_work(struct work_struct *work)
while ((s = get_pending_stripe(c)))
ec_stripe_create(s);
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
void bch2_ec_do_stripe_creates(struct bch_fs *c)
{
- bch2_write_ref_get(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_get(&c->writes, BCH_WRITE_REF_stripe_create);
if (!queue_work(system_long_wq, &c->ec_stripe_create_work))
- bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
+ enumerated_ref_put(&c->writes, BCH_WRITE_REF_stripe_create);
}
static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
@@ -1576,26 +1579,26 @@ static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_str
static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
{
struct bch_devs_mask devs = h->devs;
+ unsigned nr_devs, nr_devs_with_durability;
- rcu_read_lock();
- h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
- ? group_to_target(h->disk_label - 1)
- : 0);
- unsigned nr_devs = dev_mask_nr(&h->devs);
-
- for_each_member_device_rcu(c, ca, &h->devs)
- if (!ca->mi.durability)
- __clear_bit(ca->dev_idx, h->devs.d);
- unsigned nr_devs_with_durability = dev_mask_nr(&h->devs);
+ scoped_guard(rcu) {
+ h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
+ ? group_to_target(h->disk_label - 1)
+ : 0);
+ nr_devs = dev_mask_nr(&h->devs);
- h->blocksize = pick_blocksize(c, &h->devs);
+ for_each_member_device_rcu(c, ca, &h->devs)
+ if (!ca->mi.durability)
+ __clear_bit(ca->dev_idx, h->devs.d);
+ nr_devs_with_durability = dev_mask_nr(&h->devs);
- h->nr_active_devs = 0;
- for_each_member_device_rcu(c, ca, &h->devs)
- if (ca->mi.bucket_size == h->blocksize)
- h->nr_active_devs++;
+ h->blocksize = pick_blocksize(c, &h->devs);
- rcu_read_unlock();
+ h->nr_active_devs = 0;
+ for_each_member_device_rcu(c, ca, &h->devs)
+ if (ca->mi.bucket_size == h->blocksize)
+ h->nr_active_devs++;
+ }
/*
* If we only have redundancy + 1 devices, we're better off with just
@@ -1716,23 +1719,32 @@ err:
}
static int new_stripe_alloc_buckets(struct btree_trans *trans,
+ struct alloc_request *req,
struct ec_stripe_head *h, struct ec_stripe_new *s,
- enum bch_watermark watermark, struct closure *cl)
+ struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct bch_devs_mask devs = h->devs;
struct open_bucket *ob;
- struct open_buckets buckets;
struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
- bool have_cache = true;
int ret = 0;
+ req->scratch_data_type = req->data_type;
+ req->scratch_ptrs = req->ptrs;
+ req->scratch_nr_replicas = req->nr_replicas;
+ req->scratch_nr_effective = req->nr_effective;
+ req->scratch_have_cache = req->have_cache;
+ req->scratch_devs_may_alloc = req->devs_may_alloc;
+
+ req->devs_may_alloc = h->devs;
+ req->have_cache = true;
+
BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity);
BUG_ON(v->nr_redundant != s->nr_parity);
/* * We bypass the sector allocator which normally does this: */
- bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
+ bitmap_and(req->devs_may_alloc.d, req->devs_may_alloc.d,
+ c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) {
/*
@@ -1742,7 +1754,7 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans,
* block when updating the stripe
*/
if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID)
- __clear_bit(v->ptrs[i].dev, devs.d);
+ __clear_bit(v->ptrs[i].dev, req->devs_may_alloc.d);
if (i < s->nr_data)
nr_have_data++;
@@ -1753,60 +1765,58 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans,
BUG_ON(nr_have_data > s->nr_data);
BUG_ON(nr_have_parity > s->nr_parity);
- buckets.nr = 0;
+ req->ptrs.nr = 0;
if (nr_have_parity < s->nr_parity) {
- ret = bch2_bucket_alloc_set_trans(trans, &buckets,
- &h->parity_stripe,
- &devs,
- s->nr_parity,
- &nr_have_parity,
- &have_cache, 0,
- BCH_DATA_parity,
- watermark,
- cl);
-
- open_bucket_for_each(c, &buckets, ob, i) {
+ req->nr_replicas = s->nr_parity;
+ req->nr_effective = nr_have_parity;
+ req->data_type = BCH_DATA_parity;
+
+ ret = bch2_bucket_alloc_set_trans(trans, req, &h->parity_stripe, cl);
+
+ open_bucket_for_each(c, &req->ptrs, ob, i) {
j = find_next_zero_bit(s->blocks_gotten,
s->nr_data + s->nr_parity,
s->nr_data);
BUG_ON(j >= s->nr_data + s->nr_parity);
- s->blocks[j] = buckets.v[i];
+ s->blocks[j] = req->ptrs.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
__set_bit(j, s->blocks_gotten);
}
if (ret)
- return ret;
+ goto err;
}
- buckets.nr = 0;
+ req->ptrs.nr = 0;
if (nr_have_data < s->nr_data) {
- ret = bch2_bucket_alloc_set_trans(trans, &buckets,
- &h->block_stripe,
- &devs,
- s->nr_data,
- &nr_have_data,
- &have_cache, 0,
- BCH_DATA_user,
- watermark,
- cl);
-
- open_bucket_for_each(c, &buckets, ob, i) {
+ req->nr_replicas = s->nr_data;
+ req->nr_effective = nr_have_data;
+ req->data_type = BCH_DATA_user;
+
+ ret = bch2_bucket_alloc_set_trans(trans, req, &h->block_stripe, cl);
+
+ open_bucket_for_each(c, &req->ptrs, ob, i) {
j = find_next_zero_bit(s->blocks_gotten,
s->nr_data, 0);
BUG_ON(j >= s->nr_data);
- s->blocks[j] = buckets.v[i];
+ s->blocks[j] = req->ptrs.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
__set_bit(j, s->blocks_gotten);
}
if (ret)
- return ret;
+ goto err;
}
-
- return 0;
+err:
+ req->data_type = req->scratch_data_type;
+ req->ptrs = req->scratch_ptrs;
+ req->nr_replicas = req->scratch_nr_replicas;
+ req->nr_effective = req->scratch_nr_effective;
+ req->have_cache = req->scratch_have_cache;
+ req->devs_may_alloc = req->scratch_devs_may_alloc;
+ return ret;
}
static int __get_existing_stripe(struct btree_trans *trans,
@@ -1856,7 +1866,7 @@ static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new
s->nr_data = existing_v->nr_blocks -
existing_v->nr_redundant;
- int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors));
+ int ret = ec_stripe_buf_init(c, &s->existing_stripe, 0, le16_to_cpu(existing_v->sectors));
if (ret) {
bch2_stripe_close(c, s);
return ret;
@@ -1916,7 +1926,7 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
}
bch2_trans_iter_exit(trans, &lru_iter);
if (!ret)
- ret = -BCH_ERR_stripe_alloc_blocked;
+ ret = bch_err_throw(c, stripe_alloc_blocked);
if (ret == 1)
ret = 0;
if (ret)
@@ -1957,7 +1967,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
continue;
}
- ret = -BCH_ERR_ENOSPC_stripe_create;
+ ret = bch_err_throw(c, ENOSPC_stripe_create);
break;
}
@@ -1987,17 +1997,15 @@ err:
}
struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
- unsigned target,
+ struct alloc_request *req,
unsigned algo,
- unsigned redundancy,
- enum bch_watermark watermark,
struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct ec_stripe_head *h;
- bool waiting = false;
+ unsigned redundancy = req->nr_replicas - 1;
unsigned disk_label = 0;
- struct target t = target_decode(target);
+ struct target t = target_decode(req->target);
+ bool waiting = false;
int ret;
if (t.type == TARGET_GROUP) {
@@ -2008,14 +2016,16 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
disk_label = t.group + 1; /* 0 == no label */
}
- h = __bch2_ec_stripe_head_get(trans, disk_label, algo, redundancy, watermark);
+ struct ec_stripe_head *h =
+ __bch2_ec_stripe_head_get(trans, disk_label, algo,
+ redundancy, req->watermark);
if (IS_ERR_OR_NULL(h))
return h;
if (!h->s) {
h->s = ec_new_stripe_alloc(c, h);
if (!h->s) {
- ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
+ ret = bch_err_throw(c, ENOMEM_ec_new_stripe_alloc);
bch_err(c, "failed to allocate new stripe");
goto err;
}
@@ -2032,8 +2042,12 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
goto alloc_existing;
/* First, try to allocate a full stripe: */
- ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?:
+ enum bch_watermark saved_watermark = BCH_WATERMARK_stripe;
+ swap(req->watermark, saved_watermark);
+ ret = new_stripe_alloc_buckets(trans, req, h, s, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h, s);
+ swap(req->watermark, saved_watermark);
+
if (!ret)
goto allocate_buf;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
@@ -2051,8 +2065,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
goto err;
- if (watermark == BCH_WATERMARK_copygc) {
- ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?:
+ if (req->watermark == BCH_WATERMARK_copygc) {
+ ret = new_stripe_alloc_buckets(trans, req, h, s, NULL) ?:
__bch2_ec_stripe_head_reserve(trans, h, s);
if (ret)
goto err;
@@ -2071,12 +2085,12 @@ alloc_existing:
* Retry allocating buckets, with the watermark for this
* particular write:
*/
- ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl);
+ ret = new_stripe_alloc_buckets(trans, req, h, s, cl);
if (ret)
goto err;
allocate_buf:
- ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize);
+ ret = ec_stripe_buf_init(c, &s->new_stripe, 0, h->blocksize);
if (ret)
goto err;
@@ -2093,23 +2107,18 @@ err:
/* device removal */
-static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_s_c k_a)
+int bch2_invalidate_stripe_to_dev(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ unsigned dev_idx,
+ unsigned flags)
{
- struct bch_alloc_v4 a_convert;
- const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert);
-
- if (!a->stripe)
+ if (k.k->type != KEY_TYPE_stripe)
return 0;
- if (a->stripe_sectors) {
- bch_err(trans->c, "trying to invalidate device in stripe when bucket has stripe data");
- return -BCH_ERR_invalidate_stripe_to_dev;
- }
-
- struct btree_iter iter;
+ struct bch_fs *c = trans->c;
struct bkey_i_stripe *s =
- bch2_bkey_get_mut_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe),
- BTREE_ITER_slots, stripe);
+ bch2_bkey_make_mut_typed(trans, iter, &k, 0, stripe);
int ret = PTR_ERR_OR_ZERO(s);
if (ret)
return ret;
@@ -2126,12 +2135,30 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
acc.replicas.data_type = BCH_DATA_user;
ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
if (ret)
- goto err;
+ return ret;
struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(&s->k_i));
- bkey_for_each_ptr(ptrs, ptr)
- if (ptr->dev == k_a.k->p.inode)
- ptr->dev = BCH_SB_MEMBER_INVALID;
+
+ /* XXX: how much redundancy do we still have? check degraded flags */
+
+ unsigned nr_good = 0;
+
+ scoped_guard(rcu)
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (ptr->dev == dev_idx)
+ ptr->dev = BCH_SB_MEMBER_INVALID;
+
+ struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
+ nr_good += ca && ca->mi.state != BCH_MEMBER_STATE_failed;
+ }
+
+ if (nr_good < s->v.nr_blocks && !(flags & BCH_FORCE_IF_DATA_DEGRADED))
+ return bch_err_throw(c, remove_would_lose_data);
+
+ unsigned nr_data = s->v.nr_blocks - s->v.nr_redundant;
+
+ if (nr_good < nr_data && !(flags & BCH_FORCE_IF_DATA_LOST))
+ return bch_err_throw(c, remove_would_lose_data);
sectors = -sectors;
@@ -2139,23 +2166,48 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
acc.replicas.data_type = BCH_DATA_user;
- ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
+ return bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
+}
+
+static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, struct bkey_s_c k_a,
+ unsigned flags)
+{
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k_a, &a_convert);
+
+ if (!a->stripe)
+ return 0;
+
+ if (a->stripe_sectors) {
+ struct bch_fs *c = trans->c;
+ bch_err(c, "trying to invalidate device in stripe when bucket has stripe data");
+ return bch_err_throw(c, invalidate_stripe_to_dev);
+ }
+
+ struct btree_iter iter;
+ struct bkey_s_c_stripe s =
+ bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_stripes, POS(0, a->stripe),
+ BTREE_ITER_slots, stripe);
+ int ret = bkey_err(s);
if (ret)
- goto err;
-err:
+ return ret;
+
+ ret = bch2_invalidate_stripe_to_dev(trans, &iter, s.s_c, k_a.k->p.inode, flags);
bch2_trans_iter_exit(trans, &iter);
return ret;
}
-int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx)
+int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags)
{
- return bch2_trans_run(c,
+ int ret = bch2_trans_run(c,
for_each_btree_key_max_commit(trans, iter,
BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX),
BTREE_ITER_intent, k,
NULL, NULL, 0, ({
- bch2_invalidate_stripe_to_dev(trans, k);
+ bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags);
})));
+ bch_err_fn(c, ret);
+ return ret;
}
/* startup/shutdown */
@@ -2204,10 +2256,10 @@ void bch2_fs_ec_stop(struct bch_fs *c)
static bool bch2_fs_ec_flush_done(struct bch_fs *c)
{
- bool ret;
+ sched_annotate_sleep();
mutex_lock(&c->ec_stripe_new_lock);
- ret = list_empty(&c->ec_stripe_new_list);
+ bool ret = list_empty(&c->ec_stripe_new_list);
mutex_unlock(&c->ec_stripe_new_lock);
return ret;