diff options
author | Jens Axboe <axboe@kernel.dk> | 2024-11-06 07:55:19 -0700 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2024-11-06 07:55:19 -0700 |
commit | 0b66deb16c88f0baa7b8c223211a3c815667ad6f (patch) | |
tree | f0919fc9e4f8e2ff1b249c7a291f7bba3e836617 | |
parent | 91ff97a7225996db1071cfacc209a4fccce2246f (diff) | |
parent | 6012169e8aae9c0eda38bbedcd7a1540a81220ae (diff) |
Merge tag 'md-6.13-20241105' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.13/block
Pull MD changes from Song:
"1. Enhance handling of faulty and blocked devices, by Yu Kuai.
2. raid5-ppl atomic improvement, by Uros Bizjak.
3. md-bitmap fix, by Yuan Can."
* tag 'md-6.13-20241105' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
md/md-bitmap: Add missing destroy_work_on_stack()
md/raid5: don't set Faulty rdev for blocked_rdev
md/raid10: don't wait for Faulty rdev in wait_blocked_rdev()
md/raid1: don't wait for Faulty rdev in wait_blocked_rdev()
md/raid1: factor out helper to handle blocked rdev from raid1_write_request()
md: don't record new badblocks for faulty rdev
md: don't wait faulty rdev in md_wait_for_blocked_rdev()
md: add a new helper rdev_blocked()
md/raid5-ppl: Use atomic64_inc_return() in ppl_new_iounit()
-rw-r--r-- | drivers/md/md-bitmap.c | 1 | ||||
-rw-r--r-- | drivers/md/md.c | 15 | ||||
-rw-r--r-- | drivers/md/md.h | 24 | ||||
-rw-r--r-- | drivers/md/raid1.c | 75 | ||||
-rw-r--r-- | drivers/md/raid10.c | 40 | ||||
-rw-r--r-- | drivers/md/raid5-ppl.c | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 13 |
7 files changed, 101 insertions, 69 deletions
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 29da10e6f703..c3a42dd66ce5 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -1285,6 +1285,7 @@ static void bitmap_unplug_async(struct bitmap *bitmap) queue_work(md_bitmap_wq, &unplug_work.work); wait_for_completion(&done); + destroy_work_on_stack(&unplug_work.work); } static void bitmap_unplug(struct mddev *mddev, bool sync) diff --git a/drivers/md/md.c b/drivers/md/md.c index 179ee4afe937..bbe002ebd584 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9762,9 +9762,7 @@ EXPORT_SYMBOL(md_reap_sync_thread); void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) { sysfs_notify_dirent_safe(rdev->sysfs_state); - wait_event_timeout(rdev->blocked_wait, - !test_bit(Blocked, &rdev->flags) && - !test_bit(BlockedBadBlocks, &rdev->flags), + wait_event_timeout(rdev->blocked_wait, !rdev_blocked(rdev), msecs_to_jiffies(5000)); rdev_dec_pending(rdev, mddev); } @@ -9793,6 +9791,17 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, { struct mddev *mddev = rdev->mddev; int rv; + + /* + * Recording new badblocks for faulty rdev will force unnecessary + * super block updating. This is fragile for external management because + * userspace daemon may trying to remove this device and deadlock may + * occur. This will be probably solved in the mdadm, but it is safer to + * avoid it. + */ + if (test_bit(Faulty, &rdev->flags)) + return 1; + if (is_new) s += rdev->new_data_offset; else diff --git a/drivers/md/md.h b/drivers/md/md.h index 5d2e6bd58e4d..4ba93af36126 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -1002,6 +1002,30 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio, trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector); } +static inline bool rdev_blocked(struct md_rdev *rdev) +{ + /* + * Blocked will be set by error handler and cleared by daemon after + * updating superblock, meanwhile write IO should be blocked to prevent + * reading old data after power failure. + */ + if (test_bit(Blocked, &rdev->flags)) + return true; + + /* + * Faulty device should not be accessed anymore, there is no need to + * wait for bad block to be acknowledged. + */ + if (test_bit(Faulty, &rdev->flags)) + return false; + + /* rdev is blocked by badblocks. */ + if (test_bit(BlockedBadBlocks, &rdev->flags)) + return true; + + return false; +} + #define mddev_add_trace_msg(mddev, fmt, args...) \ do { \ if (!mddev_is_dm(mddev)) \ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 6c9d24203f39..cd3e94dceabc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1412,6 +1412,40 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, submit_bio_noacct(read_bio); } +static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio) +{ + struct r1conf *conf = mddev->private; + int disks = conf->raid_disks * 2; + int i; + +retry: + for (i = 0; i < disks; i++) { + struct md_rdev *rdev = conf->mirrors[i].rdev; + + if (!rdev) + continue; + + /* don't write here until the bad block is acknowledged */ + if (test_bit(WriteErrorSeen, &rdev->flags) && + rdev_has_badblock(rdev, bio->bi_iter.bi_sector, + bio_sectors(bio)) < 0) + set_bit(BlockedBadBlocks, &rdev->flags); + + if (rdev_blocked(rdev)) { + if (bio->bi_opf & REQ_NOWAIT) + return false; + + mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked", + rdev->raid_disk); + atomic_inc(&rdev->nr_pending); + md_wait_for_blocked_rdev(rdev, rdev->mddev); + goto retry; + } + } + + return true; +} + static void raid1_write_request(struct mddev *mddev, struct bio *bio, int max_write_sectors) { @@ -1419,7 +1453,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, struct r1bio *r1_bio; int i, disks; unsigned long flags; - struct md_rdev *blocked_rdev; int first_clone; int max_sectors; bool write_behind = false; @@ -1457,7 +1490,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, return; } - retry_write: + if (!wait_blocked_rdev(mddev, bio)) { + bio_wouldblock_error(bio); + return; + } + r1_bio = alloc_r1bio(mddev, bio); r1_bio->sectors = max_write_sectors; @@ -1473,7 +1510,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, */ disks = conf->raid_disks * 2; - blocked_rdev = NULL; max_sectors = r1_bio->sectors; for (i = 0; i < disks; i++) { struct md_rdev *rdev = conf->mirrors[i].rdev; @@ -1486,11 +1522,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags)) write_behind = true; - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - atomic_inc(&rdev->nr_pending); - blocked_rdev = rdev; - break; - } r1_bio->bios[i] = NULL; if (!rdev || test_bit(Faulty, &rdev->flags)) { if (i < conf->raid_disks) @@ -1506,13 +1537,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, is_bad = is_badblock(rdev, r1_bio->sector, max_sectors, &first_bad, &bad_sectors); - if (is_bad < 0) { - /* mustn't write here until the bad block is - * acknowledged*/ - set_bit(BlockedBadBlocks, &rdev->flags); - blocked_rdev = rdev; - break; - } if (is_bad && first_bad <= r1_bio->sector) { /* Cannot write here at all */ bad_sectors -= (r1_bio->sector - first_bad); @@ -1543,27 +1567,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, r1_bio->bios[i] = bio; } - if (unlikely(blocked_rdev)) { - /* Wait for this device to become unblocked */ - int j; - - for (j = 0; j < i; j++) - if (r1_bio->bios[j]) - rdev_dec_pending(conf->mirrors[j].rdev, mddev); - mempool_free(r1_bio, &conf->r1bio_pool); - allow_barrier(conf, bio->bi_iter.bi_sector); - - if (bio->bi_opf & REQ_NOWAIT) { - bio_wouldblock_error(bio); - return; - } - mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked", - blocked_rdev->raid_disk); - md_wait_for_blocked_rdev(blocked_rdev, mddev); - wait_barrier(conf, bio->bi_iter.bi_sector, false); - goto retry_write; - } - /* * When using a bitmap, we may call alloc_behind_master_bio below. * alloc_behind_master_bio allocates a copy of the data payload a page diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f3bf1116794a..ff73db2f6c41 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1285,9 +1285,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio) { - int i; struct r10conf *conf = mddev->private; struct md_rdev *blocked_rdev; + int i; retry_wait: blocked_rdev = NULL; @@ -1295,40 +1295,36 @@ retry_wait: struct md_rdev *rdev, *rrdev; rdev = conf->mirrors[i].rdev; - rrdev = conf->mirrors[i].replacement; - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - atomic_inc(&rdev->nr_pending); - blocked_rdev = rdev; - break; - } - if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) { - atomic_inc(&rrdev->nr_pending); - blocked_rdev = rrdev; - break; - } - - if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) { + if (rdev) { sector_t dev_sector = r10_bio->devs[i].addr; /* * Discard request doesn't care the write result * so it doesn't need to wait blocked disk here. */ - if (!r10_bio->sectors) - continue; - - if (rdev_has_badblock(rdev, dev_sector, - r10_bio->sectors) < 0) { + if (test_bit(WriteErrorSeen, &rdev->flags) && + r10_bio->sectors && + rdev_has_badblock(rdev, dev_sector, + r10_bio->sectors) < 0) /* - * Mustn't write here until the bad block - * is acknowledged + * Mustn't write here until the bad + * block is acknowledged */ - atomic_inc(&rdev->nr_pending); set_bit(BlockedBadBlocks, &rdev->flags); + + if (rdev_blocked(rdev)) { blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); break; } } + + rrdev = conf->mirrors[i].replacement; + if (rrdev && rdev_blocked(rrdev)) { + atomic_inc(&rrdev->nr_pending); + blocked_rdev = rrdev; + break; + } } if (unlikely(blocked_rdev)) { diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index a70cbec12ed0..37c4da5311ca 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -258,7 +258,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log, memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED); pplhdr->signature = cpu_to_le32(ppl_conf->signature); - io->seq = atomic64_add_return(1, &ppl_conf->seq); + io->seq = atomic64_inc_return(&ppl_conf->seq); pplhdr->generation = cpu_to_le64(io->seq); return io; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dc2ea636d173..f5ac81dd21b2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4724,14 +4724,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) if (rdev) { is_bad = rdev_has_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf)); - if (s->blocked_rdev == NULL - && (test_bit(Blocked, &rdev->flags) - || is_bad < 0)) { + if (s->blocked_rdev == NULL) { if (is_bad < 0) - set_bit(BlockedBadBlocks, - &rdev->flags); - s->blocked_rdev = rdev; - atomic_inc(&rdev->nr_pending); + set_bit(BlockedBadBlocks, &rdev->flags); + if (rdev_blocked(rdev)) { + s->blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + } } } clear_bit(R5_Insync, &dev->flags); |