diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-10-02 10:16:56 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-10-02 10:16:56 -0700 |
commit | e1b1d03ceec343362524318c076b110066ffe305 (patch) | |
tree | ca71105f13d893118eab4feb826d20cca066c53d /drivers/md/raid1.c | |
parent | 5832d26433f2bd0d28f8b12526e3c2fdb203507f (diff) | |
parent | 130e6de62107116eba124647116276266be0f84c (diff) |
Merge tag 'for-6.18/block-20250929' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull block updates from Jens Axboe:
- NVMe pull request via Keith:
- FC target fixes (Daniel)
- Authentication fixes and updates (Martin, Chris)
- Admin controller handling (Kamaljit)
- Target lockdep assertions (Max)
- Keep-alive updates for discovery (Alastair)
- Suspend quirk (Georg)
- MD pull request via Yu:
- Add support for a lockless bitmap.
A key feature for the new bitmap are that the IO fastpath is
lockless. If a user issues lots of write IO to the same bitmap
bit in a short time, only the first write has additional overhead
to update bitmap bit, no additional overhead for the following
writes.
By supporting only resync or recover written data, means in the
case creating new array or replacing with a new disk, there is no
need to do a full disk resync/recovery.
- Switch ->getgeo() and ->bios_param() to using struct gendisk rather
than struct block_device.
- Rust block changes via Andreas. This series adds configuration via
configfs and remote completion to the rnull driver. The series also
includes a set of changes to the rust block device driver API: a few
cleanup patches, and a few features supporting the rnull changes.
The series removes the raw buffer formatting logic from
`kernel::block` and improves the logic available in `kernel::string`
to support the same use as the removed logic.
- floppy arch cleanups
- Reduce the number of dereferencing needed for ublk commands
- Restrict supported sockets for nbd. Mostly done to eliminate a class
of issues perpetually reported by syzbot, by using nonsensical socket
setups.
- A few s390 dasd block fixes
- Fix a few issues around atomic writes
- Improve DMA interation for integrity requests
- Improve how iovecs are treated with regards to O_DIRECT aligment
constraints.
We used to require each segment to adhere to the constraints, now
only the request as a whole needs to.
- Clean up and improve p2p support, enabling use of p2p for metadata
payloads
- Improve locking of request lookup, using SRCU where appropriate
- Use page references properly for brd, avoiding very long RCU sections
- Fix ordering of recursively submitted IOs
- Clean up and improve updating nr_requests for a live device
- Various fixes and cleanups
* tag 'for-6.18/block-20250929' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (164 commits)
s390/dasd: enforce dma_alignment to ensure proper buffer validation
s390/dasd: Return BLK_STS_INVAL for EINVAL from do_dasd_request
ublk: remove redundant zone op check in ublk_setup_iod()
nvme: Use non zero KATO for persistent discovery connections
nvmet: add safety check for subsys lock
nvme-core: use nvme_is_io_ctrl() for I/O controller check
nvme-core: do ioccsz/iorcsz validation only for I/O controllers
nvme-core: add method to check for an I/O controller
blk-cgroup: fix possible deadlock while configuring policy
blk-mq: fix null-ptr-deref in blk_mq_free_tags() from error path
blk-mq: Fix more tag iteration function documentation
selftests: ublk: fix behavior when fio is not installed
ublk: don't access ublk_queue in ublk_unmap_io()
ublk: pass ublk_io to __ublk_complete_rq()
ublk: don't access ublk_queue in ublk_need_complete_req()
ublk: don't access ublk_queue in ublk_check_commit_and_fetch()
ublk: don't pass ublk_queue to ublk_fetch()
ublk: don't access ublk_queue in ublk_config_io_buf()
ublk: don't access ublk_queue in ublk_check_fetch_buf()
ublk: pass q_id and tag to __ublk_check_and_get_req()
...
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r-- | drivers/md/raid1.c | 119 |
1 files changed, 61 insertions, 58 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d30b82beeb92..592a40233004 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -167,7 +167,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) bio = bio_kmalloc(RESYNC_PAGES, gfp_flags); if (!bio) goto out_free_bio; - bio_init(bio, NULL, bio->bi_inline_vecs, RESYNC_PAGES, 0); + bio_init_inline(bio, NULL, RESYNC_PAGES, 0); r1_bio->bios[j] = bio; } /* @@ -1317,7 +1317,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, struct raid1_info *mirror; struct bio *read_bio; int max_sectors; - int rdisk, error; + int rdisk; bool r1bio_existed = !!r1_bio; /* @@ -1366,7 +1366,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, (unsigned long long)r1_bio->sector, mirror->rdev->bdev); - if (test_bit(WriteMostly, &mirror->rdev->flags)) { + if (test_bit(WriteMostly, &mirror->rdev->flags) && + md_bitmap_enabled(mddev, false)) { /* * Reading from a write-mostly device must take care not to * over-take any writes that are 'behind' @@ -1376,16 +1377,13 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, } if (max_sectors < bio_sectors(bio)) { - struct bio *split = bio_split(bio, max_sectors, - gfp, &conf->bio_split); - - if (IS_ERR(split)) { - error = PTR_ERR(split); + bio = bio_submit_split_bioset(bio, max_sectors, + &conf->bio_split); + if (!bio) { + set_bit(R1BIO_Returned, &r1_bio->state); goto err_handle; } - bio_chain(split, bio); - submit_bio_noacct(bio); - bio = split; + r1_bio->master_bio = bio; r1_bio->sectors = max_sectors; } @@ -1413,8 +1411,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, err_handle: atomic_dec(&mirror->rdev->nr_pending); - bio->bi_status = errno_to_blk_status(error); - set_bit(R1BIO_Uptodate, &r1_bio->state); raid_end_bio_io(r1_bio); } @@ -1452,12 +1448,36 @@ retry: return true; } +static void raid1_start_write_behind(struct mddev *mddev, struct r1bio *r1_bio, + struct bio *bio) +{ + unsigned long max_write_behind = mddev->bitmap_info.max_write_behind; + struct md_bitmap_stats stats; + int err; + + /* behind write rely on bitmap, see bitmap_operations */ + if (!md_bitmap_enabled(mddev, false)) + return; + + err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); + if (err) + return; + + /* Don't do behind IO if reader is waiting, or there are too many. */ + if (!stats.behind_wait && stats.behind_writes < max_write_behind) + alloc_behind_master_bio(r1_bio, bio); + + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) + mddev->bitmap_ops->start_behind_write(mddev); + +} + static void raid1_write_request(struct mddev *mddev, struct bio *bio, int max_write_sectors) { struct r1conf *conf = mddev->private; struct r1bio *r1_bio; - int i, disks, k, error; + int i, disks, k; unsigned long flags; int first_clone; int max_sectors; @@ -1561,10 +1581,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, * complexity of supporting that is not worth * the benefit. */ - if (bio->bi_opf & REQ_ATOMIC) { - error = -EIO; + if (bio->bi_opf & REQ_ATOMIC) goto err_handle; - } good_sectors = first_bad - r1_bio->sector; if (good_sectors < max_sectors) @@ -1584,16 +1602,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, max_sectors = min_t(int, max_sectors, BIO_MAX_VECS * (PAGE_SIZE >> 9)); if (max_sectors < bio_sectors(bio)) { - struct bio *split = bio_split(bio, max_sectors, - GFP_NOIO, &conf->bio_split); - - if (IS_ERR(split)) { - error = PTR_ERR(split); + bio = bio_submit_split_bioset(bio, max_sectors, + &conf->bio_split); + if (!bio) { + set_bit(R1BIO_Returned, &r1_bio->state); goto err_handle; } - bio_chain(split, bio); - submit_bio_noacct(bio); - bio = split; + r1_bio->master_bio = bio; r1_bio->sectors = max_sectors; } @@ -1612,22 +1627,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, continue; if (first_clone) { - unsigned long max_write_behind = - mddev->bitmap_info.max_write_behind; - struct md_bitmap_stats stats; - int err; - - /* do behind I/O ? - * Not if there are too many, or cannot - * allocate memory, or a reader on WriteMostly - * is waiting for behind writes to flush */ - err = mddev->bitmap_ops->get_stats(mddev->bitmap, &stats); - if (!err && write_behind && !stats.behind_wait && - stats.behind_writes < max_write_behind) - alloc_behind_master_bio(r1_bio, bio); - - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) - mddev->bitmap_ops->start_behind_write(mddev); + if (write_behind) + raid1_start_write_behind(mddev, r1_bio, bio); first_clone = 0; } @@ -1683,8 +1684,6 @@ err_handle: } } - bio->bi_status = errno_to_blk_status(error); - set_bit(R1BIO_Uptodate, &r1_bio->state); raid_end_bio_io(r1_bio); } @@ -2057,7 +2056,7 @@ static void abort_sync_write(struct mddev *mddev, struct r1bio *r1_bio) /* make sure these bits don't get cleared. */ do { - mddev->bitmap_ops->end_sync(mddev, s, &sync_blocks); + md_bitmap_end_sync(mddev, s, &sync_blocks); s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); @@ -2804,12 +2803,13 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, * We can find the current addess in mddev->curr_resync */ if (mddev->curr_resync < max_sector) /* aborted */ - mddev->bitmap_ops->end_sync(mddev, mddev->curr_resync, - &sync_blocks); + md_bitmap_end_sync(mddev, mddev->curr_resync, + &sync_blocks); else /* completed sync */ conf->fullsync = 0; - mddev->bitmap_ops->close_sync(mddev); + if (md_bitmap_enabled(mddev, false)) + mddev->bitmap_ops->close_sync(mddev); close_sync(conf); if (mddev_is_clustered(mddev)) { @@ -2829,7 +2829,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, /* before building a request, check if we can skip these blocks.. * This call the bitmap_start_sync doesn't actually record anything */ - if (!mddev->bitmap_ops->start_sync(mddev, sector_nr, &sync_blocks, true) && + if (!md_bitmap_start_sync(mddev, sector_nr, &sync_blocks, true) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* We can skip this block, and probably several more */ *skipped = 1; @@ -2846,10 +2846,11 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, /* we are incrementing sector_nr below. To be safe, we check against * sector_nr + two times RESYNC_SECTORS */ - - mddev->bitmap_ops->cond_end_sync(mddev, sector_nr, - mddev_is_clustered(mddev) && - (sector_nr + 2 * RESYNC_SECTORS > conf->cluster_sync_high)); + if (md_bitmap_enabled(mddev, false)) + mddev->bitmap_ops->cond_end_sync(mddev, sector_nr, + mddev_is_clustered(mddev) && + (sector_nr + 2 * RESYNC_SECTORS > + conf->cluster_sync_high)); if (raise_barrier(conf, sector_nr)) return 0; @@ -3004,8 +3005,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (len == 0) break; if (sync_blocks == 0) { - if (!mddev->bitmap_ops->start_sync(mddev, sector_nr, - &sync_blocks, still_degraded) && + if (!md_bitmap_start_sync(mddev, sector_nr, + &sync_blocks, still_degraded) && !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; @@ -3325,15 +3326,17 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) * worth it. */ sector_t newsize = raid1_size(mddev, sectors, 0); - int ret; if (mddev->external_size && mddev->array_sectors > newsize) return -EINVAL; - ret = mddev->bitmap_ops->resize(mddev, newsize, 0, false); - if (ret) - return ret; + if (md_bitmap_enabled(mddev, false)) { + int ret = mddev->bitmap_ops->resize(mddev, newsize, 0); + + if (ret) + return ret; + } md_set_array_sectors(mddev, newsize); if (sectors > mddev->dev_sectors && |