diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-08-22 09:29:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-08-22 09:29:51 -0400 |
commit | a2e94e80790bb103ca72f8a2991f43c80474a4b6 (patch) | |
tree | 5f7e444890f5d36d2d6df904331386c5f9c7c8ad /drivers | |
parent | d28de4fc0aaa8db6c0163e37c6d4d07f062a08db (diff) | |
parent | 370ac285f23aecae40600851fb4a1a9e75e50973 (diff) |
Merge tag 'block-6.17-20250822' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe:
"A set of fixes for block that should go into this tree. A bit larger
than what I usually have at this point in time, a lot of that is the
continued fixing of the lockdep annotation for queue freezing that we
recently added, which has highlighted a number of little issues here
and there. This contains:
- MD pull request via Yu:
- Add a legacy_async_del_gendisk mode, to prevent a user tools
regression. New user tools releases will not use such a mode,
the old release with a new kernel now will have warning about
deprecated behavior, and we prepare to remove this legacy mode
after about a year later
- The rename in kernel causing user tools build failure, revert
the rename in mdp_superblock_s
- Fix a regression that interrupted resync can be shown as
recover from mdstat or sysfs
- Improve file size detection for loop, particularly for networked
file systems, by using getattr to get the size rather than the
cached inode size.
- Hotplug CPU lock vs queue freeze fix
- Lockdep fix while updating the number of hardware queues
- Fix stacking for PI devices
- Silence bio_check_eod() for the known case of device removal where
the size is truncated to 0 sectors"
* tag 'block-6.17-20250822' of git://git.kernel.dk/linux:
block: avoid cpu_hotplug_lock depedency on freeze_lock
block: decrement block_rq_qos static key in rq_qos_del()
block: skip q->rq_qos check in rq_qos_done_bio()
blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
block: tone down bio_check_eod
loop: use vfs_getattr_nosec for accurate file size
loop: Consolidate size calculation logic into lo_calculate_size()
block: remove newlines from the warnings in blk_validate_integrity_limits
block: handle pi_tuple_size in queue_limits_stack_integrity
selftests: ublk: Use ARRAY_SIZE() macro to improve code
md: fix sync_action incorrect display during resync
md: add helper rdev_needs_recovery()
md: keep recovery_cp in mdp_superblock_s
md: add legacy_async_del_gendisk mode
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/loop.c | 39 | ||||
-rw-r--r-- | drivers/md/md.c | 122 |
2 files changed, 113 insertions, 48 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1b6ee91f8eb9..57263c273f0f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -137,20 +137,29 @@ static void loop_global_unlock(struct loop_device *lo, bool global) static int max_part; static int part_shift; -static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) +static loff_t lo_calculate_size(struct loop_device *lo, struct file *file) { + struct kstat stat; loff_t loopsize; + int ret; - /* Compute loopsize in bytes */ - loopsize = i_size_read(file->f_mapping->host); - if (offset > 0) - loopsize -= offset; + /* + * Get the accurate file size. This provides better results than + * cached inode data, particularly for network filesystems where + * metadata may be stale. + */ + ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0); + if (ret) + return 0; + + loopsize = stat.size; + if (lo->lo_offset > 0) + loopsize -= lo->lo_offset; /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; - - if (sizelimit > 0 && sizelimit < loopsize) - loopsize = sizelimit; + if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) + loopsize = lo->lo_sizelimit; /* * Unfortunately, if we want to do I/O on the device, * the number of 512-byte sectors has to fit into a sector_t. @@ -158,11 +167,6 @@ static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) return loopsize >> 9; } -static loff_t get_loop_size(struct loop_device *lo, struct file *file) -{ - return get_size(lo->lo_offset, lo->lo_sizelimit, file); -} - /* * We support direct I/O only if lo_offset is aligned with the logical I/O size * of backing device, and the logical block size of loop is bigger than that of @@ -569,7 +573,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, error = -EINVAL; /* size of the new backing store needs to be the same */ - if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) + if (lo_calculate_size(lo, file) != lo_calculate_size(lo, old_file)) goto out_err; /* @@ -1063,7 +1067,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode, loop_update_dio(lo); loop_sysfs_init(lo); - size = get_loop_size(lo, file); + size = lo_calculate_size(lo, file); loop_set_size(lo, size); /* Order wrt reading lo_state in loop_validate_file(). */ @@ -1255,8 +1259,7 @@ out_unfreeze: if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); if (!err && size_changed) { - loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, - lo->lo_backing_file); + loff_t new_size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, new_size); } out_unlock: @@ -1399,7 +1402,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - size = get_loop_size(lo, lo->lo_backing_file); + size = lo_calculate_size(lo, lo->lo_backing_file); loop_set_size(lo, size); return 0; diff --git a/drivers/md/md.c b/drivers/md/md.c index ac85ec73a409..1baaf52c603c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -339,6 +339,7 @@ static int start_readonly; * so all the races disappear. */ static bool create_on_open = true; +static bool legacy_async_del_gendisk = true; /* * We have a system wide 'event count' that is incremented @@ -877,15 +878,18 @@ void mddev_unlock(struct mddev *mddev) export_rdev(rdev, mddev); } - /* Call del_gendisk after release reconfig_mutex to avoid - * deadlock (e.g. call del_gendisk under the lock and an - * access to sysfs files waits the lock) - * And MD_DELETED is only used for md raid which is set in - * do_md_stop. dm raid only uses md_stop to stop. So dm raid - * doesn't need to check MD_DELETED when getting reconfig lock - */ - if (test_bit(MD_DELETED, &mddev->flags)) - del_gendisk(mddev->gendisk); + if (!legacy_async_del_gendisk) { + /* + * Call del_gendisk after release reconfig_mutex to avoid + * deadlock (e.g. call del_gendisk under the lock and an + * access to sysfs files waits the lock) + * And MD_DELETED is only used for md raid which is set in + * do_md_stop. dm raid only uses md_stop to stop. So dm raid + * doesn't need to check MD_DELETED when getting reconfig lock + */ + if (test_bit(MD_DELETED, &mddev->flags)) + del_gendisk(mddev->gendisk); + } } EXPORT_SYMBOL_GPL(mddev_unlock); @@ -1419,7 +1423,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru else { if (sb->events_hi == sb->cp_events_hi && sb->events_lo == sb->cp_events_lo) { - mddev->resync_offset = sb->resync_offset; + mddev->resync_offset = sb->recovery_cp; } else mddev->resync_offset = 0; } @@ -1547,13 +1551,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) mddev->minor_version = sb->minor_version; if (mddev->in_sync) { - sb->resync_offset = mddev->resync_offset; + sb->recovery_cp = mddev->resync_offset; sb->cp_events_hi = (mddev->events>>32); sb->cp_events_lo = (u32)mddev->events; if (mddev->resync_offset == MaxSector) sb->state = (1<< MD_SB_CLEAN); } else - sb->resync_offset = 0; + sb->recovery_cp = 0; sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_sectors << 9; @@ -4835,9 +4839,42 @@ out_unlock: static struct md_sysfs_entry md_metadata = __ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store); +static bool rdev_needs_recovery(struct md_rdev *rdev, sector_t sectors) +{ + return rdev->raid_disk >= 0 && + !test_bit(Journal, &rdev->flags) && + !test_bit(Faulty, &rdev->flags) && + !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < sectors; +} + +static enum sync_action md_get_active_sync_action(struct mddev *mddev) +{ + struct md_rdev *rdev; + bool is_recover = false; + + if (mddev->resync_offset < MaxSector) + return ACTION_RESYNC; + + if (mddev->reshape_position != MaxSector) + return ACTION_RESHAPE; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { + if (rdev_needs_recovery(rdev, MaxSector)) { + is_recover = true; + break; + } + } + rcu_read_unlock(); + + return is_recover ? ACTION_RECOVER : ACTION_IDLE; +} + enum sync_action md_sync_action(struct mddev *mddev) { unsigned long recovery = mddev->recovery; + enum sync_action active_action; /* * frozen has the highest priority, means running sync_thread will be @@ -4861,8 +4898,17 @@ enum sync_action md_sync_action(struct mddev *mddev) !test_bit(MD_RECOVERY_NEEDED, &recovery)) return ACTION_IDLE; - if (test_bit(MD_RECOVERY_RESHAPE, &recovery) || - mddev->reshape_position != MaxSector) + /* + * Check if any sync operation (resync/recover/reshape) is + * currently active. This ensures that only one sync operation + * can run at a time. Returns the type of active operation, or + * ACTION_IDLE if none are active. + */ + active_action = md_get_active_sync_action(mddev); + if (active_action != ACTION_IDLE) + return active_action; + + if (test_bit(MD_RECOVERY_RESHAPE, &recovery)) return ACTION_RESHAPE; if (test_bit(MD_RECOVERY_RECOVER, &recovery)) @@ -5818,6 +5864,13 @@ static void md_kobj_release(struct kobject *ko) { struct mddev *mddev = container_of(ko, struct mddev, kobj); + if (legacy_async_del_gendisk) { + if (mddev->sysfs_state) + sysfs_put(mddev->sysfs_state); + if (mddev->sysfs_level) + sysfs_put(mddev->sysfs_level); + del_gendisk(mddev->gendisk); + } put_disk(mddev->gendisk); } @@ -6021,6 +6074,9 @@ static int md_alloc_and_put(dev_t dev, char *name) { struct mddev *mddev = md_alloc(dev, name); + if (legacy_async_del_gendisk) + pr_warn("md: async del_gendisk mode will be removed in future, please upgrade to mdadm-4.5+\n"); + if (IS_ERR(mddev)) return PTR_ERR(mddev); mddev_put(mddev); @@ -6431,10 +6487,22 @@ static void md_clean(struct mddev *mddev) mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; - /* if UNTIL_STOP is set, it's cleared here */ - mddev->hold_active = 0; - /* Don't clear MD_CLOSING, or mddev can be opened again. */ - mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + + /* + * For legacy_async_del_gendisk mode, it can stop the array in the + * middle of assembling it, then it still can access the array. So + * it needs to clear MD_CLOSING. If not legacy_async_del_gendisk, + * it can't open the array again after stopping it. So it doesn't + * clear MD_CLOSING. + */ + if (legacy_async_del_gendisk && mddev->hold_active) { + clear_bit(MD_CLOSING, &mddev->flags); + } else { + /* if UNTIL_STOP is set, it's cleared here */ + mddev->hold_active = 0; + /* Don't clear MD_CLOSING, or mddev can be opened again. */ + mddev->flags &= BIT_ULL_MASK(MD_CLOSING); + } mddev->sb_flags = 0; mddev->ro = MD_RDWR; mddev->metadata_type[0] = 0; @@ -6658,7 +6726,8 @@ static int do_md_stop(struct mddev *mddev, int mode) export_array(mddev); md_clean(mddev); - set_bit(MD_DELETED, &mddev->flags); + if (!legacy_async_del_gendisk) + set_bit(MD_DELETED, &mddev->flags); } md_new_event(); sysfs_notify_dirent_safe(mddev->sysfs_state); @@ -8968,11 +9037,7 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action) start = MaxSector; rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < start) + if (rdev_needs_recovery(rdev, start)) start = rdev->recovery_offset; rcu_read_unlock(); @@ -9331,12 +9396,8 @@ void md_do_sync(struct md_thread *thread) test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(Journal, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < mddev->curr_resync) + if (mddev->delta_disks >= 0 && + rdev_needs_recovery(rdev, mddev->curr_resync)) rdev->recovery_offset = mddev->curr_resync; rcu_read_unlock(); } @@ -10392,6 +10453,7 @@ module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR); module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); module_param(create_on_open, bool, S_IRUSR|S_IWUSR); +module_param(legacy_async_del_gendisk, bool, 0600); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD RAID framework"); |