summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-08-22 09:29:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2025-08-22 09:29:51 -0400
commita2e94e80790bb103ca72f8a2991f43c80474a4b6 (patch)
tree5f7e444890f5d36d2d6df904331386c5f9c7c8ad /drivers
parentd28de4fc0aaa8db6c0163e37c6d4d07f062a08db (diff)
parent370ac285f23aecae40600851fb4a1a9e75e50973 (diff)
Merge tag 'block-6.17-20250822' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: "A set of fixes for block that should go into this tree. A bit larger than what I usually have at this point in time, a lot of that is the continued fixing of the lockdep annotation for queue freezing that we recently added, which has highlighted a number of little issues here and there. This contains: - MD pull request via Yu: - Add a legacy_async_del_gendisk mode, to prevent a user tools regression. New user tools releases will not use such a mode, the old release with a new kernel now will have warning about deprecated behavior, and we prepare to remove this legacy mode after about a year later - The rename in kernel causing user tools build failure, revert the rename in mdp_superblock_s - Fix a regression that interrupted resync can be shown as recover from mdstat or sysfs - Improve file size detection for loop, particularly for networked file systems, by using getattr to get the size rather than the cached inode size. - Hotplug CPU lock vs queue freeze fix - Lockdep fix while updating the number of hardware queues - Fix stacking for PI devices - Silence bio_check_eod() for the known case of device removal where the size is truncated to 0 sectors" * tag 'block-6.17-20250822' of git://git.kernel.dk/linux: block: avoid cpu_hotplug_lock depedency on freeze_lock block: decrement block_rq_qos static key in rq_qos_del() block: skip q->rq_qos check in rq_qos_done_bio() blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues block: tone down bio_check_eod loop: use vfs_getattr_nosec for accurate file size loop: Consolidate size calculation logic into lo_calculate_size() block: remove newlines from the warnings in blk_validate_integrity_limits block: handle pi_tuple_size in queue_limits_stack_integrity selftests: ublk: Use ARRAY_SIZE() macro to improve code md: fix sync_action incorrect display during resync md: add helper rdev_needs_recovery() md: keep recovery_cp in mdp_superblock_s md: add legacy_async_del_gendisk mode
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/loop.c39
-rw-r--r--drivers/md/md.c122
2 files changed, 113 insertions, 48 deletions
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1b6ee91f8eb9..57263c273f0f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -137,20 +137,29 @@ static void loop_global_unlock(struct loop_device *lo, bool global)
static int max_part;
static int part_shift;
-static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file)
+static loff_t lo_calculate_size(struct loop_device *lo, struct file *file)
{
+ struct kstat stat;
loff_t loopsize;
+ int ret;
- /* Compute loopsize in bytes */
- loopsize = i_size_read(file->f_mapping->host);
- if (offset > 0)
- loopsize -= offset;
+ /*
+ * Get the accurate file size. This provides better results than
+ * cached inode data, particularly for network filesystems where
+ * metadata may be stale.
+ */
+ ret = vfs_getattr_nosec(&file->f_path, &stat, STATX_SIZE, 0);
+ if (ret)
+ return 0;
+
+ loopsize = stat.size;
+ if (lo->lo_offset > 0)
+ loopsize -= lo->lo_offset;
/* offset is beyond i_size, weird but possible */
if (loopsize < 0)
return 0;
-
- if (sizelimit > 0 && sizelimit < loopsize)
- loopsize = sizelimit;
+ if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
+ loopsize = lo->lo_sizelimit;
/*
* Unfortunately, if we want to do I/O on the device,
* the number of 512-byte sectors has to fit into a sector_t.
@@ -158,11 +167,6 @@ static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file)
return loopsize >> 9;
}
-static loff_t get_loop_size(struct loop_device *lo, struct file *file)
-{
- return get_size(lo->lo_offset, lo->lo_sizelimit, file);
-}
-
/*
* We support direct I/O only if lo_offset is aligned with the logical I/O size
* of backing device, and the logical block size of loop is bigger than that of
@@ -569,7 +573,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
error = -EINVAL;
/* size of the new backing store needs to be the same */
- if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
+ if (lo_calculate_size(lo, file) != lo_calculate_size(lo, old_file))
goto out_err;
/*
@@ -1063,7 +1067,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
loop_update_dio(lo);
loop_sysfs_init(lo);
- size = get_loop_size(lo, file);
+ size = lo_calculate_size(lo, file);
loop_set_size(lo, size);
/* Order wrt reading lo_state in loop_validate_file(). */
@@ -1255,8 +1259,7 @@ out_unfreeze:
if (partscan)
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
if (!err && size_changed) {
- loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
- lo->lo_backing_file);
+ loff_t new_size = lo_calculate_size(lo, lo->lo_backing_file);
loop_set_size(lo, new_size);
}
out_unlock:
@@ -1399,7 +1402,7 @@ static int loop_set_capacity(struct loop_device *lo)
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- size = get_loop_size(lo, lo->lo_backing_file);
+ size = lo_calculate_size(lo, lo->lo_backing_file);
loop_set_size(lo, size);
return 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ac85ec73a409..1baaf52c603c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -339,6 +339,7 @@ static int start_readonly;
* so all the races disappear.
*/
static bool create_on_open = true;
+static bool legacy_async_del_gendisk = true;
/*
* We have a system wide 'event count' that is incremented
@@ -877,15 +878,18 @@ void mddev_unlock(struct mddev *mddev)
export_rdev(rdev, mddev);
}
- /* Call del_gendisk after release reconfig_mutex to avoid
- * deadlock (e.g. call del_gendisk under the lock and an
- * access to sysfs files waits the lock)
- * And MD_DELETED is only used for md raid which is set in
- * do_md_stop. dm raid only uses md_stop to stop. So dm raid
- * doesn't need to check MD_DELETED when getting reconfig lock
- */
- if (test_bit(MD_DELETED, &mddev->flags))
- del_gendisk(mddev->gendisk);
+ if (!legacy_async_del_gendisk) {
+ /*
+ * Call del_gendisk after release reconfig_mutex to avoid
+ * deadlock (e.g. call del_gendisk under the lock and an
+ * access to sysfs files waits the lock)
+ * And MD_DELETED is only used for md raid which is set in
+ * do_md_stop. dm raid only uses md_stop to stop. So dm raid
+ * doesn't need to check MD_DELETED when getting reconfig lock
+ */
+ if (test_bit(MD_DELETED, &mddev->flags))
+ del_gendisk(mddev->gendisk);
+ }
}
EXPORT_SYMBOL_GPL(mddev_unlock);
@@ -1419,7 +1423,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, stru
else {
if (sb->events_hi == sb->cp_events_hi &&
sb->events_lo == sb->cp_events_lo) {
- mddev->resync_offset = sb->resync_offset;
+ mddev->resync_offset = sb->recovery_cp;
} else
mddev->resync_offset = 0;
}
@@ -1547,13 +1551,13 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
mddev->minor_version = sb->minor_version;
if (mddev->in_sync)
{
- sb->resync_offset = mddev->resync_offset;
+ sb->recovery_cp = mddev->resync_offset;
sb->cp_events_hi = (mddev->events>>32);
sb->cp_events_lo = (u32)mddev->events;
if (mddev->resync_offset == MaxSector)
sb->state = (1<< MD_SB_CLEAN);
} else
- sb->resync_offset = 0;
+ sb->recovery_cp = 0;
sb->layout = mddev->layout;
sb->chunk_size = mddev->chunk_sectors << 9;
@@ -4835,9 +4839,42 @@ out_unlock:
static struct md_sysfs_entry md_metadata =
__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
+static bool rdev_needs_recovery(struct md_rdev *rdev, sector_t sectors)
+{
+ return rdev->raid_disk >= 0 &&
+ !test_bit(Journal, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags) &&
+ !test_bit(In_sync, &rdev->flags) &&
+ rdev->recovery_offset < sectors;
+}
+
+static enum sync_action md_get_active_sync_action(struct mddev *mddev)
+{
+ struct md_rdev *rdev;
+ bool is_recover = false;
+
+ if (mddev->resync_offset < MaxSector)
+ return ACTION_RESYNC;
+
+ if (mddev->reshape_position != MaxSector)
+ return ACTION_RESHAPE;
+
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev) {
+ if (rdev_needs_recovery(rdev, MaxSector)) {
+ is_recover = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return is_recover ? ACTION_RECOVER : ACTION_IDLE;
+}
+
enum sync_action md_sync_action(struct mddev *mddev)
{
unsigned long recovery = mddev->recovery;
+ enum sync_action active_action;
/*
* frozen has the highest priority, means running sync_thread will be
@@ -4861,8 +4898,17 @@ enum sync_action md_sync_action(struct mddev *mddev)
!test_bit(MD_RECOVERY_NEEDED, &recovery))
return ACTION_IDLE;
- if (test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
- mddev->reshape_position != MaxSector)
+ /*
+ * Check if any sync operation (resync/recover/reshape) is
+ * currently active. This ensures that only one sync operation
+ * can run at a time. Returns the type of active operation, or
+ * ACTION_IDLE if none are active.
+ */
+ active_action = md_get_active_sync_action(mddev);
+ if (active_action != ACTION_IDLE)
+ return active_action;
+
+ if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
return ACTION_RESHAPE;
if (test_bit(MD_RECOVERY_RECOVER, &recovery))
@@ -5818,6 +5864,13 @@ static void md_kobj_release(struct kobject *ko)
{
struct mddev *mddev = container_of(ko, struct mddev, kobj);
+ if (legacy_async_del_gendisk) {
+ if (mddev->sysfs_state)
+ sysfs_put(mddev->sysfs_state);
+ if (mddev->sysfs_level)
+ sysfs_put(mddev->sysfs_level);
+ del_gendisk(mddev->gendisk);
+ }
put_disk(mddev->gendisk);
}
@@ -6021,6 +6074,9 @@ static int md_alloc_and_put(dev_t dev, char *name)
{
struct mddev *mddev = md_alloc(dev, name);
+ if (legacy_async_del_gendisk)
+ pr_warn("md: async del_gendisk mode will be removed in future, please upgrade to mdadm-4.5+\n");
+
if (IS_ERR(mddev))
return PTR_ERR(mddev);
mddev_put(mddev);
@@ -6431,10 +6487,22 @@ static void md_clean(struct mddev *mddev)
mddev->persistent = 0;
mddev->level = LEVEL_NONE;
mddev->clevel[0] = 0;
- /* if UNTIL_STOP is set, it's cleared here */
- mddev->hold_active = 0;
- /* Don't clear MD_CLOSING, or mddev can be opened again. */
- mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
+
+ /*
+ * For legacy_async_del_gendisk mode, it can stop the array in the
+ * middle of assembling it, then it still can access the array. So
+ * it needs to clear MD_CLOSING. If not legacy_async_del_gendisk,
+ * it can't open the array again after stopping it. So it doesn't
+ * clear MD_CLOSING.
+ */
+ if (legacy_async_del_gendisk && mddev->hold_active) {
+ clear_bit(MD_CLOSING, &mddev->flags);
+ } else {
+ /* if UNTIL_STOP is set, it's cleared here */
+ mddev->hold_active = 0;
+ /* Don't clear MD_CLOSING, or mddev can be opened again. */
+ mddev->flags &= BIT_ULL_MASK(MD_CLOSING);
+ }
mddev->sb_flags = 0;
mddev->ro = MD_RDWR;
mddev->metadata_type[0] = 0;
@@ -6658,7 +6726,8 @@ static int do_md_stop(struct mddev *mddev, int mode)
export_array(mddev);
md_clean(mddev);
- set_bit(MD_DELETED, &mddev->flags);
+ if (!legacy_async_del_gendisk)
+ set_bit(MD_DELETED, &mddev->flags);
}
md_new_event();
sysfs_notify_dirent_safe(mddev->sysfs_state);
@@ -8968,11 +9037,7 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
start = MaxSector;
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
- !test_bit(Journal, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset < start)
+ if (rdev_needs_recovery(rdev, start))
start = rdev->recovery_offset;
rcu_read_unlock();
@@ -9331,12 +9396,8 @@ void md_do_sync(struct md_thread *thread)
test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
- if (rdev->raid_disk >= 0 &&
- mddev->delta_disks >= 0 &&
- !test_bit(Journal, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(In_sync, &rdev->flags) &&
- rdev->recovery_offset < mddev->curr_resync)
+ if (mddev->delta_disks >= 0 &&
+ rdev_needs_recovery(rdev, mddev->curr_resync))
rdev->recovery_offset = mddev->curr_resync;
rcu_read_unlock();
}
@@ -10392,6 +10453,7 @@ module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
+module_param(legacy_async_del_gendisk, bool, 0600);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MD RAID framework");