summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2025-03-13 05:34:51 -0600
committerJens Axboe <axboe@kernel.dk>2025-03-13 05:34:51 -0600
commit017ff379b60b98c17b82f878b4eb751dc99eb21c (patch)
tree3602e7e803e5f78ad4164f3616f21b8637440e22 /drivers/md
parent26064d3e2b4d9a14df1072980e558c636fb023ea (diff)
parent3db4404435397a345431b45f57876a3df133f3b4 (diff)
Merge tag 'md-6.15-20250312' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux into for-6.15/block
Merge MD changes from Yu: "- fix recovery can preempt resync (Li Nan) - fix md-bitmap IO limit (Su Yue) - fix raid10 discard with REQ_NOWAIT (Xiao Ni) - fix raid1 memory leak (Zheng Qixing) - fix mddev uaf (Yu Kuai) - fix raid1,raid10 IO flags (Yu Kuai) - some refactor and cleanup (Yu Kuai)" * tag 'md-6.15-20250312' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux: md/raid10: wait barrier before returning discard request with REQ_NOWAIT md/md-bitmap: fix wrong bitmap_limit for clustermd when write sb md/raid1,raid10: don't ignore IO flags md/raid5: merge reshape_progress checking inside get_reshape_loc() md: fix mddev uaf while iterating all_mddevs list md: switch md-cluster to use md_submodle_head md: don't export md_cluster_ops md/md-cluster: cleanup md_cluster_ops reference md: switch personalities to use md_submodule_head md: introduce struct md_submodule_head and APIs md: only include md-cluster.h if necessary md: merge common code into find_pers() md/raid1: fix memory leak in raid1_run() if no active rdev md: ensure resync is prioritized over recovery
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md-bitmap.c14
-rw-r--r--drivers/md/md-cluster.c18
-rw-r--r--drivers/md/md-cluster.h6
-rw-r--r--drivers/md/md-linear.c15
-rw-r--r--drivers/md/md.c295
-rw-r--r--drivers/md/md.h48
-rw-r--r--drivers/md/raid0.c18
-rw-r--r--drivers/md/raid1-10.c4
-rw-r--r--drivers/md/raid1.c46
-rw-r--r--drivers/md/raid10.c52
-rw-r--r--drivers/md/raid5.c91
11 files changed, 338 insertions, 269 deletions
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 23c09d22fcdb..44ec9b17cfd3 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -29,8 +29,10 @@
#include <linux/buffer_head.h>
#include <linux/seq_file.h>
#include <trace/events/block.h>
+
#include "md.h"
#include "md-bitmap.h"
+#include "md-cluster.h"
#define BITMAP_MAJOR_LO 3
/* version 4 insists the bitmap is in little-endian order
@@ -426,8 +428,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
- unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
- PAGE_SHIFT;
+ unsigned long num_pages = bitmap->storage.file_pages;
+ unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT;
loff_t sboff, offset = mddev->bitmap_info.offset;
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
unsigned int size = PAGE_SIZE;
@@ -436,7 +438,7 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
/* we compare length (page numbers), not page offset. */
- if ((pg_index - store->sb_index) == store->file_pages - 1) {
+ if ((pg_index - store->sb_index) == num_pages - 1) {
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
if (last_page_size == 0)
@@ -942,7 +944,7 @@ out:
bmname(bitmap), err);
goto out_no_sb;
}
- bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
+ bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev);
goto re_read;
}
@@ -2021,7 +2023,7 @@ static void md_bitmap_free(void *data)
sysfs_put(bitmap->sysfs_can_clear);
if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
- bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
+ bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev))
md_cluster_stop(bitmap->mddev);
/* Shouldn't be needed - but just in case.... */
@@ -2229,7 +2231,7 @@ static int bitmap_load(struct mddev *mddev)
mddev_create_serial_pool(mddev, rdev);
if (mddev_is_clustered(mddev))
- md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
+ mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
/* Clear out old bitmap info first: Either there is none, or we
* are resuming after someone else has possibly changed things,
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 6595f89becdb..94221d964d4f 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -1166,7 +1166,7 @@ static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsiz
struct dlm_lock_resource *bm_lockres;
char str[64];
- if (i == md_cluster_ops->slot_number(mddev))
+ if (i == slot_number(mddev))
continue;
bitmap = mddev->bitmap_ops->get_from_slot(mddev, i);
@@ -1216,7 +1216,7 @@ out:
*/
static int cluster_check_sync_size(struct mddev *mddev)
{
- int current_slot = md_cluster_ops->slot_number(mddev);
+ int current_slot = slot_number(mddev);
int node_num = mddev->bitmap_info.nodes;
struct dlm_lock_resource *bm_lockres;
struct md_bitmap_stats stats;
@@ -1612,7 +1612,14 @@ out:
return err;
}
-static const struct md_cluster_operations cluster_ops = {
+static struct md_cluster_operations cluster_ops = {
+ .head = {
+ .type = MD_CLUSTER,
+ .id = ID_CLUSTER,
+ .name = "cluster",
+ .owner = THIS_MODULE,
+ },
+
.join = join,
.leave = leave,
.slot_number = slot_number,
@@ -1642,13 +1649,12 @@ static int __init cluster_init(void)
{
pr_warn("md-cluster: support raid1 and raid10 (limited support)\n");
pr_info("Registering Cluster MD functions\n");
- register_md_cluster_operations(&cluster_ops, THIS_MODULE);
- return 0;
+ return register_md_submodule(&cluster_ops.head);
}
static void cluster_exit(void)
{
- unregister_md_cluster_operations();
+ unregister_md_submodule(&cluster_ops.head);
}
module_init(cluster_init);
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 470bf18ffde5..8fb06d853173 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -10,6 +10,8 @@ struct mddev;
struct md_rdev;
struct md_cluster_operations {
+ struct md_submodule_head head;
+
int (*join)(struct mddev *mddev, int nodes);
int (*leave)(struct mddev *mddev);
int (*slot_number)(struct mddev *mddev);
@@ -35,4 +37,8 @@ struct md_cluster_operations {
void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors);
};
+extern int md_setup_cluster(struct mddev *mddev, int nodes);
+extern void md_cluster_stop(struct mddev *mddev);
+extern void md_reload_sb(struct mddev *mddev, int raid_disk);
+
#endif /* _MD_CLUSTER_H */
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 369aed044b40..5d9b08115375 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -5,7 +5,6 @@
*/
#include <linux/blkdev.h>
-#include <linux/raid/md_u.h>
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -320,9 +319,13 @@ static void linear_quiesce(struct mddev *mddev, int state)
}
static struct md_personality linear_personality = {
- .name = "linear",
- .level = LEVEL_LINEAR,
- .owner = THIS_MODULE,
+ .head = {
+ .type = MD_PERSONALITY,
+ .id = ID_LINEAR,
+ .name = "linear",
+ .owner = THIS_MODULE,
+ },
+
.make_request = linear_make_request,
.run = linear_run,
.free = linear_free,
@@ -335,12 +338,12 @@ static struct md_personality linear_personality = {
static int __init linear_init(void)
{
- return register_md_personality(&linear_personality);
+ return register_md_submodule(&linear_personality.head);
}
static void linear_exit(void)
{
- unregister_md_personality(&linear_personality);
+ unregister_md_submodule(&linear_personality.head);
}
module_init(linear_init);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4092fb94842d..438e71e45c16 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -79,16 +79,10 @@ static const char *action_name[NR_SYNC_ACTIONS] = {
[ACTION_IDLE] = "idle",
};
-/* pers_list is a list of registered personalities protected by pers_lock. */
-static LIST_HEAD(pers_list);
-static DEFINE_SPINLOCK(pers_lock);
+static DEFINE_XARRAY(md_submodule);
static const struct kobj_type md_ktype;
-const struct md_cluster_operations *md_cluster_ops;
-EXPORT_SYMBOL(md_cluster_ops);
-static struct module *md_cluster_mod;
-
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
@@ -629,6 +623,12 @@ static void __mddev_put(struct mddev *mddev)
queue_work(md_misc_wq, &mddev->del_work);
}
+static void mddev_put_locked(struct mddev *mddev)
+{
+ if (atomic_dec_and_test(&mddev->active))
+ __mddev_put(mddev);
+}
+
void mddev_put(struct mddev *mddev)
{
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
@@ -888,16 +888,40 @@ struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
}
EXPORT_SYMBOL_GPL(md_find_rdev_rcu);
-static struct md_personality *find_pers(int level, char *clevel)
+static struct md_personality *get_pers(int level, char *clevel)
{
- struct md_personality *pers;
- list_for_each_entry(pers, &pers_list, list) {
- if (level != LEVEL_NONE && pers->level == level)
- return pers;
- if (strcmp(pers->name, clevel)==0)
- return pers;
+ struct md_personality *ret = NULL;
+ struct md_submodule_head *head;
+ unsigned long i;
+
+ xa_lock(&md_submodule);
+ xa_for_each(&md_submodule, i, head) {
+ if (head->type != MD_PERSONALITY)
+ continue;
+ if ((level != LEVEL_NONE && head->id == level) ||
+ !strcmp(head->name, clevel)) {
+ if (try_module_get(head->owner))
+ ret = (void *)head;
+ break;
+ }
}
- return NULL;
+ xa_unlock(&md_submodule);
+
+ if (!ret) {
+ if (level != LEVEL_NONE)
+ pr_warn("md: personality for level %d is not loaded!\n",
+ level);
+ else
+ pr_warn("md: personality for level %s is not loaded!\n",
+ clevel);
+ }
+
+ return ret;
+}
+
+static void put_pers(struct md_personality *pers)
+{
+ module_put(pers->head.owner);
}
/* return the offset of the super block in 512byte sectors */
@@ -1180,7 +1204,7 @@ int md_check_no_bitmap(struct mddev *mddev)
if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
return 0;
pr_warn("%s: bitmaps are not supported for %s\n",
- mdname(mddev), mddev->pers->name);
+ mdname(mddev), mddev->pers->head.name);
return 1;
}
EXPORT_SYMBOL(md_check_no_bitmap);
@@ -2626,11 +2650,11 @@ repeat:
force_change = 1;
if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
nospares = 1;
- ret = md_cluster_ops->metadata_update_start(mddev);
+ ret = mddev->cluster_ops->metadata_update_start(mddev);
/* Has someone else has updated the sb */
if (!does_sb_need_changing(mddev)) {
if (ret == 0)
- md_cluster_ops->metadata_update_cancel(mddev);
+ mddev->cluster_ops->metadata_update_cancel(mddev);
bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
BIT(MD_SB_CHANGE_DEVS) |
BIT(MD_SB_CHANGE_CLEAN));
@@ -2770,7 +2794,7 @@ rewrite:
/* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */
if (mddev_is_clustered(mddev) && ret == 0)
- md_cluster_ops->metadata_update_finish(mddev);
+ mddev->cluster_ops->metadata_update_finish(mddev);
if (mddev->in_sync != sync_req ||
!bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
@@ -2929,7 +2953,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
else {
err = 0;
if (mddev_is_clustered(mddev))
- err = md_cluster_ops->remove_disk(mddev, rdev);
+ err = mddev->cluster_ops->remove_disk(mddev, rdev);
if (err == 0) {
md_kick_rdev_from_array(rdev);
@@ -3039,7 +3063,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
* by this node eventually
*/
if (!mddev_is_clustered(rdev->mddev) ||
- (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
+ (err = mddev->cluster_ops->gather_bitmaps(rdev)) == 0) {
clear_bit(Faulty, &rdev->flags);
err = add_bound_rdev(rdev);
}
@@ -3847,7 +3871,7 @@ level_show(struct mddev *mddev, char *page)
spin_lock(&mddev->lock);
p = mddev->pers;
if (p)
- ret = sprintf(page, "%s\n", p->name);
+ ret = sprintf(page, "%s\n", p->head.name);
else if (mddev->clevel[0])
ret = sprintf(page, "%s\n", mddev->clevel);
else if (mddev->level != LEVEL_NONE)
@@ -3904,7 +3928,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
rv = -EINVAL;
if (!mddev->pers->quiesce) {
pr_warn("md: %s: %s does not support online personality change\n",
- mdname(mddev), mddev->pers->name);
+ mdname(mddev), mddev->pers->head.name);
goto out_unlock;
}
@@ -3918,24 +3942,20 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
if (request_module("md-%s", clevel) != 0)
request_module("md-level-%s", clevel);
- spin_lock(&pers_lock);
- pers = find_pers(level, clevel);
- if (!pers || !try_module_get(pers->owner)) {
- spin_unlock(&pers_lock);
- pr_warn("md: personality %s not loaded\n", clevel);
+ pers = get_pers(level, clevel);
+ if (!pers) {
rv = -EINVAL;
goto out_unlock;
}
- spin_unlock(&pers_lock);
if (pers == mddev->pers) {
/* Nothing to do! */
- module_put(pers->owner);
+ put_pers(pers);
rv = len;
goto out_unlock;
}
if (!pers->takeover) {
- module_put(pers->owner);
+ put_pers(pers);
pr_warn("md: %s: %s does not support personality takeover\n",
mdname(mddev), clevel);
rv = -EINVAL;
@@ -3956,7 +3976,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->raid_disks -= mddev->delta_disks;
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
- module_put(pers->owner);
+ put_pers(pers);
pr_warn("md: %s: %s would not accept array\n",
mdname(mddev), clevel);
rv = PTR_ERR(priv);
@@ -3971,7 +3991,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
oldpriv = mddev->private;
mddev->pers = pers;
mddev->private = priv;
- strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+ strscpy(mddev->clevel, pers->head.name, sizeof(mddev->clevel));
mddev->level = mddev->new_level;
mddev->layout = mddev->new_layout;
mddev->chunk_sectors = mddev->new_chunk_sectors;
@@ -4013,7 +4033,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->to_remove = &md_redundancy_group;
}
- module_put(oldpers->owner);
+ put_pers(oldpers);
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk < 0)
@@ -5571,7 +5591,7 @@ __ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
{
- if (mddev->pers == NULL || (mddev->pers->level != 1))
+ if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1))
return sprintf(page, "n/a\n");
else
return sprintf(page, "%d\n", mddev->serialize_policy);
@@ -5597,7 +5617,7 @@ serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
err = mddev_suspend_and_lock(mddev);
if (err)
return err;
- if (mddev->pers == NULL || (mddev->pers->level != 1)) {
+ if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1)) {
pr_err("md: serialize_policy is only effective for raid1\n");
err = -EINVAL;
goto unlock;
@@ -6083,30 +6103,21 @@ int md_run(struct mddev *mddev)
goto exit_sync_set;
}
- spin_lock(&pers_lock);
- pers = find_pers(mddev->level, mddev->clevel);
- if (!pers || !try_module_get(pers->owner)) {
- spin_unlock(&pers_lock);
- if (mddev->level != LEVEL_NONE)
- pr_warn("md: personality for level %d is not loaded!\n",
- mddev->level);
- else
- pr_warn("md: personality for level %s is not loaded!\n",
- mddev->clevel);
+ pers = get_pers(mddev->level, mddev->clevel);
+ if (!pers) {
err = -EINVAL;
goto abort;
}
- spin_unlock(&pers_lock);
- if (mddev->level != pers->level) {
- mddev->level = pers->level;
- mddev->new_level = pers->level;
+ if (mddev->level != pers->head.id) {
+ mddev->level = pers->head.id;
+ mddev->new_level = pers->head.id;
}
- strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
+ strscpy(mddev->clevel, pers->head.name, sizeof(mddev->clevel));
if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
- module_put(pers->owner);
+ put_pers(pers);
err = -EINVAL;
goto abort;
}
@@ -6233,7 +6244,7 @@ bitmap_abort:
if (mddev->private)
pers->free(mddev, mddev->private);
mddev->private = NULL;
- module_put(pers->owner);
+ put_pers(pers);
mddev->bitmap_ops->destroy(mddev);
abort:
bioset_exit(&mddev->io_clone_set);
@@ -6454,7 +6465,7 @@ static void __md_stop(struct mddev *mddev)
mddev->private = NULL;
if (pers->sync_request && mddev->to_remove == NULL)
mddev->to_remove = &md_redundancy_group;
- module_put(pers->owner);
+ put_pers(pers);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
bioset_exit(&mddev->bio_set);
@@ -6970,7 +6981,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
set_bit(Candidate, &rdev->flags);
else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
/* --add initiated by this node */
- err = md_cluster_ops->add_new_disk(mddev, rdev);
+ err = mddev->cluster_ops->add_new_disk(mddev, rdev);
if (err) {
export_rdev(rdev, mddev);
return err;
@@ -6987,14 +6998,14 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
if (mddev_is_clustered(mddev)) {
if (info->state & (1 << MD_DISK_CANDIDATE)) {
if (!err) {
- err = md_cluster_ops->new_disk_ack(mddev,
- err == 0);
+ err = mddev->cluster_ops->new_disk_ack(
+ mddev, err == 0);
if (err)
md_kick_rdev_from_array(rdev);
}
} else {
if (err)
- md_cluster_ops->add_new_disk_cancel(mddev);
+ mddev->cluster_ops->add_new_disk_cancel(mddev);
else
err = add_bound_rdev(rdev);
}
@@ -7074,10 +7085,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
goto busy;
kick_rdev:
- if (mddev_is_clustered(mddev)) {
- if (md_cluster_ops->remove_disk(mddev, rdev))
- goto busy;
- }
+ if (mddev_is_clustered(mddev) &&
+ mddev->cluster_ops->remove_disk(mddev, rdev))
+ goto busy;
md_kick_rdev_from_array(rdev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
@@ -7380,7 +7390,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
rv = mddev->pers->resize(mddev, num_sectors);
if (!rv) {
if (mddev_is_clustered(mddev))
- md_cluster_ops->update_size(mddev, old_dev_sectors);
+ mddev->cluster_ops->update_size(mddev, old_dev_sectors);
else if (!mddev_is_dm(mddev))
set_capacity_and_notify(mddev->gendisk,
mddev->array_sectors);
@@ -7428,6 +7438,28 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
return rv;
}
+static int get_cluster_ops(struct mddev *mddev)
+{
+ xa_lock(&md_submodule);
+ mddev->cluster_ops = xa_load(&md_submodule, ID_CLUSTER);
+ if (mddev->cluster_ops &&
+ !try_module_get(mddev->cluster_ops->head.owner))
+ mddev->cluster_ops = NULL;
+ xa_unlock(&md_submodule);
+
+ return mddev->cluster_ops == NULL ? -ENOENT : 0;
+}
+
+static void put_cluster_ops(struct mddev *mddev)
+{
+ if (!mddev->cluster_ops)
+ return;
+
+ mddev->cluster_ops->leave(mddev);
+ module_put(mddev->cluster_ops->head.owner);
+ mddev->cluster_ops = NULL;
+}
+
/*
* update_array_info is used to change the configuration of an
* on-line array.
@@ -7536,16 +7568,15 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
if (mddev->bitmap_info.nodes) {
/* hold PW on all the bitmap lock */
- if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) {
+ if (mddev->cluster_ops->lock_all_bitmaps(mddev) <= 0) {
pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n");
rv = -EPERM;
- md_cluster_ops->unlock_all_bitmaps(mddev);
+ mddev->cluster_ops->unlock_all_bitmaps(mddev);
goto err;
}
mddev->bitmap_info.nodes = 0;
- md_cluster_ops->leave(mddev);
- module_put(md_cluster_mod);
+ put_cluster_ops(mddev);
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
}
mddev->bitmap_ops->destroy(mddev);
@@ -7829,7 +7860,7 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
case CLUSTERED_DISK_NACK:
if (mddev_is_clustered(mddev))
- md_cluster_ops->new_disk_ack(mddev, false);
+ mddev->cluster_ops->new_disk_ack(mddev, false);
else
err = -EINVAL;
goto unlock;
@@ -8111,7 +8142,8 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
return;
mddev->pers->error_handler(mddev, rdev);
- if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
+ if (mddev->pers->head.id == ID_RAID0 ||
+ mddev->pers->head.id == ID_LINEAR)
return;
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
@@ -8149,14 +8181,17 @@ static void status_unused(struct seq_file *seq)
static void status_personalities(struct seq_file *seq)
{
- struct md_personality *pers;
+ struct md_submodule_head *head;
+ unsigned long i;
seq_puts(seq, "Personalities : ");
- spin_lock(&pers_lock);
- list_for_each_entry(pers, &pers_list, list)
- seq_printf(seq, "[%s] ", pers->name);
- spin_unlock(&pers_lock);
+ xa_lock(&md_submodule);
+ xa_for_each(&md_submodule, i, head)
+ if (head->type == MD_PERSONALITY)
+ seq_printf(seq, "[%s] ", head->name);
+ xa_unlock(&md_submodule);
+
seq_puts(seq, "\n");
}
@@ -8379,7 +8414,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, " (read-only)");
if (mddev->ro == MD_AUTO_READ)
seq_printf(seq, " (auto-read-only)");
- seq_printf(seq, " %s", mddev->pers->name);
+ seq_printf(seq, " %s", mddev->pers->head.name);
} else {
seq_printf(seq, "inactive");
}
@@ -8448,9 +8483,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))
status_unused(seq);
- if (atomic_dec_and_test(&mddev->active))
- __mddev_put(mddev);
-
+ mddev_put_locked(mddev);
return 0;
}
@@ -8501,67 +8534,34 @@ static const struct proc_ops mdstat_proc_ops = {
.proc_poll = mdstat_poll,
};
-int register_md_personality(struct md_personality *p)
-{
- pr_debug("md: %s personality registered for level %d\n",
- p->name, p->level);
- spin_lock(&pers_lock);
- list_add_tail(&p->list, &pers_list);
- spin_unlock(&pers_lock);
- return 0;
-}
-EXPORT_SYMBOL(register_md_personality);
-
-int unregister_md_personality(struct md_personality *p)
-{
- pr_debug("md: %s personality unregistered\n", p->name);
- spin_lock(&pers_lock);
- list_del_init(&p->list);
- spin_unlock(&pers_lock);
- return 0;
-}
-EXPORT_SYMBOL(unregister_md_personality);
-
-int register_md_cluster_operations(const struct md_cluster_operations *ops,
- struct module *module)
+int register_md_submodule(struct md_submodule_head *msh)
{
- int ret = 0;
- spin_lock(&pers_lock);
- if (md_cluster_ops != NULL)
- ret = -EALREADY;
- else {
- md_cluster_ops = ops;
- md_cluster_mod = module;
- }
- spin_unlock(&pers_lock);
- return ret;
+ return xa_insert(&md_submodule, msh->id, msh, GFP_KERNEL);
}
-EXPORT_SYMBOL(register_md_cluster_operations);
+EXPORT_SYMBOL_GPL(register_md_submodule);
-int unregister_md_cluster_operations(void)
+void unregister_md_submodule(struct md_submodule_head *msh)
{
- spin_lock(&pers_lock);
- md_cluster_ops = NULL;
- spin_unlock(&pers_lock);
- return 0;
+ xa_erase(&md_submodule, msh->id);
}
-EXPORT_SYMBOL(unregister_md_cluster_operations);
+EXPORT_SYMBOL_GPL(unregister_md_submodule);
int md_setup_cluster(struct mddev *mddev, int nodes)
{
- int ret;
- if (!md_cluster_ops)
+ int ret = get_cluster_ops(mddev);
+
+ if (ret) {
request_module("md-cluster");
- spin_lock(&pers_lock);
+ ret = get_cluster_ops(mddev);
+ }
+
/* ensure module won't be unloaded */
- if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
+ if (ret) {
pr_warn("can't find md-cluster module or get its reference.\n");
- spin_unlock(&pers_lock);
- return -ENOENT;
+ return ret;
}
- spin_unlock(&pers_lock);
- ret = md_cluster_ops->join(mddev, nodes);
+ ret = mddev->cluster_ops->join(mddev, nodes);
if (!ret)
mddev->safemode_delay = 0;
return ret;
@@ -8569,10 +8569,7 @@ int md_setup_cluster(struct mddev *mddev, int nodes)
void md_cluster_stop(struct mddev *mddev)
{
- if (!md_cluster_ops)
- return;
- md_cluster_ops->leave(mddev);
- module_put(md_cluster_mod);
+ put_cluster_ops(mddev);
}
static int is_mddev_idle(struct mddev *mddev, int init)
@@ -8965,7 +8962,7 @@ void md_do_sync(struct md_thread *thread)
}
if (mddev_is_clustered(mddev)) {
- ret = md_cluster_ops->resync_start(mddev);
+ ret = mddev->cluster_ops->resync_start(mddev);
if (ret)
goto skip;
@@ -8992,7 +8989,7 @@ void md_do_sync(struct md_thread *thread)
*
*/
if (mddev_is_clustered(mddev))
- md_cluster_ops->resync_start_notify(mddev);
+ mddev->cluster_ops->resync_start_notify(mddev);
do {
int mddev2_minor = -1;
mddev->curr_resync = MD_RESYNC_DELAYED;
@@ -9447,6 +9444,13 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
return true;
}
+ /* Check if resync is in progress. */
+ if (mddev->recovery_cp < MaxSector) {
+ set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ return true;
+ }
+
/*
* Remove any failed drives, then add spares if possible. Spares are
* also removed and re-added, to allow the personality to fail the
@@ -9463,13 +9467,6 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
return true;
}
- /* Check if recovery is in progress. */
- if (mddev->recovery_cp < MaxSector) {
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
- clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
- return true;
- }
-
/* Delay to choose resync/check/repair in md_do_sync(). */
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
return true;
@@ -9776,7 +9773,7 @@ void md_reap_sync_thread(struct mddev *mddev)
* call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by
* clustered raid */
if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
- md_cluster_ops->resync_finish(mddev);
+ mddev->cluster_ops->resync_finish(mddev);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@@ -9784,13 +9781,13 @@ void md_reap_sync_thread(struct mddev *mddev)
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
/*
- * We call md_cluster_ops->update_size here because sync_size could
+ * We call mddev->cluster_ops->update_size here because sync_size could
* be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
* so it is time to update size across cluster.
*/
if (mddev_is_clustered(mddev) && is_reshaped
&& !test_bit(MD_CLOSING, &mddev->flags))
- md_cluster_ops->update_size(mddev, old_dev_sectors);
+ mddev->cluster_ops->update_size(mddev, old_dev_sectors);
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_completed);
@@ -9882,11 +9879,11 @@ EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
{
- struct mddev *mddev, *n;
+ struct mddev *mddev;
int need_delay = 0;
spin_lock(&all_mddevs_lock);
- list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
if (!mddev_get(mddev))
continue;
spin_unlock(&all_mddevs_lock);
@@ -9898,8 +9895,8 @@ static int md_notify_reboot(struct notifier_block *this,
mddev_unlock(mddev);
}
need_delay = 1;
- mddev_put(mddev);
spin_lock(&all_mddevs_lock);
+ mddev_put_locked(mddev);
}
spin_unlock(&all_mddevs_lock);
@@ -10016,7 +10013,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
!(le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE) &&
- !md_cluster_ops->resync_status_get(mddev)) {
+ !mddev->cluster_ops->resync_status_get(mddev)) {
/*
* -1 to make raid1_add_disk() set conf->fullsync
* to 1. This could avoid skipping sync when the
@@ -10232,7 +10229,7 @@ void md_autostart_arrays(int part)
static __exit void md_exit(void)
{
- struct mddev *mddev, *n;
+ struct mddev *mddev;
int delay = 1;
unregister_blkdev(MD_MAJOR,"md");
@@ -10253,7 +10250,7 @@ static __exit void md_exit(void)
remove_proc_entry("mdstat", NULL);
spin_lock(&all_mddevs_lock);
- list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
if (!mddev_get(mddev))
continue;
spin_unlock(&all_mddevs_lock);
@@ -10265,8 +10262,8 @@ static __exit void md_exit(void)
* the mddev for destruction by a workqueue, and the
* destroy_workqueue() below will wait for that to complete.
*/
- mddev_put(mddev);
spin_lock(&all_mddevs_lock);
+ mddev_put_locked(mddev);
}
spin_unlock(&all_mddevs_lock);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 6edc0f71b7d4..1cf00a04bcdd 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -18,11 +18,37 @@
#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
+#include <linux/raid/md_u.h>
#include <trace/events/block.h>
-#include "md-cluster.h"
#define MaxSector (~(sector_t)0)
+enum md_submodule_type {
+ MD_PERSONALITY = 0,
+ MD_CLUSTER,
+ MD_BITMAP, /* TODO */
+};
+
+enum md_submodule_id {
+ ID_LINEAR = LEVEL_LINEAR,
+ ID_RAID0 = 0,
+ ID_RAID1 = 1,
+ ID_RAID4 = 4,
+ ID_RAID5 = 5,
+ ID_RAID6 = 6,
+ ID_RAID10 = 10,
+ ID_CLUSTER,
+ ID_BITMAP, /* TODO */
+ ID_LLBITMAP, /* TODO */
+};
+
+struct md_submodule_head {
+ enum md_submodule_type type;
+ enum md_submodule_id id;
+ const char *name;
+ struct module *owner;
+};
+
/*
* These flags should really be called "NO_RETRY" rather than
* "FAILFAST" because they don't make any promise about time lapse,
@@ -294,6 +320,7 @@ extern bool rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
extern void rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
struct md_cluster_info;
+struct md_cluster_operations;
/**
* enum mddev_flags - md device flags.
@@ -576,6 +603,7 @@ struct mddev {
mempool_t *serial_info_pool;
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
+ struct md_cluster_operations *cluster_ops;
unsigned int good_device_nr; /* good device num within cluster raid */
unsigned int noio_flag; /* for memalloc scope API */
@@ -699,10 +727,8 @@ static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
struct md_personality
{
- char *name;
- int level;
- struct list_head list;
- struct module *owner;
+ struct md_submodule_head head;
+
bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
/*
* start up works that do NOT require md_thread. tasks that
@@ -843,13 +869,9 @@ static inline void safe_put_page(struct page *p)
if (p) put_page(p);
}
-extern int register_md_personality(struct md_personality *p);
-extern int unregister_md_personality(struct md_personality *p);
-extern int register_md_cluster_operations(const struct md_cluster_operations *ops,
- struct module *module);
-extern int unregister_md_cluster_operations(void);
-extern int md_setup_cluster(struct mddev *mddev, int nodes);
-extern void md_cluster_stop(struct mddev *mddev);
+int register_md_submodule(struct md_submodule_head *msh);
+void unregister_md_submodule(struct md_submodule_head *msh);
+
extern struct md_thread *md_register_thread(
void (*run)(struct md_thread *thread),
struct mddev *mddev,
@@ -906,7 +928,6 @@ extern void md_idle_sync_thread(struct mddev *mddev);
extern void md_frozen_sync_thread(struct mddev *mddev);
extern void md_unfrozen_sync_thread(struct mddev *mddev);
-extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev);
extern void mddev_destroy_serial_pool(struct mddev *mddev,
@@ -928,7 +949,6 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
}
}
-extern const struct md_cluster_operations *md_cluster_ops;
static inline int mddev_is_clustered(struct mddev *mddev)
{
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 8fc9339b00c7..2aec92e6e0a9 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -811,9 +811,13 @@ static void raid0_quiesce(struct mddev *mddev, int quiesce)
static struct md_personality raid0_personality=
{
- .name = "raid0",
- .level = 0,
- .owner = THIS_MODULE,
+ .head = {
+ .type = MD_PERSONALITY,
+ .id = ID_RAID0,
+ .name = "raid0",
+ .owner = THIS_MODULE,
+ },
+
.make_request = raid0_make_request,
.run = raid0_run,
.free = raid0_free,
@@ -824,14 +828,14 @@ static struct md_personality raid0_personality=
.error_handler = raid0_error,
};
-static int __init raid0_init (void)
+static int __init raid0_init(void)
{
- return register_md_personality (&raid0_personality);
+ return register_md_submodule(&raid0_personality.head);
}
-static void raid0_exit (void)
+static void __exit raid0_exit(void)
{
- unregister_md_personality (&raid0_personality);
+ unregister_md_submodule(&raid0_personality.head);
}
module_init(raid0_init);
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 62b980b12f93..c7efd8aab675 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -287,8 +287,8 @@ static inline bool raid1_should_read_first(struct mddev *mddev,
return true;
if (mddev_is_clustered(mddev) &&
- md_cluster_ops->area_resyncing(mddev, READ, this_sector,
- this_sector + len))
+ mddev->cluster_ops->area_resyncing(mddev, READ, this_sector,
+ this_sector + len))
return true;
return false;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 0b2839105857..f06effec0a15 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -36,6 +36,7 @@
#include "md.h"
#include "raid1.h"
#include "md-bitmap.h"
+#include "md-cluster.h"
#define UNSUPPORTED_MDDEV_FLAGS \
((1L << MD_HAS_JOURNAL) | \
@@ -45,6 +46,7 @@
static void allow_barrier(struct r1conf *conf, sector_t sector_nr);
static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
+static void raid1_free(struct mddev *mddev, void *priv);
#define RAID_1_10_NAME "raid1"
#include "raid1-10.c"
@@ -1315,8 +1317,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
struct r1conf *conf = mddev->private;
struct raid1_info *mirror;
struct bio *read_bio;
- const enum req_op op = bio_op(bio);
- const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
int rdisk, error;
bool r1bio_existed = !!r1_bio;
@@ -1404,7 +1404,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r1_bio->sector +
mirror->rdev->data_offset;
read_bio->bi_end_io = raid1_end_read_request;
- read_bio->bi_opf = op | do_sync;
if (test_bit(FailFast, &mirror->rdev->flags) &&
test_bit(R1BIO_FailFast, &r1_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
@@ -1467,7 +1466,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
bool is_discard = (bio_op(bio) == REQ_OP_DISCARD);
if (mddev_is_clustered(mddev) &&
- md_cluster_ops->area_resyncing(mddev, WRITE,
+ mddev->cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio))) {
DEFINE_WAIT(w);
@@ -1478,7 +1477,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
for (;;) {
prepare_to_wait(&conf->wait_barrier,
&w, TASK_IDLE);
- if (!md_cluster_ops->area_resyncing(mddev, WRITE,
+ if (!mddev->cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector,
bio_end_sector(bio)))
break;
@@ -1653,8 +1652,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
mbio->bi_end_io = raid1_end_write_request;
- mbio->bi_opf = bio_op(bio) |
- (bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC));
if (test_bit(FailFast, &rdev->flags) &&
!test_bit(WriteMostly, &rdev->flags) &&
conf->raid_disks - mddev->degraded > 1)
@@ -3038,9 +3035,9 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
conf->cluster_sync_low = mddev->curr_resync_completed;
conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS;
/* Send resync message */
- md_cluster_ops->resync_info_update(mddev,
- conf->cluster_sync_low,
- conf->cluster_sync_high);
+ mddev->cluster_ops->resync_info_update(mddev,
+ conf->cluster_sync_low,
+ conf->cluster_sync_high);
}
/* For a user-requested sync, we read all readable devices and do a
@@ -3258,8 +3255,11 @@ static int raid1_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid1_set_limits(mddev);
- if (ret)
+ if (ret) {
+ if (!mddev->private)
+ raid1_free(mddev, conf);
return ret;
+ }
}
mddev->degraded = 0;
@@ -3273,6 +3273,8 @@ static int raid1_run(struct mddev *mddev)
*/
if (conf->raid_disks - mddev->degraded < 1) {
md_unregister_thread(mddev, &conf->thread);
+ if (!mddev->private)
+ raid1_free(mddev, conf);
return -EINVAL;
}
@@ -3493,9 +3495,13 @@ static void *raid1_takeover(struct mddev *mddev)
static struct md_personality raid1_personality =
{
- .name = "raid1",
- .level = 1,
- .owner = THIS_MODULE,
+ .head = {
+ .type = MD_PERSONALITY,
+ .id = ID_RAID1,
+ .name = "raid1",
+ .owner = THIS_MODULE,
+ },
+
.make_request = raid1_make_request,
.run = raid1_run,
.free = raid1_free,
@@ -3512,18 +3518,18 @@ static struct md_personality raid1_personality =
.takeover = raid1_takeover,
};
-static int __init raid_init(void)
+static int __init raid1_init(void)
{
- return register_md_personality(&raid1_personality);
+ return register_md_submodule(&raid1_personality.head);
}
-static void raid_exit(void)
+static void __exit raid1_exit(void)
{
- unregister_md_personality(&raid1_personality);
+ unregister_md_submodule(&raid1_personality.head);
}
-module_init(raid_init);
-module_exit(raid_exit);
+module_init(raid1_init);
+module_exit(raid1_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD");
MODULE_ALIAS("md-personality-3"); /* RAID1 */
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a8664e29aada..d412aeeafc18 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -24,6 +24,7 @@
#include "raid10.h"
#include "raid0.h"
#include "md-bitmap.h"
+#include "md-cluster.h"
/*
* RAID10 provides a combination of RAID0 and RAID1 functionality.
@@ -1146,8 +1147,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
{
struct r10conf *conf = mddev->private;
struct bio *read_bio;
- const enum req_op op = bio_op(bio);
- const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
struct md_rdev *rdev;
char b[BDEVNAME_SIZE];
@@ -1228,7 +1227,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
choose_data_offset(r10_bio, rdev);
read_bio->bi_end_io = raid10_end_read_request;
- read_bio->bi_opf = op | do_sync;
if (test_bit(FailFast, &rdev->flags) &&
test_bit(R10BIO_FailFast, &r10_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
@@ -1247,10 +1245,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
struct bio *bio, bool replacement,
int n_copy)
{
- const enum req_op op = bio_op(bio);
- const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
- const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
- const blk_opf_t do_atomic = bio->bi_opf & REQ_ATOMIC;
unsigned long flags;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev;
@@ -1269,7 +1263,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
choose_data_offset(r10_bio, rdev));
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_opf = op | do_sync | do_fua | do_atomic;
if (!replacement && test_bit(FailFast,
&conf->mirrors[devnum].rdev->flags)
&& enough(conf, devnum))
@@ -1355,9 +1348,9 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
int error;
if ((mddev_is_clustered(mddev) &&
- md_cluster_ops->area_resyncing(mddev, WRITE,
- bio->bi_iter.bi_sector,
- bio_end_sector(bio)))) {
+ mddev->cluster_ops->area_resyncing(mddev, WRITE,
+ bio->bi_iter.bi_sector,
+ bio_end_sector(bio)))) {
DEFINE_WAIT(w);
/* Bail out if REQ_NOWAIT is set for the bio */
if (bio->bi_opf & REQ_NOWAIT) {
@@ -1367,7 +1360,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
for (;;) {
prepare_to_wait(&conf->wait_barrier,
&w, TASK_IDLE);
- if (!md_cluster_ops->area_resyncing(mddev, WRITE,
+ if (!mddev->cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio)))
break;
schedule();
@@ -1631,11 +1624,10 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
return -EAGAIN;
- if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) {
+ if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) {
bio_wouldblock_error(bio);
return 0;
}
- wait_barrier(conf, false);
/*
* Check reshape again to avoid reshape happens after checking
@@ -3716,7 +3708,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
conf->cluster_sync_low = mddev->curr_resync_completed;
raid10_set_cluster_sync_high(conf);
/* Send resync message */
- md_cluster_ops->resync_info_update(mddev,
+ mddev->cluster_ops->resync_info_update(mddev,
conf->cluster_sync_low,
conf->cluster_sync_high);
}
@@ -3749,7 +3741,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
}
if (broadcast_msg) {
raid10_set_cluster_sync_high(conf);
- md_cluster_ops->resync_info_update(mddev,
+ mddev->cluster_ops->resync_info_update(mddev,
conf->cluster_sync_low,
conf->cluster_sync_high);
}
@@ -4543,7 +4535,7 @@ static int raid10_start_reshape(struct mddev *mddev)
if (ret)
goto abort;
- ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
+ ret = mddev->cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
if (ret) {
mddev->bitmap_ops->resize(mddev, oldsize, 0, false);
goto abort;
@@ -4834,7 +4826,7 @@ read_more:
conf->cluster_sync_low = sb_reshape_pos;
}
- md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
+ mddev->cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
conf->cluster_sync_high);
}
@@ -4979,7 +4971,7 @@ static void raid10_update_reshape_pos(struct mddev *mddev)
struct r10conf *conf = mddev->private;
sector_t lo, hi;
- md_cluster_ops->resync_info_get(mddev, &lo, &hi);
+ mddev->cluster_ops->resync_info_get(mddev, &lo, &hi);
if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo))
|| mddev->reshape_position == MaxSector)
conf->reshape_progress = mddev->reshape_position;
@@ -5125,9 +5117,13 @@ static void raid10_finish_reshape(struct mddev *mddev)
static struct md_personality raid10_personality =
{
- .name = "raid10",
- .level = 10,
- .owner = THIS_MODULE,
+ .head = {
+ .type = MD_PERSONALITY,
+ .id = ID_RAID10,
+ .name = "raid10",
+ .owner = THIS_MODULE,
+ },
+
.make_request = raid10_make_request,
.run = raid10_run,
.free = raid10_free,
@@ -5147,18 +5143,18 @@ static struct md_personality raid10_personality =
.update_reshape_pos = raid10_update_reshape_pos,
};
-static int __init raid_init(void)
+static int __init raid10_init(void)
{
- return register_md_personality(&raid10_personality);
+ return register_md_submodule(&raid10_personality.head);
}
-static void raid_exit(void)
+static void __exit raid10_exit(void)
{
- unregister_md_personality(&raid10_personality);
+ unregister_md_submodule(&raid10_personality.head);
}
-module_init(raid_init);
-module_exit(raid_exit);
+module_init(raid10_init);
+module_exit(raid10_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD");
MODULE_ALIAS("md-personality-9"); /* RAID10 */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5c79429acc64..6389383166c0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5858,6 +5858,9 @@ static enum reshape_loc get_reshape_loc(struct mddev *mddev,
struct r5conf *conf, sector_t logical_sector)
{
sector_t reshape_progress, reshape_safe;
+
+ if (likely(conf->reshape_progress == MaxSector))
+ return LOC_NO_RESHAPE;
/*
* Spinlock is needed as reshape_progress may be
* 64bit on a 32bit platform, and so it might be
@@ -5935,22 +5938,19 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
const int rw = bio_data_dir(bi);
enum stripe_result ret;
struct stripe_head *sh;
+ enum reshape_loc loc;
sector_t new_sector;
int previous = 0, flags = 0;
int seq, dd_idx;
seq = read_seqcount_begin(&conf->gen_lock);
-
- if (unlikely(conf->reshape_progress != MaxSector)) {
- enum reshape_loc loc = get_reshape_loc(mddev, conf,
- logical_sector);
- if (loc == LOC_INSIDE_RESHAPE) {
- ret = STRIPE_SCHEDULE_AND_RETRY;
- goto out;
- }
- if (loc == LOC_AHEAD_OF_RESHAPE)
- previous = 1;
+ loc = get_reshape_loc(mddev, conf, logical_sector);
+ if (loc == LOC_INSIDE_RESHAPE) {
+ ret = STRIPE_SCHEDULE_AND_RETRY;
+ goto out;
}
+ if (loc == LOC_AHEAD_OF_RESHAPE)
+ previous = 1;
new_sector = raid5_compute_sector(conf, logical_sector, previous,
&dd_idx, NULL);
@@ -6127,7 +6127,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
if ((bi->bi_opf & REQ_NOWAIT) &&
- (conf->reshape_progress != MaxSector) &&
get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
bio_wouldblock_error(bi);
if (rw == WRITE)
@@ -8954,9 +8953,13 @@ static void raid5_prepare_suspend(struct mddev *mddev)
static struct md_personality raid6_personality =
{
- .name = "raid6",
- .level = 6,
- .owner = THIS_MODULE,
+ .head = {
+ .type = MD_PERSONALITY,
+ .id = ID_RAID6,
+ .name = "raid6",
+ .owner = THIS_MODULE,
+ },
+
.make_request = raid5_make_request,
.run = raid5_run,
.start = raid5_start,
@@ -8980,9 +8983,13 @@ static struct md_personality raid6_personality =
};
static struct md_personality raid5_personality =
{
- .name = "raid5",
- .level = 5,
- .owner = THIS_MODULE,
+ .head = {
+ .type = MD_PERSONALITY,
+ .id = ID_RAID5,
+ .name = "raid5",
+ .owner = THIS_MODULE,
+ },
+
.make_request = raid5_make_request,
.run = raid5_run,
.start = raid5_start,
@@ -9007,9 +9014,13 @@ static struct md_personality raid5_personality =
static struct md_personality raid4_personality =
{
- .name = "raid4",
- .level = 4,
- .owner = THIS_MODULE,
+ .head = {
+ .type = MD_PERSONALITY,
+ .id = ID_RAID4,
+ .name = "raid4",
+ .owner = THIS_MODULE,
+ },
+
.make_request = raid5_make_request,
.run = raid5_run,
.start = raid5_start,
@@ -9045,21 +9056,39 @@ static int __init raid5_init(void)
"md/raid5:prepare",
raid456_cpu_up_prepare,
raid456_cpu_dead);
- if (ret) {
- destroy_workqueue(raid5_wq);
- return ret;
- }
- register_md_personality(&raid6_personality);
- register_md_personality(&raid5_personality);
- register_md_personality(&raid4_personality);
+ if (ret)
+ goto err_destroy_wq;
+
+ ret = register_md_submodule(&raid6_personality.head);
+ if (ret)
+ goto err_cpuhp_remove;
+
+ ret = register_md_submodule(&raid5_personality.head);
+ if (ret)
+ goto err_unregister_raid6;
+
+ ret = register_md_submodule(&raid4_personality.head);
+ if (ret)
+ goto err_unregister_raid5;
+
return 0;
+
+err_unregister_raid5:
+ unregister_md_submodule(&raid5_personality.head);
+err_unregister_raid6:
+ unregister_md_submodule(&raid6_personality.head);
+err_cpuhp_remove:
+ cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
+err_destroy_wq:
+ destroy_workqueue(raid5_wq);
+ return ret;
}
-static void raid5_exit(void)
+static void __exit raid5_exit(void)
{
- unregister_md_personality(&raid6_personality);
- unregister_md_personality(&raid5_personality);
- unregister_md_personality(&raid4_personality);
+ unregister_md_submodule(&raid6_personality.head);
+ unregister_md_submodule(&raid5_personality.head);
+ unregister_md_submodule(&raid4_personality.head);
cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
destroy_workqueue(raid5_wq);
}