summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/md/raid5-cache.c6
-rw-r--r--drivers/md/raid5.c95
-rw-r--r--drivers/md/raid5.h2
3 files changed, 52 insertions, 51 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 23f2cbcf1a6c..b4f7b79fd187 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2798,7 +2798,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
{
struct r5l_log *log = READ_ONCE(conf->log);
int i;
- int do_wakeup = 0;
sector_t tree_index;
void __rcu **pslot;
uintptr_t refcount;
@@ -2815,7 +2814,7 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
for (i = sh->disks; i--; ) {
clear_bit(R5_InJournal, &sh->dev[i].flags);
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- do_wakeup = 1;
+ wake_up_bit(&sh->dev[i].flags, R5_Overlap);
}
/*
@@ -2828,9 +2827,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
if (atomic_dec_and_test(&conf->pending_full_writes))
md_wakeup_thread(conf->mddev->thread);
- if (do_wakeup)
- wake_up(&conf->wait_for_overlap);
-
spin_lock_irq(&log->stripe_in_journal_lock);
list_del_init(&sh->r5c);
spin_unlock_irq(&log->stripe_in_journal_lock);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c84a7e0263cd..dc2ea636d173 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2337,7 +2337,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if (test_and_clear_bit(R5_Overlap, &dev->flags))
- wake_up(&sh->raid_conf->wait_for_overlap);
+ wake_up_bit(&dev->flags, R5_Overlap);
}
}
local_unlock(&conf->percpu->lock);
@@ -3473,7 +3473,7 @@ static bool stripe_bio_overlaps(struct stripe_head *sh, struct bio *bi,
* With PPL only writes to consecutive data chunks within a
* stripe are allowed because for a single stripe_head we can
* only have one PPL entry at a time, which describes one data
- * range. Not really an overlap, but wait_for_overlap can be
+ * range. Not really an overlap, but R5_Overlap can be
* used to handle this.
*/
sector_t sector;
@@ -3652,7 +3652,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
log_stripe_write_finished(sh);
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- wake_up(&conf->wait_for_overlap);
+ wake_up_bit(&sh->dev[i].flags, R5_Overlap);
while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
@@ -3697,7 +3697,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].toread = NULL;
spin_unlock_irq(&sh->stripe_lock);
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- wake_up(&conf->wait_for_overlap);
+ wake_up_bit(&sh->dev[i].flags, R5_Overlap);
if (bi)
s->to_read--;
while (bi && bi->bi_iter.bi_sector <
@@ -3736,7 +3736,7 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
BUG_ON(sh->batch_head);
clear_bit(STRIPE_SYNCING, &sh->state);
if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
- wake_up(&conf->wait_for_overlap);
+ wake_up_bit(&sh->dev[sh->pd_idx].flags, R5_Overlap);
s->syncing = 0;
s->replacing = 0;
/* There is nothing more to do for sync/check/repair.
@@ -4877,7 +4877,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
{
struct stripe_head *sh, *next;
int i;
- int do_wakeup = 0;
list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
@@ -4913,7 +4912,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
spin_unlock_irq(&sh->stripe_lock);
for (i = 0; i < sh->disks; i++) {
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
- do_wakeup = 1;
+ wake_up_bit(&sh->dev[i].flags, R5_Overlap);
sh->dev[i].flags = head_sh->dev[i].flags &
(~((1 << R5_WriteError) | (1 << R5_Overlap)));
}
@@ -4927,12 +4926,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
spin_unlock_irq(&head_sh->stripe_lock);
for (i = 0; i < head_sh->disks; i++)
if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
- do_wakeup = 1;
+ wake_up_bit(&head_sh->dev[i].flags, R5_Overlap);
if (head_sh->state & handle_flags)
set_bit(STRIPE_HANDLE, &head_sh->state);
-
- if (do_wakeup)
- wake_up(&head_sh->raid_conf->wait_for_overlap);
}
static void handle_stripe(struct stripe_head *sh)
@@ -5198,7 +5194,7 @@ static void handle_stripe(struct stripe_head *sh)
md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1);
clear_bit(STRIPE_SYNCING, &sh->state);
if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
- wake_up(&conf->wait_for_overlap);
+ wake_up_bit(&sh->dev[sh->pd_idx].flags, R5_Overlap);
}
/* If the failed drives are just a ReadError, then we might need
@@ -5261,7 +5257,7 @@ static void handle_stripe(struct stripe_head *sh)
} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
- wake_up(&conf->wait_for_overlap);
+ wake_up(&conf->wait_for_reshape);
md_done_sync(conf->mddev, RAID5_STRIPE_SECTORS(conf), 1);
}
@@ -5755,12 +5751,11 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
int d;
again:
sh = raid5_get_active_stripe(conf, NULL, logical_sector, 0);
- prepare_to_wait(&conf->wait_for_overlap, &w,
- TASK_UNINTERRUPTIBLE);
set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
if (test_bit(STRIPE_SYNCING, &sh->state)) {
raid5_release_stripe(sh);
- schedule();
+ wait_on_bit(&sh->dev[sh->pd_idx].flags, R5_Overlap,
+ TASK_UNINTERRUPTIBLE);
goto again;
}
clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
@@ -5772,12 +5767,12 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
set_bit(R5_Overlap, &sh->dev[d].flags);
spin_unlock_irq(&sh->stripe_lock);
raid5_release_stripe(sh);
- schedule();
+ wait_on_bit(&sh->dev[d].flags, R5_Overlap,
+ TASK_UNINTERRUPTIBLE);
goto again;
}
}
set_bit(STRIPE_DISCARD, &sh->state);
- finish_wait(&conf->wait_for_overlap, &w);
sh->overwrite_disks = 0;
for (d = 0; d < conf->raid_disks; d++) {
if (d == sh->pd_idx || d == sh->qd_idx)
@@ -5854,7 +5849,6 @@ static int add_all_stripe_bios(struct r5conf *conf,
struct bio *bi, int forwrite, int previous)
{
int dd_idx;
- int ret = 1;
spin_lock_irq(&sh->stripe_lock);
@@ -5870,14 +5864,19 @@ static int add_all_stripe_bios(struct r5conf *conf,
if (stripe_bio_overlaps(sh, bi, dd_idx, forwrite)) {
set_bit(R5_Overlap, &dev->flags);
- ret = 0;
- continue;
+ spin_unlock_irq(&sh->stripe_lock);
+ raid5_release_stripe(sh);
+ /* release batch_last before wait to avoid risk of deadlock */
+ if (ctx->batch_last) {
+ raid5_release_stripe(ctx->batch_last);
+ ctx->batch_last = NULL;
+ }
+ md_wakeup_thread(conf->mddev->thread);
+ wait_on_bit(&dev->flags, R5_Overlap, TASK_UNINTERRUPTIBLE);
+ return 0;
}
}
- if (!ret)
- goto out;
-
for (dd_idx = 0; dd_idx < sh->disks; dd_idx++) {
struct r5dev *dev = &sh->dev[dd_idx];
@@ -5893,9 +5892,8 @@ static int add_all_stripe_bios(struct r5conf *conf,
RAID5_STRIPE_SHIFT(conf), ctx->sectors_to_do);
}
-out:
spin_unlock_irq(&sh->stripe_lock);
- return ret;
+ return 1;
}
enum reshape_loc {
@@ -5991,17 +5989,17 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
goto out_release;
}
- if (test_bit(STRIPE_EXPANDING, &sh->state) ||
- !add_all_stripe_bios(conf, ctx, sh, bi, rw, previous)) {
- /*
- * Stripe is busy expanding or add failed due to
- * overlap. Flush everything and wait a while.
- */
+ if (test_bit(STRIPE_EXPANDING, &sh->state)) {
md_wakeup_thread(mddev->thread);
ret = STRIPE_SCHEDULE_AND_RETRY;
goto out_release;
}
+ if (!add_all_stripe_bios(conf, ctx, sh, bi, rw, previous)) {
+ ret = STRIPE_RETRY;
+ goto out;
+ }
+
if (stripe_can_batch(sh)) {
stripe_add_to_batch_list(conf, sh, ctx->batch_last);
if (ctx->batch_last)
@@ -6072,6 +6070,7 @@ static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf,
static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ bool on_wq;
struct r5conf *conf = mddev->private;
sector_t logical_sector;
struct stripe_request_ctx ctx = {};
@@ -6145,11 +6144,15 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
* sequential IO pattern. We don't bother with the optimization when
* reshaping as the performance benefit is not worth the complexity.
*/
- if (likely(conf->reshape_progress == MaxSector))
+ if (likely(conf->reshape_progress == MaxSector)) {
logical_sector = raid5_bio_lowest_chunk_sector(conf, bi);
+ on_wq = false;
+ } else {
+ add_wait_queue(&conf->wait_for_reshape, &wait);
+ on_wq = true;
+ }
s = (logical_sector - ctx.first_sector) >> RAID5_STRIPE_SHIFT(conf);
- add_wait_queue(&conf->wait_for_overlap, &wait);
while (1) {
res = make_stripe_request(mddev, conf, &ctx, logical_sector,
bi);
@@ -6160,6 +6163,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
continue;
if (res == STRIPE_SCHEDULE_AND_RETRY) {
+ WARN_ON_ONCE(!on_wq);
/*
* Must release the reference to batch_last before
* scheduling and waiting for work to be done,
@@ -6184,7 +6188,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
logical_sector = ctx.first_sector +
(s << RAID5_STRIPE_SHIFT(conf));
}
- remove_wait_queue(&conf->wait_for_overlap, &wait);
+ if (unlikely(on_wq))
+ remove_wait_queue(&conf->wait_for_reshape, &wait);
if (ctx.batch_last)
raid5_release_stripe(ctx.batch_last);
@@ -6337,7 +6342,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
: (safepos < writepos && readpos > writepos)) ||
time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
/* Cannot proceed until we've updated the superblock... */
- wait_event(conf->wait_for_overlap,
+ wait_event(conf->wait_for_reshape,
atomic_read(&conf->reshape_stripes)==0
|| test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (atomic_read(&conf->reshape_stripes) != 0)
@@ -6363,7 +6368,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
spin_lock_irq(&conf->device_lock);
conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
- wake_up(&conf->wait_for_overlap);
+ wake_up(&conf->wait_for_reshape);
sysfs_notify_dirent_safe(mddev->sysfs_completed);
}
@@ -6446,7 +6451,7 @@ finish:
(sector_nr - mddev->curr_resync_completed) * 2
>= mddev->resync_max - mddev->curr_resync_completed) {
/* Cannot proceed until we've updated the superblock... */
- wait_event(conf->wait_for_overlap,
+ wait_event(conf->wait_for_reshape,
atomic_read(&conf->reshape_stripes) == 0
|| test_bit(MD_RECOVERY_INTR, &mddev->recovery));
if (atomic_read(&conf->reshape_stripes) != 0)
@@ -6472,7 +6477,7 @@ finish:
spin_lock_irq(&conf->device_lock);
conf->reshape_safe = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
- wake_up(&conf->wait_for_overlap);
+ wake_up(&conf->wait_for_reshape);
sysfs_notify_dirent_safe(mddev->sysfs_completed);
}
ret:
@@ -6507,7 +6512,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
}
/* Allow raid5_quiesce to complete */
- wait_event(conf->wait_for_overlap, conf->quiesce != 2);
+ wait_event(conf->wait_for_reshape, conf->quiesce != 2);
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
return reshape_request(mddev, sector_nr, skipped);
@@ -7493,7 +7498,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
init_waitqueue_head(&conf->wait_for_quiescent);
init_waitqueue_head(&conf->wait_for_stripe);
- init_waitqueue_head(&conf->wait_for_overlap);
+ init_waitqueue_head(&conf->wait_for_reshape);
INIT_LIST_HEAD(&conf->handle_list);
INIT_LIST_HEAD(&conf->loprio_list);
INIT_LIST_HEAD(&conf->hold_list);
@@ -8552,7 +8557,7 @@ static void end_reshape(struct r5conf *conf)
!test_bit(In_sync, &rdev->flags))
rdev->recovery_offset = MaxSector;
spin_unlock_irq(&conf->device_lock);
- wake_up(&conf->wait_for_overlap);
+ wake_up(&conf->wait_for_reshape);
mddev_update_io_opt(conf->mddev,
conf->raid_disks - conf->max_degraded);
@@ -8616,13 +8621,13 @@ static void raid5_quiesce(struct mddev *mddev, int quiesce)
conf->quiesce = 1;
unlock_all_device_hash_locks_irq(conf);
/* allow reshape to continue */
- wake_up(&conf->wait_for_overlap);
+ wake_up(&conf->wait_for_reshape);
} else {
/* re-enable writes */
lock_all_device_hash_locks_irq(conf);
conf->quiesce = 0;
wake_up(&conf->wait_for_quiescent);
- wake_up(&conf->wait_for_overlap);
+ wake_up(&conf->wait_for_reshape);
unlock_all_device_hash_locks_irq(conf);
}
log_quiesce(conf, quiesce);
@@ -8941,7 +8946,7 @@ static void raid5_prepare_suspend(struct mddev *mddev)
{
struct r5conf *conf = mddev->private;
- wake_up(&conf->wait_for_overlap);
+ wake_up(&conf->wait_for_reshape);
}
static struct md_personality raid6_personality =
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 9b5a7dc3f2a0..896ecfc4afa6 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -668,7 +668,7 @@ struct r5conf {
struct llist_head released_stripes;
wait_queue_head_t wait_for_quiescent;
wait_queue_head_t wait_for_stripe;
- wait_queue_head_t wait_for_overlap;
+ wait_queue_head_t wait_for_reshape;
unsigned long cache_state;
struct shrinker *shrinker;
int pool_size; /* number of disks in stripeheads in pool */