diff options
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r-- | block/blk-throttle.c | 411 |
1 files changed, 274 insertions, 137 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index d6dd2e047874..bd15357f23bd 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -143,7 +143,8 @@ static inline unsigned int throtl_bio_data_size(struct bio *bio) static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) { INIT_LIST_HEAD(&qn->node); - bio_list_init(&qn->bios); + bio_list_init(&qn->bios_bps); + bio_list_init(&qn->bios_iops); qn->tg = tg; } @@ -151,18 +152,32 @@ static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) * throtl_qnode_add_bio - add a bio to a throtl_qnode and activate it * @bio: bio being added * @qn: qnode to add bio to - * @queued: the service_queue->queued[] list @qn belongs to + * @sq: the service_queue @qn belongs to * - * Add @bio to @qn and put @qn on @queued if it's not already on. + * Add @bio to @qn and put @qn on @sq->queued if it's not already on. * @qn->tg's reference count is bumped when @qn is activated. See the * comment on top of throtl_qnode definition for details. */ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, - struct list_head *queued) + struct throtl_service_queue *sq) { - bio_list_add(&qn->bios, bio); + bool rw = bio_data_dir(bio); + + /* + * Split bios have already been throttled by bps, so they are + * directly queued into the iops path. + */ + if (bio_flagged(bio, BIO_TG_BPS_THROTTLED) || + bio_flagged(bio, BIO_BPS_THROTTLED)) { + bio_list_add(&qn->bios_iops, bio); + sq->nr_queued_iops[rw]++; + } else { + bio_list_add(&qn->bios_bps, bio); + sq->nr_queued_bps[rw]++; + } + if (list_empty(&qn->node)) { - list_add_tail(&qn->node, queued); + list_add_tail(&qn->node, &sq->queued[rw]); blkg_get(tg_to_blkg(qn->tg)); } } @@ -170,6 +185,10 @@ static void throtl_qnode_add_bio(struct bio *bio, struct throtl_qnode *qn, /** * throtl_peek_queued - peek the first bio on a qnode list * @queued: the qnode list to peek + * + * Always take a bio from the head of the iops queue first. If the queue is + * empty, we then take it from the bps queue to maintain the overall idea of + * fetching bios from the head. */ static struct bio *throtl_peek_queued(struct list_head *queued) { @@ -180,28 +199,33 @@ static struct bio *throtl_peek_queued(struct list_head *queued) return NULL; qn = list_first_entry(queued, struct throtl_qnode, node); - bio = bio_list_peek(&qn->bios); + bio = bio_list_peek(&qn->bios_iops); + if (!bio) + bio = bio_list_peek(&qn->bios_bps); WARN_ON_ONCE(!bio); return bio; } /** * throtl_pop_queued - pop the first bio form a qnode list - * @queued: the qnode list to pop a bio from + * @sq: the service_queue to pop a bio from * @tg_to_put: optional out argument for throtl_grp to put + * @rw: read/write * - * Pop the first bio from the qnode list @queued. After popping, the first - * qnode is removed from @queued if empty or moved to the end of @queued so - * that the popping order is round-robin. + * Pop the first bio from the qnode list @sq->queued. Note that we firstly + * focus on the iops list because bios are ultimately dispatched from it. + * After popping, the first qnode is removed from @sq->queued if empty or moved + * to the end of @sq->queued so that the popping order is round-robin. * * When the first qnode is removed, its associated throtl_grp should be put * too. If @tg_to_put is NULL, this function automatically puts it; * otherwise, *@tg_to_put is set to the throtl_grp to put and the caller is * responsible for putting it. */ -static struct bio *throtl_pop_queued(struct list_head *queued, - struct throtl_grp **tg_to_put) +static struct bio *throtl_pop_queued(struct throtl_service_queue *sq, + struct throtl_grp **tg_to_put, bool rw) { + struct list_head *queued = &sq->queued[rw]; struct throtl_qnode *qn; struct bio *bio; @@ -209,10 +233,17 @@ static struct bio *throtl_pop_queued(struct list_head *queued, return NULL; qn = list_first_entry(queued, struct throtl_qnode, node); - bio = bio_list_pop(&qn->bios); + bio = bio_list_pop(&qn->bios_iops); + if (bio) { + sq->nr_queued_iops[rw]--; + } else { + bio = bio_list_pop(&qn->bios_bps); + if (bio) + sq->nr_queued_bps[rw]--; + } WARN_ON_ONCE(!bio); - if (bio_list_empty(&qn->bios)) { + if (bio_list_empty(&qn->bios_bps) && bio_list_empty(&qn->bios_iops)) { list_del_init(&qn->node); if (tg_to_put) *tg_to_put = qn->tg; @@ -520,6 +551,9 @@ static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw, static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw, unsigned long jiffy_end) { + if (!time_before(tg->slice_end[rw], jiffy_end)) + return; + throtl_set_slice_end(tg, rw, jiffy_end); throtl_log(&tg->service_queue, "[%c] extend slice start=%lu end=%lu jiffies=%lu", @@ -536,6 +570,11 @@ static bool throtl_slice_used(struct throtl_grp *tg, bool rw) return true; } +static unsigned int sq_queued(struct throtl_service_queue *sq, int type) +{ + return sq->nr_queued_bps[type] + sq->nr_queued_iops[type]; +} + static unsigned int calculate_io_allowed(u32 iops_limit, unsigned long jiffy_elapsed) { @@ -571,6 +610,48 @@ static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed) return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ); } +static long long throtl_trim_bps(struct throtl_grp *tg, bool rw, + unsigned long time_elapsed) +{ + u64 bps_limit = tg_bps_limit(tg, rw); + long long bytes_trim; + + if (bps_limit == U64_MAX) + return 0; + + /* Need to consider the case of bytes_allowed overflow. */ + bytes_trim = calculate_bytes_allowed(bps_limit, time_elapsed); + if (bytes_trim <= 0 || tg->bytes_disp[rw] < bytes_trim) { + bytes_trim = tg->bytes_disp[rw]; + tg->bytes_disp[rw] = 0; + } else { + tg->bytes_disp[rw] -= bytes_trim; + } + + return bytes_trim; +} + +static int throtl_trim_iops(struct throtl_grp *tg, bool rw, + unsigned long time_elapsed) +{ + u32 iops_limit = tg_iops_limit(tg, rw); + int io_trim; + + if (iops_limit == UINT_MAX) + return 0; + + /* Need to consider the case of io_allowed overflow. */ + io_trim = calculate_io_allowed(iops_limit, time_elapsed); + if (io_trim <= 0 || tg->io_disp[rw] < io_trim) { + io_trim = tg->io_disp[rw]; + tg->io_disp[rw] = 0; + } else { + tg->io_disp[rw] -= io_trim; + } + + return io_trim; +} + /* Trim the used slices and adjust slice start accordingly */ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw) { @@ -612,22 +693,11 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw) * one extra slice is preserved for deviation. */ time_elapsed -= tg->td->throtl_slice; - bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw), - time_elapsed); - io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed); - if (bytes_trim <= 0 && io_trim <= 0) + bytes_trim = throtl_trim_bps(tg, rw, time_elapsed); + io_trim = throtl_trim_iops(tg, rw, time_elapsed); + if (!bytes_trim && !io_trim) return; - if ((long long)tg->bytes_disp[rw] >= bytes_trim) - tg->bytes_disp[rw] -= bytes_trim; - else - tg->bytes_disp[rw] = 0; - - if ((int)tg->io_disp[rw] >= io_trim) - tg->io_disp[rw] -= io_trim; - else - tg->io_disp[rw] = 0; - tg->slice_start[rw] += time_elapsed; throtl_log(&tg->service_queue, @@ -643,21 +713,41 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw, unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw]; u64 bps_limit = tg_bps_limit(tg, rw); u32 iops_limit = tg_iops_limit(tg, rw); + long long bytes_allowed; + int io_allowed; + + /* + * If the queue is empty, carryover handling is not needed. In such cases, + * tg->[bytes/io]_disp should be reset to 0 to avoid impacting the dispatch + * of subsequent bios. The same handling applies when the previous BPS/IOPS + * limit was set to max. + */ + if (sq_queued(&tg->service_queue, rw) == 0) { + tg->bytes_disp[rw] = 0; + tg->io_disp[rw] = 0; + return; + } /* * If config is updated while bios are still throttled, calculate and - * accumulate how many bytes/ios are waited across changes. And - * carryover_bytes/ios will be used to calculate new wait time under new - * configuration. + * accumulate how many bytes/ios are waited across changes. And use the + * calculated carryover (@bytes/@ios) to update [bytes/io]_disp, which + * will be used to calculate new wait time under new configuration. + * And we need to consider the case of bytes/io_allowed overflow. */ - if (bps_limit != U64_MAX) - *bytes = calculate_bytes_allowed(bps_limit, jiffy_elapsed) - - tg->bytes_disp[rw]; - if (iops_limit != UINT_MAX) - *ios = calculate_io_allowed(iops_limit, jiffy_elapsed) - - tg->io_disp[rw]; - tg->bytes_disp[rw] -= *bytes; - tg->io_disp[rw] -= *ios; + if (bps_limit != U64_MAX) { + bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed); + if (bytes_allowed > 0) + *bytes = bytes_allowed - tg->bytes_disp[rw]; + } + if (iops_limit != UINT_MAX) { + io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed); + if (io_allowed > 0) + *ios = io_allowed - tg->io_disp[rw]; + } + + tg->bytes_disp[rw] = -*bytes; + tg->io_disp[rw] = -*ios; } static void tg_update_carryover(struct throtl_grp *tg) @@ -665,12 +755,10 @@ static void tg_update_carryover(struct throtl_grp *tg) long long bytes[2] = {0}; int ios[2] = {0}; - if (tg->service_queue.nr_queued[READ]) - __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]); - if (tg->service_queue.nr_queued[WRITE]) - __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]); + __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]); + __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]); - /* see comments in struct throtl_grp for meaning of these fields. */ + /* see comments in struct throtl_grp for meaning of carryover. */ throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__, bytes[READ], bytes[WRITE], ios[READ], ios[WRITE]); } @@ -682,10 +770,6 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio int io_allowed; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; - if (iops_limit == UINT_MAX) { - return 0; - } - jiffy_elapsed = jiffies - tg->slice_start[rw]; /* Round up to the next throttle slice, wait time must be nonzero */ @@ -711,11 +795,6 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio, unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; unsigned int bio_size = throtl_bio_data_size(bio); - /* no need to throttle if this bio's bytes have been accounted */ - if (bps_limit == U64_MAX || bio_flagged(bio, BIO_BPS_THROTTLED)) { - return 0; - } - jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; /* Slice has just started. Consider one slice interval */ @@ -724,7 +803,9 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio, jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice); bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd); - if (bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed) + /* Need to consider the case of bytes_allowed overflow. */ + if ((bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed) + || bytes_allowed < 0) return 0; /* Calc approx time to dispatch */ @@ -742,17 +823,82 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio, return jiffy_wait; } +static void throtl_charge_bps_bio(struct throtl_grp *tg, struct bio *bio) +{ + unsigned int bio_size = throtl_bio_data_size(bio); + + /* Charge the bio to the group */ + if (!bio_flagged(bio, BIO_BPS_THROTTLED) && + !bio_flagged(bio, BIO_TG_BPS_THROTTLED)) { + bio_set_flag(bio, BIO_TG_BPS_THROTTLED); + tg->bytes_disp[bio_data_dir(bio)] += bio_size; + } +} + +static void throtl_charge_iops_bio(struct throtl_grp *tg, struct bio *bio) +{ + bio_clear_flag(bio, BIO_TG_BPS_THROTTLED); + tg->io_disp[bio_data_dir(bio)]++; +} + /* - * Returns whether one can dispatch a bio or not. Also returns approx number - * of jiffies to wait before this bio is with-in IO rate and can be dispatched + * If previous slice expired, start a new one otherwise renew/extend existing + * slice to make sure it is at least throtl_slice interval long since now. New + * slice is started only for empty throttle group. If there is queued bio, that + * means there should be an active slice and it should be extended instead. */ -static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, - unsigned long *wait) +static void tg_update_slice(struct throtl_grp *tg, bool rw) +{ + if (throtl_slice_used(tg, rw) && + sq_queued(&tg->service_queue, rw) == 0) + throtl_start_new_slice(tg, rw, true); + else + throtl_extend_slice(tg, rw, jiffies + tg->td->throtl_slice); +} + +static unsigned long tg_dispatch_bps_time(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); - unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; u64 bps_limit = tg_bps_limit(tg, rw); + unsigned long bps_wait; + + /* no need to throttle if this bio's bytes have been accounted */ + if (bps_limit == U64_MAX || tg->flags & THROTL_TG_CANCELING || + bio_flagged(bio, BIO_BPS_THROTTLED) || + bio_flagged(bio, BIO_TG_BPS_THROTTLED)) + return 0; + + tg_update_slice(tg, rw); + bps_wait = tg_within_bps_limit(tg, bio, bps_limit); + throtl_extend_slice(tg, rw, jiffies + bps_wait); + + return bps_wait; +} + +static unsigned long tg_dispatch_iops_time(struct throtl_grp *tg, struct bio *bio) +{ + bool rw = bio_data_dir(bio); u32 iops_limit = tg_iops_limit(tg, rw); + unsigned long iops_wait; + + if (iops_limit == UINT_MAX || tg->flags & THROTL_TG_CANCELING) + return 0; + + tg_update_slice(tg, rw); + iops_wait = tg_within_iops_limit(tg, bio, iops_limit); + throtl_extend_slice(tg, rw, jiffies + iops_wait); + + return iops_wait; +} + +/* + * Returns approx number of jiffies to wait before this bio is with-in IO rate + * and can be moved to other queue or dispatched. + */ +static unsigned long tg_dispatch_time(struct throtl_grp *tg, struct bio *bio) +{ + bool rw = bio_data_dir(bio); + unsigned long wait; /* * Currently whole state machine of group depends on first bio @@ -760,62 +906,20 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, * this function with a different bio if there are other bios * queued. */ - BUG_ON(tg->service_queue.nr_queued[rw] && + BUG_ON(sq_queued(&tg->service_queue, rw) && bio != throtl_peek_queued(&tg->service_queue.queued[rw])); - /* If tg->bps = -1, then BW is unlimited */ - if ((bps_limit == U64_MAX && iops_limit == UINT_MAX) || - tg->flags & THROTL_TG_CANCELING) { - if (wait) - *wait = 0; - return true; - } + wait = tg_dispatch_bps_time(tg, bio); + if (wait != 0) + return wait; /* - * If previous slice expired, start a new one otherwise renew/extend - * existing slice to make sure it is at least throtl_slice interval - * long since now. New slice is started only for empty throttle group. - * If there is queued bio, that means there should be an active - * slice and it should be extended instead. + * Charge bps here because @bio will be directly placed into the + * iops queue afterward. */ - if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw])) - throtl_start_new_slice(tg, rw, true); - else { - if (time_before(tg->slice_end[rw], - jiffies + tg->td->throtl_slice)) - throtl_extend_slice(tg, rw, - jiffies + tg->td->throtl_slice); - } - - bps_wait = tg_within_bps_limit(tg, bio, bps_limit); - iops_wait = tg_within_iops_limit(tg, bio, iops_limit); - if (bps_wait + iops_wait == 0) { - if (wait) - *wait = 0; - return true; - } - - max_wait = max(bps_wait, iops_wait); - - if (wait) - *wait = max_wait; - - if (time_before(tg->slice_end[rw], jiffies + max_wait)) - throtl_extend_slice(tg, rw, jiffies + max_wait); - - return false; -} - -static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) -{ - bool rw = bio_data_dir(bio); - unsigned int bio_size = throtl_bio_data_size(bio); + throtl_charge_bps_bio(tg, bio); - /* Charge the bio to the group */ - if (!bio_flagged(bio, BIO_BPS_THROTTLED)) - tg->bytes_disp[rw] += bio_size; - - tg->io_disp[rw]++; + return tg_dispatch_iops_time(tg, bio); } /** @@ -842,28 +946,36 @@ static void throtl_add_bio_tg(struct bio *bio, struct throtl_qnode *qn, * dispatched. Mark that @tg was empty. This is automatically * cleared on the next tg_update_disptime(). */ - if (!sq->nr_queued[rw]) + if (sq_queued(sq, rw) == 0) tg->flags |= THROTL_TG_WAS_EMPTY; - throtl_qnode_add_bio(bio, qn, &sq->queued[rw]); + throtl_qnode_add_bio(bio, qn, sq); + + /* + * Since we have split the queues, when the iops queue is + * previously empty and a new @bio is added into the first @qn, + * we also need to update the @tg->disptime. + */ + if (bio_flagged(bio, BIO_BPS_THROTTLED) && + bio == throtl_peek_queued(&sq->queued[rw])) + tg->flags |= THROTL_TG_IOPS_WAS_EMPTY; - sq->nr_queued[rw]++; throtl_enqueue_tg(tg); } static void tg_update_disptime(struct throtl_grp *tg) { struct throtl_service_queue *sq = &tg->service_queue; - unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; + unsigned long read_wait = -1, write_wait = -1, min_wait, disptime; struct bio *bio; bio = throtl_peek_queued(&sq->queued[READ]); if (bio) - tg_may_dispatch(tg, bio, &read_wait); + read_wait = tg_dispatch_time(tg, bio); bio = throtl_peek_queued(&sq->queued[WRITE]); if (bio) - tg_may_dispatch(tg, bio, &write_wait); + write_wait = tg_dispatch_time(tg, bio); min_wait = min(read_wait, write_wait); disptime = jiffies + min_wait; @@ -875,6 +987,7 @@ static void tg_update_disptime(struct throtl_grp *tg) /* see throtl_add_bio_tg() */ tg->flags &= ~THROTL_TG_WAS_EMPTY; + tg->flags &= ~THROTL_TG_IOPS_WAS_EMPTY; } static void start_parent_slice_with_credit(struct throtl_grp *child_tg, @@ -901,10 +1014,9 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw) * getting released prematurely. Remember the tg to put and put it * after @bio is transferred to @parent_sq. */ - bio = throtl_pop_queued(&sq->queued[rw], &tg_to_put); - sq->nr_queued[rw]--; + bio = throtl_pop_queued(sq, &tg_to_put, rw); - throtl_charge_bio(tg, bio); + throtl_charge_iops_bio(tg, bio); /* * If our parent is another tg, we just need to transfer @bio to @@ -919,7 +1031,7 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw) } else { bio_set_flag(bio, BIO_BPS_THROTTLED); throtl_qnode_add_bio(bio, &tg->qnode_on_parent[rw], - &parent_sq->queued[rw]); + parent_sq); BUG_ON(tg->td->nr_queued[rw] <= 0); tg->td->nr_queued[rw]--; } @@ -941,7 +1053,7 @@ static int throtl_dispatch_tg(struct throtl_grp *tg) /* Try to dispatch 75% READS and 25% WRITES */ while ((bio = throtl_peek_queued(&sq->queued[READ])) && - tg_may_dispatch(tg, bio, NULL)) { + tg_dispatch_time(tg, bio) == 0) { tg_dispatch_one_bio(tg, READ); nr_reads++; @@ -951,7 +1063,7 @@ static int throtl_dispatch_tg(struct throtl_grp *tg) } while ((bio = throtl_peek_queued(&sq->queued[WRITE])) && - tg_may_dispatch(tg, bio, NULL)) { + tg_dispatch_time(tg, bio) == 0) { tg_dispatch_one_bio(tg, WRITE); nr_writes++; @@ -984,7 +1096,7 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq) nr_disp += throtl_dispatch_tg(tg); sq = &tg->service_queue; - if (sq->nr_queued[READ] || sq->nr_queued[WRITE]) + if (sq_queued(sq, READ) || sq_queued(sq, WRITE)) tg_update_disptime(tg); else throtl_dequeue_tg(tg); @@ -1037,9 +1149,11 @@ again: dispatched = false; while (true) { + unsigned int __maybe_unused bio_cnt_r = sq_queued(sq, READ); + unsigned int __maybe_unused bio_cnt_w = sq_queued(sq, WRITE); + throtl_log(sq, "dispatch nr_queued=%u read=%u write=%u", - sq->nr_queued[READ] + sq->nr_queued[WRITE], - sq->nr_queued[READ], sq->nr_queued[WRITE]); + bio_cnt_r + bio_cnt_w, bio_cnt_r, bio_cnt_w); ret = throtl_select_dispatch(sq); if (ret) { @@ -1061,7 +1175,8 @@ again: if (parent_sq) { /* @parent_sq is another throl_grp, propagate dispatch */ - if (tg->flags & THROTL_TG_WAS_EMPTY) { + if (tg->flags & THROTL_TG_WAS_EMPTY || + tg->flags & THROTL_TG_IOPS_WAS_EMPTY) { tg_update_disptime(tg); if (!throtl_schedule_next_dispatch(parent_sq, false)) { /* window is already open, repeat dispatching */ @@ -1101,7 +1216,7 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work) spin_lock_irq(&q->queue_lock); for (rw = READ; rw <= WRITE; rw++) - while ((bio = throtl_pop_queued(&td_sq->queued[rw], NULL))) + while ((bio = throtl_pop_queued(td_sq, NULL, rw))) bio_list_add(&bio_list_on_stack, bio); spin_unlock_irq(&q->queue_lock); @@ -1606,11 +1721,30 @@ void blk_throtl_cancel_bios(struct gendisk *disk) static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw) { - /* throtl is FIFO - if bios are already queued, should queue */ - if (tg->service_queue.nr_queued[rw]) + struct throtl_service_queue *sq = &tg->service_queue; + + /* + * For a split bio, we need to specifically distinguish whether the + * iops queue is empty. + */ + if (bio_flagged(bio, BIO_BPS_THROTTLED)) + return sq->nr_queued_iops[rw] == 0 && + tg_dispatch_iops_time(tg, bio) == 0; + + /* + * Throtl is FIFO - if bios are already queued, should queue. + * If the bps queue is empty and @bio is within the bps limit, charge + * bps here for direct placement into the iops queue. + */ + if (sq_queued(&tg->service_queue, rw)) { + if (sq->nr_queued_bps[rw] == 0 && + tg_dispatch_bps_time(tg, bio) == 0) + throtl_charge_bps_bio(tg, bio); + return false; + } - return tg_may_dispatch(tg, bio, NULL); + return tg_dispatch_time(tg, bio) == 0; } bool __blk_throtl_bio(struct bio *bio) @@ -1631,7 +1765,7 @@ bool __blk_throtl_bio(struct bio *bio) while (true) { if (tg_within_limit(tg, bio, rw)) { /* within limits, let's charge and dispatch directly */ - throtl_charge_bio(tg, bio); + throtl_charge_iops_bio(tg, bio); /* * We need to trim slice even when bios are not being @@ -1654,7 +1788,8 @@ bool __blk_throtl_bio(struct bio *bio) * control algorithm is adaptive, and extra IO bytes * will be throttled for paying the debt */ - throtl_charge_bio(tg, bio); + throtl_charge_bps_bio(tg, bio); + throtl_charge_iops_bio(tg, bio); } else { /* if above limits, break to queue */ break; @@ -1680,7 +1815,7 @@ bool __blk_throtl_bio(struct bio *bio) tg->bytes_disp[rw], bio->bi_iter.bi_size, tg_bps_limit(tg, rw), tg->io_disp[rw], tg_iops_limit(tg, rw), - sq->nr_queued[READ], sq->nr_queued[WRITE]); + sq_queued(sq, READ), sq_queued(sq, WRITE)); td->nr_queued[rw]++; throtl_add_bio_tg(bio, qn, tg); @@ -1688,11 +1823,13 @@ bool __blk_throtl_bio(struct bio *bio) /* * Update @tg's dispatch time and force schedule dispatch if @tg - * was empty before @bio. The forced scheduling isn't likely to - * cause undue delay as @bio is likely to be dispatched directly if - * its @tg's disptime is not in the future. + * was empty before @bio, or the iops queue is empty and @bio will + * add to. The forced scheduling isn't likely to cause undue + * delay as @bio is likely to be dispatched directly if its @tg's + * disptime is not in the future. */ - if (tg->flags & THROTL_TG_WAS_EMPTY) { + if (tg->flags & THROTL_TG_WAS_EMPTY || + tg->flags & THROTL_TG_IOPS_WAS_EMPTY) { tg_update_disptime(tg); throtl_schedule_next_dispatch(tg->service_queue.parent_sq, true); } |