Commit 24f5a90f authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe
Browse files

blk-mq: quiesce queue during switching io sched and updating nr_requests



Dispatch may still be in-progress after queue is frozen, so we have to
quiesce queue before switching IO scheduler and updating nr_requests.

Also when switching io schedulers, blk_mq_run_hw_queue() may still be
called somewhere(such as from nvme_reset_work()), and io scheduler's
per-hctx data may not be setup yet, so cause oops even inside
blk_mq_hctx_has_pending(), such as it can be run just between:

        ret = e->ops.mq.init_sched(q, e);
AND
        ret = e->ops.mq.init_hctx(hctx, i)

inside blk_mq_init_sched().

This reverts commit 7a148c2f(block: don't call blk_mq_quiesce_queue()
after queue is frozen) basically, and makes sure blk_mq_hctx_has_pending
won't be called if queue is quiesced.

Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Fixes: 7a148c2f(block: don't call blk_mq_quiesce_queue() after queue is frozen)
Reported-by: default avatarYi Zhang <yi.zhang@redhat.com>
Tested-by: default avatarYi Zhang <yi.zhang@redhat.com>
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent c2856ae2
Loading
Loading
Loading
Loading
+26 −1
Original line number Diff line number Diff line
@@ -1285,7 +1285,30 @@ EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);

bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
	if (blk_mq_hctx_has_pending(hctx)) {
	int srcu_idx;
	bool need_run;

	/*
	 * When queue is quiesced, we may be switching io scheduler, or
	 * updating nr_hw_queues, or other things, and we can't run queue
	 * any more, even __blk_mq_hctx_has_pending() can't be called safely.
	 *
	 * And queue will be rerun in blk_mq_unquiesce_queue() if it is
	 * quiesced.
	 */
	if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
		rcu_read_lock();
		need_run = !blk_queue_quiesced(hctx->queue) &&
			blk_mq_hctx_has_pending(hctx);
		rcu_read_unlock();
	} else {
		srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
		need_run = !blk_queue_quiesced(hctx->queue) &&
			blk_mq_hctx_has_pending(hctx);
		srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
	}

	if (need_run) {
		__blk_mq_delay_run_hw_queue(hctx, async, 0);
		return true;
	}
@@ -2710,6 +2733,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
		return -EINVAL;

	blk_mq_freeze_queue(q);
	blk_mq_quiesce_queue(q);

	ret = 0;
	queue_for_each_hw_ctx(q, hctx, i) {
@@ -2733,6 +2757,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
	if (!ret)
		q->nr_requests = nr;

	blk_mq_unquiesce_queue(q);
	blk_mq_unfreeze_queue(q);

	return ret;
+2 −0
Original line number Diff line number Diff line
@@ -968,6 +968,7 @@ static int elevator_switch_mq(struct request_queue *q,
	int ret;

	blk_mq_freeze_queue(q);
	blk_mq_quiesce_queue(q);

	if (q->elevator) {
		if (q->elevator->registered)
@@ -994,6 +995,7 @@ static int elevator_switch_mq(struct request_queue *q,
		blk_add_trace_msg(q, "elv switch: none");

out:
	blk_mq_unquiesce_queue(q);
	blk_mq_unfreeze_queue(q);
	return ret;
}