Commit fd9c40f6 authored by Bart Van Assche's avatar Bart Van Assche Committed by Jens Axboe
Browse files

block: Revert v5.0 blk_mq_request_issue_directly() changes



blk_mq_try_issue_directly() can return BLK_STS*_RESOURCE for requests that
have been queued. If that happens when blk_mq_try_issue_directly() is called
by the dm-mpath driver then dm-mpath will try to resubmit a request that is
already queued and a kernel crash follows. Since it is nontrivial to fix
blk_mq_request_issue_directly(), revert the blk_mq_request_issue_directly()
changes that went into kernel v5.0.

This patch reverts the following commits:
* d6a51a97 ("blk-mq: replace and kill blk_mq_request_issue_directly") # v5.0.
* 5b7a6f12 ("blk-mq: issue directly with bypass 'false' in blk_mq_sched_insert_requests") # v5.0.
* 7f556a44 ("blk-mq: refactor the code of issue request directly") # v5.0.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: James Smart <james.smart@broadcom.com>
Cc: Dongli Zhang <dongli.zhang@oracle.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: <stable@vger.kernel.org>
Reported-by: default avatarLaurence Oberman <loberman@redhat.com>
Tested-by: default avatarLaurence Oberman <loberman@redhat.com>
Fixes: 7f556a44 ("blk-mq: refactor the code of issue request directly") # v5.0.
Signed-off-by: default avatarBart Van Assche <bvanassche@acm.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent f0d17625
Loading
Loading
Loading
Loading
+1 −3
Original line number Diff line number Diff line
@@ -1245,8 +1245,6 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
 */
blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
{
	blk_qc_t unused;

	if (blk_cloned_rq_check_limits(q, rq))
		return BLK_STS_IOERR;

@@ -1262,7 +1260,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
	 * bypass a potential scheduler on the bottom device for
	 * insert.
	 */
	return blk_mq_try_issue_directly(rq->mq_hctx, rq, &unused, true, true);
	return blk_mq_request_issue_directly(rq, true);
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);

+5 −3
Original line number Diff line number Diff line
@@ -423,9 +423,11 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
		 * busy in case of 'none' scheduler, and this way may save
		 * us one extra enqueue & dequeue to sw queue.
		 */
		if (!hctx->dispatch_busy && !e && !run_queue_async)
		if (!hctx->dispatch_busy && !e && !run_queue_async) {
			blk_mq_try_issue_list_directly(hctx, list);
		else
			if (list_empty(list))
				return;
		}
		blk_mq_insert_requests(hctx, ctx, list);
	}

+63 −59
Original line number Diff line number Diff line
@@ -1801,95 +1801,99 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
	return ret;
}

blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
						struct request *rq,
						blk_qc_t *cookie,
						bool bypass, bool last)
						bool bypass_insert, bool last)
{
	struct request_queue *q = rq->q;
	bool run_queue = true;
	blk_status_t ret = BLK_STS_RESOURCE;
	int srcu_idx;
	bool force = false;

	hctx_lock(hctx, &srcu_idx);
	/*
	 * hctx_lock is needed before checking quiesced flag.
	 * RCU or SRCU read lock is needed before checking quiesced flag.
	 *
	 * When queue is stopped or quiesced, ignore 'bypass', insert
	 * and return BLK_STS_OK to caller, and avoid driver to try to
	 * dispatch again.
	 * When queue is stopped or quiesced, ignore 'bypass_insert' from
	 * blk_mq_request_issue_directly(), and return BLK_STS_OK to caller,
	 * and avoid driver to try to dispatch again.
	 */
	if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q))) {
	if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
		run_queue = false;
		bypass = false;
		goto out_unlock;
		bypass_insert = false;
		goto insert;
	}

	if (unlikely(q->elevator && !bypass))
		goto out_unlock;
	if (q->elevator && !bypass_insert)
		goto insert;

	if (!blk_mq_get_dispatch_budget(hctx))
		goto out_unlock;
		goto insert;

	if (!blk_mq_get_driver_tag(rq)) {
		blk_mq_put_dispatch_budget(hctx);
		goto out_unlock;
		goto insert;
	}

	/*
	 * Always add a request that has been through
	 *.queue_rq() to the hardware dispatch list.
	 */
	force = true;
	ret = __blk_mq_issue_directly(hctx, rq, cookie, last);
out_unlock:
	hctx_unlock(hctx, srcu_idx);
	switch (ret) {
	case BLK_STS_OK:
		break;
	case BLK_STS_DEV_RESOURCE:
	case BLK_STS_RESOURCE:
		if (force) {
	return __blk_mq_issue_directly(hctx, rq, cookie, last);
insert:
	if (bypass_insert)
		return BLK_STS_RESOURCE;

	blk_mq_request_bypass_insert(rq, run_queue);
			/*
			 * We have to return BLK_STS_OK for the DM
			 * to avoid livelock. Otherwise, we return
			 * the real result to indicate whether the
			 * request is direct-issued successfully.
			 */
			ret = bypass ? BLK_STS_OK : ret;
		} else if (!bypass) {
			blk_mq_sched_insert_request(rq, false,
						    run_queue, false);
	return BLK_STS_OK;
}
		break;
	default:
		if (!bypass)

static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
		struct request *rq, blk_qc_t *cookie)
{
	blk_status_t ret;
	int srcu_idx;

	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);

	hctx_lock(hctx, &srcu_idx);

	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
	if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
		blk_mq_request_bypass_insert(rq, true);
	else if (ret != BLK_STS_OK)
		blk_mq_end_request(rq, ret);
		break;

	hctx_unlock(hctx, srcu_idx);
}

blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
{
	blk_status_t ret;
	int srcu_idx;
	blk_qc_t unused_cookie;
	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;

	hctx_lock(hctx, &srcu_idx);
	ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
	hctx_unlock(hctx, srcu_idx);

	return ret;
}

void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
		struct list_head *list)
{
	blk_qc_t unused;
	blk_status_t ret = BLK_STS_OK;

	while (!list_empty(list)) {
		blk_status_t ret;
		struct request *rq = list_first_entry(list, struct request,
				queuelist);

		list_del_init(&rq->queuelist);
		if (ret == BLK_STS_OK)
			ret = blk_mq_try_issue_directly(hctx, rq, &unused,
							false,
		ret = blk_mq_request_issue_directly(rq, list_empty(list));
		if (ret != BLK_STS_OK) {
			if (ret == BLK_STS_RESOURCE ||
					ret == BLK_STS_DEV_RESOURCE) {
				blk_mq_request_bypass_insert(rq,
							list_empty(list));
		else
			blk_mq_sched_insert_request(rq, false, true, false);
				break;
			}
			blk_mq_end_request(rq, ret);
		}
	}

	/*
@@ -1897,7 +1901,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
	 * the driver there was more coming, but that turned out to
	 * be a lie.
	 */
	if (ret != BLK_STS_OK && hctx->queue->mq_ops->commit_rqs)
	if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs)
		hctx->queue->mq_ops->commit_rqs(hctx);
}

@@ -2012,13 +2016,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
			data.hctx = same_queue_rq->mq_hctx;
			trace_block_unplug(q, 1, true);
			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
					&cookie, false, true);
					&cookie);
		}
	} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
			!data.hctx->dispatch_busy)) {
		blk_mq_put_ctx(data.ctx);
		blk_mq_bio_to_request(rq, bio);
		blk_mq_try_issue_directly(data.hctx, rq, &cookie, false, true);
		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
	} else {
		blk_mq_put_ctx(data.ctx);
		blk_mq_bio_to_request(rq, bio);
+2 −4
Original line number Diff line number Diff line
@@ -70,10 +70,8 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
				struct list_head *list);

blk_status_t blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
						struct request *rq,
						blk_qc_t *cookie,
						bool bypass, bool last);
/* Used by blk_insert_cloned_request() to issue request directly */
blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last);
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
				    struct list_head *list);