Commit 64b28683 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-20180204' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe:
 "Most of this is fixes and not new code/features:

   - skd fix from Arnd, fixing a build error dependent on sla allocator
     type.

   - blk-mq scheduler discard merging fixes, one from me and one from
     Keith. This fixes a segment miscalculation for blk-mq-sched, where
     we mistakenly think two segments are physically contigious even
     though the request isn't carrying real data. Also fixes a bio-to-rq
     merge case.

   - Don't re-set a bit on the buffer_head flags, if it's already set.
     This can cause scalability concerns on bigger machines and
     workloads. From Kemi Wang.

   - Add BLK_STS_DEV_RESOURCE return value to blk-mq, allowing us to
     distuingish between a local (device related) resource starvation
     and a global one. The latter might happen without IO being in
     flight, so it has to be handled a bit differently. From Ming"

* tag 'for-linus-20180204' of git://git.kernel.dk/linux-block:
  block: skd: fix incorrect linux/slab_def.h inclusion
  buffer: Avoid setting buffer bits that are already set
  blk-mq-sched: Enable merging discard bio into request
  blk-mq: fix discard merge with scheduler attached
  blk-mq: introduce BLK_STS_DEV_RESOURCE
parents d3658c22 1d518775
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -145,6 +145,7 @@ static const struct {
	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
	[BLK_STS_DEV_RESOURCE]	= { -EBUSY,	"device resource" },
	[BLK_STS_AGAIN]		= { -EAGAIN,	"nonblocking retry" },

	/* device mapper special case, should not leak out: */
@@ -3282,6 +3283,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
{
	if (bio_has_data(bio))
		rq->nr_phys_segments = bio_phys_segments(q, bio);
	else if (bio_op(bio) == REQ_OP_DISCARD)
		rq->nr_phys_segments = 1;

	rq->__data_len = bio->bi_iter.bi_size;
	rq->bio = rq->biotail = bio;
+26 −3
Original line number Diff line number Diff line
@@ -550,6 +550,24 @@ static bool req_no_special_merge(struct request *req)
	return !q->mq_ops && req->special;
}

static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
		struct request *next)
{
	unsigned short segments = blk_rq_nr_discard_segments(req);

	if (segments >= queue_max_discard_segments(q))
		goto no_merge;
	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
		goto no_merge;

	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
	return true;
no_merge:
	req_set_nomerge(q, req);
	return false;
}

static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
				struct request *next)
{
@@ -683,9 +701,13 @@ static struct request *attempt_merge(struct request_queue *q,
	 * If we are allowed to merge, then append bio list
	 * from next to rq and release next. merge_requests_fn
	 * will have updated segment counts, update sector
	 * counts here.
	 * counts here. Handle DISCARDs separately, as they
	 * have separate settings.
	 */
	if (!ll_merge_requests_fn(q, req, next))
	if (req_op(req) == REQ_OP_DISCARD) {
		if (!req_attempt_discard_merge(q, req, next))
			return NULL;
	} else if (!ll_merge_requests_fn(q, req, next))
		return NULL;

	/*
@@ -715,6 +737,7 @@ static struct request *attempt_merge(struct request_queue *q,

	req->__data_len += blk_rq_bytes(next);

	if (req_op(req) != REQ_OP_DISCARD)
		elv_merge_requests(q, req, next);

	/*
+2 −0
Original line number Diff line number Diff line
@@ -259,6 +259,8 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
		if (!*merged_request)
			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
		return true;
	case ELEVATOR_DISCARD_MERGE:
		return bio_attempt_discard_merge(q, rq, bio);
	default:
		return false;
	}
+16 −4
Original line number Diff line number Diff line
@@ -1162,6 +1162,8 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
	return true;
}

#define BLK_MQ_RESOURCE_DELAY	3		/* ms units */

bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
			     bool got_budget)
{
@@ -1169,6 +1171,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
	struct request *rq, *nxt;
	bool no_tag = false;
	int errors, queued;
	blk_status_t ret = BLK_STS_OK;

	if (list_empty(list))
		return false;
@@ -1181,7 +1184,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
	errors = queued = 0;
	do {
		struct blk_mq_queue_data bd;
		blk_status_t ret;

		rq = list_first_entry(list, struct request, queuelist);
		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
@@ -1226,7 +1228,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
		}

		ret = q->mq_ops->queue_rq(hctx, &bd);
		if (ret == BLK_STS_RESOURCE) {
		if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
			/*
			 * If an I/O scheduler has been configured and we got a
			 * driver tag for the next request already, free it
@@ -1257,6 +1259,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
	 * that is where we will continue on next queue run.
	 */
	if (!list_empty(list)) {
		bool needs_restart;

		spin_lock(&hctx->lock);
		list_splice_init(list, &hctx->dispatch);
		spin_unlock(&hctx->lock);
@@ -1280,10 +1284,17 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
		 * - Some but not all block drivers stop a queue before
		 *   returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
		 *   and dm-rq.
		 *
		 * If driver returns BLK_STS_RESOURCE and SCHED_RESTART
		 * bit is set, run queue after a delay to avoid IO stalls
		 * that could otherwise occur if the queue is idle.
		 */
		if (!blk_mq_sched_needs_restart(hctx) ||
		needs_restart = blk_mq_sched_needs_restart(hctx);
		if (!needs_restart ||
		    (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
			blk_mq_run_hw_queue(hctx, true);
		else if (needs_restart && (ret == BLK_STS_RESOURCE))
			blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
	}

	return (queued + errors) != 0;
@@ -1764,6 +1775,7 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
		*cookie = new_cookie;
		break;
	case BLK_STS_RESOURCE:
	case BLK_STS_DEV_RESOURCE:
		__blk_mq_requeue_request(rq);
		break;
	default:
@@ -1826,7 +1838,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
	hctx_lock(hctx, &srcu_idx);

	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
	if (ret == BLK_STS_RESOURCE)
	if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
		blk_mq_sched_insert_request(rq, false, true, false);
	else if (ret != BLK_STS_OK)
		blk_mq_end_request(rq, ret);
+1 −1
Original line number Diff line number Diff line
@@ -1230,7 +1230,7 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
				return BLK_STS_OK;
			} else
				/* requeue request */
				return BLK_STS_RESOURCE;
				return BLK_STS_DEV_RESOURCE;
		}
	}

Loading