Commit edda4153 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-20180413' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Followup fixes for this merge window. This contains:

   - Series from Ming, fixing corner cases in our CPU <-> queue mapping.

     This triggered repeated warnings on especially s390, but I also hit
     it in cpu hot plug/unplug testing while doing IO on NVMe on x86-64.

   - Another fix from Ming, ensuring that we always order budget and
     driver tag identically, avoiding a deadlock on QD=1 devices.

   - Loop locking regression fix from this merge window, from Omar.

   - Another loop locking fix, this time missing an unlock, from Tetsuo
     Handa.

   - Fix for racing IO submission with device removal from Bart.

   - sr reference fix from me, fixing a case where disk change or
     getevents can race with device removal.

   - Set of nvme fixes by way of Keith, from various contributors"

* tag 'for-linus-20180413' of git://git.kernel.dk/linux-block: (28 commits)
  nvme: expand nvmf_check_if_ready checks
  nvme: Use admin command effects for admin commands
  nvmet: fix space padding in serial number
  nvme: check return value of init_srcu_struct function
  nvmet: Fix nvmet_execute_write_zeroes sector count
  nvme-pci: Separate IO and admin queue IRQ vectors
  nvme-pci: Remove unused queue parameter
  nvme-pci: Skip queue deletion if there are no queues
  nvme: target: fix buffer overflow
  nvme: don't send keep-alives to the discovery controller
  nvme: unexport nvme_start_keep_alive
  nvme-loop: fix kernel oops in case of unhandled command
  nvme: enforce 64bit offset for nvme_get_log_ext fn
  sr: get/drop reference to device in revalidate and check_events
  blk-mq: Revert "blk-mq: reimplement blk_mq_hw_queue_mapped"
  blk-mq: Avoid that submitting a bio concurrently with device removal triggers a crash
  backing: silence compiler warning using __printf
  blk-mq: remove code for dealing with remapping queue
  blk-mq: reimplement blk_mq_hw_queue_mapped
  blk-mq: don't check queue mapped in __blk_mq_delay_run_hw_queue()
  ...
parents 3e565a35 bb06ec31
Loading
Loading
Loading
Loading
+29 −6
Original line number Diff line number Diff line
@@ -2385,8 +2385,20 @@ blk_qc_t generic_make_request(struct bio *bio)
	 * yet.
	 */
	struct bio_list bio_list_on_stack[2];
	blk_mq_req_flags_t flags = 0;
	struct request_queue *q = bio->bi_disk->queue;
	blk_qc_t ret = BLK_QC_T_NONE;

	if (bio->bi_opf & REQ_NOWAIT)
		flags = BLK_MQ_REQ_NOWAIT;
	if (blk_queue_enter(q, flags) < 0) {
		if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
			bio_wouldblock_error(bio);
		else
			bio_io_error(bio);
		return ret;
	}

	if (!generic_make_request_checks(bio))
		goto out;

@@ -2423,11 +2435,22 @@ blk_qc_t generic_make_request(struct bio *bio)
	bio_list_init(&bio_list_on_stack[0]);
	current->bio_list = bio_list_on_stack;
	do {
		struct request_queue *q = bio->bi_disk->queue;
		blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
			BLK_MQ_REQ_NOWAIT : 0;
		bool enter_succeeded = true;

		if (likely(blk_queue_enter(q, flags) == 0)) {
		if (unlikely(q != bio->bi_disk->queue)) {
			if (q)
				blk_queue_exit(q);
			q = bio->bi_disk->queue;
			flags = 0;
			if (bio->bi_opf & REQ_NOWAIT)
				flags = BLK_MQ_REQ_NOWAIT;
			if (blk_queue_enter(q, flags) < 0) {
				enter_succeeded = false;
				q = NULL;
			}
		}

		if (enter_succeeded) {
			struct bio_list lower, same;

			/* Create a fresh bio_list for all subordinate requests */
@@ -2435,8 +2458,6 @@ blk_qc_t generic_make_request(struct bio *bio)
			bio_list_init(&bio_list_on_stack[0]);
			ret = q->make_request_fn(q, bio);

			blk_queue_exit(q);

			/* sort new bios into those for a lower level
			 * and those for the same level
			 */
@@ -2463,6 +2484,8 @@ blk_qc_t generic_make_request(struct bio *bio)
	current->bio_list = NULL; /* deactivate */

out:
	if (q)
		blk_queue_exit(q);
	return ret;
}
EXPORT_SYMBOL(generic_make_request);
+0 −5
Original line number Diff line number Diff line
@@ -16,11 +16,6 @@

static int cpu_to_queue_index(unsigned int nr_queues, const int cpu)
{
	/*
	 * Non present CPU will be mapped to queue index 0.
	 */
	if (!cpu_present(cpu))
		return 0;
	return cpu % nr_queues;
}

+0 −1
Original line number Diff line number Diff line
@@ -235,7 +235,6 @@ static const char *const hctx_state_name[] = {
	HCTX_STATE_NAME(STOPPED),
	HCTX_STATE_NAME(TAG_ACTIVE),
	HCTX_STATE_NAME(SCHED_RESTART),
	HCTX_STATE_NAME(START_ON_RUN),
};
#undef HCTX_STATE_NAME

+33 −89
Original line number Diff line number Diff line
@@ -1180,7 +1180,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
		struct blk_mq_queue_data bd;

		rq = list_first_entry(list, struct request, queuelist);
		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {

		hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
		if (!got_budget && !blk_mq_get_dispatch_budget(hctx))
			break;

		if (!blk_mq_get_driver_tag(rq, NULL, false)) {
			/*
			 * The initial allocation attempt failed, so we need to
			 * rerun the hardware queue when a tag is freed. The
@@ -1189,7 +1194,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
			 * we'll re-run it below.
			 */
			if (!blk_mq_mark_tag_wait(&hctx, rq)) {
				if (got_budget)
				blk_mq_put_dispatch_budget(hctx);
				/*
				 * For non-shared tags, the RESTART check
@@ -1201,11 +1205,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
			}
		}

		if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
			blk_mq_put_driver_tag(rq);
			break;
		}

		list_del_init(&rq->queuelist);

		bd.rq = rq;
@@ -1336,6 +1335,15 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
	hctx_unlock(hctx, srcu_idx);
}

static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
{
	int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);

	if (cpu >= nr_cpu_ids)
		cpu = cpumask_first(hctx->cpumask);
	return cpu;
}

/*
 * It'd be great if the workqueue API had a way to pass
 * in a mask and had some smarts for more clever placement.
@@ -1345,26 +1353,17 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
{
	bool tried = false;
	int next_cpu = hctx->next_cpu;

	if (hctx->queue->nr_hw_queues == 1)
		return WORK_CPU_UNBOUND;

	if (--hctx->next_cpu_batch <= 0) {
		int next_cpu;
select_cpu:
		next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask,
		next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
				cpu_online_mask);
		if (next_cpu >= nr_cpu_ids)
			next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask);

		/*
		 * No online CPU is found, so have to make sure hctx->next_cpu
		 * is set correctly for not breaking workqueue.
		 */
		if (next_cpu >= nr_cpu_ids)
			hctx->next_cpu = cpumask_first(hctx->cpumask);
		else
			hctx->next_cpu = next_cpu;
			next_cpu = blk_mq_first_mapped_cpu(hctx);
		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
	}

@@ -1372,7 +1371,7 @@ select_cpu:
	 * Do unbound schedule if we can't find a online CPU for this hctx,
	 * and it should only happen in the path of handling CPU DEAD.
	 */
	if (!cpu_online(hctx->next_cpu)) {
	if (!cpu_online(next_cpu)) {
		if (!tried) {
			tried = true;
			goto select_cpu;
@@ -1382,18 +1381,18 @@ select_cpu:
		 * Make sure to re-select CPU next time once after CPUs
		 * in hctx->cpumask become online again.
		 */
		hctx->next_cpu = next_cpu;
		hctx->next_cpu_batch = 1;
		return WORK_CPU_UNBOUND;
	}
	return hctx->next_cpu;

	hctx->next_cpu = next_cpu;
	return next_cpu;
}

static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
					unsigned long msecs)
{
	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
		return;

	if (unlikely(blk_mq_hctx_stopped(hctx)))
		return;

@@ -1560,40 +1559,14 @@ static void blk_mq_run_work_fn(struct work_struct *work)
	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);

	/*
	 * If we are stopped, don't run the queue. The exception is if
	 * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
	 * the STOPPED bit and run it.
	 * If we are stopped, don't run the queue.
	 */
	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
		if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
			return;

		clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state))
		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
	}

	__blk_mq_run_hw_queue(hctx);
}


void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
{
	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
		return;

	/*
	 * Stop the hw queue, then modify currently delayed work.
	 * This should prevent us from running the queue prematurely.
	 * Mark the queue as auto-clearing STOPPED when it runs.
	 */
	blk_mq_stop_hw_queue(hctx);
	set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
					&hctx->run_work,
					msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_mq_delay_queue);

static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
					    struct request *rq,
					    bool at_head)
@@ -1804,11 +1777,11 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
	if (q->elevator && !bypass_insert)
		goto insert;

	if (!blk_mq_get_driver_tag(rq, NULL, false))
	if (!blk_mq_get_dispatch_budget(hctx))
		goto insert;

	if (!blk_mq_get_dispatch_budget(hctx)) {
		blk_mq_put_driver_tag(rq);
	if (!blk_mq_get_driver_tag(rq, NULL, false)) {
		blk_mq_put_dispatch_budget(hctx);
		goto insert;
	}

@@ -2356,7 +2329,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,

static void blk_mq_map_swqueue(struct request_queue *q)
{
	unsigned int i, hctx_idx;
	unsigned int i;
	struct blk_mq_hw_ctx *hctx;
	struct blk_mq_ctx *ctx;
	struct blk_mq_tag_set *set = q->tag_set;
@@ -2373,23 +2346,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)

	/*
	 * Map software to hardware queues.
	 *
	 * If the cpu isn't present, the cpu is mapped to first hctx.
	 */
	for_each_possible_cpu(i) {
		hctx_idx = q->mq_map[i];
		/* unmapped hw queue can be remapped after CPU topo changed */
		if (!set->tags[hctx_idx] &&
		    !__blk_mq_alloc_rq_map(set, hctx_idx)) {
			/*
			 * If tags initialization fail for some hctx,
			 * that hctx won't be brought online.  In this
			 * case, remap the current ctx to hctx[0] which
			 * is guaranteed to always have tags allocated
			 */
			q->mq_map[i] = 0;
		}

		ctx = per_cpu_ptr(q->queue_ctx, i);
		hctx = blk_mq_map_queue(q, i);

@@ -2401,21 +2359,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
	mutex_unlock(&q->sysfs_lock);

	queue_for_each_hw_ctx(q, hctx, i) {
		/*
		 * If no software queues are mapped to this hardware queue,
		 * disable it and free the request entries.
		 */
		if (!hctx->nr_ctx) {
			/* Never unmap queue 0.  We need it as a
			 * fallback in case of a new remap fails
			 * allocation
			 */
			if (i && set->tags[i])
				blk_mq_free_map_and_requests(set, i);

			hctx->tags = NULL;
			continue;
		}
		/* every hctx should get mapped by at least one CPU */
		WARN_ON(!hctx->nr_ctx);

		hctx->tags = set->tags[i];
		WARN_ON(!hctx->tags);
@@ -2430,8 +2375,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
		/*
		 * Initialize batch roundrobin counts
		 */
		hctx->next_cpu = cpumask_first_and(hctx->cpumask,
				cpu_online_mask);
		hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
	}
}
+26 −19
Original line number Diff line number Diff line
@@ -1103,11 +1103,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
	if (info->lo_encrypt_type) {
		unsigned int type = info->lo_encrypt_type;

		if (type >= MAX_LO_CRYPT)
			return -EINVAL;
		if (type >= MAX_LO_CRYPT) {
			err = -EINVAL;
			goto exit;
		}
		xfer = xfer_funcs[type];
		if (xfer == NULL)
			return -EINVAL;
		if (xfer == NULL) {
			err = -EINVAL;
			goto exit;
		}
	} else
		xfer = NULL;

@@ -1283,11 +1287,12 @@ static int
loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
	struct loop_info info;
	struct loop_info64 info64;
	int err = 0;
	int err;

	if (!arg)
		err = -EINVAL;
	if (!err)
	if (!arg) {
		mutex_unlock(&lo->lo_ctl_mutex);
		return -EINVAL;
	}
	err = loop_get_status(lo, &info64);
	if (!err)
		err = loop_info64_to_old(&info64, &info);
@@ -1300,11 +1305,12 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
static int
loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
	struct loop_info64 info64;
	int err = 0;
	int err;

	if (!arg)
		err = -EINVAL;
	if (!err)
	if (!arg) {
		mutex_unlock(&lo->lo_ctl_mutex);
		return -EINVAL;
	}
	err = loop_get_status(lo, &info64);
	if (!err && copy_to_user(arg, &info64, sizeof(info64)))
		err = -EFAULT;
@@ -1529,11 +1535,12 @@ loop_get_status_compat(struct loop_device *lo,
		       struct compat_loop_info __user *arg)
{
	struct loop_info64 info64;
	int err = 0;
	int err;

	if (!arg)
		err = -EINVAL;
	if (!err)
	if (!arg) {
		mutex_unlock(&lo->lo_ctl_mutex);
		return -EINVAL;
	}
	err = loop_get_status(lo, &info64);
	if (!err)
		err = loop_info64_to_compat(&info64, arg);
Loading