Commit 439b84fa authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'for-5.5/block' into for-5.5/drivers

Pull in dependencies for the new zoned open/close/finish support.

* for-5.5/block: (32 commits)
  block: add zone open, close and finish ioctl support
  block: add zone open, close and finish operations
  block: Simplify REQ_OP_ZONE_RESET_ALL handling
  block: Remove REQ_OP_ZONE_RESET plugging
  block: Warn if elevator= parameter is used
  block: avoid blk_bio_segment_split for small I/O operations
  blk-mq: make sure that line break can be printed
  block: sed-opal: Introduce Opal Datastore UID
  block: sed-opal: Add support to read/write opal tables generically
  block: sed-opal: Generalizing write data to any opal table
  bdev: Refresh bdev size for disks without partitioning
  bdev: Factor out bdev revalidation into a common helper
  blk-mq: avoid sysfs buffer overflow with too many CPU cores
  blk-mq: Make blk_mq_run_hw_queue() return void
  fcntl: fix typo in RWH_WRITE_LIFE_NOT_SET r/w hint name
  blk-mq: fill header with kernel-doc
  blk-mq: remove needless goto from blk_mq_get_driver_tag
  block: reorder bio::__bi_remaining for better packing
  block: Reduce the amount of memory used for tag sets
  block: Reduce the amount of memory required per request queue
  ...
parents 64fab729 e876df1f
Loading
Loading
Loading
Loading
+11 −5
Original line number Diff line number Diff line
@@ -132,6 +132,9 @@ static const char *const blk_op_name[] = {
	REQ_OP_NAME(SECURE_ERASE),
	REQ_OP_NAME(ZONE_RESET),
	REQ_OP_NAME(ZONE_RESET_ALL),
	REQ_OP_NAME(ZONE_OPEN),
	REQ_OP_NAME(ZONE_CLOSE),
	REQ_OP_NAME(ZONE_FINISH),
	REQ_OP_NAME(WRITE_SAME),
	REQ_OP_NAME(WRITE_ZEROES),
	REQ_OP_NAME(SCSI_IN),
@@ -336,14 +339,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 */
void blk_cleanup_queue(struct request_queue *q)
{
	WARN_ON_ONCE(blk_queue_registered(q));

	/* mark @q DYING, no new request or merges will be allowed afterwards */
	mutex_lock(&q->sysfs_lock);
	blk_set_queue_dying(q);

	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
	blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
	mutex_unlock(&q->sysfs_lock);

	/*
	 * Drain all requests queued before DYING marking. Set DEAD flag to
@@ -849,10 +852,10 @@ static inline int blk_partition_remap(struct bio *bio)
		goto out;

	/*
	 * Zone reset does not include bi_size so bio_sectors() is always 0.
	 * Include a test for the reset op code and perform the remap if needed.
	 * Zone management bios do not have a sector count but they do have
	 * a start sector filled out and need to be remapped.
	 */
	if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
	if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) {
		if (bio_check_eod(bio, part_nr_sects_read(p)))
			goto out;
		bio->bi_iter.bi_sector += p->start_sect;
@@ -936,6 +939,9 @@ generic_make_request_checks(struct bio *bio)
			goto not_supported;
		break;
	case REQ_OP_ZONE_RESET:
	case REQ_OP_ZONE_OPEN:
	case REQ_OP_ZONE_CLOSE:
	case REQ_OP_ZONE_FINISH:
		if (!blk_queue_is_zoned(q))
			goto not_supported;
		break;
+2 −0
Original line number Diff line number Diff line
@@ -55,6 +55,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
	rq->rq_disk = bd_disk;
	rq->end_io = done;

	blk_account_io_start(rq, true);

	/*
	 * don't check dying flag for MQ because the request won't
	 * be reused after dying flag is set
+15 −1
Original line number Diff line number Diff line
@@ -293,7 +293,7 @@ split:
void __blk_queue_split(struct request_queue *q, struct bio **bio,
		unsigned int *nr_segs)
{
	struct bio *split;
	struct bio *split = NULL;

	switch (bio_op(*bio)) {
	case REQ_OP_DISCARD:
@@ -309,6 +309,20 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
				nr_segs);
		break;
	default:
		/*
		 * All drivers must accept single-segments bios that are <=
		 * PAGE_SIZE.  This is a quick and dirty check that relies on
		 * the fact that bi_io_vec[0] is always valid if a bio has data.
		 * The check might lead to occasional false negatives when bios
		 * are cloned, but compared to the performance impact of cloned
		 * bios themselves the loop below doesn't matter anyway.
		 */
		if (!q->limits.chunk_sectors &&
		    (*bio)->bi_vcnt == 1 &&
		    (*bio)->bi_io_vec[0].bv_len <= PAGE_SIZE) {
			*nr_segs = 1;
			break;
		}
		split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
		break;
	}
+14 −17
Original line number Diff line number Diff line
@@ -74,9 +74,7 @@ static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
	if (!entry->show)
		return -EIO;

	res = -ENOENT;
	mutex_lock(&q->sysfs_lock);
	if (!blk_queue_dying(q))
	res = entry->show(ctx, page);
	mutex_unlock(&q->sysfs_lock);
	return res;
@@ -97,9 +95,7 @@ static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
	if (!entry->store)
		return -EIO;

	res = -ENOENT;
	mutex_lock(&q->sysfs_lock);
	if (!blk_queue_dying(q))
	res = entry->store(ctx, page, length);
	mutex_unlock(&q->sysfs_lock);
	return res;
@@ -120,9 +116,7 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
	if (!entry->show)
		return -EIO;

	res = -ENOENT;
	mutex_lock(&q->sysfs_lock);
	if (!blk_queue_dying(q))
	res = entry->show(hctx, page);
	mutex_unlock(&q->sysfs_lock);
	return res;
@@ -144,9 +138,7 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
	if (!entry->store)
		return -EIO;

	res = -ENOENT;
	mutex_lock(&q->sysfs_lock);
	if (!blk_queue_dying(q))
	res = entry->store(hctx, page, length);
	mutex_unlock(&q->sysfs_lock);
	return res;
@@ -166,20 +158,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,

static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
	const size_t size = PAGE_SIZE - 1;
	unsigned int i, first = 1;
	ssize_t ret = 0;
	int ret = 0, pos = 0;

	for_each_cpu(i, hctx->cpumask) {
		if (first)
			ret += sprintf(ret + page, "%u", i);
			ret = snprintf(pos + page, size - pos, "%u", i);
		else
			ret += sprintf(ret + page, ", %u", i);
			ret = snprintf(pos + page, size - pos, ", %u", i);

		if (ret >= size - pos)
			break;

		first = 0;
		pos += ret;
	}

	ret += sprintf(ret + page, "\n");
	return ret;
	ret = snprintf(pos + page, size + 1 - pos, "\n");
	return pos + ret;
}

static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
+58 −72
Original line number Diff line number Diff line
@@ -93,7 +93,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,

struct mq_inflight {
	struct hd_struct *part;
	unsigned int *inflight;
	unsigned int inflight[2];
};

static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
@@ -102,45 +102,29 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
	struct mq_inflight *mi = priv;

	/*
	 * index[0] counts the specific partition that was asked for.
	 */
	if (rq->part == mi->part)
		mi->inflight[0]++;
		mi->inflight[rq_data_dir(rq)]++;

	return true;
}

unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
{
	unsigned inflight[2];
	struct mq_inflight mi = { .part = part, .inflight = inflight, };
	struct mq_inflight mi = { .part = part };

	inflight[0] = inflight[1] = 0;
	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);

	return inflight[0];
}

static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
				     struct request *rq, void *priv,
				     bool reserved)
{
	struct mq_inflight *mi = priv;

	if (rq->part == mi->part)
		mi->inflight[rq_data_dir(rq)]++;

	return true;
	return mi.inflight[0] + mi.inflight[1];
}

void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
			 unsigned int inflight[2])
{
	struct mq_inflight mi = { .part = part, .inflight = inflight, };
	struct mq_inflight mi = { .part = part };

	inflight[0] = inflight[1] = 0;
	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
	blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
	inflight[0] = mi.inflight[0];
	inflight[1] = mi.inflight[1];
}

void blk_freeze_queue_start(struct request_queue *q)
@@ -663,18 +647,6 @@ bool blk_mq_complete_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_complete_request);

int blk_mq_request_started(struct request *rq)
{
	return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
}
EXPORT_SYMBOL_GPL(blk_mq_request_started);

int blk_mq_request_completed(struct request *rq)
{
	return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
}
EXPORT_SYMBOL_GPL(blk_mq_request_completed);

void blk_mq_start_request(struct request *rq)
{
	struct request_queue *q = rq->q;
@@ -1064,7 +1036,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
	bool shared;

	if (rq->tag != -1)
		goto done;
		return true;

	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
		data.flags |= BLK_MQ_REQ_RESERVED;
@@ -1079,7 +1051,6 @@ bool blk_mq_get_driver_tag(struct request *rq)
		data.hctx->tags->rqs[rq->tag] = rq;
	}

done:
	return rq->tag != -1;
}

@@ -1486,7 +1457,7 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);

bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
	int srcu_idx;
	bool need_run;
@@ -1504,12 +1475,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
		blk_mq_hctx_has_pending(hctx);
	hctx_unlock(hctx, srcu_idx);

	if (need_run) {
	if (need_run)
		__blk_mq_delay_run_hw_queue(hctx, async, 0);
		return true;
	}

	return false;
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);

@@ -2789,6 +2756,23 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
	int i, j, end;
	struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;

	if (q->nr_hw_queues < set->nr_hw_queues) {
		struct blk_mq_hw_ctx **new_hctxs;

		new_hctxs = kcalloc_node(set->nr_hw_queues,
				       sizeof(*new_hctxs), GFP_KERNEL,
				       set->numa_node);
		if (!new_hctxs)
			return;
		if (hctxs)
			memcpy(new_hctxs, hctxs, q->nr_hw_queues *
			       sizeof(*hctxs));
		q->queue_hw_ctx = new_hctxs;
		q->nr_hw_queues = set->nr_hw_queues;
		kfree(hctxs);
		hctxs = new_hctxs;
	}

	/* protect against switching io scheduler  */
	mutex_lock(&q->sysfs_lock);
	for (i = 0; i < set->nr_hw_queues; i++) {
@@ -2844,19 +2828,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
	mutex_unlock(&q->sysfs_lock);
}

/*
 * Maximum number of hardware queues we support. For single sets, we'll never
 * have more than the CPUs (software queues). For multiple sets, the tag_set
 * user may have set ->nr_hw_queues larger.
 */
static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
{
	if (set->nr_maps == 1)
		return nr_cpu_ids;

	return max(set->nr_hw_queues, nr_cpu_ids);
}

struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
						  struct request_queue *q,
						  bool elevator_init)
@@ -2876,12 +2847,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
	/* init q->mq_kobj and sw queues' kobjects */
	blk_mq_sysfs_init(q);

	q->nr_queues = nr_hw_queues(set);
	q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
						GFP_KERNEL, set->numa_node);
	if (!q->queue_hw_ctx)
		goto err_sys_init;

	INIT_LIST_HEAD(&q->unused_hctx_list);
	spin_lock_init(&q->unused_hctx_lock);

@@ -2929,7 +2894,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs:
	kfree(q->queue_hw_ctx);
	q->nr_hw_queues = 0;
err_sys_init:
	blk_mq_sysfs_deinit(q);
err_poll:
	blk_stat_free_callback(q->poll_cb);
@@ -3030,6 +2994,29 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
	}
}

static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
				  int cur_nr_hw_queues, int new_nr_hw_queues)
{
	struct blk_mq_tags **new_tags;

	if (cur_nr_hw_queues >= new_nr_hw_queues)
		return 0;

	new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
				GFP_KERNEL, set->numa_node);
	if (!new_tags)
		return -ENOMEM;

	if (set->tags)
		memcpy(new_tags, set->tags, cur_nr_hw_queues *
		       sizeof(*set->tags));
	kfree(set->tags);
	set->tags = new_tags;
	set->nr_hw_queues = new_nr_hw_queues;

	return 0;
}

/*
 * Alloc a tag set to be associated with one or more request queues.
 * May fail with EINVAL for various error conditions. May adjust the
@@ -3083,9 +3070,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
	if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
		set->nr_hw_queues = nr_cpu_ids;

	set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *),
				 GFP_KERNEL, set->numa_node);
	if (!set->tags)
	if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0)
		return -ENOMEM;

	ret = -ENOMEM;
@@ -3126,7 +3111,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
{
	int i, j;

	for (i = 0; i < nr_hw_queues(set); i++)
	for (i = 0; i < set->nr_hw_queues; i++)
		blk_mq_free_map_and_requests(set, i);

	for (j = 0; j < set->nr_maps; j++) {
@@ -3270,10 +3255,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,

	list_for_each_entry(q, &set->tag_list, tag_set_list)
		blk_mq_freeze_queue(q);
	/*
	 * Sync with blk_mq_queue_tag_busy_iter.
	 */
	synchronize_rcu();
	/*
	 * Switch IO scheduler to 'none', cleaning up the data associated
	 * with the previous scheduler. We will switch back once we are done
@@ -3288,6 +3269,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
		blk_mq_sysfs_unregister(q);
	}

	if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
	    0)
		goto reregister;

	prev_nr_hw_queues = set->nr_hw_queues;
	set->nr_hw_queues = nr_hw_queues;
	blk_mq_update_queue_map(set);
@@ -3304,6 +3289,7 @@ fallback:
		blk_mq_map_swqueue(q);
	}

reregister:
	list_for_each_entry(q, &set->tag_list, tag_set_list) {
		blk_mq_sysfs_register(q);
		blk_mq_debugfs_register_hctxs(q);
Loading