Commit 39a9f97e authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'for-3.16/blk-mq-tagging' into for-3.16/core



Signed-off-by: default avatarJens Axboe <axboe@fb.com>

Conflicts:
	block/blk-mq-tag.c
parents 1429d7c9 0d2602ca
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -208,6 +208,11 @@ static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
	return blk_mq_tag_sysfs_show(hctx->tags, page);
}

static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
{
	return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
}

static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
	unsigned int i, first = 1;
@@ -267,6 +272,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
	.attr = {.name = "dispatched", .mode = S_IRUGO },
	.show = blk_mq_hw_sysfs_dispatched_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
	.attr = {.name = "active", .mode = S_IRUGO },
	.show = blk_mq_hw_sysfs_active_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
	.attr = {.name = "pending", .mode = S_IRUGO },
	.show = blk_mq_hw_sysfs_rq_list_show,
@@ -287,6 +296,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
	&blk_mq_hw_sysfs_pending.attr,
	&blk_mq_hw_sysfs_tags.attr,
	&blk_mq_hw_sysfs_cpus.attr,
	&blk_mq_hw_sysfs_active.attr,
	NULL,
};

+95 −17
Original line number Diff line number Diff line
@@ -7,13 +7,12 @@
#include "blk-mq.h"
#include "blk-mq-tag.h"

void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx,
			  bool reserved)
void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved)
{
	int tag, zero = 0;

	tag = blk_mq_get_tag(tags, hctx, &zero, __GFP_WAIT, reserved);
	blk_mq_put_tag(tags, tag, &zero);
	tag = blk_mq_get_tag(hctx, &zero, __GFP_WAIT, reserved);
	blk_mq_put_tag(hctx, tag, &zero);
}

static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt)
@@ -40,6 +39,84 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
	return bt_has_free_tags(&tags->bitmap_tags);
}

static inline void bt_index_inc(unsigned int *index)
{
	*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
}

/*
 * If a previously inactive queue goes active, bump the active user count.
 */
bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
	if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
	    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
		atomic_inc(&hctx->tags->active_queues);

	return true;
}

/*
 * If a previously busy queue goes inactive, potential waiters could now
 * be allowed to queue. Wake them up and check.
 */
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
	struct blk_mq_tags *tags = hctx->tags;
	struct blk_mq_bitmap_tags *bt;
	int i, wake_index;

	if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
		return;

	atomic_dec(&tags->active_queues);

	/*
	 * Will only throttle depth on non-reserved tags
	 */
	bt = &tags->bitmap_tags;
	wake_index = bt->wake_index;
	for (i = 0; i < BT_WAIT_QUEUES; i++) {
		struct bt_wait_state *bs = &bt->bs[wake_index];

		if (waitqueue_active(&bs->wait))
			wake_up(&bs->wait);

		bt_index_inc(&wake_index);
	}
}

/*
 * For shared tag users, we track the number of currently active users
 * and attempt to provide a fair share of the tag depth for each of them.
 */
static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
				  struct blk_mq_bitmap_tags *bt)
{
	unsigned int depth, users;

	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
		return true;
	if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
		return true;

	/*
	 * Don't try dividing an ant
	 */
	if (bt->depth == 1)
		return true;

	users = atomic_read(&hctx->tags->active_queues);
	if (!users)
		return true;

	/*
	 * Allow at least some tags
	 */
	depth = max((bt->depth + users - 1) / users, 4U);
	return atomic_read(&hctx->nr_active) < depth;
}

static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
{
	int tag, org_last_tag, end;
@@ -78,11 +155,15 @@ restart:
 * multiple users will tend to stick to different cachelines, at least
 * until the map is exhausted.
 */
static int __bt_get(struct blk_mq_bitmap_tags *bt, unsigned int *tag_cache)
static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
		    unsigned int *tag_cache)
{
	unsigned int last_tag, org_last_tag;
	int index, i, tag;

	if (!hctx_may_queue(hctx, bt))
		return -1;

	last_tag = org_last_tag = *tag_cache;
	index = TAG_TO_INDEX(bt, last_tag);

@@ -117,11 +198,6 @@ done:
	return tag;
}

static inline void bt_index_inc(unsigned int *index)
{
	*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
}

static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
					 struct blk_mq_hw_ctx *hctx)
{
@@ -142,7 +218,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
	DEFINE_WAIT(wait);
	int tag;

	tag = __bt_get(bt, last_tag);
	tag = __bt_get(hctx, bt, last_tag);
	if (tag != -1)
		return tag;

@@ -156,7 +232,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
		was_empty = list_empty(&wait.task_list);
		prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);

		tag = __bt_get(bt, last_tag);
		tag = __bt_get(hctx, bt, last_tag);
		if (tag != -1)
			break;

@@ -200,14 +276,13 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags,
	return tag;
}

unsigned int blk_mq_get_tag(struct blk_mq_tags *tags,
			    struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
			    gfp_t gfp, bool reserved)
{
	if (!reserved)
		return __blk_mq_get_tag(tags, hctx, last_tag, gfp);
		return __blk_mq_get_tag(hctx->tags, hctx, last_tag, gfp);

	return __blk_mq_get_reserved_tag(tags, gfp);
	return __blk_mq_get_reserved_tag(hctx->tags, gfp);
}

static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
@@ -265,9 +340,11 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
	bt_clear_tag(&tags->breserved_tags, tag);
}

void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag,
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
		    unsigned int *last_tag)
{
	struct blk_mq_tags *tags = hctx->tags;

	if (tag >= tags->nr_reserved_tags) {
		const int real_tag = tag - tags->nr_reserved_tags;

@@ -465,6 +542,7 @@ ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
	res = bt_unused_tags(&tags->breserved_tags);

	page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
	page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));

	return page - orig_page;
}
+24 −3
Original line number Diff line number Diff line
@@ -35,6 +35,8 @@ struct blk_mq_tags {
	unsigned int nr_tags;
	unsigned int nr_reserved_tags;

	atomic_t active_queues;

	struct blk_mq_bitmap_tags bitmap_tags;
	struct blk_mq_bitmap_tags breserved_tags;

@@ -46,9 +48,9 @@ struct blk_mq_tags {
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
extern void blk_mq_free_tags(struct blk_mq_tags *tags);

extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, bool reserved);
extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag, unsigned int *last_tag);
extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
extern void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved);
extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag);
extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
@@ -65,4 +67,23 @@ enum {
	BLK_MQ_TAG_MAX		= BLK_MQ_TAG_FAIL - 1,
};

extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);

static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
	if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
		return false;

	return __blk_mq_tag_busy(hctx);
}

static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
	if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
		return;

	__blk_mq_tag_idle(hctx);
}

#endif
+79 −6
Original line number Diff line number Diff line
@@ -99,9 +99,16 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
	struct request *rq;
	unsigned int tag;

	tag = blk_mq_get_tag(hctx->tags, hctx, &ctx->last_tag, gfp, reserved);
	tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved);
	if (tag != BLK_MQ_TAG_FAIL) {
		rq = hctx->tags->rqs[tag];

		rq->cmd_flags = 0;
		if (blk_mq_tag_busy(hctx)) {
			rq->cmd_flags = REQ_MQ_INFLIGHT;
			atomic_inc(&hctx->nr_active);
		}

		rq->tag = tag;
		return rq;
	}
@@ -209,7 +216,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
	/* csd/requeue_work/fifo_time is initialized before use */
	rq->q = q;
	rq->mq_ctx = ctx;
	rq->cmd_flags = rw_flags;
	rq->cmd_flags |= rw_flags;
	rq->cmd_type = 0;
	/* do not touch atomic flags, it needs atomic ops against the timer */
	rq->cpu = -1;
@@ -281,7 +288,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
			break;
		}

		blk_mq_wait_for_tags(hctx->tags, hctx, reserved);
		blk_mq_wait_for_tags(hctx, reserved);
	} while (1);

	return rq;
@@ -322,8 +329,11 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
	const int tag = rq->tag;
	struct request_queue *q = rq->q;

	if (rq->cmd_flags & REQ_MQ_INFLIGHT)
		atomic_dec(&hctx->nr_active);

	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
	blk_mq_put_tag(hctx->tags, tag, &ctx->last_tag);
	blk_mq_put_tag(hctx, tag, &ctx->last_tag);
	blk_mq_queue_exit(q);
}

@@ -590,8 +600,13 @@ static void blk_mq_rq_timer(unsigned long data)
	queue_for_each_hw_ctx(q, hctx, i)
		blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set);

	if (next_set)
		mod_timer(&q->timeout, round_jiffies_up(next));
	if (next_set) {
		next = blk_rq_timeout(round_jiffies_up(next));
		mod_timer(&q->timeout, next);
	} else {
		queue_for_each_hw_ctx(q, hctx, i)
			blk_mq_tag_idle(hctx);
	}
}

/*
@@ -1501,6 +1516,56 @@ static void blk_mq_map_swqueue(struct request_queue *q)
	}
}

static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
{
	struct blk_mq_hw_ctx *hctx;
	struct request_queue *q;
	bool shared;
	int i;

	if (set->tag_list.next == set->tag_list.prev)
		shared = false;
	else
		shared = true;

	list_for_each_entry(q, &set->tag_list, tag_set_list) {
		blk_mq_freeze_queue(q);

		queue_for_each_hw_ctx(q, hctx, i) {
			if (shared)
				hctx->flags |= BLK_MQ_F_TAG_SHARED;
			else
				hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
		}
		blk_mq_unfreeze_queue(q);
	}
}

static void blk_mq_del_queue_tag_set(struct request_queue *q)
{
	struct blk_mq_tag_set *set = q->tag_set;

	blk_mq_freeze_queue(q);

	mutex_lock(&set->tag_list_lock);
	list_del_init(&q->tag_set_list);
	blk_mq_update_tag_set_depth(set);
	mutex_unlock(&set->tag_list_lock);

	blk_mq_unfreeze_queue(q);
}

static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
				     struct request_queue *q)
{
	q->tag_set = set;

	mutex_lock(&set->tag_list_lock);
	list_add_tail(&q->tag_set_list, &set->tag_list);
	blk_mq_update_tag_set_depth(set);
	mutex_unlock(&set->tag_list_lock);
}

struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
{
	struct blk_mq_hw_ctx **hctxs;
@@ -1526,6 +1591,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
		if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL))
			goto err_hctxs;

		atomic_set(&hctxs[i]->nr_active, 0);
		hctxs[i]->numa_node = NUMA_NO_NODE;
		hctxs[i]->queue_num = i;
	}
@@ -1578,6 +1644,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
	list_add_tail(&q->all_q_node, &all_q_list);
	mutex_unlock(&all_q_mutex);

	blk_mq_add_queue_tag_set(set, q);

	return q;

err_flush_rq:
@@ -1605,6 +1673,8 @@ void blk_mq_free_queue(struct request_queue *q)
	struct blk_mq_hw_ctx *hctx;
	int i;

	blk_mq_del_queue_tag_set(q);

	queue_for_each_hw_ctx(q, hctx, i) {
		kfree(hctx->ctxs);
		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
@@ -1696,6 +1766,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
			goto out_unwind;
	}

	mutex_init(&set->tag_list_lock);
	INIT_LIST_HEAD(&set->tag_list);

	return 0;

out_unwind:
+12 −1
Original line number Diff line number Diff line
@@ -166,6 +166,17 @@ void blk_abort_request(struct request *req)
}
EXPORT_SYMBOL_GPL(blk_abort_request);

unsigned long blk_rq_timeout(unsigned long timeout)
{
	unsigned long maxt;

	maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT);
	if (time_after(timeout, maxt))
		timeout = maxt;

	return timeout;
}

/**
 * blk_add_timer - Start timeout timer for a single request
 * @req:	request that is about to start running.
@@ -200,7 +211,7 @@ void blk_add_timer(struct request *req)
	 * than an existing one, modify the timer. Round up to next nearest
	 * second.
	 */
	expiry = round_jiffies_up(req->deadline);
	expiry = blk_rq_timeout(round_jiffies_up(req->deadline));

	if (!timer_pending(&q->timeout) ||
	    time_before(expiry, q->timeout.expires)) {
Loading