Commit ff856bad authored by Jens Axboe's avatar Jens Axboe
Browse files

[BLOCK] ll_rw_blk: Enable out-of-order request completions through softirq



Request completion can be a quite heavy process, since it needs to
iterate through the entire request and complete the bio's it holds.
This patch adds blk_complete_request() which moves this processing
into a dedicated block softirq.

Signed-off-by: default avatarJens Axboe <axboe@suse.de>
parent 5367f2d6
Loading
Loading
Loading
Loading
+105 −1
Original line number Diff line number Diff line
@@ -27,6 +27,8 @@
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>

/*
 * for max sense size
@@ -69,6 +71,8 @@ unsigned long blk_max_low_pfn, blk_max_pfn;
EXPORT_SYMBOL(blk_max_low_pfn);
EXPORT_SYMBOL(blk_max_pfn);

static DEFINE_PER_CPU(struct list_head, blk_cpu_done);

/* Amount of time in which a process may batch requests */
#define BLK_BATCH_TIME	(HZ/50UL)

@@ -207,6 +211,13 @@ void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)

EXPORT_SYMBOL(blk_queue_merge_bvec);

void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
{
	q->softirq_done_fn = fn;
}

EXPORT_SYMBOL(blk_queue_softirq_done);

/**
 * blk_queue_make_request - define an alternate make_request function for a device
 * @q:  the request queue for the device to be affected
@@ -270,6 +281,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
static inline void rq_init(request_queue_t *q, struct request *rq)
{
	INIT_LIST_HEAD(&rq->queuelist);
	INIT_LIST_HEAD(&rq->donelist);

	rq->errors = 0;
	rq->rq_status = RQ_ACTIVE;
@@ -286,6 +298,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
	rq->sense = NULL;
	rq->end_io = NULL;
	rq->end_io_data = NULL;
	rq->completion_data = NULL;
}

/**
@@ -3286,6 +3299,87 @@ int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes)

EXPORT_SYMBOL(end_that_request_chunk);

/*
 * splice the completion data to a local structure and hand off to
 * process_completion_queue() to complete the requests
 */
static void blk_done_softirq(struct softirq_action *h)
{
	struct list_head *cpu_list;
	LIST_HEAD(local_list);

	local_irq_disable();
	cpu_list = &__get_cpu_var(blk_cpu_done);
	list_splice_init(cpu_list, &local_list);
	local_irq_enable();

	while (!list_empty(&local_list)) {
		struct request *rq = list_entry(local_list.next, struct request, donelist);

		list_del_init(&rq->donelist);
		rq->q->softirq_done_fn(rq);
	}
}

#ifdef CONFIG_HOTPLUG_CPU

static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
			  void *hcpu)
{
	/*
	 * If a CPU goes away, splice its entries to the current CPU
	 * and trigger a run of the softirq
	 */
	if (action == CPU_DEAD) {
		int cpu = (unsigned long) hcpu;

		local_irq_disable();
		list_splice_init(&per_cpu(blk_cpu_done, cpu),
				 &__get_cpu_var(blk_cpu_done));
		raise_softirq_irqoff(BLOCK_SOFTIRQ);
		local_irq_enable();
	}

	return NOTIFY_OK;
}


static struct notifier_block __devinitdata blk_cpu_notifier = {
	.notifier_call	= blk_cpu_notify,
};

#endif /* CONFIG_HOTPLUG_CPU */

/**
 * blk_complete_request - end I/O on a request
 * @req:      the request being processed
 *
 * Description:
 *     Ends all I/O on a request. It does not handle partial completions,
 *     unless the driver actually implements this in its completionc callback
 *     through requeueing. Theh actual completion happens out-of-order,
 *     through a softirq handler. The user must have registered a completion
 *     callback through blk_queue_softirq_done().
 **/

void blk_complete_request(struct request *req)
{
	struct list_head *cpu_list;
	unsigned long flags;

	BUG_ON(!req->q->softirq_done_fn);
		
	local_irq_save(flags);

	cpu_list = &__get_cpu_var(blk_cpu_done);
	list_add_tail(&req->donelist, cpu_list);
	raise_softirq_irqoff(BLOCK_SOFTIRQ);

	local_irq_restore(flags);
}

EXPORT_SYMBOL(blk_complete_request);
	
/*
 * queue lock must be held
 */
@@ -3364,6 +3458,8 @@ EXPORT_SYMBOL(kblockd_flush);

int __init blk_dev_init(void)
{
	int i;

	kblockd_workqueue = create_workqueue("kblockd");
	if (!kblockd_workqueue)
		panic("Failed to create kblockd\n");
@@ -3377,6 +3473,14 @@ int __init blk_dev_init(void)
	iocontext_cachep = kmem_cache_create("blkdev_ioc",
			sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);

	for (i = 0; i < NR_CPUS; i++)
		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));

	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
#ifdef CONFIG_HOTPLUG_CPU
	register_cpu_notifier(&blk_cpu_notifier);
#endif

	blk_max_low_pfn = max_low_pfn;
	blk_max_pfn = max_pfn;

+18 −3
Original line number Diff line number Diff line
@@ -118,9 +118,9 @@ struct request_list {
 * try to put the fields that are referenced together in the same cacheline
 */
struct request {
	struct list_head queuelist; /* looking for ->queue? you must _not_
				     * access it directly, use
				     * blkdev_dequeue_request! */
	struct list_head queuelist;
	struct list_head donelist;

	unsigned long flags;		/* see REQ_ bits below */

	/* Maintain bio traversal state for part by part I/O submission.
@@ -141,6 +141,7 @@ struct request {
	struct bio *biotail;

	void *elevator_private;
	void *completion_data;

	unsigned short ioprio;

@@ -291,6 +292,7 @@ typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
typedef void (activity_fn) (void *data, int rw);
typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
typedef void (prepare_flush_fn) (request_queue_t *, struct request *);
typedef void (softirq_done_fn)(struct request *);

enum blk_queue_state {
	Queue_down,
@@ -332,6 +334,7 @@ struct request_queue
	activity_fn		*activity_fn;
	issue_flush_fn		*issue_flush_fn;
	prepare_flush_fn	*prepare_flush_fn;
	softirq_done_fn		*softirq_done_fn;

	/*
	 * Dispatch queue sorting
@@ -646,6 +649,17 @@ extern int end_that_request_first(struct request *, int, int);
extern int end_that_request_chunk(struct request *, int, int);
extern void end_that_request_last(struct request *, int);
extern void end_request(struct request *req, int uptodate);
extern void blk_complete_request(struct request *);

static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
{
	if (blk_fs_request(rq))
		return (nr_bytes >= (rq->hard_nr_sectors << 9));
	else if (blk_pc_request(rq))
		return nr_bytes >= rq->data_len;

	return 0;
}

/*
 * end_that_request_first/chunk() takes an uptodate argument. we account
@@ -694,6 +708,7 @@ extern void blk_queue_segment_boundary(request_queue_t *, unsigned long);
extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn);
extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
extern void blk_queue_dma_alignment(request_queue_t *, int);
extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *);
extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
+1 −0
Original line number Diff line number Diff line
@@ -112,6 +112,7 @@ enum
	TIMER_SOFTIRQ,
	NET_TX_SOFTIRQ,
	NET_RX_SOFTIRQ,
	BLOCK_SOFTIRQ,
	SCSI_SOFTIRQ,
	TASKLET_SOFTIRQ
};