Commit a8474ce2 authored by Jens Axboe's avatar Jens Axboe
Browse files

SCSI: support for allocating large scatterlists



This is what enables large commands. If we need to allocate an
sgtable that doesn't fit in a single page, allocate several
SCSI_MAX_SG_SEGMENTS sized tables and chain them together.

SCSI defaults to large chained sg tables, if the arch supports it.

Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent 0cde8d95
Loading
Loading
Loading
Loading
+162 −47
Original line number Diff line number Diff line
@@ -41,27 +41,13 @@ struct scsi_host_sg_pool {
	mempool_t	*pool;
};

#if (SCSI_MAX_PHYS_SEGMENTS < 32)
#error SCSI_MAX_PHYS_SEGMENTS is too small
#endif

#define SP(x) { x, "sgpool-" #x }
static struct scsi_host_sg_pool scsi_sg_pools[] = {
	SP(8),
	SP(16),
	SP(32),
#if (SCSI_MAX_PHYS_SEGMENTS > 32)
	SP(64),
#if (SCSI_MAX_PHYS_SEGMENTS > 64)
	SP(128),
#if (SCSI_MAX_PHYS_SEGMENTS > 128)
	SP(256),
#if (SCSI_MAX_PHYS_SEGMENTS > 256)
#error SCSI_MAX_PHYS_SEGMENTS is too large
#endif
#endif
#endif
#endif
};
#undef SP

@@ -698,45 +684,126 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
	return NULL;
}

struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
{
	struct scsi_host_sg_pool *sgp;
	struct scatterlist *sgl;
/*
 * The maximum number of SG segments that we will put inside a scatterlist
 * (unless chaining is used). Should ideally fit inside a single page, to
 * avoid a higher order allocation.
 */
#define SCSI_MAX_SG_SEGMENTS	128

	BUG_ON(!cmd->use_sg);
/*
 * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
 * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
 */
#define SCSI_MAX_SG_CHAIN_SEGMENTS	2048

static inline unsigned int scsi_sgtable_index(unsigned short nents)
{
	unsigned int index;

	switch (cmd->use_sg) {
	switch (nents) {
	case 1 ... 8:
		cmd->sglist_len = 0;
		index = 0;
		break;
	case 9 ... 16:
		cmd->sglist_len = 1;
		index = 1;
		break;
	case 17 ... 32:
		cmd->sglist_len = 2;
		index = 2;
		break;
#if (SCSI_MAX_PHYS_SEGMENTS > 32)
	case 33 ... 64:
		cmd->sglist_len = 3;
		break;
#if (SCSI_MAX_PHYS_SEGMENTS > 64)
	case 65 ... 128:
		cmd->sglist_len = 4;
		index = 3;
		break;
#if (SCSI_MAX_PHYS_SEGMENTS  > 128)
	case 129 ... 256:
		cmd->sglist_len = 5;
	case 65 ... SCSI_MAX_SG_SEGMENTS:
		index = 4;
		break;
#endif
#endif
#endif
	default:
		return NULL;
		printk(KERN_ERR "scsi: bad segment count=%d\n", nents);
		BUG();
	}

	sgp = scsi_sg_pools + cmd->sglist_len;
	return index;
}

struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
{
	struct scsi_host_sg_pool *sgp;
	struct scatterlist *sgl, *prev, *ret;
	unsigned int index;
	int this, left;

	BUG_ON(!cmd->use_sg);

	left = cmd->use_sg;
	ret = prev = NULL;
	do {
		this = left;
		if (this > SCSI_MAX_SG_SEGMENTS) {
			this = SCSI_MAX_SG_SEGMENTS - 1;
			index = SG_MEMPOOL_NR - 1;
		} else
			index = scsi_sgtable_index(this);

		left -= this;

		sgp = scsi_sg_pools + index;

		sgl = mempool_alloc(sgp->pool, gfp_mask);
	return sgl;
		if (unlikely(!sgl))
			goto enomem;

		memset(sgl, 0, sizeof(*sgl) * sgp->size);

		/*
		 * first loop through, set initial index and return value
		 */
		if (!ret) {
			cmd->sglist_len = index;
			ret = sgl;
		}

		/*
		 * chain previous sglist, if any. we know the previous
		 * sglist must be the biggest one, or we would not have
		 * ended up doing another loop.
		 */
		if (prev)
			sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);

		/*
		 * don't allow subsequent mempool allocs to sleep, it would
		 * violate the mempool principle.
		 */
		gfp_mask &= ~__GFP_WAIT;
		gfp_mask |= __GFP_HIGH;
		prev = sgl;
	} while (left);

	/*
	 * ->use_sg may get modified after dma mapping has potentially
	 * shrunk the number of segments, so keep a copy of it for free.
	 */
	cmd->__use_sg = cmd->use_sg;
	return ret;
enomem:
	if (ret) {
		/*
		 * Free entries chained off ret. Since we were trying to
		 * allocate another sglist, we know that all entries are of
		 * the max size.
		 */
		sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
		prev = ret;
		ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];

		while ((sgl = sg_chain_ptr(ret)) != NULL) {
			ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
			mempool_free(sgl, sgp->pool);
		}

		mempool_free(prev, sgp->pool);
	}
	return NULL;
}

EXPORT_SYMBOL(scsi_alloc_sgtable);
@@ -748,6 +815,42 @@ void scsi_free_sgtable(struct scsi_cmnd *cmd)

	BUG_ON(cmd->sglist_len >= SG_MEMPOOL_NR);

	/*
	 * if this is the biggest size sglist, check if we have
	 * chained parts we need to free
	 */
	if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
		unsigned short this, left;
		struct scatterlist *next;
		unsigned int index;

		left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
		next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
		while (left && next) {
			sgl = next;
			this = left;
			if (this > SCSI_MAX_SG_SEGMENTS) {
				this = SCSI_MAX_SG_SEGMENTS - 1;
				index = SG_MEMPOOL_NR - 1;
			} else
				index = scsi_sgtable_index(this);

			left -= this;

			sgp = scsi_sg_pools + index;

			if (left)
				next = sg_chain_ptr(&sgl[sgp->size - 1]);

			mempool_free(sgl, sgp->pool);
		}

		/*
		 * Restore original, will be freed below
		 */
		sgl = cmd->request_buffer;
	}

	sgp = scsi_sg_pools + cmd->sglist_len;
	mempool_free(sgl, sgp->pool);
}
@@ -988,7 +1091,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
static int scsi_init_io(struct scsi_cmnd *cmd)
{
	struct request     *req = cmd->request;
	struct scatterlist *sgpnt;
	int		   count;

	/*
@@ -1001,14 +1103,13 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
	/*
	 * If sg table allocation fails, requeue request later.
	 */
	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
	if (unlikely(!sgpnt)) {
	cmd->request_buffer = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
	if (unlikely(!cmd->request_buffer)) {
		scsi_unprep_request(req);
		return BLKPREP_DEFER;
	}

	req->buffer = NULL;
	cmd->request_buffer = (char *) sgpnt;
	if (blk_pc_request(req))
		cmd->request_bufflen = req->data_len;
	else
@@ -1533,8 +1634,22 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
	if (!q)
		return NULL;

	/*
	 * this limit is imposed by hardware restrictions
	 */
	blk_queue_max_hw_segments(q, shost->sg_tablesize);
	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);

	/*
	 * In the future, sg chaining support will be mandatory and this
	 * ifdef can then go away. Right now we don't have all archs
	 * converted, so better keep it safe.
	 */
#ifdef ARCH_HAS_SG_CHAIN
	blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
#else
	blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
#endif

	blk_queue_max_sectors(q, shost->max_sectors);
	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
	blk_queue_segment_boundary(q, shost->dma_boundary);
+0 −7
Original line number Diff line number Diff line
@@ -10,13 +10,6 @@

#include <linux/types.h>

/*
 *	The maximum sg list length SCSI can cope with
 *	(currently must be a power of 2 between 32 and 256)
 */
#define SCSI_MAX_PHYS_SEGMENTS	MAX_PHYS_SEGMENTS


/*
 *	SCSI command lengths
 */
+1 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ struct scsi_cmnd {
	/* These elements define the operation we ultimately want to perform */
	unsigned short use_sg;	/* Number of pieces of scatter-gather */
	unsigned short sglist_len;	/* size of malloc'd scatter-gather list */
	unsigned short __use_sg;

	unsigned underflow;	/* Return error if less than
				   this amount is transferred */