Commit dea26209 authored by Bart Van Assche's avatar Bart Van Assche Committed by Doug Ledford
Browse files

IB/srpt: Change default behavior from using SRQ to using RC



Although in the RC mode more resources are needed that mode has three
advantages over SRQ:
- It works with all RDMA adapters, even those that do not support
  SRQ.
- Posting WRs and polling WCs does not trigger lock contention
  because only one thread at a time accesses a WR or WC queue in
  non-SRQ mode.
- The end-to-end flow control mechanism is used.

>From the IB spec:

    C9-150.2.1: For QPs that are not associated with an SRQ, each HCA
    receive queue shall generate end-to-end flow control credits. If
    a QP is associated with an SRQ, the HCA receive queue shall not
    generate end-to-end flow control credits.

Add new configfs attributes that allow to configure which mode to use
(/sys/kernel/config/target/srpt/$GUID/$GUID/attrib/use_srq). Note:
only the attribute for port 1 is relevant on multi-port adapters.

Signed-off-by: default avatarBart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 74333f12
Loading
Loading
Loading
Loading
+117 −35
Original line number Diff line number Diff line
@@ -295,6 +295,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
{
	struct srpt_device *sdev = sport->sdev;
	struct ib_dm_ioc_profile *iocp;
	int send_queue_depth;

	iocp = (struct ib_dm_ioc_profile *)mad->data;

@@ -310,6 +311,12 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
		return;
	}

	if (sdev->use_srq)
		send_queue_depth = sdev->srq_size;
	else
		send_queue_depth = min(SRPT_RQ_SIZE,
				       sdev->device->attrs.max_qp_wr);

	memset(iocp, 0, sizeof(*iocp));
	strcpy(iocp->id_string, SRPT_ID_STRING);
	iocp->guid = cpu_to_be64(srpt_service_guid);
@@ -322,7 +329,7 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
	iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
	iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
	iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
	iocp->send_queue_depth = cpu_to_be16(sdev->srq_size);
	iocp->send_queue_depth = cpu_to_be16(send_queue_depth);
	iocp->rdma_read_depth = 4;
	iocp->send_size = cpu_to_be32(srp_max_req_size);
	iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size,
@@ -686,6 +693,9 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
{
	int i;

	if (!ioctx_ring)
		return;

	for (i = 0; i < ring_size; ++i)
		srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
	kfree(ioctx_ring);
@@ -757,7 +767,7 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx,
/**
 * srpt_post_recv() - Post an IB receive request.
 */
static int srpt_post_recv(struct srpt_device *sdev,
static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
			  struct srpt_recv_ioctx *ioctx)
{
	struct ib_sge list;
@@ -774,7 +784,10 @@ static int srpt_post_recv(struct srpt_device *sdev,
	wr.sg_list = &list;
	wr.num_sge = 1;

	if (sdev->use_srq)
		return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
	else
		return ib_post_recv(ch->qp, &wr, &bad_wr);
}

/**
@@ -1517,7 +1530,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
		break;
	}

	srpt_post_recv(ch->sport->sdev, recv_ioctx);
	srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
	return;

out_wait:
@@ -1616,7 +1629,7 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
	struct srpt_device *sdev = sport->sdev;
	const struct ib_device_attr *attrs = &sdev->device->attrs;
	u32 srp_sq_size = sport->port_attrib.srp_sq_size;
	int ret;
	int i, ret;

	WARN_ON(ch->rq_size < 1);

@@ -1640,7 +1653,6 @@ retry:
		= (void(*)(struct ib_event *, void*))srpt_qp_event;
	qp_init->send_cq = ch->cq;
	qp_init->recv_cq = ch->cq;
	qp_init->srq = sdev->srq;
	qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
	qp_init->qp_type = IB_QPT_RC;
	/*
@@ -1654,6 +1666,12 @@ retry:
	qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
	qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
	qp_init->port_num = ch->sport->port;
	if (sdev->use_srq) {
		qp_init->srq = sdev->srq;
	} else {
		qp_init->cap.max_recv_wr = ch->rq_size;
		qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge;
	}

	ch->qp = ib_create_qp(sdev->pd, qp_init);
	if (IS_ERR(ch->qp)) {
@@ -1669,6 +1687,10 @@ retry:
		goto err_destroy_cq;
	}

	if (!sdev->use_srq)
		for (i = 0; i < ch->rq_size; i++)
			srpt_post_recv(sdev, ch, ch->ioctx_recv_ring[i]);

	atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr);

	pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
@@ -1818,6 +1840,10 @@ static void srpt_release_channel_work(struct work_struct *w)
			     ch->sport->sdev, ch->rq_size,
			     ch->rsp_size, DMA_TO_DEVICE);

	srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
			     sdev, ch->rq_size,
			     srp_max_req_size, DMA_FROM_DEVICE);

	mutex_lock(&sdev->mutex);
	list_del_init(&ch->list);
	if (ch->release_done)
@@ -1975,6 +2001,19 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
		ch->ioctx_ring[i]->ch = ch;
		list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
	}
	if (!sdev->use_srq) {
		ch->ioctx_recv_ring = (struct srpt_recv_ioctx **)
			srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
					      sizeof(*ch->ioctx_recv_ring[0]),
					      srp_max_req_size,
					      DMA_FROM_DEVICE);
		if (!ch->ioctx_recv_ring) {
			pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n");
			rej->reason =
			    cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
			goto free_ring;
		}
	}

	ret = srpt_create_ch_ib(ch);
	if (ret) {
@@ -1982,7 +2021,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
			      SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
		pr_err("rejected SRP_LOGIN_REQ because creating"
		       " a new RDMA channel failed.\n");
		goto free_ring;
		goto free_recv_ring;
	}

	ret = srpt_ch_qp_rtr(ch, ch->qp);
@@ -2073,6 +2112,11 @@ release_channel:
destroy_ib:
	srpt_destroy_ch_ib(ch);

free_recv_ring:
	srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
			     ch->sport->sdev, ch->rq_size,
			     srp_max_req_size, DMA_FROM_DEVICE);

free_ring:
	srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
			     ch->sport->sdev, ch->rq_size,
@@ -2502,20 +2546,38 @@ static void srpt_add_one(struct ib_device *device)
	srq_attr.attr.srq_limit = 0;
	srq_attr.srq_type = IB_SRQT_BASIC;

	sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
	if (IS_ERR(sdev->srq))
		goto err_pd;
	sdev->srq = sdev->port[0].port_attrib.use_srq ?
		ib_create_srq(sdev->pd, &srq_attr) : ERR_PTR(-ENOTSUPP);
	if (IS_ERR(sdev->srq)) {
		pr_debug("ib_create_srq() failed: %ld\n", PTR_ERR(sdev->srq));

	pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
		 __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
		/* SRQ not supported. */
		sdev->use_srq = false;
	} else {
		pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n",
			 sdev->srq_size, sdev->device->attrs.max_srq_wr,
			 device->name);

		sdev->use_srq = true;

		sdev->ioctx_ring = (struct srpt_recv_ioctx **)
			srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
					      sizeof(*sdev->ioctx_ring[0]),
					      srp_max_req_size,
					      DMA_FROM_DEVICE);
		if (!sdev->ioctx_ring)
			goto err_pd;

		for (i = 0; i < sdev->srq_size; ++i)
			srpt_post_recv(sdev, NULL, sdev->ioctx_ring[i]);
	}

	if (!srpt_service_guid)
		srpt_service_guid = be64_to_cpu(device->node_guid);

	sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
	if (IS_ERR(sdev->cm_id))
		goto err_srq;
		goto err_ring;

	/* print out target login information */
	pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,"
@@ -2535,16 +2597,6 @@ static void srpt_add_one(struct ib_device *device)
			      srpt_event_handler);
	ib_register_event_handler(&sdev->event_handler);

	sdev->ioctx_ring = (struct srpt_recv_ioctx **)
		srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
				      sizeof(*sdev->ioctx_ring[0]),
				      srp_max_req_size, DMA_FROM_DEVICE);
	if (!sdev->ioctx_ring)
		goto err_event;

	for (i = 0; i < sdev->srq_size; ++i)
		srpt_post_recv(sdev, sdev->ioctx_ring[i]);

	WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));

	for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
@@ -2554,12 +2606,13 @@ static void srpt_add_one(struct ib_device *device)
		sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
		sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE;
		sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE;
		sport->port_attrib.use_srq = false;
		INIT_WORK(&sport->work, srpt_refresh_port_work);

		if (srpt_refresh_port(sport)) {
			pr_err("MAD registration failed for %s-%d.\n",
			       sdev->device->name, i);
			goto err_ring;
			goto err_event;
		}
	}

@@ -2572,16 +2625,16 @@ out:
	pr_debug("added %s.\n", device->name);
	return;

err_ring:
	srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
			     sdev->srq_size, srp_max_req_size,
			     DMA_FROM_DEVICE);
err_event:
	ib_unregister_event_handler(&sdev->event_handler);
err_cm:
	ib_destroy_cm_id(sdev->cm_id);
err_srq:
err_ring:
	if (sdev->use_srq)
		ib_destroy_srq(sdev->srq);
	srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
			     sdev->srq_size, srp_max_req_size,
			     DMA_FROM_DEVICE);
err_pd:
	ib_dealloc_pd(sdev->pd);
free_dev:
@@ -2625,12 +2678,12 @@ static void srpt_remove_one(struct ib_device *device, void *client_data)
	spin_unlock(&srpt_dev_lock);
	srpt_release_sdev(sdev);

	if (sdev->use_srq)
		ib_destroy_srq(sdev->srq);
	ib_dealloc_pd(sdev->pd);

	srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
			     sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE);
	sdev->ioctx_ring = NULL;
	ib_dealloc_pd(sdev->pd);

	kfree(sdev);
}

@@ -2928,14 +2981,43 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item,
	return count;
}

static ssize_t srpt_tpg_attrib_use_srq_show(struct config_item *item,
					    char *page)
{
	struct se_portal_group *se_tpg = attrib_to_tpg(item);
	struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);

	return sprintf(page, "%d\n", sport->port_attrib.use_srq);
}

static ssize_t srpt_tpg_attrib_use_srq_store(struct config_item *item,
					     const char *page, size_t count)
{
	struct se_portal_group *se_tpg = attrib_to_tpg(item);
	struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
	unsigned long val;
	int ret;

	ret = kstrtoul(page, 0, &val);
	if (ret < 0)
		return ret;
	if (val != !!val)
		return -EINVAL;
	sport->port_attrib.use_srq = val;

	return count;
}

CONFIGFS_ATTR(srpt_tpg_attrib_,  srp_max_rdma_size);
CONFIGFS_ATTR(srpt_tpg_attrib_,  srp_max_rsp_size);
CONFIGFS_ATTR(srpt_tpg_attrib_,  srp_sq_size);
CONFIGFS_ATTR(srpt_tpg_attrib_,  use_srq);

static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
	&srpt_tpg_attrib_attr_srp_max_rdma_size,
	&srpt_tpg_attrib_attr_srp_max_rsp_size,
	&srpt_tpg_attrib_attr_srp_sq_size,
	&srpt_tpg_attrib_attr_use_srq,
	NULL,
};

+6 −0
Original line number Diff line number Diff line
@@ -252,6 +252,7 @@ enum rdma_ch_state {
 * @free_list:     Head of list with free send I/O contexts.
 * @state:         channel state. See also enum rdma_ch_state.
 * @ioctx_ring:    Send ring.
 * @ioctx_recv_ring: Receive I/O context ring.
 * @list:          Node for insertion in the srpt_device.rch_list list.
 * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
 *                 list contains struct srpt_ioctx elements and is protected
@@ -281,6 +282,7 @@ struct srpt_rdma_ch {
	struct list_head	free_list;
	enum rdma_ch_state	state;
	struct srpt_send_ioctx	**ioctx_ring;
	struct srpt_recv_ioctx	**ioctx_recv_ring;
	struct list_head	list;
	struct list_head	cmd_wait_list;
	struct se_session	*sess;
@@ -295,11 +297,13 @@ struct srpt_rdma_ch {
 * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections.
 * @srp_max_rsp_size: Maximum size of SRP response messages in bytes.
 * @srp_sq_size: Shared receive queue (SRQ) size.
 * @use_srq: Whether or not to use SRQ.
 */
struct srpt_port_attrib {
	u32			srp_max_rdma_size;
	u32			srp_max_rsp_size;
	u32			srp_sq_size;
	bool			use_srq;
};

/**
@@ -347,6 +351,7 @@ struct srpt_port {
 * @srq:           Per-HCA SRQ (shared receive queue).
 * @cm_id:         Connection identifier.
 * @srq_size:      SRQ size.
 * @use_srq:       Whether or not to use SRQ.
 * @ioctx_ring:    Per-HCA SRQ.
 * @rch_list:      Per-device channel list -- see also srpt_rdma_ch.list.
 * @ch_releaseQ:   Enables waiting for removal from rch_list.
@@ -362,6 +367,7 @@ struct srpt_device {
	struct ib_srq		*srq;
	struct ib_cm_id		*cm_id;
	int			srq_size;
	bool			use_srq;
	struct srpt_recv_ioctx	**ioctx_ring;
	struct list_head	rch_list;
	wait_queue_head_t	ch_releaseQ;