Commit 93aa8e0a authored by Chuck Lever's avatar Chuck Lever Committed by Anna Schumaker
Browse files

xprtrdma: Merge struct rpcrdma_ia into struct rpcrdma_ep



I eventually want to allocate rpcrdma_ep separately from struct
rpcrdma_xprt so that on occasion there can be more than one ep per
xprt.

The new struct rpcrdma_ep will contain all the fields currently in
rpcrdma_ia and in rpcrdma_ep. This is all the device and CM settings
for the connection, in addition to per-connection settings
negotiated with the remote.

Take this opportunity to rename the existing ep fields from rep_* to
re_* to disambiguate these from struct rpcrdma_rep.

Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
parent d6ccebf9
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -104,7 +104,7 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
	TP_fast_assign(
		__entry->r_xprt = r_xprt;
		__entry->rc = rc;
		__entry->connect_status = r_xprt->rx_ep.rep_connected;
		__entry->connect_status = r_xprt->rx_ep.re_connect_status;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),
@@ -394,10 +394,10 @@ TRACE_EVENT(xprtrdma_inline_thresh,
		const struct rpcrdma_ep *ep = &r_xprt->rx_ep;

		__entry->r_xprt = r_xprt;
		__entry->inline_send = ep->rep_inline_send;
		__entry->inline_recv = ep->rep_inline_recv;
		__entry->max_send = ep->rep_max_inline_send;
		__entry->max_recv = ep->rep_max_inline_recv;
		__entry->inline_send = ep->re_inline_send;
		__entry->inline_recv = ep->re_inline_recv;
		__entry->max_send = ep->re_max_inline_send;
		__entry->max_recv = ep->re_max_inline_recv;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),
@@ -803,7 +803,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
		__entry->r_xprt = r_xprt;
		__entry->count = count;
		__entry->status = status;
		__entry->posted = r_xprt->rx_ep.rep_receive_count;
		__entry->posted = r_xprt->rx_ep.re_receive_count;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),
+2 −2
Original line number Diff line number Diff line
@@ -47,7 +47,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	size_t maxmsg;

	maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv);
	maxmsg = min_t(unsigned int, ep->re_inline_send, ep->re_inline_recv);
	maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
	return maxmsg - RPCRDMA_HDRLEN_MIN;
}
@@ -190,7 +190,7 @@ create_req:
	if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
		return NULL;

	size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE);
	size = min_t(size_t, r_xprt->rx_ep.re_inline_recv, PAGE_SIZE);
	req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
	if (!req)
		return NULL;
+51 −57
Original line number Diff line number Diff line
@@ -74,7 +74,7 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr)

	if (mr->mr_dir != DMA_NONE) {
		trace_xprtrdma_mr_unmap(mr);
		ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
		ib_dma_unmap_sg(r_xprt->rx_ep.re_id->device,
				mr->mr_sg, mr->mr_nents, mr->mr_dir);
		mr->mr_dir = DMA_NONE;
	}
@@ -115,13 +115,13 @@ void frwr_reset(struct rpcrdma_req *req)
 */
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	unsigned int depth = ia->ri_max_frwr_depth;
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	unsigned int depth = ep->re_max_fr_depth;
	struct scatterlist *sg;
	struct ib_mr *frmr;
	int rc;

	frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
	frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth);
	if (IS_ERR(frmr))
		goto out_mr_err;

@@ -151,29 +151,24 @@ out_list_err:

/**
 * frwr_query_device - Prepare a transport for use with FRWR
 * @r_xprt: controlling transport instance
 * @ep: endpoint to fill in
 * @device: RDMA device to query
 *
 * On success, sets:
 *	ep->rep_attr
 *	ep->rep_max_requests
 *	ia->ri_max_rdma_segs
 *
 * And these FRWR-related fields:
 *	ia->ri_max_frwr_depth
 *	ia->ri_mrtype
 *	ep->re_attr
 *	ep->re_max_requests
 *	ep->re_max_rdma_segs
 *	ep->re_max_fr_depth
 *	ep->re_mrtype
 *
 * Return values:
 *   On success, returns zero.
 *   %-EINVAL - the device does not support FRWR memory registration
 *   %-ENOMEM - the device is not sufficiently capable for NFS/RDMA
 */
int frwr_query_device(struct rpcrdma_xprt *r_xprt,
		      const struct ib_device *device)
int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
{
	const struct ib_device_attr *attrs = &device->attrs;
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	int max_qp_wr, depth, delta;
	unsigned int max_sge;

@@ -190,23 +185,23 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt,
		pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge);
		return -ENOMEM;
	}
	ep->rep_attr.cap.max_send_sge = max_sge;
	ep->rep_attr.cap.max_recv_sge = 1;
	ep->re_attr.cap.max_send_sge = max_sge;
	ep->re_attr.cap.max_recv_sge = 1;

	ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
	ep->re_mrtype = IB_MR_TYPE_MEM_REG;
	if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
		ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
		ep->re_mrtype = IB_MR_TYPE_SG_GAPS;

	/* Quirk: Some devices advertise a large max_fast_reg_page_list_len
	 * capability, but perform optimally when the MRs are not larger
	 * than a page.
	 */
	if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS)
		ia->ri_max_frwr_depth = attrs->max_sge_rd;
		ep->re_max_fr_depth = attrs->max_sge_rd;
	else
		ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
	if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
		ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
		ep->re_max_fr_depth = attrs->max_fast_reg_page_list_len;
	if (ep->re_max_fr_depth > RPCRDMA_MAX_DATA_SEGS)
		ep->re_max_fr_depth = RPCRDMA_MAX_DATA_SEGS;

	/* Add room for frwr register and invalidate WRs.
	 * 1. FRWR reg WR for head
@@ -222,11 +217,11 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt,
	/* Calculate N if the device max FRWR depth is smaller than
	 * RPCRDMA_MAX_DATA_SEGS.
	 */
	if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
		delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
	if (ep->re_max_fr_depth < RPCRDMA_MAX_DATA_SEGS) {
		delta = RPCRDMA_MAX_DATA_SEGS - ep->re_max_fr_depth;
		do {
			depth += 2; /* FRWR reg + invalidate */
			delta -= ia->ri_max_frwr_depth;
			delta -= ep->re_max_fr_depth;
		} while (delta > 0);
	}

@@ -235,34 +230,34 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt,
	max_qp_wr -= 1;
	if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
		return -ENOMEM;
	if (ep->rep_max_requests > max_qp_wr)
		ep->rep_max_requests = max_qp_wr;
	ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
	if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
		ep->rep_max_requests = max_qp_wr / depth;
		if (!ep->rep_max_requests)
	if (ep->re_max_requests > max_qp_wr)
		ep->re_max_requests = max_qp_wr;
	ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;
	if (ep->re_attr.cap.max_send_wr > max_qp_wr) {
		ep->re_max_requests = max_qp_wr / depth;
		if (!ep->re_max_requests)
			return -ENOMEM;
		ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
		ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;
	}
	ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
	ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
	ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests;
	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
	ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */

	ia->ri_max_rdma_segs =
		DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth);
	ep->re_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
	ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
	ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
	ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
	ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */

	ep->re_max_rdma_segs =
		DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ep->re_max_fr_depth);
	/* Reply chunks require segments for head and tail buffers */
	ia->ri_max_rdma_segs += 2;
	if (ia->ri_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)
		ia->ri_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;
	ep->re_max_rdma_segs += 2;
	if (ep->re_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)
		ep->re_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;

	/* Ensure the underlying device is capable of conveying the
	 * largest r/wsize NFS will ask for. This guarantees that
	 * failing over from one RDMA device to another will not
	 * break NFS I/O.
	 */
	if ((ia->ri_max_rdma_segs * ia->ri_max_frwr_depth) < RPCRDMA_MAX_SEGS)
	if ((ep->re_max_rdma_segs * ep->re_max_fr_depth) < RPCRDMA_MAX_SEGS)
		return -ENOMEM;

	return 0;
@@ -288,14 +283,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
				int nsegs, bool writing, __be32 xid,
				struct rpcrdma_mr *mr)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	struct ib_reg_wr *reg_wr;
	int i, n, dma_nents;
	struct ib_mr *ibmr;
	u8 key;

	if (nsegs > ia->ri_max_frwr_depth)
		nsegs = ia->ri_max_frwr_depth;
	if (nsegs > ep->re_max_fr_depth)
		nsegs = ep->re_max_fr_depth;
	for (i = 0; i < nsegs;) {
		if (seg->mr_page)
			sg_set_page(&mr->mr_sg[i],
@@ -308,7 +303,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,

		++seg;
		++i;
		if (ia->ri_mrtype == IB_MR_TYPE_SG_GAPS)
		if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
			continue;
		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
@@ -317,7 +312,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
	mr->mr_dir = rpcrdma_data_dir(writing);
	mr->mr_nents = i;

	dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, mr->mr_nents,
	dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, mr->mr_nents,
				  mr->mr_dir);
	if (!dma_nents)
		goto out_dmamap_err;
@@ -391,7 +386,6 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
 */
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct ib_send_wr *post_wr;
	struct rpcrdma_mr *mr;

@@ -411,7 +405,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
		post_wr = &frwr->fr_regwr.wr;
	}

	return ib_post_send(ia->ri_id->qp, post_wr, NULL);
	return ib_post_send(r_xprt->rx_ep.re_id->qp, post_wr, NULL);
}

/**
@@ -538,10 +532,10 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)

	/* Transport disconnect drains the receive CQ before it
	 * replaces the QP. The RPC reply handler won't call us
	 * unless ri_id->qp is a valid pointer.
	 * unless re_id->qp is a valid pointer.
	 */
	bad_wr = NULL;
	rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
	rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr);

	/* The final LOCAL_INV WR in the chain is supposed to
	 * do the wake. If it was never posted, the wake will
@@ -643,10 +637,10 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)

	/* Transport disconnect drains the receive CQ before it
	 * replaces the QP. The RPC reply handler won't call us
	 * unless ri_id->qp is a valid pointer.
	 * unless re_id->qp is a valid pointer.
	 */
	bad_wr = NULL;
	rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
	rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr);
	if (!rc)
		return;

+15 −16
Original line number Diff line number Diff line
@@ -103,21 +103,20 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)

/**
 * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
 * @r_xprt: transport instance to initialize
 * @ep: endpoint to initialize
 *
 * The max_inline fields contain the maximum size of an RPC message
 * so the marshaling code doesn't have to repeat this calculation
 * for every RPC.
 */
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep)
{
	unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs;
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	unsigned int maxsegs = ep->re_max_rdma_segs;

	ep->rep_max_inline_send =
		ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
	ep->rep_max_inline_recv =
		ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
	ep->re_max_inline_send =
		ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs);
	ep->re_max_inline_recv =
		ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
}

/* The client can send a request inline as long as the RPCRDMA header
@@ -134,7 +133,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
	struct xdr_buf *xdr = &rqst->rq_snd_buf;
	unsigned int count, remaining, offset;

	if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
	if (xdr->len > r_xprt->rx_ep.re_max_inline_send)
		return false;

	if (xdr->page_len) {
@@ -145,7 +144,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
			remaining -= min_t(unsigned int,
					   PAGE_SIZE - offset, remaining);
			offset = 0;
			if (++count > r_xprt->rx_ep.rep_attr.cap.max_send_sge)
			if (++count > r_xprt->rx_ep.re_attr.cap.max_send_sge)
				return false;
		}
	}
@@ -162,7 +161,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
				   struct rpc_rqst *rqst)
{
	return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
	return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.re_max_inline_recv;
}

/* The client is required to provide a Reply chunk if the maximum
@@ -176,7 +175,7 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
	const struct xdr_buf *buf = &rqst->rq_rcv_buf;

	return (buf->head[0].iov_len + buf->tail[0].iov_len) <
		r_xprt->rx_ep.rep_max_inline_recv;
		r_xprt->rx_ep.re_max_inline_recv;
}

/* Split @vec on page boundaries into SGEs. FMR registers pages, not
@@ -255,7 +254,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
	/* When encoding a Read chunk, the tail iovec contains an
	 * XDR pad and may be omitted.
	 */
	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
	if (type == rpcrdma_readch && r_xprt->rx_ep.re_implicit_roundup)
		goto out;

	/* When encoding a Write chunk, some servers need to see an
@@ -263,7 +262,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
	 * layer provides space in the tail iovec that may be used
	 * for this purpose.
	 */
	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
	if (type == rpcrdma_writech && r_xprt->rx_ep.re_implicit_roundup)
		goto out;

	if (xdrbuf->tail[0].iov_len)
@@ -1476,8 +1475,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)

	if (credits == 0)
		credits = 1;	/* don't deadlock */
	else if (credits > r_xprt->rx_ep.rep_max_requests)
		credits = r_xprt->rx_ep.rep_max_requests;
	else if (credits > r_xprt->rx_ep.re_max_requests)
		credits = r_xprt->rx_ep.re_max_requests;
	if (buf->rb_credits != credits)
		rpcrdma_update_cwnd(r_xprt, credits);
	rpcrdma_post_recvs(r_xprt, false);
+6 −3
Original line number Diff line number Diff line
@@ -238,11 +238,12 @@ xprt_rdma_connect_worker(struct work_struct *work)
	struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
						   rx_connect_worker.work);
	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	int rc;

	rc = rpcrdma_xprt_connect(r_xprt);
	xprt_clear_connecting(xprt);
	if (r_xprt->rx_ep.rep_connected > 0) {
	if (ep->re_connect_status > 0) {
		xprt->stat.connect_count++;
		xprt->stat.connect_time += (long)jiffies -
					   xprt->stat.connect_start;
@@ -265,7 +266,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);

	trace_xprtrdma_op_inject_dsc(r_xprt);
	rdma_disconnect(r_xprt->rx_ia.ri_id);
	rdma_disconnect(r_xprt->rx_ep.re_id);
}

/**
@@ -355,6 +356,7 @@ xprt_setup_rdma(struct xprt_create *args)

	INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
			  xprt_rdma_connect_worker);

	xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;

	dprintk("RPC:       %s: %s:%s\n", __func__,
@@ -489,10 +491,11 @@ static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	unsigned long delay;

	delay = 0;
	if (r_xprt->rx_ep.rep_connected != 0) {
	if (ep->re_connect_status != 0) {
		delay = xprt_reconnect_delay(xprt);
		xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO);
	}
Loading