Commit f764a1e1 authored by Trond Myklebust's avatar Trond Myklebust
Browse files

Merge tag 'nfs-rdma-for-5.7-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

NFSoRDMA Client Updates for Linux 5.7

New Features:
- Allow one active connection and several zombie connections to prevent
  blocking if the remote server is unresponsive.

Bugfixes and Cleanups:
- Enhance MR-related trace points
- Refactor connection set-up and disconnect functions
- Make Protection Domains per-connection instead of per-transport
- Merge struct rpcrdma_ia into rpcrdma_ep
parents 1de3af98 e28ce900
Loading
Loading
Loading
Loading
+63 −90
Original line number Diff line number Diff line
@@ -104,12 +104,12 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
	TP_fast_assign(
		__entry->r_xprt = r_xprt;
		__entry->rc = rc;
		__entry->connect_status = r_xprt->rx_ep.rep_connected;
		__entry->connect_status = r_xprt->rx_ep->re_connect_status;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),

	TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d",
	TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d",
		__get_str(addr), __get_str(port), __entry->r_xprt,
		__entry->rc, __entry->connect_status
	)
@@ -228,20 +228,20 @@ DECLARE_EVENT_CLASS(xprtrdma_frwr_done,
	TP_ARGS(wc, frwr),

	TP_STRUCT__entry(
		__field(const void *, mr)
		__field(u32, mr_id)
		__field(unsigned int, status)
		__field(unsigned int, vendor_err)
	),

	TP_fast_assign(
		__entry->mr = container_of(frwr, struct rpcrdma_mr, frwr);
		__entry->mr_id = frwr->fr_mr->res.id;
		__entry->status = wc->status;
		__entry->vendor_err = __entry->status ? wc->vendor_err : 0;
	),

	TP_printk(
		"mr=%p: %s (%u/0x%x)",
		__entry->mr, rdma_show_wc_status(__entry->status),
		"mr.id=%u: %s (%u/0x%x)",
		__entry->mr_id, rdma_show_wc_status(__entry->status),
		__entry->status, __entry->vendor_err
	)
);
@@ -274,7 +274,8 @@ DECLARE_EVENT_CLASS(xprtrdma_mr,
	TP_ARGS(mr),

	TP_STRUCT__entry(
		__field(const void *, mr)
		__field(u32, mr_id)
		__field(int, nents)
		__field(u32, handle)
		__field(u32, length)
		__field(u64, offset)
@@ -282,15 +283,16 @@ DECLARE_EVENT_CLASS(xprtrdma_mr,
	),

	TP_fast_assign(
		__entry->mr = mr;
		__entry->mr_id  = mr->frwr.fr_mr->res.id;
		__entry->nents  = mr->mr_nents;
		__entry->handle = mr->mr_handle;
		__entry->length = mr->mr_length;
		__entry->offset = mr->mr_offset;
		__entry->dir    = mr->mr_dir;
	),

	TP_printk("mr=%p %u@0x%016llx:0x%08x (%s)",
		__entry->mr, __entry->length,
	TP_printk("mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s)",
		__entry->mr_id, __entry->nents, __entry->length,
		(unsigned long long)__entry->offset, __entry->handle,
		xprtrdma_show_direction(__entry->dir)
	)
@@ -340,68 +342,37 @@ DECLARE_EVENT_CLASS(xprtrdma_cb_event,
 ** Connection events
 **/

TRACE_EVENT(xprtrdma_cm_event,
	TP_PROTO(
		const struct rpcrdma_xprt *r_xprt,
		struct rdma_cm_event *event
	),

	TP_ARGS(r_xprt, event),

	TP_STRUCT__entry(
		__field(const void *, r_xprt)
		__field(unsigned int, event)
		__field(int, status)
		__string(addr, rpcrdma_addrstr(r_xprt))
		__string(port, rpcrdma_portstr(r_xprt))
	),

	TP_fast_assign(
		__entry->r_xprt = r_xprt;
		__entry->event = event->event;
		__entry->status = event->status;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),

	TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)",
		__get_str(addr), __get_str(port),
		__entry->r_xprt, rdma_show_cm_event(__entry->event),
		__entry->event, __entry->status
	)
);

TRACE_EVENT(xprtrdma_inline_thresh,
	TP_PROTO(
		const struct rpcrdma_xprt *r_xprt
		const struct rpcrdma_ep *ep
	),

	TP_ARGS(r_xprt),
	TP_ARGS(ep),

	TP_STRUCT__entry(
		__field(const void *, r_xprt)
		__field(unsigned int, inline_send)
		__field(unsigned int, inline_recv)
		__field(unsigned int, max_send)
		__field(unsigned int, max_recv)
		__string(addr, rpcrdma_addrstr(r_xprt))
		__string(port, rpcrdma_portstr(r_xprt))
		__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
		__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
	),

	TP_fast_assign(
		const struct rpcrdma_ep *ep = &r_xprt->rx_ep;
		const struct rdma_cm_id *id = ep->re_id;

		__entry->r_xprt = r_xprt;
		__entry->inline_send = ep->rep_inline_send;
		__entry->inline_recv = ep->rep_inline_recv;
		__entry->max_send = ep->rep_max_inline_send;
		__entry->max_recv = ep->rep_max_inline_recv;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
		__entry->inline_send = ep->re_inline_send;
		__entry->inline_recv = ep->re_inline_recv;
		__entry->max_send = ep->re_max_inline_send;
		__entry->max_recv = ep->re_max_inline_recv;
		memcpy(__entry->srcaddr, &id->route.addr.src_addr,
		       sizeof(struct sockaddr_in6));
		memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
		       sizeof(struct sockaddr_in6));
	),

	TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u",
		__get_str(addr), __get_str(port), __entry->r_xprt,
	TP_printk("%pISpc -> %pISpc neg send/recv=%u/%u, calc send/recv=%u/%u",
		__entry->srcaddr, __entry->dstaddr,
		__entry->inline_send, __entry->inline_recv,
		__entry->max_send, __entry->max_recv
	)
@@ -409,11 +380,10 @@ TRACE_EVENT(xprtrdma_inline_thresh,

DEFINE_CONN_EVENT(connect);
DEFINE_CONN_EVENT(disconnect);
DEFINE_CONN_EVENT(flush_dct);

DEFINE_RXPRT_EVENT(xprtrdma_create);
DEFINE_RXPRT_EVENT(xprtrdma_op_destroy);
DEFINE_RXPRT_EVENT(xprtrdma_remove);
DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
DEFINE_RXPRT_EVENT(xprtrdma_op_close);
DEFINE_RXPRT_EVENT(xprtrdma_op_setport);
@@ -480,32 +450,33 @@ TRACE_EVENT(xprtrdma_op_set_cto,

TRACE_EVENT(xprtrdma_qp_event,
	TP_PROTO(
		const struct rpcrdma_xprt *r_xprt,
		const struct rpcrdma_ep *ep,
		const struct ib_event *event
	),

	TP_ARGS(r_xprt, event),
	TP_ARGS(ep, event),

	TP_STRUCT__entry(
		__field(const void *, r_xprt)
		__field(unsigned int, event)
		__field(unsigned long, event)
		__string(name, event->device->name)
		__string(addr, rpcrdma_addrstr(r_xprt))
		__string(port, rpcrdma_portstr(r_xprt))
		__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
		__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
	),

	TP_fast_assign(
		__entry->r_xprt = r_xprt;
		const struct rdma_cm_id *id = ep->re_id;

		__entry->event = event->event;
		__assign_str(name, event->device->name);
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
		memcpy(__entry->srcaddr, &id->route.addr.src_addr,
		       sizeof(struct sockaddr_in6));
		memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
		       sizeof(struct sockaddr_in6));
	),

	TP_printk("peer=[%s]:%s r_xprt=%p: dev %s: %s (%u)",
		__get_str(addr), __get_str(port), __entry->r_xprt,
		__get_str(name), rdma_show_ib_event(__entry->event),
		__entry->event
	TP_printk("%pISpc -> %pISpc device=%s %s (%lu)",
		__entry->srcaddr, __entry->dstaddr, __get_str(name),
		rdma_show_ib_event(__entry->event), __entry->event
	)
);

@@ -801,7 +772,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
		__entry->r_xprt = r_xprt;
		__entry->count = count;
		__entry->status = status;
		__entry->posted = r_xprt->rx_ep.rep_receive_count;
		__entry->posted = r_xprt->rx_ep->re_receive_count;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),
@@ -920,17 +891,17 @@ TRACE_EVENT(xprtrdma_frwr_alloc,
	TP_ARGS(mr, rc),

	TP_STRUCT__entry(
		__field(const void *, mr)
		__field(u32, mr_id)
		__field(int, rc)
	),

	TP_fast_assign(
		__entry->mr = mr;
		__entry->mr_id = mr->frwr.fr_mr->res.id;
		__entry->rc = rc;
	),

	TP_printk("mr=%p: rc=%d",
		__entry->mr, __entry->rc
	TP_printk("mr.id=%u: rc=%d",
		__entry->mr_id, __entry->rc
	)
);

@@ -943,7 +914,8 @@ TRACE_EVENT(xprtrdma_frwr_dereg,
	TP_ARGS(mr, rc),

	TP_STRUCT__entry(
		__field(const void *, mr)
		__field(u32, mr_id)
		__field(int, nents)
		__field(u32, handle)
		__field(u32, length)
		__field(u64, offset)
@@ -952,7 +924,8 @@ TRACE_EVENT(xprtrdma_frwr_dereg,
	),

	TP_fast_assign(
		__entry->mr = mr;
		__entry->mr_id  = mr->frwr.fr_mr->res.id;
		__entry->nents  = mr->mr_nents;
		__entry->handle = mr->mr_handle;
		__entry->length = mr->mr_length;
		__entry->offset = mr->mr_offset;
@@ -960,8 +933,8 @@ TRACE_EVENT(xprtrdma_frwr_dereg,
		__entry->rc	= rc;
	),

	TP_printk("mr=%p %u@0x%016llx:0x%08x (%s): rc=%d",
		__entry->mr, __entry->length,
	TP_printk("mr.id=%u nents=%d %u@0x%016llx:0x%08x (%s): rc=%d",
		__entry->mr_id, __entry->nents, __entry->length,
		(unsigned long long)__entry->offset, __entry->handle,
		xprtrdma_show_direction(__entry->dir),
		__entry->rc
@@ -977,21 +950,21 @@ TRACE_EVENT(xprtrdma_frwr_sgerr,
	TP_ARGS(mr, sg_nents),

	TP_STRUCT__entry(
		__field(const void *, mr)
		__field(u32, mr_id)
		__field(u64, addr)
		__field(u32, dir)
		__field(int, nents)
	),

	TP_fast_assign(
		__entry->mr = mr;
		__entry->mr_id = mr->frwr.fr_mr->res.id;
		__entry->addr = mr->mr_sg->dma_address;
		__entry->dir = mr->mr_dir;
		__entry->nents = sg_nents;
	),

	TP_printk("mr=%p dma addr=0x%llx (%s) sg_nents=%d",
		__entry->mr, __entry->addr,
	TP_printk("mr.id=%u DMA addr=0x%llx (%s) sg_nents=%d",
		__entry->mr_id, __entry->addr,
		xprtrdma_show_direction(__entry->dir),
		__entry->nents
	)
@@ -1006,7 +979,7 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
	TP_ARGS(mr, num_mapped),

	TP_STRUCT__entry(
		__field(const void *, mr)
		__field(u32, mr_id)
		__field(u64, addr)
		__field(u32, dir)
		__field(int, num_mapped)
@@ -1014,15 +987,15 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
	),

	TP_fast_assign(
		__entry->mr = mr;
		__entry->mr_id = mr->frwr.fr_mr->res.id;
		__entry->addr = mr->mr_sg->dma_address;
		__entry->dir = mr->mr_dir;
		__entry->num_mapped = num_mapped;
		__entry->nents = mr->mr_nents;
	),

	TP_printk("mr=%p dma addr=0x%llx (%s) nents=%d of %d",
		__entry->mr, __entry->addr,
	TP_printk("mr.id=%u DMA addr=0x%llx (%s) nents=%d of %d",
		__entry->mr_id, __entry->addr,
		xprtrdma_show_direction(__entry->dir),
		__entry->num_mapped, __entry->nents
	)
@@ -1031,7 +1004,7 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
DEFINE_MR_EVENT(localinv);
DEFINE_MR_EVENT(map);
DEFINE_MR_EVENT(unmap);
DEFINE_MR_EVENT(remoteinv);
DEFINE_MR_EVENT(reminv);
DEFINE_MR_EVENT(recycle);

TRACE_EVENT(xprtrdma_dma_maperr,
+4 −4
Original line number Diff line number Diff line
@@ -44,10 +44,10 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
{
	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	struct rpcrdma_ep *ep = r_xprt->rx_ep;
	size_t maxmsg;

	maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv);
	maxmsg = min_t(unsigned int, ep->re_inline_send, ep->re_inline_recv);
	maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE);
	return maxmsg - RPCRDMA_HDRLEN_MIN;
}
@@ -115,7 +115,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
	if (rc < 0)
		goto failed_marshal;

	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
	if (rpcrdma_post_sends(r_xprt, req))
		goto drop_connection;
	return 0;

@@ -190,7 +190,7 @@ create_req:
	if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS)
		return NULL;

	size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE);
	size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE);
	req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL);
	if (!req)
		return NULL;
+81 −73
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@

/**
 * frwr_release_mr - Destroy one MR
 * @mr: MR allocated by frwr_init_mr
 * @mr: MR allocated by frwr_mr_init
 *
 */
void frwr_release_mr(struct rpcrdma_mr *mr)
@@ -74,7 +74,7 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr)

	if (mr->mr_dir != DMA_NONE) {
		trace_xprtrdma_mr_unmap(mr);
		ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device,
		ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device,
				mr->mr_sg, mr->mr_nents, mr->mr_dir);
		mr->mr_dir = DMA_NONE;
	}
@@ -106,21 +106,22 @@ void frwr_reset(struct rpcrdma_req *req)
}

/**
 * frwr_init_mr - Initialize one MR
 * @ia: interface adapter
 * frwr_mr_init - Initialize one MR
 * @r_xprt: controlling transport instance
 * @mr: generic MR to prepare for FRWR
 *
 * Returns zero if successful. Otherwise a negative errno
 * is returned.
 */
int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
{
	unsigned int depth = ia->ri_max_frwr_depth;
	struct rpcrdma_ep *ep = r_xprt->rx_ep;
	unsigned int depth = ep->re_max_fr_depth;
	struct scatterlist *sg;
	struct ib_mr *frmr;
	int rc;

	frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
	frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth);
	if (IS_ERR(frmr))
		goto out_mr_err;

@@ -128,6 +129,7 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
	if (!sg)
		goto out_list_err;

	mr->mr_xprt = r_xprt;
	mr->frwr.fr_mr = frmr;
	mr->mr_dir = DMA_NONE;
	INIT_LIST_HEAD(&mr->mr_list);
@@ -149,29 +151,24 @@ out_list_err:

/**
 * frwr_query_device - Prepare a transport for use with FRWR
 * @r_xprt: controlling transport instance
 * @ep: endpoint to fill in
 * @device: RDMA device to query
 *
 * On success, sets:
 *	ep->rep_attr
 *	ep->rep_max_requests
 *	ia->ri_max_rdma_segs
 *
 * And these FRWR-related fields:
 *	ia->ri_max_frwr_depth
 *	ia->ri_mrtype
 *	ep->re_attr
 *	ep->re_max_requests
 *	ep->re_max_rdma_segs
 *	ep->re_max_fr_depth
 *	ep->re_mrtype
 *
 * Return values:
 *   On success, returns zero.
 *   %-EINVAL - the device does not support FRWR memory registration
 *   %-ENOMEM - the device is not sufficiently capable for NFS/RDMA
 */
int frwr_query_device(struct rpcrdma_xprt *r_xprt,
		      const struct ib_device *device)
int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
{
	const struct ib_device_attr *attrs = &device->attrs;
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	int max_qp_wr, depth, delta;
	unsigned int max_sge;

@@ -188,23 +185,23 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt,
		pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge);
		return -ENOMEM;
	}
	ep->rep_attr.cap.max_send_sge = max_sge;
	ep->rep_attr.cap.max_recv_sge = 1;
	ep->re_attr.cap.max_send_sge = max_sge;
	ep->re_attr.cap.max_recv_sge = 1;

	ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
	ep->re_mrtype = IB_MR_TYPE_MEM_REG;
	if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
		ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
		ep->re_mrtype = IB_MR_TYPE_SG_GAPS;

	/* Quirk: Some devices advertise a large max_fast_reg_page_list_len
	 * capability, but perform optimally when the MRs are not larger
	 * than a page.
	 */
	if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS)
		ia->ri_max_frwr_depth = attrs->max_sge_rd;
		ep->re_max_fr_depth = attrs->max_sge_rd;
	else
		ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len;
	if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS)
		ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS;
		ep->re_max_fr_depth = attrs->max_fast_reg_page_list_len;
	if (ep->re_max_fr_depth > RPCRDMA_MAX_DATA_SEGS)
		ep->re_max_fr_depth = RPCRDMA_MAX_DATA_SEGS;

	/* Add room for frwr register and invalidate WRs.
	 * 1. FRWR reg WR for head
@@ -220,11 +217,11 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt,
	/* Calculate N if the device max FRWR depth is smaller than
	 * RPCRDMA_MAX_DATA_SEGS.
	 */
	if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
		delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
	if (ep->re_max_fr_depth < RPCRDMA_MAX_DATA_SEGS) {
		delta = RPCRDMA_MAX_DATA_SEGS - ep->re_max_fr_depth;
		do {
			depth += 2; /* FRWR reg + invalidate */
			delta -= ia->ri_max_frwr_depth;
			delta -= ep->re_max_fr_depth;
		} while (delta > 0);
	}

@@ -233,34 +230,34 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt,
	max_qp_wr -= 1;
	if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
		return -ENOMEM;
	if (ep->rep_max_requests > max_qp_wr)
		ep->rep_max_requests = max_qp_wr;
	ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
	if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
		ep->rep_max_requests = max_qp_wr / depth;
		if (!ep->rep_max_requests)
	if (ep->re_max_requests > max_qp_wr)
		ep->re_max_requests = max_qp_wr;
	ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;
	if (ep->re_attr.cap.max_send_wr > max_qp_wr) {
		ep->re_max_requests = max_qp_wr / depth;
		if (!ep->re_max_requests)
			return -ENOMEM;
		ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth;
		ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth;
	}
	ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
	ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
	ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests;
	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
	ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */

	ia->ri_max_rdma_segs =
		DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth);
	ep->re_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
	ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
	ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
	ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
	ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */

	ep->re_max_rdma_segs =
		DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ep->re_max_fr_depth);
	/* Reply chunks require segments for head and tail buffers */
	ia->ri_max_rdma_segs += 2;
	if (ia->ri_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)
		ia->ri_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;
	ep->re_max_rdma_segs += 2;
	if (ep->re_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS)
		ep->re_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS;

	/* Ensure the underlying device is capable of conveying the
	 * largest r/wsize NFS will ask for. This guarantees that
	 * failing over from one RDMA device to another will not
	 * break NFS I/O.
	 */
	if ((ia->ri_max_rdma_segs * ia->ri_max_frwr_depth) < RPCRDMA_MAX_SEGS)
	if ((ep->re_max_rdma_segs * ep->re_max_fr_depth) < RPCRDMA_MAX_SEGS)
		return -ENOMEM;

	return 0;
@@ -286,14 +283,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
				int nsegs, bool writing, __be32 xid,
				struct rpcrdma_mr *mr)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct rpcrdma_ep *ep = r_xprt->rx_ep;
	struct ib_reg_wr *reg_wr;
	int i, n, dma_nents;
	struct ib_mr *ibmr;
	u8 key;

	if (nsegs > ia->ri_max_frwr_depth)
		nsegs = ia->ri_max_frwr_depth;
	if (nsegs > ep->re_max_fr_depth)
		nsegs = ep->re_max_fr_depth;
	for (i = 0; i < nsegs;) {
		if (seg->mr_page)
			sg_set_page(&mr->mr_sg[i],
@@ -306,7 +303,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,

		++seg;
		++i;
		if (ia->ri_mrtype == IB_MR_TYPE_SG_GAPS)
		if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
			continue;
		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
@@ -315,7 +312,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
	mr->mr_dir = rpcrdma_data_dir(writing);
	mr->mr_nents = i;

	dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, mr->mr_nents,
	dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, mr->mr_nents,
				  mr->mr_dir);
	if (!dma_nents)
		goto out_dmamap_err;
@@ -356,8 +353,8 @@ out_mapmr_err:

/**
 * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
 * @cq:	completion queue (ignored)
 * @wc:	completed WR
 * @cq: completion queue
 * @wc: WCE for a completed FastReg WR
 *
 */
static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
@@ -369,20 +366,25 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
	/* WARNING: Only wr_cqe and status are reliable at this point */
	trace_xprtrdma_wc_fastreg(wc, frwr);
	/* The MR will get recycled when the associated req is retransmitted */

	rpcrdma_flush_disconnect(cq, wc);
}

/**
 * frwr_send - post Send WR containing the RPC Call message
 * @ia: interface adapter
 * @req: Prepared RPC Call
 * frwr_send - post Send WRs containing the RPC Call message
 * @r_xprt: controlling transport instance
 * @req: prepared RPC Call
 *
 * For FRWR, chain any FastReg WRs to the Send WR. Only a
 * single ib_post_send call is needed to register memory
 * and then post the Send WR.
 *
 * Returns the result of ib_post_send.
 * Returns the return code from ib_post_send.
 *
 * Caller must hold the transport send lock to ensure that the
 * pointers to the transport's rdma_cm_id and QP are stable.
 */
int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
	struct ib_send_wr *post_wr;
	struct rpcrdma_mr *mr;
@@ -403,7 +405,7 @@ int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
		post_wr = &frwr->fr_regwr.wr;
	}

	return ib_post_send(ia->ri_id->qp, post_wr, NULL);
	return ib_post_send(r_xprt->rx_ep->re_id->qp, post_wr, NULL);
}

/**
@@ -419,7 +421,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
	list_for_each_entry(mr, mrs, mr_list)
		if (mr->mr_handle == rep->rr_inv_rkey) {
			list_del_init(&mr->mr_list);
			trace_xprtrdma_mr_remoteinv(mr);
			trace_xprtrdma_mr_reminv(mr);
			rpcrdma_mr_put(mr);
			break;	/* only one invalidated MR per RPC */
		}
@@ -435,8 +437,8 @@ static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)

/**
 * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC
 * @cq:	completion queue (ignored)
 * @wc:	completed WR
 * @cq: completion queue
 * @wc: WCE for a completed LocalInv WR
 *
 */
static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
@@ -449,12 +451,14 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
	/* WARNING: Only wr_cqe and status are reliable at this point */
	trace_xprtrdma_wc_li(wc, frwr);
	__frwr_release_mr(wc, mr);

	rpcrdma_flush_disconnect(cq, wc);
}

/**
 * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC
 * @cq:	completion queue (ignored)
 * @wc:	completed WR
 * @cq: completion queue
 * @wc: WCE for a completed LocalInv WR
 *
 * Awaken anyone waiting for an MR to finish being fenced.
 */
@@ -469,6 +473,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
	trace_xprtrdma_wc_li_wake(wc, frwr);
	__frwr_release_mr(wc, mr);
	complete(&frwr->fr_linv_done);

	rpcrdma_flush_disconnect(cq, wc);
}

/**
@@ -526,10 +532,10 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)

	/* Transport disconnect drains the receive CQ before it
	 * replaces the QP. The RPC reply handler won't call us
	 * unless ri_id->qp is a valid pointer.
	 * unless re_id->qp is a valid pointer.
	 */
	bad_wr = NULL;
	rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
	rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);

	/* The final LOCAL_INV WR in the chain is supposed to
	 * do the wake. If it was never posted, the wake will
@@ -556,8 +562,8 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)

/**
 * frwr_wc_localinv_done - Invoked by RDMA provider for a signaled LOCAL_INV WC
 * @cq:	completion queue (ignored)
 * @wc:	completed WR
 * @cq:	completion queue
 * @wc:	WCE for a completed LocalInv WR
 *
 */
static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -575,6 +581,8 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
	/* Ensure @rep is generated before __frwr_release_mr */
	smp_rmb();
	rpcrdma_complete_rqst(rep);

	rpcrdma_flush_disconnect(cq, wc);
}

/**
@@ -629,10 +637,10 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)

	/* Transport disconnect drains the receive CQ before it
	 * replaces the QP. The RPC reply handler won't call us
	 * unless ri_id->qp is a valid pointer.
	 * unless re_id->qp is a valid pointer.
	 */
	bad_wr = NULL;
	rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
	rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr);
	if (!rc)
		return;

+16 −16
Original line number Diff line number Diff line
@@ -103,21 +103,20 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)

/**
 * rpcrdma_set_max_header_sizes - Initialize inline payload sizes
 * @r_xprt: transport instance to initialize
 * @ep: endpoint to initialize
 *
 * The max_inline fields contain the maximum size of an RPC message
 * so the marshaling code doesn't have to repeat this calculation
 * for every RPC.
 */
void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep)
{
	unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs;
	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
	unsigned int maxsegs = ep->re_max_rdma_segs;

	ep->rep_max_inline_send =
		ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs);
	ep->rep_max_inline_recv =
		ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
	ep->re_max_inline_send =
		ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs);
	ep->re_max_inline_recv =
		ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs);
}

/* The client can send a request inline as long as the RPCRDMA header
@@ -132,9 +131,10 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
				struct rpc_rqst *rqst)
{
	struct xdr_buf *xdr = &rqst->rq_snd_buf;
	struct rpcrdma_ep *ep = r_xprt->rx_ep;
	unsigned int count, remaining, offset;

	if (xdr->len > r_xprt->rx_ep.rep_max_inline_send)
	if (xdr->len > ep->re_max_inline_send)
		return false;

	if (xdr->page_len) {
@@ -145,7 +145,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
			remaining -= min_t(unsigned int,
					   PAGE_SIZE - offset, remaining);
			offset = 0;
			if (++count > r_xprt->rx_ep.rep_attr.cap.max_send_sge)
			if (++count > ep->re_attr.cap.max_send_sge)
				return false;
		}
	}
@@ -162,7 +162,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
				   struct rpc_rqst *rqst)
{
	return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv;
	return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv;
}

/* The client is required to provide a Reply chunk if the maximum
@@ -176,7 +176,7 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
	const struct xdr_buf *buf = &rqst->rq_rcv_buf;

	return (buf->head[0].iov_len + buf->tail[0].iov_len) <
		r_xprt->rx_ep.rep_max_inline_recv;
		r_xprt->rx_ep->re_max_inline_recv;
}

/* Split @vec on page boundaries into SGEs. FMR registers pages, not
@@ -255,7 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
	/* When encoding a Read chunk, the tail iovec contains an
	 * XDR pad and may be omitted.
	 */
	if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
	if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
		goto out;

	/* When encoding a Write chunk, some servers need to see an
@@ -263,7 +263,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
	 * layer provides space in the tail iovec that may be used
	 * for this purpose.
	 */
	if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
	if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup)
		goto out;

	if (xdrbuf->tail[0].iov_len)
@@ -1476,8 +1476,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)

	if (credits == 0)
		credits = 1;	/* don't deadlock */
	else if (credits > r_xprt->rx_ep.rep_max_requests)
		credits = r_xprt->rx_ep.rep_max_requests;
	else if (credits > r_xprt->rx_ep->re_max_requests)
		credits = r_xprt->rx_ep->re_max_requests;
	if (buf->rb_credits != credits)
		rpcrdma_update_cwnd(r_xprt, credits);
	rpcrdma_post_recvs(r_xprt, false);
+22 −50

File changed.

Preview size limit exceeded, changes collapsed.

Loading