Commit 0dabe948 authored by Chuck Lever's avatar Chuck Lever
Browse files

svcrdma: Avoid DMA mapping small RPC Replies



On some platforms, DMA mapping part of a page is more costly than
copying bytes. Indeed, not involving the I/O MMU can help the
RPC/RDMA transport scale better for tiny I/Os across more RDMA
devices. This is because interaction with the I/O MMU is eliminated
for each of these small I/Os. Without the explicit unmapping, the
NIC no longer needs to do a costly internal TLB shoot down for
buffers that are just a handful of bytes.

Since pull-up is now a more a frequent operation, I've introduced a
trace point in the pull-up path. It can be used for debugging or
user-space tools that count pull-up frequency.

Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
parent aee4b74a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@

/* Default and maximum inline threshold sizes */
enum {
	RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1,
	RPCRDMA_DEF_INLINE_THRESH = 4096,
	RPCRDMA_MAX_INLINE_THRESH = 65536
};
+18 −0
Original line number Diff line number Diff line
@@ -1639,6 +1639,24 @@ TRACE_EVENT(svcrdma_dma_map_rwctx,
	)
);

TRACE_EVENT(svcrdma_send_pullup,
	TP_PROTO(
		unsigned int len
	),

	TP_ARGS(len),

	TP_STRUCT__entry(
		__field(unsigned int, len)
	),

	TP_fast_assign(
		__entry->len = len;
	),

	TP_printk("len=%u", __entry->len)
);

TRACE_EVENT(svcrdma_send_failed,
	TP_PROTO(
		const struct svc_rqst *rqst,
+12 −1
Original line number Diff line number Diff line
@@ -541,6 +541,7 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
/**
 * svc_rdma_pull_up_needed - Determine whether to use pull-up
 * @rdma: controlling transport
 * @sctxt: send_ctxt for the Send WR
 * @rctxt: Write and Reply chunks provided by client
 * @xdr: xdr_buf containing RPC message to transmit
 *
@@ -549,11 +550,20 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
 *	%false otherwise
 */
static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
				    struct svc_rdma_send_ctxt *sctxt,
				    const struct svc_rdma_recv_ctxt *rctxt,
				    struct xdr_buf *xdr)
{
	int elements;

	/* For small messages, copying bytes is cheaper than DMA mapping.
	 */
	if (sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH)
		return true;

	/* Check whether the xdr_buf has more elements than can
	 * fit in a single RDMA Send.
	 */
	/* xdr->head */
	elements = 1;

@@ -636,6 +646,7 @@ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
		memcpy(dst, tailbase, taillen);

	sctxt->sc_sges[0].length += xdr->len;
	trace_svcrdma_send_pullup(sctxt->sc_sges[0].length);
	return 0;
}

@@ -675,7 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
	/* For pull-up, svc_rdma_send() will sync the transport header.
	 * No additional DMA mapping is necessary.
	 */
	if (svc_rdma_pull_up_needed(rdma, rctxt, xdr))
	if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr))
		return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr);

	++sctxt->sc_cur_sge_no;