Commit 4e121fca authored by Trond Myklebust's avatar Trond Myklebust
Browse files

Merge tag 'nfs-rdma-for-5.5-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

NFSoRDMA Client Updates for Linux 5.5

New Features:
- New tracepoints for congestion control and Local Invalidate WRs

Bugfixes and Cleanups:
- Eliminate log noise in call_reserveresult
- Fix unstable connections after a reconnect
- Clean up some code duplication
- Close race between waking a sender and posting a receive
- Fix MR list corruption, and clean up MR usage
- Remove unused rpcrdma_sendctx fields
- Try to avoid DMA mapping pages if it is too costly
- Wake pending tasks if connection fails
- Replace some dprintk()s with tracepoints
parents f751c545 a52c23b8
Loading
Loading
Loading
Loading
+132 −66
Original line number Diff line number Diff line
@@ -85,6 +85,44 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt,
				),					\
				TP_ARGS(r_xprt))

DECLARE_EVENT_CLASS(xprtrdma_connect_class,
	TP_PROTO(
		const struct rpcrdma_xprt *r_xprt,
		int rc
	),

	TP_ARGS(r_xprt, rc),

	TP_STRUCT__entry(
		__field(const void *, r_xprt)
		__field(int, rc)
		__field(int, connect_status)
		__string(addr, rpcrdma_addrstr(r_xprt))
		__string(port, rpcrdma_portstr(r_xprt))
	),

	TP_fast_assign(
		__entry->r_xprt = r_xprt;
		__entry->rc = rc;
		__entry->connect_status = r_xprt->rx_ep.rep_connected;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),

	TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d",
		__get_str(addr), __get_str(port), __entry->r_xprt,
		__entry->rc, __entry->connect_status
	)
);

#define DEFINE_CONN_EVENT(name)						\
		DEFINE_EVENT(xprtrdma_connect_class, xprtrdma_##name,	\
				TP_PROTO(				\
					const struct rpcrdma_xprt *r_xprt, \
					int rc				\
				),					\
				TP_ARGS(r_xprt, rc))

DECLARE_EVENT_CLASS(xprtrdma_rdch_event,
	TP_PROTO(
		const struct rpc_task *task,
@@ -333,47 +371,81 @@ TRACE_EVENT(xprtrdma_cm_event,
	)
);

TRACE_EVENT(xprtrdma_disconnect,
TRACE_EVENT(xprtrdma_inline_thresh,
	TP_PROTO(
		const struct rpcrdma_xprt *r_xprt,
		int status
		const struct rpcrdma_xprt *r_xprt
	),

	TP_ARGS(r_xprt, status),
	TP_ARGS(r_xprt),

	TP_STRUCT__entry(
		__field(const void *, r_xprt)
		__field(int, status)
		__field(int, connected)
		__field(unsigned int, inline_send)
		__field(unsigned int, inline_recv)
		__field(unsigned int, max_send)
		__field(unsigned int, max_recv)
		__string(addr, rpcrdma_addrstr(r_xprt))
		__string(port, rpcrdma_portstr(r_xprt))
	),

	TP_fast_assign(
		const struct rpcrdma_ep *ep = &r_xprt->rx_ep;

		__entry->r_xprt = r_xprt;
		__entry->status = status;
		__entry->connected = r_xprt->rx_ep.rep_connected;
		__entry->inline_send = ep->rep_inline_send;
		__entry->inline_recv = ep->rep_inline_recv;
		__entry->max_send = ep->rep_max_inline_send;
		__entry->max_recv = ep->rep_max_inline_recv;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),

	TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected",
		__get_str(addr), __get_str(port),
		__entry->r_xprt, __entry->status,
		__entry->connected == 1 ? "still " : "dis"
	TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u",
		__get_str(addr), __get_str(port), __entry->r_xprt,
		__entry->inline_send, __entry->inline_recv,
		__entry->max_send, __entry->max_recv
	)
);

DEFINE_RXPRT_EVENT(xprtrdma_conn_start);
DEFINE_RXPRT_EVENT(xprtrdma_conn_tout);
DEFINE_CONN_EVENT(connect);
DEFINE_CONN_EVENT(disconnect);

DEFINE_RXPRT_EVENT(xprtrdma_create);
DEFINE_RXPRT_EVENT(xprtrdma_op_destroy);
DEFINE_RXPRT_EVENT(xprtrdma_remove);
DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
DEFINE_RXPRT_EVENT(xprtrdma_reconnect);
DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc);
DEFINE_RXPRT_EVENT(xprtrdma_op_close);
DEFINE_RXPRT_EVENT(xprtrdma_op_connect);
DEFINE_RXPRT_EVENT(xprtrdma_op_setport);

TRACE_EVENT(xprtrdma_op_connect,
	TP_PROTO(
		const struct rpcrdma_xprt *r_xprt,
		unsigned long delay
	),

	TP_ARGS(r_xprt, delay),

	TP_STRUCT__entry(
		__field(const void *, r_xprt)
		__field(unsigned long, delay)
		__string(addr, rpcrdma_addrstr(r_xprt))
		__string(port, rpcrdma_portstr(r_xprt))
	),

	TP_fast_assign(
		__entry->r_xprt = r_xprt;
		__entry->delay = delay;
		__assign_str(addr, rpcrdma_addrstr(r_xprt));
		__assign_str(port, rpcrdma_portstr(r_xprt));
	),

	TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu",
		__get_str(addr), __get_str(port), __entry->r_xprt,
		__entry->delay
	)
);


TRACE_EVENT(xprtrdma_op_set_cto,
	TP_PROTO(
@@ -532,6 +604,8 @@ DEFINE_WRCH_EVENT(write);
DEFINE_WRCH_EVENT(reply);

TRACE_DEFINE_ENUM(rpcrdma_noch);
TRACE_DEFINE_ENUM(rpcrdma_noch_pullup);
TRACE_DEFINE_ENUM(rpcrdma_noch_mapped);
TRACE_DEFINE_ENUM(rpcrdma_readch);
TRACE_DEFINE_ENUM(rpcrdma_areadch);
TRACE_DEFINE_ENUM(rpcrdma_writech);
@@ -540,6 +614,8 @@ TRACE_DEFINE_ENUM(rpcrdma_replych);
#define xprtrdma_show_chunktype(x)					\
		__print_symbolic(x,					\
				{ rpcrdma_noch, "inline" },		\
				{ rpcrdma_noch_pullup, "pullup" },	\
				{ rpcrdma_noch_mapped, "mapped" },	\
				{ rpcrdma_readch, "read list" },	\
				{ rpcrdma_areadch, "*read list" },	\
				{ rpcrdma_writech, "write list" },	\
@@ -667,9 +743,8 @@ TRACE_EVENT(xprtrdma_post_send,
		__entry->client_id = rqst->rq_task->tk_client ?
				     rqst->rq_task->tk_client->cl_clid : -1;
		__entry->req = req;
		__entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
		__entry->signaled = req->rl_sendctx->sc_wr.send_flags &
				    IB_SEND_SIGNALED;
		__entry->num_sge = req->rl_wr.num_sge;
		__entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED;
		__entry->status = status;
	),

@@ -735,6 +810,31 @@ TRACE_EVENT(xprtrdma_post_recvs,
	)
);

TRACE_EVENT(xprtrdma_post_linv,
	TP_PROTO(
		const struct rpcrdma_req *req,
		int status
	),

	TP_ARGS(req, status),

	TP_STRUCT__entry(
		__field(const void *, req)
		__field(int, status)
		__field(u32, xid)
	),

	TP_fast_assign(
		__entry->req = req;
		__entry->status = status;
		__entry->xid = be32_to_cpu(req->rl_slot.rq_xid);
	),

	TP_printk("req=%p xid=0x%08x status=%d",
		__entry->req, __entry->xid, __entry->status
	)
);

/**
 ** Completion events
 **/
@@ -1021,66 +1121,32 @@ DEFINE_REPLY_EVENT(xprtrdma_reply_hdr);
TRACE_EVENT(xprtrdma_fixup,
	TP_PROTO(
		const struct rpc_rqst *rqst,
		int len,
		int hdrlen
		unsigned long fixup
	),

	TP_ARGS(rqst, len, hdrlen),
	TP_ARGS(rqst, fixup),

	TP_STRUCT__entry(
		__field(unsigned int, task_id)
		__field(unsigned int, client_id)
		__field(const void *, base)
		__field(int, len)
		__field(int, hdrlen)
	),

	TP_fast_assign(
		__entry->task_id = rqst->rq_task->tk_pid;
		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
		__entry->base = rqst->rq_rcv_buf.head[0].iov_base;
		__entry->len = len;
		__entry->hdrlen = hdrlen;
	),

	TP_printk("task:%u@%u base=%p len=%d hdrlen=%d",
		__entry->task_id, __entry->client_id,
		__entry->base, __entry->len, __entry->hdrlen
	)
);

TRACE_EVENT(xprtrdma_fixup_pg,
	TP_PROTO(
		const struct rpc_rqst *rqst,
		int pageno,
		const void *pos,
		int len,
		int curlen
	),

	TP_ARGS(rqst, pageno, pos, len, curlen),

	TP_STRUCT__entry(
		__field(unsigned int, task_id)
		__field(unsigned int, client_id)
		__field(const void *, pos)
		__field(int, pageno)
		__field(int, len)
		__field(int, curlen)
		__field(unsigned long, fixup)
		__field(size_t, headlen)
		__field(unsigned int, pagelen)
		__field(size_t, taillen)
	),

	TP_fast_assign(
		__entry->task_id = rqst->rq_task->tk_pid;
		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
		__entry->pos = pos;
		__entry->pageno = pageno;
		__entry->len = len;
		__entry->curlen = curlen;
		__entry->fixup = fixup;
		__entry->headlen = rqst->rq_rcv_buf.head[0].iov_len;
		__entry->pagelen = rqst->rq_rcv_buf.page_len;
		__entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len;
	),

	TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d",
		__entry->task_id, __entry->client_id,
		__entry->pageno, __entry->pos, __entry->len, __entry->curlen
	TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu",
		__entry->task_id, __entry->client_id, __entry->fixup,
		__entry->headlen, __entry->pagelen, __entry->taillen
	)
);

+93 −0
Original line number Diff line number Diff line
@@ -777,6 +777,99 @@ TRACE_EVENT(xprt_ping,
			__get_str(addr), __get_str(port), __entry->status)
);

DECLARE_EVENT_CLASS(xprt_writelock_event,
	TP_PROTO(
		const struct rpc_xprt *xprt, const struct rpc_task *task
	),

	TP_ARGS(xprt, task),

	TP_STRUCT__entry(
		__field(unsigned int, task_id)
		__field(unsigned int, client_id)
		__field(unsigned int, snd_task_id)
	),

	TP_fast_assign(
		if (task) {
			__entry->task_id = task->tk_pid;
			__entry->client_id = task->tk_client ?
					     task->tk_client->cl_clid : -1;
		} else {
			__entry->task_id = -1;
			__entry->client_id = -1;
		}
		__entry->snd_task_id = xprt->snd_task ?
					xprt->snd_task->tk_pid : -1;
	),

	TP_printk("task:%u@%u snd_task:%u",
			__entry->task_id, __entry->client_id,
			__entry->snd_task_id)
);

#define DEFINE_WRITELOCK_EVENT(name) \
	DEFINE_EVENT(xprt_writelock_event, xprt_##name, \
			TP_PROTO( \
				const struct rpc_xprt *xprt, \
				const struct rpc_task *task \
			), \
			TP_ARGS(xprt, task))

DEFINE_WRITELOCK_EVENT(reserve_xprt);
DEFINE_WRITELOCK_EVENT(release_xprt);

DECLARE_EVENT_CLASS(xprt_cong_event,
	TP_PROTO(
		const struct rpc_xprt *xprt, const struct rpc_task *task
	),

	TP_ARGS(xprt, task),

	TP_STRUCT__entry(
		__field(unsigned int, task_id)
		__field(unsigned int, client_id)
		__field(unsigned int, snd_task_id)
		__field(unsigned long, cong)
		__field(unsigned long, cwnd)
		__field(bool, wait)
	),

	TP_fast_assign(
		if (task) {
			__entry->task_id = task->tk_pid;
			__entry->client_id = task->tk_client ?
					     task->tk_client->cl_clid : -1;
		} else {
			__entry->task_id = -1;
			__entry->client_id = -1;
		}
		__entry->snd_task_id = xprt->snd_task ?
					xprt->snd_task->tk_pid : -1;
		__entry->cong = xprt->cong;
		__entry->cwnd = xprt->cwnd;
		__entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
	),

	TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s",
			__entry->task_id, __entry->client_id,
			__entry->snd_task_id, __entry->cong, __entry->cwnd,
			__entry->wait ? " (wait)" : "")
);

#define DEFINE_CONG_EVENT(name) \
	DEFINE_EVENT(xprt_cong_event, xprt_##name, \
			TP_PROTO( \
				const struct rpc_xprt *xprt, \
				const struct rpc_task *task \
			), \
			TP_ARGS(xprt, task))

DEFINE_CONG_EVENT(reserve_cong);
DEFINE_CONG_EVENT(release_cong);
DEFINE_CONG_EVENT(get_cong);
DEFINE_CONG_EVENT(put_cong);

TRACE_EVENT(xs_stream_read_data,
	TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total),

+2 −12
Original line number Diff line number Diff line
@@ -1679,8 +1679,6 @@ call_reserveresult(struct rpc_task *task)
			return;
		}

		printk(KERN_ERR "%s: status=%d, but no request slot, exiting\n",
				__func__, status);
		rpc_call_rpcerror(task, -EIO);
		return;
	}
@@ -1689,11 +1687,8 @@ call_reserveresult(struct rpc_task *task)
	 * Even though there was an error, we may have acquired
	 * a request slot somehow.  Make sure not to leak it.
	 */
	if (task->tk_rqstp) {
		printk(KERN_ERR "%s: status=%d, request allocated anyway\n",
				__func__, status);
	if (task->tk_rqstp)
		xprt_release(task);
	}

	switch (status) {
	case -ENOMEM:
@@ -1702,15 +1697,10 @@ call_reserveresult(struct rpc_task *task)
	case -EAGAIN:	/* woken up; retry */
		task->tk_action = call_retry_reserve;
		return;
	case -EIO:	/* probably a shutdown */
		break;
	default:
		printk(KERN_ERR "%s: unrecognized error %d, exiting\n",
				__func__, status);
		break;
	}
		rpc_call_rpcerror(task, status);
	}
}

/*
 * 1c.	Retry reserving an RPC call slot
+13 −9
Original line number Diff line number Diff line
@@ -205,20 +205,20 @@ int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)

	if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
		if (task == xprt->snd_task)
			return 1;
			goto out_locked;
		goto out_sleep;
	}
	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
		goto out_unlock;
	xprt->snd_task = task;

out_locked:
	trace_xprt_reserve_xprt(xprt, task);
	return 1;

out_unlock:
	xprt_clear_locked(xprt);
out_sleep:
	dprintk("RPC: %5u failed to lock transport %p\n",
			task->tk_pid, xprt);
	task->tk_status = -EAGAIN;
	if  (RPC_IS_SOFT(task))
		rpc_sleep_on_timeout(&xprt->sending, task, NULL,
@@ -269,23 +269,22 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)

	if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
		if (task == xprt->snd_task)
			return 1;
			goto out_locked;
		goto out_sleep;
	}
	if (req == NULL) {
		xprt->snd_task = task;
		return 1;
		goto out_locked;
	}
	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
		goto out_unlock;
	if (!xprt_need_congestion_window_wait(xprt)) {
		xprt->snd_task = task;
		return 1;
		goto out_locked;
	}
out_unlock:
	xprt_clear_locked(xprt);
out_sleep:
	dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
	task->tk_status = -EAGAIN;
	if (RPC_IS_SOFT(task))
		rpc_sleep_on_timeout(&xprt->sending, task, NULL,
@@ -293,6 +292,9 @@ out_sleep:
	else
		rpc_sleep_on(&xprt->sending, task, NULL);
	return 0;
out_locked:
	trace_xprt_reserve_cong(xprt, task);
	return 1;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);

@@ -357,6 +359,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
		xprt_clear_locked(xprt);
		__xprt_lock_write_next(xprt);
	}
	trace_xprt_release_xprt(xprt, task);
}
EXPORT_SYMBOL_GPL(xprt_release_xprt);

@@ -374,6 +377,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
		xprt_clear_locked(xprt);
		__xprt_lock_write_next_cong(xprt);
	}
	trace_xprt_release_cong(xprt, task);
}
EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);

@@ -395,8 +399,7 @@ __xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
	if (req->rq_cong)
		return 1;
	dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
			req->rq_task->tk_pid, xprt->cong, xprt->cwnd);
	trace_xprt_get_cong(xprt, req->rq_task);
	if (RPCXPRT_CONGESTED(xprt)) {
		xprt_set_congestion_window_wait(xprt);
		return 0;
@@ -418,6 +421,7 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
	req->rq_cong = 0;
	xprt->cong -= RPC_CWNDSCALE;
	xprt_test_and_clear_congestion_window_wait(xprt);
	trace_xprt_put_cong(xprt, req->rq_task);
	__xprt_lock_write_next_cong(xprt);
}

+1 −1
Original line number Diff line number Diff line
@@ -79,7 +79,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
	*p = xdr_zero;

	if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
				      &rqst->rq_snd_buf, rpcrdma_noch))
				      &rqst->rq_snd_buf, rpcrdma_noch_pullup))
		return -EIO;

	trace_xprtrdma_cb_reply(rqst);
Loading