Commit c410bf01 authored by David Howells's avatar David Howells
Browse files

rxrpc: Fix the excessive initial retransmission timeout



rxrpc currently uses a fixed 4s retransmission timeout until the RTT is
sufficiently sampled.  This can cause problems with some fileservers with
calls to the cache manager in the afs filesystem being dropped from the
fileserver because a packet goes missing and the retransmission timeout is
greater than the call expiry timeout.

Fix this by:

 (1) Copying the RTT/RTO calculation code from Linux's TCP implementation
     and altering it to fit rxrpc.

 (2) Altering the various users of the RTT to make use of the new SRTT
     value.

 (3) Replacing the use of rxrpc_resend_timeout to use the calculated RTO
     value instead (which is needed in jiffies), along with a backoff.

Notes:

 (1) rxrpc provides RTT samples by matching the serial numbers on outgoing
     DATA packets that have the RXRPC_REQUEST_ACK set and PING ACK packets
     against the reference serial number in incoming REQUESTED ACK and
     PING-RESPONSE ACK packets.

 (2) Each packet that is transmitted on an rxrpc connection gets a new
     per-connection serial number, even for retransmissions, so an ACK can
     be cross-referenced to a specific trigger packet.  This allows RTT
     information to be drawn from retransmitted DATA packets also.

 (3) rxrpc maintains the RTT/RTO state on the rxrpc_peer record rather than
     on an rxrpc_call because many RPC calls won't live long enough to
     generate more than one sample.

 (4) The calculated SRTT value is in units of 8ths of a microsecond rather
     than nanoseconds.

The (S)RTT and RTO values are displayed in /proc/net/rxrpc/peers.

Fixes: 17926a79 ([AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both"")
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent 42c556fe
Loading
Loading
Loading
Loading
+5 −13
Original line number Diff line number Diff line
@@ -32,9 +32,8 @@ void afs_fileserver_probe_result(struct afs_call *call)
	struct afs_server *server = call->server;
	unsigned int server_index = call->server_index;
	unsigned int index = call->addr_ix;
	unsigned int rtt = UINT_MAX;
	unsigned int rtt_us;
	bool have_result = false;
	u64 _rtt;
	int ret = call->error;

	_enter("%pU,%u", &server->uuid, index);
@@ -93,15 +92,9 @@ responded:
		}
	}

	/* Get the RTT and scale it to fit into a 32-bit value that represents
	 * over a minute of time so that we can access it with one instruction
	 * on a 32-bit system.
	 */
	_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
	_rtt /= 64;
	rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
	if (rtt < server->probe.rtt) {
		server->probe.rtt = rtt;
	rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall);
	if (rtt_us < server->probe.rtt) {
		server->probe.rtt = rtt_us;
		alist->preferred = index;
		have_result = true;
	}
@@ -113,8 +106,7 @@ out:
	spin_unlock(&server->probe_lock);

	_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
	       server_index, index, &alist->addrs[index].transport,
	       (unsigned int)rtt, ret);
	       server_index, index, &alist->addrs[index].transport, rtt_us, ret);

	have_result |= afs_fs_probe_done(server);
	if (have_result)
+5 −13
Original line number Diff line number Diff line
@@ -31,10 +31,9 @@ void afs_vlserver_probe_result(struct afs_call *call)
	struct afs_addr_list *alist = call->alist;
	struct afs_vlserver *server = call->vlserver;
	unsigned int server_index = call->server_index;
	unsigned int rtt_us = 0;
	unsigned int index = call->addr_ix;
	unsigned int rtt = UINT_MAX;
	bool have_result = false;
	u64 _rtt;
	int ret = call->error;

	_enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
@@ -93,15 +92,9 @@ responded:
		}
	}

	/* Get the RTT and scale it to fit into a 32-bit value that represents
	 * over a minute of time so that we can access it with one instruction
	 * on a 32-bit system.
	 */
	_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
	_rtt /= 64;
	rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
	if (rtt < server->probe.rtt) {
		server->probe.rtt = rtt;
	rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall);
	if (rtt_us < server->probe.rtt) {
		server->probe.rtt = rtt_us;
		alist->preferred = index;
		have_result = true;
	}
@@ -113,8 +106,7 @@ out:
	spin_unlock(&server->probe_lock);

	_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
	       server_index, index, &alist->addrs[index].transport,
	       (unsigned int)rtt, ret);
	       server_index, index, &alist->addrs[index].transport, rtt_us, ret);

	have_result |= afs_vl_probe_done(server);
	if (have_result) {
+1 −1
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
			   struct sockaddr_rxrpc *);
u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
u32 rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
			       rxrpc_user_attach_call_t, unsigned long, gfp_t,
			       unsigned int);
+7 −10
Original line number Diff line number Diff line
@@ -1112,18 +1112,17 @@ TRACE_EVENT(rxrpc_rtt_tx,
TRACE_EVENT(rxrpc_rtt_rx,
	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
		     rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
		     s64 rtt, u8 nr, s64 avg),
		     u32 rtt, u32 rto),

	    TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg),
	    TP_ARGS(call, why, send_serial, resp_serial, rtt, rto),

	    TP_STRUCT__entry(
		    __field(unsigned int,		call		)
		    __field(enum rxrpc_rtt_rx_trace,	why		)
		    __field(u8,				nr		)
		    __field(rxrpc_serial_t,		send_serial	)
		    __field(rxrpc_serial_t,		resp_serial	)
		    __field(s64,			rtt		)
		    __field(u64,			avg		)
		    __field(u32,			rtt		)
		    __field(u32,			rto		)
			     ),

	    TP_fast_assign(
@@ -1132,18 +1131,16 @@ TRACE_EVENT(rxrpc_rtt_rx,
		    __entry->send_serial = send_serial;
		    __entry->resp_serial = resp_serial;
		    __entry->rtt = rtt;
		    __entry->nr = nr;
		    __entry->avg = avg;
		    __entry->rto = rto;
			   ),

	    TP_printk("c=%08x %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld",
	    TP_printk("c=%08x %s sr=%08x rr=%08x rtt=%u rto=%u",
		      __entry->call,
		      __print_symbolic(__entry->why, rxrpc_rtt_rx_traces),
		      __entry->send_serial,
		      __entry->resp_serial,
		      __entry->rtt,
		      __entry->nr,
		      __entry->avg)
		      __entry->rto)
	    );

TRACE_EVENT(rxrpc_timer,
+1 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ rxrpc-y := \
	peer_event.o \
	peer_object.o \
	recvmsg.o \
	rtt.o \
	security.o \
	sendmsg.o \
	skbuff.o \
Loading