Commit f0e834e1 authored by David S. Miller's avatar David S. Miller
Browse files


David Howells says:

====================
rxrpc: Development

Here are some development patches for AF_RXRPC.  The most significant points
are:

 (1) Change the tracepoint that indicates a packet has been transmitted
     into one that indicates a packet is about to be transmitted.  Without
     this, the response tracepoint may occur first if the round trip is
     fast enough.

 (2) Sort out AFS address list handling to better enforce maximum capacity
     to use helper functions to fill them and to do an insertion sort to
     order them.  This is here to make (3) easier.

 (3) Keep AF_INET addresses as AF_INET addresses rather than converting
     them to AF_INET6 in both AF_RXRPC and kAFS.  I hadn't realised that a
     UDP6 socket would just call down into UDP4 if given an AF_INET
     address.

 (4) Allow the timestamp on the first DATA packet of a reply to be
     retrieved by a kernel service.  This will give the kAFS a more
     accurate base from which to calculate the callback promise expiration.

 (5) Allow the rxrpc protocol epoch value to be retrieved from an incoming
     call.  This will allow kAFS to determine if the fileserver restarted
     and if two addresses apparently assigned to the same fileserver
     actually are different boxes.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bbb4c432 e908bcf4
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
@@ -1069,6 +1069,31 @@ The kernel interface functions are as follows:

     This function may transmit a PING ACK.

 (*) Get reply timestamp.

	bool rxrpc_kernel_get_reply_time(struct socket *sock,
					 struct rxrpc_call *call,
					 ktime_t *_ts)

     This allows the timestamp on the first DATA packet of the reply of a
     client call to be queried, provided that it is still in the Rx ring.  If
     successful, the timestamp will be stored into *_ts and true will be
     returned; false will be returned otherwise.

 (*) Get remote client epoch.

	u32 rxrpc_kernel_get_epoch(struct socket *sock,
				   struct rxrpc_call *call)

     This allows the epoch that's contained in packets of an incoming client
     call to be queried.  This value is returned.  The function always
     successful if the call is still in progress.  It shouldn't be called once
     the call has expired.  Note that calling this on a local client call only
     returns the local epoch.

     This value can be used to determine if the remote client has been
     restarted as it shouldn't change otherwise.


=======================
CONFIGURABLE PARAMETERS
+50 −51
Original line number Diff line number Diff line
@@ -17,11 +17,6 @@
#include "internal.h"
#include "afs_fs.h"

//#define AFS_MAX_ADDRESSES
//	((unsigned int)((PAGE_SIZE - sizeof(struct afs_addr_list)) /
//			sizeof(struct sockaddr_rxrpc)))
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))

/*
 * Release an address list.
 */
@@ -43,11 +38,15 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,

	_enter("%u,%u,%u", nr, service, port);

	if (nr > AFS_MAX_ADDRESSES)
		nr = AFS_MAX_ADDRESSES;

	alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL);
	if (!alist)
		return NULL;

	refcount_set(&alist->usage, 1);
	alist->max_addrs = nr;

	for (i = 0; i < nr; i++) {
		struct sockaddr_rxrpc *srx = &alist->addrs[i];
@@ -109,8 +108,6 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
	} while (p < end);

	_debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
	if (nr > AFS_MAX_ADDRESSES)
		nr = AFS_MAX_ADDRESSES;

	alist = afs_alloc_addrlist(nr, service, port);
	if (!alist)
@@ -119,8 +116,10 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
	/* Extract the addresses */
	p = text;
	do {
		struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs];
		const char *q, *stop;
		unsigned int xport = port;
		__be32 x[4];
		int family;

		if (*p == delim) {
			p++;
@@ -136,19 +135,12 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
					break;
		}

		if (in4_pton(p, q - p,
			     (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3],
			     -1, &stop)) {
			srx->transport.sin6.sin6_addr.s6_addr32[0] = 0;
			srx->transport.sin6.sin6_addr.s6_addr32[1] = 0;
			srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff);
		} else if (in6_pton(p, q - p,
				    srx->transport.sin6.sin6_addr.s6_addr,
				    -1, &stop)) {
			/* Nothing to do */
		} else {
		if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop))
			family = AF_INET;
		else if (in6_pton(p, q - p, (u8 *)x, -1, &stop))
			family = AF_INET6;
		else
			goto bad_address;
		}

		if (stop != q)
			goto bad_address;
@@ -160,7 +152,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
		if (p < end) {
			if (*p == '+') {
				/* Port number specification "+1234" */
				unsigned int xport = 0;
				xport = 0;
				p++;
				if (p >= end || !isdigit(*p))
					goto bad_address;
@@ -171,7 +163,6 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
						goto bad_address;
					p++;
				} while (p < end && isdigit(*p));
				srx->transport.sin6.sin6_port = htons(xport);
			} else if (*p == delim) {
				p++;
			} else {
@@ -179,8 +170,12 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
			}
		}

		alist->nr_addrs++;
	} while (p < end && alist->nr_addrs < AFS_MAX_ADDRESSES);
		if (family == AF_INET)
			afs_merge_fs_addr4(alist, x[0], xport);
		else
			afs_merge_fs_addr6(alist, x, xport);

	} while (p < end);

	_leave(" = [nr %u]", alist->nr_addrs);
	return alist;
@@ -237,19 +232,23 @@ struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
 */
void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
{
	struct sockaddr_in6 *a;
	__be16 xport = htons(port);
	struct sockaddr_rxrpc *srx;
	u32 addr = ntohl(xdr);
	int i;

	if (alist->nr_addrs >= alist->max_addrs)
		return;

	for (i = 0; i < alist->nr_ipv4; i++) {
		a = &alist->addrs[i].transport.sin6;
		if (xdr == a->sin6_addr.s6_addr32[3] &&
		    xport == a->sin6_port)
		struct sockaddr_in *a = &alist->addrs[i].transport.sin;
		u32 a_addr = ntohl(a->sin_addr.s_addr);
		u16 a_port = ntohs(a->sin_port);

		if (addr == a_addr && port == a_port)
			return;
		if (xdr == a->sin6_addr.s6_addr32[3] &&
		    (u16 __force)xport < (u16 __force)a->sin6_port)
		if (addr == a_addr && port < a_port)
			break;
		if ((u32 __force)xdr < (u32 __force)a->sin6_addr.s6_addr32[3])
		if (addr < a_addr)
			break;
	}

@@ -258,12 +257,11 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
			alist->addrs + i,
			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));

	a = &alist->addrs[i].transport.sin6;
	a->sin6_port		  = xport;
	a->sin6_addr.s6_addr32[0] = 0;
	a->sin6_addr.s6_addr32[1] = 0;
	a->sin6_addr.s6_addr32[2] = htonl(0xffff);
	a->sin6_addr.s6_addr32[3] = xdr;
	srx = &alist->addrs[i];
	srx->transport_len = sizeof(srx->transport.sin);
	srx->transport.sin.sin_family = AF_INET;
	srx->transport.sin.sin_port = htons(port);
	srx->transport.sin.sin_addr.s_addr = xdr;
	alist->nr_ipv4++;
	alist->nr_addrs++;
}
@@ -273,18 +271,20 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
 */
void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
{
	struct sockaddr_in6 *a;
	__be16 xport = htons(port);
	struct sockaddr_rxrpc *srx;
	int i, diff;

	if (alist->nr_addrs >= alist->max_addrs)
		return;

	for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
		a = &alist->addrs[i].transport.sin6;
		struct sockaddr_in6 *a = &alist->addrs[i].transport.sin6;
		u16 a_port = ntohs(a->sin6_port);

		diff = memcmp(xdr, &a->sin6_addr, 16);
		if (diff == 0 &&
		    xport == a->sin6_port)
		if (diff == 0 && port == a_port)
			return;
		if (diff == 0 &&
		    (u16 __force)xport < (u16 __force)a->sin6_port)
		if (diff == 0 && port < a_port)
			break;
		if (diff < 0)
			break;
@@ -295,12 +295,11 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
			alist->addrs + i,
			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));

	a = &alist->addrs[i].transport.sin6;
	a->sin6_port		  = xport;
	a->sin6_addr.s6_addr32[0] = xdr[0];
	a->sin6_addr.s6_addr32[1] = xdr[1];
	a->sin6_addr.s6_addr32[2] = xdr[2];
	a->sin6_addr.s6_addr32[3] = xdr[3];
	srx = &alist->addrs[i];
	srx->transport_len = sizeof(srx->transport.sin6);
	srx->transport.sin6.sin6_family = AF_INET6;
	srx->transport.sin6.sin6_port = htons(port);
	memcpy(&srx->transport.sin6.sin6_addr, xdr, 16);
	alist->nr_addrs++;
}

+5 −3
Original line number Diff line number Diff line
@@ -73,12 +73,14 @@ struct afs_addr_list {
	struct rcu_head		rcu;		/* Must be first */
	refcount_t		usage;
	u32			version;	/* Version */
	unsigned short		nr_addrs;
	unsigned short		index;		/* Address currently in use */
	unsigned short		nr_ipv4;	/* Number of IPv4 addresses */
	unsigned char		max_addrs;
	unsigned char		nr_addrs;
	unsigned char		index;		/* Address currently in use */
	unsigned char		nr_ipv4;	/* Number of IPv4 addresses */
	unsigned long		probed;		/* Mask of servers that have been probed */
	unsigned long		yfs;		/* Mask of servers that are YFS */
	struct sockaddr_rxrpc	addrs[];
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
};

/*
+4 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#define _NET_RXRPC_H

#include <linux/rxrpc.h>
#include <linux/ktime.h>

struct key;
struct sock;
@@ -77,5 +78,8 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
			    enum rxrpc_call_completion *, u32 *);
u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
				 ktime_t *);

#endif /* _NET_RXRPC_H */
+16 −1
Original line number Diff line number Diff line
@@ -97,7 +97,8 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
	    srx->transport_len > len)
		return -EINVAL;

	if (srx->transport.family != rx->family)
	if (srx->transport.family != rx->family &&
	    srx->transport.family == AF_INET && rx->family != AF_INET6)
		return -EAFNOSUPPORT;

	switch (srx->transport.family) {
@@ -384,6 +385,20 @@ u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);

/**
 * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
 * @sock: The socket the call is on
 * @call: The call to query
 *
 * Allow a kernel service to retrieve the epoch value from a service call to
 * see if the client at the other end rebooted.
 */
u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call)
{
	return call->conn->proto.epoch;
}
EXPORT_SYMBOL(rxrpc_kernel_get_epoch);

/**
 * rxrpc_kernel_check_call - Check a call's state
 * @sock: The socket the call is on
Loading