Commit 88d7fcfa authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by David S. Miller
Browse files

net: inet_csk: Fix so_reuseport bind-address cache in tb->fast*



The commit 637bc8bb ("inet: reset tb->fastreuseport when adding a reuseport sk")
added a bind-address cache in tb->fast*.  The tb->fast* caches the address
of a sk which has successfully been binded with SO_REUSEPORT ON.  The idea
is to avoid the expensive conflict search in inet_csk_bind_conflict().

There is an issue with wildcard matching where sk_reuseport_match() should
have returned false but it is currently returning true.  It ends up
hiding bind conflict.  For example,

bind("[::1]:443"); /* without SO_REUSEPORT. Succeed. */
bind("[::2]:443"); /* with    SO_REUSEPORT. Succeed. */
bind("[::]:443");  /* with    SO_REUSEPORT. Still Succeed where it shouldn't */

The last bind("[::]:443") with SO_REUSEPORT on should have failed because
it should have a conflict with the very first bind("[::1]:443") which
has SO_REUSEPORT off.  However, the address "[::2]" is cached in
tb->fast* in the second bind. In the last bind, the sk_reuseport_match()
returns true because the binding sk's wildcard addr "[::]" matches with
the "[::2]" cached in tb->fast*.

The correct bind conflict is reported by removing the second
bind such that tb->fast* cache is not involved and forces the
bind("[::]:443") to go through the inet_csk_bind_conflict():

bind("[::1]:443"); /* without SO_REUSEPORT. Succeed. */
bind("[::]:443");  /* with    SO_REUSEPORT. -EADDRINUSE */

The expected behavior for sk_reuseport_match() is, it should only allow
the "cached" tb->fast* address to be used as a wildcard match but not
the address of the binding sk.  To do that, the current
"bool match_wildcard" arg is split into
"bool match_sk1_wildcard" and "bool match_sk2_wildcard".

This change only affects the sk_reuseport_match() which is only
used by inet_csk (e.g. TCP).
The other use cases are calling inet_rcv_saddr_equal() and
this patch makes it pass the same "match_wildcard" arg twice to
the "ipv[46]_rcv_saddr_equal(..., match_wildcard, match_wildcard)".

Cc: Josef Bacik <jbacik@fb.com>
Fixes: 637bc8bb ("inet: reset tb->fastreuseport when adding a reuseport sk")
Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c27a2043
Loading
Loading
Loading
Loading
+24 −19
Original line number Diff line number Diff line
@@ -24,9 +24,10 @@
#include <net/addrconf.h>

#if IS_ENABLED(CONFIG_IPV6)
/* match_wildcard == true:  IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6
 *                          only, and any IPv4 addresses if not IPv6 only
 * match_wildcard == false: addresses must be exactly the same, i.e.
/* match_sk*_wildcard == true:  IPV6_ADDR_ANY equals to any IPv6 addresses
 *				if IPv6 only, and any IPv4 addresses
 *				if not IPv6 only
 * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
 *				IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
 *				and 0.0.0.0 equals to 0.0.0.0 only
 */
@@ -34,7 +35,8 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
				 const struct in6_addr *sk2_rcv_saddr6,
				 __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
				 bool sk1_ipv6only, bool sk2_ipv6only,
				 bool match_wildcard)
				 bool match_sk1_wildcard,
				 bool match_sk2_wildcard)
{
	int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -44,8 +46,8 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
		if (!sk2_ipv6only) {
			if (sk1_rcv_saddr == sk2_rcv_saddr)
				return true;
			if (!sk1_rcv_saddr || !sk2_rcv_saddr)
				return match_wildcard;
			return (match_sk1_wildcard && !sk1_rcv_saddr) ||
				(match_sk2_wildcard && !sk2_rcv_saddr);
		}
		return false;
	}
@@ -53,11 +55,11 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
	if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
		return true;

	if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
	if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard &&
	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
		return true;

	if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
	if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard &&
	    !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
		return true;

@@ -69,18 +71,19 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
}
#endif

/* match_wildcard == true:  0.0.0.0 equals to any IPv4 addresses
 * match_wildcard == false: addresses must be exactly the same, i.e.
/* match_sk*_wildcard == true:  0.0.0.0 equals to any IPv4 addresses
 * match_sk*_wildcard == false: addresses must be exactly the same, i.e.
 *				0.0.0.0 only equals to 0.0.0.0
 */
static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
				 bool sk2_ipv6only, bool match_wildcard)
				 bool sk2_ipv6only, bool match_sk1_wildcard,
				 bool match_sk2_wildcard)
{
	if (!sk2_ipv6only) {
		if (sk1_rcv_saddr == sk2_rcv_saddr)
			return true;
		if (!sk1_rcv_saddr || !sk2_rcv_saddr)
			return match_wildcard;
		return (match_sk1_wildcard && !sk1_rcv_saddr) ||
			(match_sk2_wildcard && !sk2_rcv_saddr);
	}
	return false;
}
@@ -96,10 +99,12 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
					    sk2->sk_rcv_saddr,
					    ipv6_only_sock(sk),
					    ipv6_only_sock(sk2),
					    match_wildcard,
					    match_wildcard);
#endif
	return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr,
				    ipv6_only_sock(sk2), match_wildcard);
				    ipv6_only_sock(sk2), match_wildcard,
				    match_wildcard);
}
EXPORT_SYMBOL(inet_rcv_saddr_equal);

@@ -285,10 +290,10 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
					    tb->fast_rcv_saddr,
					    sk->sk_rcv_saddr,
					    tb->fast_ipv6_only,
					    ipv6_only_sock(sk), true);
					    ipv6_only_sock(sk), true, false);
#endif
	return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr,
				    ipv6_only_sock(sk), true);
				    ipv6_only_sock(sk), true, false);
}

/* Obtain a reference to a local port for the given sock,