Commit 116e7dbe authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'gen-syn-cookie'



Petar Penkov says:

====================
This patch series introduces a BPF helper function that allows generating SYN
cookies from BPF. Currently, this helper is enabled at both the TC hook and the
XDP hook.

The first two patches in the series add/modify several TCP helper functions to
allow for SKB-less operation, as is the case at the XDP hook.

The third patch introduces the bpf_tcp_gen_syncookie helper function which
generates a SYN cookie for either XDP or TC programs. The return value of
this function contains both the MSS value, encoded in the cookie, and the
cookie itself.

The last three patches sync tools/ and add a test.

Performance evaluation:
I sent 10Mpps to a fixed port on a host with 2 10G bonded Mellanox 4 NICs from
random IPv6 source addresses. Without XDP I observed 7.2Mpps (syn-acks) being
sent out if the IPv6 packets carry 20 bytes of TCP options or 7.6Mpps if they
carry no options. If I attached a simple program that checks if a packet is
IPv6/TCP/SYN, looks up the socket, issues a cookie, and sends it back out after
swapping src/dest, recomputing the checksum, and setting the ACK flag, I
observed 10Mpps being sent back out.

Changes since v1:
1/ Added performance numbers to the cover letter
2/ Patch 2: Refactored a bit to fix compilation issues
3/ Patch 3: Changed ENOTSUPP to EOPNOTSUPP at Toke's suggestion

Changes since RFC:
1/ Cookie is returned in host order at Alexei's suggestion
2/ If cookies are not enabled via a sysctl, the helper function returns
   -ENOENT instead of -EINVAL at Lorenz's suggestion
3/ Fixed documentation to properly reflect that MSS is 16 bits at
   Lorenz's suggestion
4/ BPF helper requires TCP length to match ->doff field, rather than to simply
   be no more than 20 bytes at Eric and Alexei's suggestion
5/ Packet type is looked up from the packet version field, rather than from the
   socket. v4 packets are rejected on v6-only sockets but should work with
   dual stack listeners at Eric's suggestion
6/ Removed unnecessary `net` argument from helper function in patch 2 at
   Lorenz's suggestion
7/ Changed test to only pass MSS option so we can convince the verifier that the
   memory access is not out of bounds

Note that 7/ below illustrates the verifier might need to be extended to allow
passing a variable tcph->doff to the helper function like below:

__u32 thlen = tcph->doff * 4;
if (thlen < sizeof(*tcph))
	return;
__s64 cookie = bpf_tcp_gen_syncookie(sk, ipv4h, 20, tcph, thlen);
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents d3406913 91bc3578
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -414,6 +414,16 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
		       int estab, struct tcp_fastopen_cookie *foc);
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);

/*
 *	BPF SKB-less helpers
 */
u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
			 struct tcphdr *th, u32 *cookie);
u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
			 struct tcphdr *th, u32 *cookie);
u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
			  const struct tcp_request_sock_ops *af_ops,
			  struct sock *sk, struct tcphdr *th);
/*
 *	TCP v4 functions exported for the inet6 API
 */
+29 −1
Original line number Diff line number Diff line
@@ -2714,6 +2714,33 @@ union bpf_attr {
 *		**-EPERM** if no permission to send the *sig*.
 *
 *		**-EAGAIN** if bpf program can try again.
 *
 * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
 *	Description
 *		Try to issue a SYN cookie for the packet with corresponding
 *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
 *
 *		*iph* points to the start of the IPv4 or IPv6 header, while
 *		*iph_len* contains **sizeof**\ (**struct iphdr**) or
 *		**sizeof**\ (**struct ip6hdr**).
 *
 *		*th* points to the start of the TCP header, while *th_len*
 *		contains the length of the TCP header.
 *
 *	Return
 *		On success, lower 32 bits hold the generated SYN cookie in
 *		followed by 16 bits which hold the MSS value for that cookie,
 *		and the top 16 bits are unused.
 *
 *		On failure, the returned value is one of the following:
 *
 *		**-EINVAL** SYN cookie cannot be issued due to error
 *
 *		**-ENOENT** SYN cookie should not be issued (no SYN flood)
 *
 *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
 *
 *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -2825,7 +2852,8 @@ union bpf_attr {
	FN(strtoul),			\
	FN(sk_storage_get),		\
	FN(sk_storage_delete),		\
	FN(send_signal),
	FN(send_signal),		\
	FN(tcp_gen_syncookie),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
+73 −0
Original line number Diff line number Diff line
@@ -5855,6 +5855,75 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
	.arg5_type	= ARG_CONST_SIZE,
};

BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
	   struct tcphdr *, th, u32, th_len)
{
#ifdef CONFIG_SYN_COOKIES
	u32 cookie;
	u16 mss;

	if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
		return -EINVAL;

	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
		return -EINVAL;

	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
		return -ENOENT;

	if (!th->syn || th->ack || th->fin || th->rst)
		return -EINVAL;

	if (unlikely(iph_len < sizeof(struct iphdr)))
		return -EINVAL;

	/* Both struct iphdr and struct ipv6hdr have the version field at the
	 * same offset so we can cast to the shorter header (struct iphdr).
	 */
	switch (((struct iphdr *)iph)->version) {
	case 4:
		if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
			return -EINVAL;

		mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
		break;

#if IS_BUILTIN(CONFIG_IPV6)
	case 6:
		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
			return -EINVAL;

		if (sk->sk_family != AF_INET6)
			return -EINVAL;

		mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
		break;
#endif /* CONFIG_IPV6 */

	default:
		return -EPROTONOSUPPORT;
	}
	if (mss <= 0)
		return -ENOENT;

	return cookie | ((u64)mss << 32);
#else
	return -EOPNOTSUPP;
#endif /* CONFIG_SYN_COOKIES */
}

static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
	.func		= bpf_tcp_gen_syncookie,
	.gpl_only	= true, /* __cookie_v*_init_sequence() is GPL */
	.pkt_access	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
	.arg2_type	= ARG_PTR_TO_MEM,
	.arg3_type	= ARG_CONST_SIZE,
	.arg4_type	= ARG_PTR_TO_MEM,
	.arg5_type	= ARG_CONST_SIZE,
};

#endif /* CONFIG_INET */

bool bpf_helper_changes_pkt_data(void *func)
@@ -6144,6 +6213,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
		return &bpf_tcp_check_syncookie_proto;
	case BPF_FUNC_skb_ecn_set_ce:
		return &bpf_skb_ecn_set_ce_proto;
	case BPF_FUNC_tcp_gen_syncookie:
		return &bpf_tcp_gen_syncookie_proto;
#endif
	default:
		return bpf_base_func_proto(func_id);
@@ -6183,6 +6254,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
		return &bpf_xdp_skc_lookup_tcp_proto;
	case BPF_FUNC_tcp_check_syncookie:
		return &bpf_tcp_check_syncookie_proto;
	case BPF_FUNC_tcp_gen_syncookie:
		return &bpf_tcp_gen_syncookie_proto;
#endif
	default:
		return bpf_base_func_proto(func_id);
+76 −5
Original line number Diff line number Diff line
@@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th,
#endif
}

/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
 * value on success.
 */
static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
{
	const unsigned char *ptr = (const unsigned char *)(th + 1);
	int length = (th->doff * 4) - sizeof(struct tcphdr);
	u16 mss = 0;

	while (length > 0) {
		int opcode = *ptr++;
		int opsize;

		switch (opcode) {
		case TCPOPT_EOL:
			return mss;
		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
			length--;
			continue;
		default:
			if (length < 2)
				return mss;
			opsize = *ptr++;
			if (opsize < 2) /* "silly options" */
				return mss;
			if (opsize > length)
				return mss;	/* fail on partial options */
			if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
				u16 in_mss = get_unaligned_be16(ptr);

				if (in_mss) {
					if (user_mss && user_mss < in_mss)
						in_mss = user_mss;
					mss = in_mss;
				}
			}
			ptr += opsize - 2;
			length -= opsize;
		}
	}
	return mss;
}

/* Look for tcp options. Normally only called on SYN and SYNACK packets.
 * But, this can also be called on packets in the established flow when
 * the fast version below fails.
@@ -6422,9 +6465,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
/*
 * Return true if a syncookie should be sent
 */
static bool tcp_syn_flood_action(const struct sock *sk,
				 const struct sk_buff *skb,
				 const char *proto)
static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
{
	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
	const char *msg = "Dropping request";
@@ -6444,7 +6485,7 @@ static bool tcp_syn_flood_action(const struct sock *sk,
	    net->ipv4.sysctl_tcp_syncookies != 2 &&
	    xchg(&queue->synflood_warned, 1) == 0)
		net_info_ratelimited("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
				     proto, ntohs(tcp_hdr(skb)->dest), msg);
				     proto, sk->sk_num, msg);

	return want_cookie;
}
@@ -6466,6 +6507,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
	}
}

/* If a SYN cookie is required and supported, returns a clamped MSS value to be
 * used for SYN cookie generation.
 */
u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
			  const struct tcp_request_sock_ops *af_ops,
			  struct sock *sk, struct tcphdr *th)
{
	struct tcp_sock *tp = tcp_sk(sk);
	u16 mss;

	if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
	    !inet_csk_reqsk_queue_is_full(sk))
		return 0;

	if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
		return 0;

	if (sk_acceptq_is_full(sk)) {
		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
		return 0;
	}

	mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
	if (!mss)
		mss = af_ops->mss_clamp;

	return mss;
}
EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);

int tcp_conn_request(struct request_sock_ops *rsk_ops,
		     const struct tcp_request_sock_ops *af_ops,
		     struct sock *sk, struct sk_buff *skb)
@@ -6487,7 +6558,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
	 */
	if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
	     inet_csk_reqsk_queue_is_full(sk)) && !isn) {
		want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
		want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
		if (!want_cookie)
			goto drop;
	}
+15 −0
Original line number Diff line number Diff line
@@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
	return sk;
}

u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
			 struct tcphdr *th, u32 *cookie)
{
	u16 mss = 0;
#ifdef CONFIG_SYN_COOKIES
	mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
				    &tcp_request_sock_ipv4_ops, sk, th);
	if (mss) {
		*cookie = __cookie_v4_init_sequence(iph, th, &mss);
		tcp_synq_overflow(sk);
	}
#endif
	return mss;
}

/* The socket must have it's spinlock held when we get
 * here, unless it is a TCP_LISTEN socket.
 *
Loading