Commit ad2f8eb0 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by Alexei Starovoitov
Browse files

bpf: selftests: Tcp header options



This patch adds tests for the new bpf tcp header option feature.

test_tcp_hdr_options.c:
- It tests header option writing and parsing in 3WHS: regular
  connection establishment, fastopen, and syncookie.
- In syncookie, the passive side's bpf prog is asking the active side
  to resend its bpf header option by specifying a RESEND bit in the
  outgoing SYNACK. handle_active_estab() and write_nodata_opt() has
  some details.
- handle_passive_estab() has comments on fastopen.
- It also has test for header writing and parsing in FIN packet.
- Most of the tests is writing an experimental option 254 with magic 0xeB9F.
- The no_exprm_estab() also tests writing a regular TCP option
  without any magic.

test_misc_tcp_options.c:
- It is an one directional test.  Active side writes option and
  passive side parses option.  The focus is to exercise
  the new helpers and API.
- Testing the new helper: bpf_load_hdr_opt() and bpf_store_hdr_opt().
- Testing the bpf_getsockopt(TCP_BPF_SYN).
- Negative tests for the above helpers.
- Testing the sock_ops->skb_data.

Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200820190117.2886749-1-kafai@fb.com
parent 8085e1dc
Loading
Loading
Loading
Loading
+622 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */

#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <linux/compiler.h>

#include "test_progs.h"
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "test_tcp_hdr_options.h"
#include "test_tcp_hdr_options.skel.h"
#include "test_misc_tcp_hdr_options.skel.h"

#define LO_ADDR6 "::eB9F"
#define CG_NAME "/tcpbpf-hdr-opt-test"

struct bpf_test_option exp_passive_estab_in;
struct bpf_test_option exp_active_estab_in;
struct bpf_test_option exp_passive_fin_in;
struct bpf_test_option exp_active_fin_in;
struct hdr_stg exp_passive_hdr_stg;
struct hdr_stg exp_active_hdr_stg = { .active = true, };

static struct test_misc_tcp_hdr_options *misc_skel;
static struct test_tcp_hdr_options *skel;
static int lport_linum_map_fd;
static int hdr_stg_map_fd;
static __u32 duration;
static int cg_fd;

struct sk_fds {
	int srv_fd;
	int passive_fd;
	int active_fd;
	int passive_lport;
	int active_lport;
};

static int add_lo_addr(void)
{
	char ip_addr_cmd[256];
	int cmdlen;

	cmdlen = snprintf(ip_addr_cmd, sizeof(ip_addr_cmd),
			  "ip -6 addr add %s/128 dev lo scope host",
			  LO_ADDR6);

	if (CHECK(cmdlen >= sizeof(ip_addr_cmd), "compile ip cmd",
		  "failed to add host addr %s to lo. ip cmdlen is too long\n",
		  LO_ADDR6))
		return -1;

	if (CHECK(system(ip_addr_cmd), "run ip cmd",
		  "failed to add host addr %s to lo\n", LO_ADDR6))
		return -1;

	return 0;
}

static int create_netns(void)
{
	if (CHECK(unshare(CLONE_NEWNET), "create netns",
		  "unshare(CLONE_NEWNET): %s (%d)",
		  strerror(errno), errno))
		return -1;

	if (CHECK(system("ip link set dev lo up"), "run ip cmd",
		  "failed to bring lo link up\n"))
		return -1;

	if (add_lo_addr())
		return -1;

	return 0;
}

static int write_sysctl(const char *sysctl, const char *value)
{
	int fd, err, len;

	fd = open(sysctl, O_WRONLY);
	if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
		  sysctl, strerror(errno), errno))
		return -1;

	len = strlen(value);
	err = write(fd, value, len);
	close(fd);
	if (CHECK(err != len, "write sysctl",
		  "write(%s, %s): err:%d %s (%d)\n",
		  sysctl, value, err, strerror(errno), errno))
		return -1;

	return 0;
}

static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
{
	fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
		prefix ? : "", hdr_stg->active, hdr_stg->resend_syn,
		hdr_stg->syncookie, hdr_stg->fastopen);
}

static void print_option(const struct bpf_test_option *opt, const char *prefix)
{
	fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n",
		prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand);
}

static void sk_fds_close(struct sk_fds *sk_fds)
{
	close(sk_fds->srv_fd);
	close(sk_fds->passive_fd);
	close(sk_fds->active_fd);
}

static int sk_fds_shutdown(struct sk_fds *sk_fds)
{
	int ret, abyte;

	shutdown(sk_fds->active_fd, SHUT_WR);
	ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
	if (CHECK(ret != 0, "read-after-shutdown(passive_fd):",
		  "ret:%d %s (%d)\n",
		  ret, strerror(errno), errno))
		return -1;

	shutdown(sk_fds->passive_fd, SHUT_WR);
	ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
	if (CHECK(ret != 0, "read-after-shutdown(active_fd):",
		  "ret:%d %s (%d)\n",
		  ret, strerror(errno), errno))
		return -1;

	return 0;
}

static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
{
	const char fast[] = "FAST!!!";
	struct sockaddr_in6 addr6;
	socklen_t len;

	sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
	if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n",
		  strerror(errno), errno))
		goto error;

	if (fast_open)
		sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast,
						     sizeof(fast), 0);
	else
		sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);

	if (CHECK_FAIL(sk_fds->active_fd == -1)) {
		close(sk_fds->srv_fd);
		goto error;
	}

	len = sizeof(addr6);
	if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
			      &len), "getsockname(srv_fd)", "%s (%d)\n",
		  strerror(errno), errno))
		goto error_close;
	sk_fds->passive_lport = ntohs(addr6.sin6_port);

	len = sizeof(addr6);
	if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
			      &len), "getsockname(active_fd)", "%s (%d)\n",
		  strerror(errno), errno))
		goto error_close;
	sk_fds->active_lport = ntohs(addr6.sin6_port);

	sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
	if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n",
		  strerror(errno), errno))
		goto error_close;

	if (fast_open) {
		char bytes_in[sizeof(fast)];
		int ret;

		ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
		if (CHECK(ret != sizeof(fast), "read fastopen syn data",
			  "expected=%lu actual=%d\n", sizeof(fast), ret)) {
			close(sk_fds->passive_fd);
			goto error_close;
		}
	}

	return 0;

error_close:
	close(sk_fds->active_fd);
	close(sk_fds->srv_fd);

error:
	memset(sk_fds, -1, sizeof(*sk_fds));
	return -1;
}

static int check_hdr_opt(const struct bpf_test_option *exp,
			 const struct bpf_test_option *act,
			 const char *hdr_desc)
{
	if (CHECK(memcmp(exp, act, sizeof(*exp)),
		  "expected-vs-actual", "unexpected %s\n", hdr_desc)) {
		print_option(exp, "expected: ");
		print_option(act, "  actual: ");
		return -1;
	}

	return 0;
}

static int check_hdr_stg(const struct hdr_stg *exp, int fd,
			 const char *stg_desc)
{
	struct hdr_stg act;

	if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
		  "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n",
		  stg_desc, strerror(errno), errno))
		return -1;

	if (CHECK(memcmp(exp, &act, sizeof(*exp)),
		  "expected-vs-actual", "unexpected %s\n", stg_desc)) {
		print_hdr_stg(exp, "expected: ");
		print_hdr_stg(&act, "  actual: ");
		return -1;
	}

	return 0;
}

static int check_error_linum(const struct sk_fds *sk_fds)
{
	unsigned int nr_errors = 0;
	struct linum_err linum_err;
	int lport;

	lport = sk_fds->passive_lport;
	if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
		fprintf(stderr,
			"bpf prog error out at lport:passive(%d), linum:%u err:%d\n",
			lport, linum_err.linum, linum_err.err);
		nr_errors++;
	}

	lport = sk_fds->active_lport;
	if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
		fprintf(stderr,
			"bpf prog error out at lport:active(%d), linum:%u err:%d\n",
			lport, linum_err.linum, linum_err.err);
		nr_errors++;
	}

	return nr_errors;
}

static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
{
	if (sk_fds_shutdown(sk_fds))
		goto check_linum;

	if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
			  "passive_hdr_stg"))
		goto check_linum;

	if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd,
			  "active_hdr_stg"))
		goto check_linum;

	if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in,
			  "passive_estab_in"))
		goto check_linum;

	if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in,
			  "active_estab_in"))
		goto check_linum;

	if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in,
			  "passive_fin_in"))
		goto check_linum;

	check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in,
		      "active_fin_in");

check_linum:
	CHECK_FAIL(check_error_linum(sk_fds));
	sk_fds_close(sk_fds);
}

static void prepare_out(void)
{
	skel->bss->active_syn_out = exp_passive_estab_in;
	skel->bss->passive_synack_out = exp_active_estab_in;

	skel->bss->active_fin_out = exp_passive_fin_in;
	skel->bss->passive_fin_out = exp_active_fin_in;
}

static void reset_test(void)
{
	size_t optsize = sizeof(struct bpf_test_option);
	int lport, err;

	memset(&skel->bss->passive_synack_out, 0, optsize);
	memset(&skel->bss->passive_fin_out, 0, optsize);

	memset(&skel->bss->passive_estab_in, 0, optsize);
	memset(&skel->bss->passive_fin_in, 0, optsize);

	memset(&skel->bss->active_syn_out, 0, optsize);
	memset(&skel->bss->active_fin_out, 0, optsize);

	memset(&skel->bss->active_estab_in, 0, optsize);
	memset(&skel->bss->active_fin_in, 0, optsize);

	skel->data->test_kind = TCPOPT_EXP;
	skel->data->test_magic = 0xeB9F;

	memset(&exp_passive_estab_in, 0, optsize);
	memset(&exp_active_estab_in, 0, optsize);
	memset(&exp_passive_fin_in, 0, optsize);
	memset(&exp_active_fin_in, 0, optsize);

	memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg));
	memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg));
	exp_active_hdr_stg.active = true;

	err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport);
	while (!err) {
		bpf_map_delete_elem(lport_linum_map_fd, &lport);
		err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport);
	}
}

static void fastopen_estab(void)
{
	struct bpf_link *link;
	struct sk_fds sk_fds;

	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);

	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
	exp_passive_estab_in.rand = 0xfa;
	exp_passive_estab_in.max_delack_ms = 11;

	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
	exp_active_estab_in.rand = 0xce;
	exp_active_estab_in.max_delack_ms = 22;

	exp_passive_hdr_stg.fastopen = true;

	prepare_out();

	/* Allow fastopen without fastopen cookie */
	if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543"))
		return;

	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
		  PTR_ERR(link)))
		return;

	if (sk_fds_connect(&sk_fds, true)) {
		bpf_link__destroy(link);
		return;
	}

	check_hdr_and_close_fds(&sk_fds);
	bpf_link__destroy(link);
}

static void syncookie_estab(void)
{
	struct bpf_link *link;
	struct sk_fds sk_fds;

	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);

	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
	exp_passive_estab_in.rand = 0xfa;
	exp_passive_estab_in.max_delack_ms = 11;

	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS |
					OPTION_F_RESEND;
	exp_active_estab_in.rand = 0xce;
	exp_active_estab_in.max_delack_ms = 22;

	exp_passive_hdr_stg.syncookie = true;
	exp_active_hdr_stg.resend_syn = true,

	prepare_out();

	/* Clear the RESEND to ensure the bpf prog can learn
	 * want_cookie and set the RESEND by itself.
	 */
	skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND;

	/* Enforce syncookie mode */
	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
		return;

	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
		  PTR_ERR(link)))
		return;

	if (sk_fds_connect(&sk_fds, false)) {
		bpf_link__destroy(link);
		return;
	}

	check_hdr_and_close_fds(&sk_fds);
	bpf_link__destroy(link);
}

static void fin(void)
{
	struct bpf_link *link;
	struct sk_fds sk_fds;

	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);

	exp_passive_fin_in.flags = OPTION_F_RAND;
	exp_passive_fin_in.rand = 0xfa;

	exp_active_fin_in.flags = OPTION_F_RAND;
	exp_active_fin_in.rand = 0xce;

	prepare_out();

	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
		return;

	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
		  PTR_ERR(link)))
		return;

	if (sk_fds_connect(&sk_fds, false)) {
		bpf_link__destroy(link);
		return;
	}

	check_hdr_and_close_fds(&sk_fds);
	bpf_link__destroy(link);
}

static void __simple_estab(bool exprm)
{
	struct bpf_link *link;
	struct sk_fds sk_fds;

	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);

	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
	exp_passive_estab_in.rand = 0xfa;
	exp_passive_estab_in.max_delack_ms = 11;

	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
	exp_active_estab_in.rand = 0xce;
	exp_active_estab_in.max_delack_ms = 22;

	prepare_out();

	if (!exprm) {
		skel->data->test_kind = 0xB9;
		skel->data->test_magic = 0;
	}

	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
		return;

	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
		  PTR_ERR(link)))
		return;

	if (sk_fds_connect(&sk_fds, false)) {
		bpf_link__destroy(link);
		return;
	}

	check_hdr_and_close_fds(&sk_fds);
	bpf_link__destroy(link);
}

static void no_exprm_estab(void)
{
	__simple_estab(false);
}

static void simple_estab(void)
{
	__simple_estab(true);
}

static void misc(void)
{
	const char send_msg[] = "MISC!!!";
	char recv_msg[sizeof(send_msg)];
	const unsigned int nr_data = 2;
	struct bpf_link *link;
	struct sk_fds sk_fds;
	int i, ret;

	lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map);

	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
		return;

	link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
	if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
		  PTR_ERR(link)))
		return;

	if (sk_fds_connect(&sk_fds, false)) {
		bpf_link__destroy(link);
		return;
	}

	for (i = 0; i < nr_data; i++) {
		/* MSG_EOR to ensure skb will not be combined */
		ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
			   MSG_EOR);
		if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n",
			  ret))
			goto check_linum;

		ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
		if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n",
			  ret))
			goto check_linum;
	}

	if (sk_fds_shutdown(&sk_fds))
		goto check_linum;

	CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn",
	      "expected (1) != actual (%u)\n",
		misc_skel->bss->nr_syn);

	CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data",
	      "expected (%u) != actual (%u)\n",
	      nr_data, misc_skel->bss->nr_data);

	/* The last ACK may have been delayed, so it is either 1 or 2. */
	CHECK(misc_skel->bss->nr_pure_ack != 1 &&
	      misc_skel->bss->nr_pure_ack != 2,
	      "unexpected nr_pure_ack",
	      "expected (1 or 2) != actual (%u)\n",
		misc_skel->bss->nr_pure_ack);

	CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin",
	      "expected (1) != actual (%u)\n",
	      misc_skel->bss->nr_fin);

check_linum:
	CHECK_FAIL(check_error_linum(&sk_fds));
	sk_fds_close(&sk_fds);
	bpf_link__destroy(link);
}

struct test {
	const char *desc;
	void (*run)(void);
};

#define DEF_TEST(name) { #name, name }
static struct test tests[] = {
	DEF_TEST(simple_estab),
	DEF_TEST(no_exprm_estab),
	DEF_TEST(syncookie_estab),
	DEF_TEST(fastopen_estab),
	DEF_TEST(fin),
	DEF_TEST(misc),
};

void test_tcp_hdr_options(void)
{
	int i;

	skel = test_tcp_hdr_options__open_and_load();
	if (CHECK(!skel, "open and load skel", "failed"))
		return;

	misc_skel = test_misc_tcp_hdr_options__open_and_load();
	if (CHECK(!misc_skel, "open and load misc test skel", "failed"))
		goto skel_destroy;

	cg_fd = test__join_cgroup(CG_NAME);
	if (CHECK_FAIL(cg_fd < 0))
		goto skel_destroy;

	for (i = 0; i < ARRAY_SIZE(tests); i++) {
		if (!test__start_subtest(tests[i].desc))
			continue;

		if (create_netns())
			break;

		tests[i].run();

		reset_test();
	}

	close(cg_fd);
skel_destroy:
	test_misc_tcp_hdr_options__destroy(misc_skel);
	test_tcp_hdr_options__destroy(skel);
}
+325 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */

#include <stddef.h>
#include <errno.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/socket.h>
#include <linux/bpf.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#define BPF_PROG_TEST_TCP_HDR_OPTIONS
#include "test_tcp_hdr_options.h"

__u16 last_addr16_n = __bpf_htons(0xeB9F);
__u16 active_lport_n = 0;
__u16 active_lport_h = 0;
__u16 passive_lport_n = 0;
__u16 passive_lport_h = 0;

/* options received at passive side */
unsigned int nr_pure_ack = 0;
unsigned int nr_data = 0;
unsigned int nr_syn = 0;
unsigned int nr_fin = 0;

/* Check the header received from the active side */
static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
{
	union {
		struct tcphdr th;
		struct ipv6hdr ip6;
		struct tcp_exprm_opt exprm_opt;
		struct tcp_opt reg_opt;
		__u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
	} hdr = {};
	__u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
	struct tcphdr *pth;
	int ret;

	hdr.reg_opt.kind = 0xB9;

	/* The option is 4 bytes long instead of 2 bytes */
	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
	if (ret != -ENOSPC)
		RET_CG_ERR(ret);

	/* Test searching magic with regular kind */
	hdr.reg_opt.len = 4;
	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
			       load_flags);
	if (ret != -EINVAL)
		RET_CG_ERR(ret);

	hdr.reg_opt.len = 0;
	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
			       load_flags);
	if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
	    hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
		RET_CG_ERR(ret);

	/* Test searching experimental option with invalid kind length */
	hdr.exprm_opt.kind = TCPOPT_EXP;
	hdr.exprm_opt.len = 5;
	hdr.exprm_opt.magic = 0;
	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
			       load_flags);
	if (ret != -EINVAL)
		RET_CG_ERR(ret);

	/* Test searching experimental option with 0 magic value */
	hdr.exprm_opt.len = 4;
	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
			       load_flags);
	if (ret != -ENOMSG)
		RET_CG_ERR(ret);

	hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
			       load_flags);
	if (ret != 4 || hdr.exprm_opt.len != 4 ||
	    hdr.exprm_opt.kind != TCPOPT_EXP ||
	    hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
		RET_CG_ERR(ret);

	if (!check_syn)
		return CG_OK;

	/* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
	 *
	 * Test loading from tp->saved_syn for other sk_state.
	 */
	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
			     sizeof(hdr.ip6));
	if (ret != -ENOSPC)
		RET_CG_ERR(ret);

	if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
	    hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
		RET_CG_ERR(0);

	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
	if (ret < 0)
		RET_CG_ERR(ret);

	pth = (struct tcphdr *)(&hdr.ip6 + 1);
	if (pth->dest != passive_lport_n || pth->source != active_lport_n)
		RET_CG_ERR(0);

	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
	if (ret < 0)
		RET_CG_ERR(ret);

	if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
		RET_CG_ERR(0);

	return CG_OK;
}

static int check_active_syn_in(struct bpf_sock_ops *skops)
{
	return __check_active_hdr_in(skops, true);
}

static int check_active_hdr_in(struct bpf_sock_ops *skops)
{
	struct tcphdr *th;

	if (__check_active_hdr_in(skops, false) == CG_ERR)
		return CG_ERR;

	th = skops->skb_data;
	if (th + 1 > skops->skb_data_end)
		RET_CG_ERR(0);

	if (tcp_hdrlen(th) < skops->skb_len)
		nr_data++;

	if (th->fin)
		nr_fin++;

	if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
		nr_pure_ack++;

	return CG_OK;
}

static int active_opt_len(struct bpf_sock_ops *skops)
{
	int err;

	/* Reserve more than enough to allow the -EEXIST test in
	 * the write_active_opt().
	 */
	err = bpf_reserve_hdr_opt(skops, 12, 0);
	if (err)
		RET_CG_ERR(err);

	return CG_OK;
}

static int write_active_opt(struct bpf_sock_ops *skops)
{
	struct tcp_exprm_opt exprm_opt = {};
	struct tcp_opt win_scale_opt = {};
	struct tcp_opt reg_opt = {};
	struct tcphdr *th;
	int err, ret;

	exprm_opt.kind = TCPOPT_EXP;
	exprm_opt.len = 4;
	exprm_opt.magic = __bpf_htons(0xeB9F);

	reg_opt.kind = 0xB9;
	reg_opt.len = 4;
	reg_opt.data[0] = 0xfa;
	reg_opt.data[1] = 0xce;

	win_scale_opt.kind = TCPOPT_WINDOW;

	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
	if (err)
		RET_CG_ERR(err);

	/* Store the same exprm option */
	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
	if (err != -EEXIST)
		RET_CG_ERR(err);

	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
	if (err)
		RET_CG_ERR(err);
	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
	if (err != -EEXIST)
		RET_CG_ERR(err);

	/* Check the option has been written and can be searched */
	ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
	if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
	    exprm_opt.magic != __bpf_htons(0xeB9F))
		RET_CG_ERR(ret);

	reg_opt.len = 0;
	ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
	if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
	    reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
		RET_CG_ERR(ret);

	th = skops->skb_data;
	if (th + 1 > skops->skb_data_end)
		RET_CG_ERR(0);

	if (th->syn) {
		active_lport_h = skops->local_port;
		active_lport_n = th->source;

		/* Search the win scale option written by kernel
		 * in the SYN packet.
		 */
		ret = bpf_load_hdr_opt(skops, &win_scale_opt,
				       sizeof(win_scale_opt), 0);
		if (ret != 3 || win_scale_opt.len != 3 ||
		    win_scale_opt.kind != TCPOPT_WINDOW)
			RET_CG_ERR(ret);

		/* Write the win scale option that kernel
		 * has already written.
		 */
		err = bpf_store_hdr_opt(skops, &win_scale_opt,
					sizeof(win_scale_opt), 0);
		if (err != -EEXIST)
			RET_CG_ERR(err);
	}

	return CG_OK;
}

static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
{
	__u8 tcp_flags = skops_tcp_flags(skops);

	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
		/* Check the SYN from bpf_sock_ops_kern->syn_skb */
		return check_active_syn_in(skops);

	/* Passive side should have cleared the write hdr cb by now */
	if (skops->local_port == passive_lport_h)
		RET_CG_ERR(0);

	return active_opt_len(skops);
}

static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
{
	if (skops->local_port == passive_lport_h)
		RET_CG_ERR(0);

	return write_active_opt(skops);
}

static int handle_parse_hdr(struct bpf_sock_ops *skops)
{
	/* Passive side is not writing any non-standard/unknown
	 * option, so the active side should never be called.
	 */
	if (skops->local_port == active_lport_h)
		RET_CG_ERR(0);

	return check_active_hdr_in(skops);
}

static int handle_passive_estab(struct bpf_sock_ops *skops)
{
	int err;

	/* No more write hdr cb */
	bpf_sock_ops_cb_flags_set(skops,
				  skops->bpf_sock_ops_cb_flags &
				  ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);

	/* Recheck the SYN but check the tp->saved_syn this time */
	err = check_active_syn_in(skops);
	if (err == CG_ERR)
		return err;

	nr_syn++;

	/* The ack has header option written by the active side also */
	return check_active_hdr_in(skops);
}

SEC("sockops/misc_estab")
int misc_estab(struct bpf_sock_ops *skops)
{
	int true_val = 1;

	switch (skops->op) {
	case BPF_SOCK_OPS_TCP_LISTEN_CB:
		passive_lport_h = skops->local_port;
		passive_lport_n = __bpf_htons(passive_lport_h);
		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
			       &true_val, sizeof(true_val));
		set_hdr_cb_flags(skops);
		break;
	case BPF_SOCK_OPS_TCP_CONNECT_CB:
		set_hdr_cb_flags(skops);
		break;
	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
		return handle_parse_hdr(skops);
	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
		return handle_hdr_opt_len(skops);
	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
		return handle_write_hdr_opt(skops);
	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
		return handle_passive_estab(skops);
	}

	return CG_OK;
}

char _license[] SEC("license") = "GPL";
+623 −0

File added.

Preview size limit exceeded, changes collapsed.

+151 −0

File added.

Preview size limit exceeded, changes collapsed.