selftests_bpf: add L2 encap to test_tc_tunnel (3ec61df8) · Commits · 戴 / test

tools/testing/selftests/bpf/config

+4 −0

Original line number	Diff line number	Diff line
		@@ -29,3 +29,7 @@ CONFIG_NET_FOU=m
		CONFIG_NET_FOU_IP_TUNNELS=y
		CONFIG_IPV6_FOU=m
		CONFIG_IPV6_FOU_TUNNEL=m
		CONFIG_MPLS=y
		CONFIG_NET_MPLS_GSO=m
		CONFIG_MPLS_ROUTING=m
		CONFIG_MPLS_IPTUNNEL=m

tools/testing/selftests/bpf/progs/test_tc_tunnel.c

+191 −28

Original line number	Diff line number	Diff line
		@@ -11,6 +11,7 @@
		#include <linux/in.h>
		#include <linux/ip.h>
		#include <linux/ipv6.h>
		#include <linux/mpls.h>
		#include <linux/tcp.h>
		#include <linux/udp.h>
		#include <linux/pkt_cls.h>
		@@ -22,7 +23,14 @@
		static const int cfg_port = 8000;

		static const int cfg_udp_src = 20000;
		static const int cfg_udp_dst = 5555;

		#define UDP_PORT 5555
		#define MPLS_OVER_UDP_PORT 6635
		#define ETH_OVER_UDP_PORT 7777

		/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
		static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 \|
		MPLS_LS_S_MASK \| 0xff);

		struct gre_hdr {
		__be16 flags;
		@@ -37,11 +45,13 @@ union l4hdr {
		struct v4hdr {
		struct iphdr ip;
		union l4hdr l4hdr;
		__u8 pad[16]; /* enough space for L2 header */
		} __attribute__((packed));

		struct v6hdr {
		struct ipv6hdr ip;
		union l4hdr l4hdr;
		__u8 pad[16]; /* enough space for L2 header */
		} __attribute__((packed));

		static __always_inline void set_ipv4_csum(struct iphdr *iph)
		@@ -59,13 +69,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
		iph->check = ~((csum & 0xffff) + (csum >> 16));
		}

		static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
		static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
		__u16 l2_proto)
		{
		__u16 udp_dst = UDP_PORT;
		struct iphdr iph_inner;
		struct v4hdr h_outer;
		struct tcphdr tcph;
		int olen, l2_len;
		__u64 flags;
		int olen;

		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
		sizeof(iph_inner)) < 0)
		@@ -83,23 +95,38 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
		return TC_ACT_OK;

		olen = sizeof(h_outer.ip);
		l2_len = 0;

		flags = BPF_F_ADJ_ROOM_FIXED_GSO \| BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;

		switch (l2_proto) {
		case ETH_P_MPLS_UC:
		l2_len = sizeof(mpls_label);
		udp_dst = MPLS_OVER_UDP_PORT;
		break;
		case ETH_P_TEB:
		l2_len = ETH_HLEN;
		udp_dst = ETH_OVER_UDP_PORT;
		break;
		}
		flags \|= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);

		switch (encap_proto) {
		case IPPROTO_GRE:
		flags \|= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
		olen += sizeof(h_outer.l4hdr.gre);
		h_outer.l4hdr.gre.protocol = bpf_htons(ETH_P_IP);
		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
		h_outer.l4hdr.gre.flags = 0;
		break;
		case IPPROTO_UDP:
		flags \|= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
		olen += sizeof(h_outer.l4hdr.udp);
		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
		h_outer.l4hdr.udp.dest = __bpf_constant_htons(cfg_udp_dst);
		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
		h_outer.l4hdr.udp.check = 0;
		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
		sizeof(h_outer.l4hdr.udp));
		sizeof(h_outer.l4hdr.udp) +
		l2_len);
		break;
		case IPPROTO_IPIP:
		break;
		@@ -107,6 +134,19 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
		return TC_ACT_OK;
		}

		/* add L2 encap (if specified) */
		switch (l2_proto) {
		case ETH_P_MPLS_UC:
		((__u32 )((__u8 *)&h_outer + olen)) = mpls_label;
		break;
		case ETH_P_TEB:
		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
		ETH_HLEN))
		return TC_ACT_SHOT;
		break;
		}
		olen += l2_len;

		/* add room between mac and network header */
		if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
		return TC_ACT_SHOT;
		@@ -127,14 +167,16 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto)
		return TC_ACT_OK;
		}

		static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
		static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
		__u16 l2_proto)
		{
		__u16 udp_dst = UDP_PORT;
		struct ipv6hdr iph_inner;
		struct v6hdr h_outer;
		struct tcphdr tcph;
		int olen, l2_len;
		__u16 tot_len;
		__u64 flags;
		int olen;

		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
		sizeof(iph_inner)) < 0)
		@@ -149,20 +191,34 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
		return TC_ACT_OK;

		olen = sizeof(h_outer.ip);
		l2_len = 0;

		flags = BPF_F_ADJ_ROOM_FIXED_GSO \| BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;

		switch (l2_proto) {
		case ETH_P_MPLS_UC:
		l2_len = sizeof(mpls_label);
		udp_dst = MPLS_OVER_UDP_PORT;
		break;
		case ETH_P_TEB:
		l2_len = ETH_HLEN;
		udp_dst = ETH_OVER_UDP_PORT;
		break;
		}
		flags \|= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);

		switch (encap_proto) {
		case IPPROTO_GRE:
		flags \|= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
		olen += sizeof(h_outer.l4hdr.gre);
		h_outer.l4hdr.gre.protocol = bpf_htons(ETH_P_IPV6);
		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
		h_outer.l4hdr.gre.flags = 0;
		break;
		case IPPROTO_UDP:
		flags \|= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
		olen += sizeof(h_outer.l4hdr.udp);
		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
		h_outer.l4hdr.udp.dest = __bpf_constant_htons(cfg_udp_dst);
		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
		sizeof(h_outer.l4hdr.udp);
		h_outer.l4hdr.udp.check = 0;
		@@ -174,6 +230,19 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
		return TC_ACT_OK;
		}

		/* add L2 encap (if specified) */
		switch (l2_proto) {
		case ETH_P_MPLS_UC:
		((__u32 )((__u8 *)&h_outer + olen)) = mpls_label;
		break;
		case ETH_P_TEB:
		if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
		ETH_HLEN))
		return TC_ACT_SHOT;
		break;
		}
		olen += l2_len;

		/* add room between mac and network header */
		if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
		return TC_ACT_SHOT;
		@@ -193,56 +262,128 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto)
		return TC_ACT_OK;
		}

		SEC("encap_ipip")
		int __encap_ipip(struct __sk_buff *skb)
		SEC("encap_ipip_none")
		int __encap_ipip_none(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
		else
		return TC_ACT_OK;
		}

		SEC("encap_gre_none")
		int __encap_gre_none(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
		else
		return TC_ACT_OK;
		}

		SEC("encap_gre_mpls")
		int __encap_gre_mpls(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
		else
		return TC_ACT_OK;
		}

		SEC("encap_gre_eth")
		int __encap_gre_eth(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
		else
		return TC_ACT_OK;
		}

		SEC("encap_udp_none")
		int __encap_udp_none(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
		return encap_ipv4(skb, IPPROTO_IPIP);
		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
		else
		return TC_ACT_OK;
		}

		SEC("encap_gre")
		int __encap_gre(struct __sk_buff *skb)
		SEC("encap_udp_mpls")
		int __encap_udp_mpls(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
		return encap_ipv4(skb, IPPROTO_GRE);
		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
		else
		return TC_ACT_OK;
		}

		SEC("encap_udp")
		int __encap_udp(struct __sk_buff *skb)
		SEC("encap_udp_eth")
		int __encap_udp_eth(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
		return encap_ipv4(skb, IPPROTO_UDP);
		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
		else
		return TC_ACT_OK;
		}

		SEC("encap_ip6tnl_none")
		int __encap_ip6tnl_none(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
		else
		return TC_ACT_OK;
		}

		SEC("encap_ip6gre_none")
		int __encap_ip6gre_none(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
		else
		return TC_ACT_OK;
		}

		SEC("encap_ip6tnl")
		int __encap_ip6tnl(struct __sk_buff *skb)
		SEC("encap_ip6gre_mpls")
		int __encap_ip6gre_mpls(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
		return encap_ipv6(skb, IPPROTO_IPV6);
		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
		else
		return TC_ACT_OK;
		}

		SEC("encap_ip6gre")
		int __encap_ip6gre(struct __sk_buff *skb)
		SEC("encap_ip6gre_eth")
		int __encap_ip6gre_eth(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
		return encap_ipv6(skb, IPPROTO_GRE);
		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
		else
		return TC_ACT_OK;
		}

		SEC("encap_ip6udp")
		int __encap_ip6udp(struct __sk_buff *skb)
		SEC("encap_ip6udp_none")
		int __encap_ip6udp_none(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
		return encap_ipv6(skb, IPPROTO_UDP);
		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
		else
		return TC_ACT_OK;
		}

		SEC("encap_ip6udp_mpls")
		int __encap_ip6udp_mpls(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
		else
		return TC_ACT_OK;
		}

		SEC("encap_ip6udp_eth")
		int __encap_ip6udp_eth(struct __sk_buff *skb)
		{
		if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
		else
		return TC_ACT_OK;
		}
		@@ -250,6 +391,8 @@ int __encap_ip6udp(struct __sk_buff *skb)
		static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
		{
		char buf[sizeof(struct v6hdr)];
		struct gre_hdr greh;
		struct udphdr udph;
		int olen = len;

		switch (proto) {
		@@ -258,9 +401,29 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
		break;
		case IPPROTO_GRE:
		olen += sizeof(struct gre_hdr);
		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
		return TC_ACT_OK;
		switch (bpf_ntohs(greh.protocol)) {
		case ETH_P_MPLS_UC:
		olen += sizeof(mpls_label);
		break;
		case ETH_P_TEB:
		olen += ETH_HLEN;
		break;
		}
		break;
		case IPPROTO_UDP:
		olen += sizeof(struct udphdr);
		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
		return TC_ACT_OK;
		switch (bpf_ntohs(udph.dest)) {
		case MPLS_OVER_UDP_PORT:
		olen += sizeof(mpls_label);
		break;
		case ETH_OVER_UDP_PORT:
		olen += ETH_HLEN;
		break;
		}
		break;
		default:
		return TC_ACT_OK;

tools/testing/selftests/bpf/test_tc_tunnel.sh

+82 −31

Original line number	Diff line number	Diff line
		@@ -17,6 +17,9 @@ readonly ns2_v6=fd::2

		# Must match port used by bpf program
		readonly udpport=5555
		# MPLSoverUDP
		readonly mplsudpport=6635
		readonly mplsproto=137

		readonly infile="$(mktemp)"
		readonly outfile="$(mktemp)"
		@@ -41,8 +44,8 @@ setup() {
		# clamp route to reserve room for tunnel headers
		ip -netns "${ns1}" -4 route flush table main
		ip -netns "${ns1}" -6 route flush table main
		ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1472 dev veth1
		ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1452 dev veth1
		ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
		ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1

		sleep 1

		@@ -89,42 +92,44 @@ set -e
		# no arguments: automated test, run all
		if [[ "$#" -eq "0" ]]; then
		echo "ipip"
		$0 ipv4 ipip 100
		$0 ipv4 ipip none 100

		echo "ip6ip6"
		$0 ipv6 ip6tnl 100
		$0 ipv6 ip6tnl none 100

		echo "ip gre"
		$0 ipv4 gre 100
		for mac in none mpls eth ; do
		echo "ip gre $mac"
		$0 ipv4 gre $mac 100

		echo "ip6 gre"
		$0 ipv6 ip6gre 100
		echo "ip6 gre $mac"
		$0 ipv6 ip6gre $mac 100

		echo "ip gre gso"
		$0 ipv4 gre 2000
		echo "ip gre $mac gso"
		$0 ipv4 gre $mac 2000

		echo "ip6 gre gso"
		$0 ipv6 ip6gre 2000
		echo "ip6 gre $mac gso"
		$0 ipv6 ip6gre $mac 2000

		echo "ip udp"
		$0 ipv4 udp 100
		echo "ip udp $mac"
		$0 ipv4 udp $mac 100

		echo "ip6 udp"
		$0 ipv6 ip6udp 100
		echo "ip6 udp $mac"
		$0 ipv6 ip6udp $mac 100

		echo "ip udp gso"
		$0 ipv4 udp 2000
		echo "ip udp $mac gso"
		$0 ipv4 udp $mac 2000

		echo "ip6 udp gso"
		$0 ipv6 ip6udp 2000
		echo "ip6 udp $mac gso"
		$0 ipv6 ip6udp $mac 2000
		done

		echo "OK. All tests passed"
		exit 0
		fi

		if [[ "$#" -ne "3" ]]; then
		if [[ "$#" -ne "4" ]]; then
		echo "Usage: $0"
		echo " or: $0 <ipv4\|ipv6> <tuntype> <data_len>"
		echo " or: $0 <ipv4\|ipv6> <tuntype> <none\|mpls\|eth> <data_len>"
		exit 1
		fi

		@@ -137,6 +142,8 @@ case "$1" in
		readonly foumod=fou
		readonly foutype=ipip
		readonly fouproto=4
		readonly fouproto_mpls=${mplsproto}
		readonly gretaptype=gretap
		;;
		"ipv6")
		readonly addr1="${ns1_v6}"
		@@ -146,6 +153,8 @@ case "$1" in
		readonly foumod=fou6
		readonly foutype=ip6tnl
		readonly fouproto="41 -6"
		readonly fouproto_mpls="${mplsproto} -6"
		readonly gretaptype=ip6gretap
		;;
		*)
		echo "unknown arg: $1"
		@@ -154,9 +163,10 @@ case "$1" in
		esac

		readonly tuntype=$2
		readonly datalen=$3
		readonly mac=$3
		readonly datalen=$4

		echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
		echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"

		trap cleanup EXIT

		@@ -173,7 +183,7 @@ verify_data
		ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
		ip netns exec "${ns1}" tc filter add dev veth1 egress \
		bpf direct-action object-file ./test_tc_tunnel.o \
		section "encap_${tuntype}"
		section "encap_${tuntype}_${mac}"
		echo "test bpf encap without decap (expect failure)"
		server_listen
		! client_connect
		@@ -184,7 +194,18 @@ if [[ "$tuntype" =~ "udp" ]]; then
		targs="encap fou encap-sport auto encap-dport $udpport"
		# fou may be a module; allow this to fail.
		modprobe "${foumod}" \|\|true
		ip netns exec "${ns2}" ip fou add port 5555 ipproto ${fouproto}
		if [[ "$mac" == "mpls" ]]; then
		dport=${mplsudpport}
		dproto=${fouproto_mpls}
		tmode="mode any ttl 255"
		else
		dport=${udpport}
		dproto=${fouproto}
		fi
		ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
		targs="encap fou encap-sport auto encap-dport $dport"
		elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
		ttype=$gretaptype
		else
		ttype=$tuntype
		targs=""
		@@ -194,7 +215,31 @@ fi
		# server is still running
		# client can connect again
		ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
		remote "${addr1}" local "${addr2}" $targs
		${tmode} remote "${addr1}" local "${addr2}" $targs

		expect_tun_fail=0

		if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
		# No support for MPLS IPv6 fou tunnel; expect failure.
		expect_tun_fail=1
		elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
		# No support for TEB fou tunnel; expect failure.
		expect_tun_fail=1
		elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
		# Share ethernet address between tunnel/veth2 so L2 decap works.
		ethaddr=$(ip netns exec "${ns2}" ip link show veth2 \| \
		awk '/ether/ { print $2 }')
		ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
		elif [[ "$mac" == "mpls" ]]; then
		modprobe mpls_iptunnel \|\|true
		modprobe mpls_gso \|\|true
		ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
		ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
		ip netns exec "${ns2}" ip link set lo up
		ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
		ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
		fi

		# Because packets are decapped by the tunnel they arrive on testtun0 from
		# the IP stack perspective. Ensure reverse path filtering is disabled
		# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
		@@ -204,16 +249,22 @@ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
		# selected as the max of the "all" and device-specific values.
		ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
		ip netns exec "${ns2}" ip link set dev testtun0 up
		if [[ "$expect_tun_fail" == 1 ]]; then
		# This tunnel mode is not supported, so we expect failure.
		echo "test bpf encap with tunnel device decap (expect failure)"
		! client_connect
		else
		echo "test bpf encap with tunnel device decap"
		client_connect
		verify_data
		server_listen
		fi

		# serverside, use BPF for decap
		ip netns exec "${ns2}" ip link del dev testtun0
		ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
		ip netns exec "${ns2}" tc filter add dev veth2 ingress \
		bpf direct-action object-file ./test_tc_tunnel.o section decap
		server_listen
		echo "test bpf encap with bpf decap"
		client_connect
		verify_data

Admin message