Commit a21b48a2 authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-proto-fixes'



Willem de Bruijn says:

====================
Expand the tc tunnel encap support with protocols that convert the
network layer protocol, such as 6in4. This is analogous to existing
support in bpf_skb_proto_6_to_4.

Patch 1 implements the straightforward logic
Patch 2 tests it with a 6in4 tunnel

Changes v1->v2
  - improve documentation in test
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 2aad3261 f6ad6acc
Loading
Loading
Loading
Loading
+8 −0
Original line number Original line Diff line number Diff line
@@ -3081,6 +3081,14 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,


			skb_set_transport_header(skb, mac_len + nh_len);
			skb_set_transport_header(skb, mac_len + nh_len);
		}
		}

		/* Match skb->protocol to new outer l3 protocol */
		if (skb->protocol == htons(ETH_P_IP) &&
		    flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
			skb->protocol = htons(ETH_P_IPV6);
		else if (skb->protocol == htons(ETH_P_IPV6) &&
			 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
			skb->protocol = htons(ETH_P_IP);
	}
	}


	if (skb_is_gso(skb)) {
	if (skb_is_gso(skb)) {
+1 −0
Original line number Original line Diff line number Diff line
@@ -33,3 +33,4 @@ CONFIG_MPLS=y
CONFIG_NET_MPLS_GSO=m
CONFIG_NET_MPLS_GSO=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_IPV6_SIT=m
+60 −4
Original line number Original line Diff line number Diff line
@@ -77,17 +77,52 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
	struct v4hdr h_outer;
	struct v4hdr h_outer;
	struct tcphdr tcph;
	struct tcphdr tcph;
	int olen, l2_len;
	int olen, l2_len;
	int tcp_off;
	__u64 flags;
	__u64 flags;


	/* Most tests encapsulate a packet into a tunnel with the same
	 * network protocol, and derive the outer header fields from
	 * the inner header.
	 *
	 * The 6in4 case tests different inner and outer protocols. As
	 * the inner is ipv6, but the outer expects an ipv4 header as
	 * input, manually build a struct iphdr based on the ipv6hdr.
	 */
	if (encap_proto == IPPROTO_IPV6) {
		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
		struct ipv6hdr iph6_inner;

		/* Read the IPv6 header */
		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
				       sizeof(iph6_inner)) < 0)
			return TC_ACT_OK;

		/* Derive the IPv4 header fields from the IPv6 header */
		memset(&iph_inner, 0, sizeof(iph_inner));
		iph_inner.version = 4;
		iph_inner.ihl = 5;
		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
				    bpf_ntohs(iph6_inner.payload_len));
		iph_inner.ttl = iph6_inner.hop_limit - 1;
		iph_inner.protocol = iph6_inner.nexthdr;
		iph_inner.saddr = __bpf_constant_htonl(saddr);
		iph_inner.daddr = __bpf_constant_htonl(daddr);

		tcp_off = sizeof(iph6_inner);
	} else {
		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
				       sizeof(iph_inner)) < 0)
				       sizeof(iph_inner)) < 0)
			return TC_ACT_OK;
			return TC_ACT_OK;


		tcp_off = sizeof(iph_inner);
	}

	/* filter only packets we want */
	/* filter only packets we want */
	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
		return TC_ACT_OK;
		return TC_ACT_OK;


	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
			       &tcph, sizeof(tcph)) < 0)
			       &tcph, sizeof(tcph)) < 0)
		return TC_ACT_OK;
		return TC_ACT_OK;


@@ -129,6 +164,7 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
						  l2_len);
						  l2_len);
		break;
		break;
	case IPPROTO_IPIP:
	case IPPROTO_IPIP:
	case IPPROTO_IPV6:
		break;
		break;
	default:
	default:
		return TC_ACT_OK;
		return TC_ACT_OK;
@@ -164,6 +200,17 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
				BPF_F_INVALIDATE_HASH) < 0)
				BPF_F_INVALIDATE_HASH) < 0)
		return TC_ACT_SHOT;
		return TC_ACT_SHOT;


	/* if changing outer proto type, update eth->h_proto */
	if (encap_proto == IPPROTO_IPV6) {
		struct ethhdr eth;

		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
			return TC_ACT_SHOT;
		eth.h_proto = bpf_htons(ETH_P_IP);
		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
			return TC_ACT_SHOT;
	}

	return TC_ACT_OK;
	return TC_ACT_OK;
}
}


@@ -325,6 +372,15 @@ int __encap_udp_eth(struct __sk_buff *skb)
		return TC_ACT_OK;
		return TC_ACT_OK;
}
}


SEC("encap_sit_none")
int __encap_sit_none(struct __sk_buff *skb)
{
	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
	else
		return TC_ACT_OK;
}

SEC("encap_ip6tnl_none")
SEC("encap_ip6tnl_none")
int __encap_ip6tnl_none(struct __sk_buff *skb)
int __encap_ip6tnl_none(struct __sk_buff *skb)
{
{
+19 −1
Original line number Original line Diff line number Diff line
@@ -97,6 +97,9 @@ if [[ "$#" -eq "0" ]]; then
	echo "ip6ip6"
	echo "ip6ip6"
	$0 ipv6 ip6tnl none 100
	$0 ipv6 ip6tnl none 100


	echo "sit"
	$0 ipv6 sit none 100

	for mac in none mpls eth ; do
	for mac in none mpls eth ; do
		echo "ip gre $mac"
		echo "ip gre $mac"
		$0 ipv4 gre $mac 100
		$0 ipv4 gre $mac 100
@@ -211,11 +214,20 @@ else
	targs=""
	targs=""
fi
fi


# tunnel address family differs from inner for SIT
if [[ "${tuntype}" == "sit" ]]; then
	link_addr1="${ns1_v4}"
	link_addr2="${ns2_v4}"
else
	link_addr1="${addr1}"
	link_addr2="${addr2}"
fi

# serverside, insert decap module
# serverside, insert decap module
# server is still running
# server is still running
# client can connect again
# client can connect again
ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
	${tmode} remote "${addr1}" local "${addr2}" $targs
	${tmode} remote "${link_addr1}" local "${link_addr2}" $targs


expect_tun_fail=0
expect_tun_fail=0


@@ -260,6 +272,12 @@ else
	server_listen
	server_listen
fi
fi


# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3.
if [[ "${tuntype}" == "sit" ]]; then
	echo OK
	exit 0
fi

# serverside, use BPF for decap
# serverside, use BPF for decap
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact