Commit ba452c9e authored by Toke Høiland-Jørgensen's avatar Toke Høiland-Jørgensen Committed by Daniel Borkmann
Browse files

bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop

Based on the discussion in [0], update the bpf_redirect_neigh() helper to
accept an optional parameter specifying the nexthop information. This makes
it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without
incurring a duplicate FIB lookup - since the FIB lookup helper will return
the nexthop information even if no neighbour is present, this can simply
be passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns
BPF_FIB_LKUP_RET_NO_NEIGH. Thus fix & extend it before helper API is frozen.

  [0] https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63fca@iogearbox.net/



Signed-off-by: default avatarToke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/bpf/160322915615.32199.1187570224032024535.stgit@toke.dk
parent c5eb48e8
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -607,12 +607,21 @@ struct bpf_skb_data_end {
	void *data_end;
};

struct bpf_nh_params {
	u32 nh_family;
	union {
		u32 ipv4_nh;
		struct in6_addr ipv6_nh;
	};
};

struct bpf_redirect_info {
	u32 flags;
	u32 tgt_index;
	void *tgt_value;
	struct bpf_map *map;
	u32 kern_flags;
	struct bpf_nh_params nh;
};

DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
+18 −4
Original line number Diff line number Diff line
@@ -3677,15 +3677,19 @@ union bpf_attr {
 * 	Return
 * 		The id is returned or 0 in case the id could not be retrieved.
 *
 * long bpf_redirect_neigh(u32 ifindex, u64 flags)
 * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
 * 	Description
 * 		Redirect the packet to another net device of index *ifindex*
 * 		and fill in L2 addresses from neighboring subsystem. This helper
 * 		is somewhat similar to **bpf_redirect**\ (), except that it
 * 		populates L2 addresses as well, meaning, internally, the helper
 * 		performs a FIB lookup based on the skb's networking header to
 * 		get the address of the next hop and then relies on the neighbor
 * 		lookup for the L2 address of the nexthop.
 * 		relies on the neighbor lookup for the L2 address of the nexthop.
 *
 * 		The helper will perform a FIB lookup based on the skb's
 * 		networking header to get the address of the next hop, unless
 * 		this is supplied by the caller in the *params* argument. The
 * 		*plen* argument indicates the len of *params* and should be set
 * 		to 0 if *params* is NULL.
 *
 * 		The *flags* argument is reserved and must be 0. The helper is
 * 		currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
	__u8	dmac[6];     /* ETH_ALEN */
};

struct bpf_redir_neigh {
	/* network family for lookup (AF_INET, AF_INET6) */
	__u32 nh_family;
	/* network address of nexthop; skips fib lookup to find gateway */
	union {
		__be32		ipv4_nh;
		__u32		ipv6_nh[4];  /* in6_addr; network order */
	};
};

enum bpf_task_fd_type {
	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
+99 −59
Original line number Diff line number Diff line
@@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
}

#if IS_ENABLED(CONFIG_IPV6)
static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
			    struct net_device *dev, struct bpf_nh_params *nh)
{
	struct dst_entry *dst = skb_dst(skb);
	struct net_device *dev = dst->dev;
	u32 hh_len = LL_RESERVED_SPACE(dev);
	const struct in6_addr *nexthop;
	struct dst_entry *dst = NULL;
	struct neighbour *neigh;

	if (dev_xmit_recursion()) {
@@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
	}

	rcu_read_lock_bh();
	if (!nh) {
		dst = skb_dst(skb);
		nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
				      &ipv6_hdr(skb)->daddr);
	} else {
		nexthop = &nh->ipv6_nh;
	}
	neigh = ip_neigh_gw6(dev, nexthop);
	if (likely(!IS_ERR(neigh))) {
		int ret;
@@ -2210,6 +2215,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
		return ret;
	}
	rcu_read_unlock_bh();
	if (dst)
		IP6_INC_STATS(dev_net(dst->dev),
			      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
@@ -2217,11 +2223,14 @@ out_drop:
	return -ENETDOWN;
}

static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
				   struct bpf_nh_params *nh)
{
	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
	struct net *net = dev_net(dev);
	int err, ret = NET_XMIT_DROP;

	if (!nh) {
		struct dst_entry *dst;
		struct flowi6 fl6 = {
			.flowi6_flags = FLOWI_FLAG_ANYSRC,
@@ -2238,8 +2247,11 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
			goto out_drop;

		skb_dst_set(skb, dst);
	} else if (nh->nh_family != AF_INET6) {
		goto out_drop;
	}

	err = bpf_out_neigh_v6(net, skb);
	err = bpf_out_neigh_v6(net, skb, dev, nh);
	if (unlikely(net_xmit_eval(err)))
		dev->stats.tx_errors++;
	else
@@ -2252,7 +2264,8 @@ out_xmit:
	return ret;
}
#else
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
				   struct bpf_nh_params *nh)
{
	kfree_skb(skb);
	return NET_XMIT_DROP;
@@ -2260,11 +2273,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
#endif /* CONFIG_IPV6 */

#if IS_ENABLED(CONFIG_INET)
static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
			    struct net_device *dev, struct bpf_nh_params *nh)
{
	struct dst_entry *dst = skb_dst(skb);
	struct rtable *rt = container_of(dst, struct rtable, dst);
	struct net_device *dev = dst->dev;
	u32 hh_len = LL_RESERVED_SPACE(dev);
	struct neighbour *neigh;
	bool is_v6gw = false;
@@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
	}

	rcu_read_lock_bh();
	if (!nh) {
		struct dst_entry *dst = skb_dst(skb);
		struct rtable *rt = container_of(dst, struct rtable, dst);

		neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
	} else if (nh->nh_family == AF_INET6) {
		neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
		is_v6gw = true;
	} else if (nh->nh_family == AF_INET) {
		neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
	} else {
		rcu_read_unlock_bh();
		goto out_drop;
	}

	if (likely(!IS_ERR(neigh))) {
		int ret;

@@ -2309,12 +2334,14 @@ out_drop:
	return -ENETDOWN;
}

static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
				   struct bpf_nh_params *nh)
{
	const struct iphdr *ip4h = ip_hdr(skb);
	struct net *net = dev_net(dev);
	int err, ret = NET_XMIT_DROP;
	struct rtable *rt;

	if (!nh) {
		struct flowi4 fl4 = {
			.flowi4_flags = FLOWI_FLAG_ANYSRC,
			.flowi4_mark  = skb->mark,
@@ -2324,6 +2351,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
			.daddr	      = ip4h->daddr,
			.saddr	      = ip4h->saddr,
		};
		struct rtable *rt;

		rt = ip_route_output_flow(net, &fl4, NULL);
		if (IS_ERR(rt))
@@ -2334,8 +2362,9 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
		}

		skb_dst_set(skb, &rt->dst);
	}

	err = bpf_out_neigh_v4(net, skb);
	err = bpf_out_neigh_v4(net, skb, dev, nh);
	if (unlikely(net_xmit_eval(err)))
		dev->stats.tx_errors++;
	else
@@ -2348,14 +2377,16 @@ out_xmit:
	return ret;
}
#else
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
				   struct bpf_nh_params *nh)
{
	kfree_skb(skb);
	return NET_XMIT_DROP;
}
#endif /* CONFIG_INET */

static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
				struct bpf_nh_params *nh)
{
	struct ethhdr *ethh = eth_hdr(skb);

@@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
	skb_reset_network_header(skb);

	if (skb->protocol == htons(ETH_P_IP))
		return __bpf_redirect_neigh_v4(skb, dev);
		return __bpf_redirect_neigh_v4(skb, dev, nh);
	else if (skb->protocol == htons(ETH_P_IPV6))
		return __bpf_redirect_neigh_v6(skb, dev);
		return __bpf_redirect_neigh_v6(skb, dev, nh);
out:
	kfree_skb(skb);
	return -ENOTSUPP;
@@ -2382,7 +2413,8 @@ out:
enum {
	BPF_F_NEIGH	= (1ULL << 1),
	BPF_F_PEER	= (1ULL << 2),
#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH | BPF_F_PEER)
	BPF_F_NEXTHOP	= (1ULL << 3),
#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
};

BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb)
		return -EAGAIN;
	}
	return flags & BPF_F_NEIGH ?
	       __bpf_redirect_neigh(skb, dev) :
	       __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
				    &ri->nh : NULL) :
	       __bpf_redirect(skb, dev, flags);
out_drop:
	kfree_skb(skb);
@@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = {
	.arg2_type      = ARG_ANYTHING,
};

BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
	   int, plen, u64, flags)
{
	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);

	if (unlikely(flags))
	if (unlikely((plen && plen < sizeof(*params)) || flags))
		return TC_ACT_SHOT;

	ri->flags = BPF_F_NEIGH;
	ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
	ri->tgt_index = ifindex;

	BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
	if (plen)
		memcpy(&ri->nh, params, sizeof(ri->nh));

	return TC_ACT_REDIRECT;
}

@@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = {
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_ANYTHING,
	.arg2_type	= ARG_ANYTHING,
	.arg2_type      = ARG_PTR_TO_MEM_OR_NULL,
	.arg3_type      = ARG_CONST_SIZE_OR_ZERO,
	.arg4_type	= ARG_ANYTHING,
};

BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
+1 −0
Original line number Diff line number Diff line
@@ -453,6 +453,7 @@ class PrinterHelpers(Printer):
            'struct bpf_perf_event_data',
            'struct bpf_perf_event_value',
            'struct bpf_pidns_info',
            'struct bpf_redir_neigh',
            'struct bpf_sk_lookup',
            'struct bpf_sock',
            'struct bpf_sock_addr',
+18 −4
Original line number Diff line number Diff line
@@ -3677,15 +3677,19 @@ union bpf_attr {
 * 	Return
 * 		The id is returned or 0 in case the id could not be retrieved.
 *
 * long bpf_redirect_neigh(u32 ifindex, u64 flags)
 * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
 * 	Description
 * 		Redirect the packet to another net device of index *ifindex*
 * 		and fill in L2 addresses from neighboring subsystem. This helper
 * 		is somewhat similar to **bpf_redirect**\ (), except that it
 * 		populates L2 addresses as well, meaning, internally, the helper
 * 		performs a FIB lookup based on the skb's networking header to
 * 		get the address of the next hop and then relies on the neighbor
 * 		lookup for the L2 address of the nexthop.
 * 		relies on the neighbor lookup for the L2 address of the nexthop.
 *
 * 		The helper will perform a FIB lookup based on the skb's
 * 		networking header to get the address of the next hop, unless
 * 		this is supplied by the caller in the *params* argument. The
 * 		*plen* argument indicates the len of *params* and should be set
 * 		to 0 if *params* is NULL.
 *
 * 		The *flags* argument is reserved and must be 0. The helper is
 * 		currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
	__u8	dmac[6];     /* ETH_ALEN */
};

struct bpf_redir_neigh {
	/* network family for lookup (AF_INET, AF_INET6) */
	__u32 nh_family;
	/* network address of nexthop; skips fib lookup to find gateway */
	union {
		__be32		ipv4_nh;
		__u32		ipv6_nh[4];  /* in6_addr; network order */
	};
};

enum bpf_task_fd_type {
	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
	BPF_FD_TYPE_TRACEPOINT,		/* tp name */