Commit 673e3752 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Follow-up BPF helper improvements'

Daniel Borkmann says:

====================

This series addresses most of the feedback [0] that was to be followed
up from the last series, that is, UAPI helper comment improvements and
getting rid of the ifindex obj file hacks in the selftest by using a
BPF map instead. The __sk_buff data/data_end pointer work, I'm planning
to do in a later round as well as the mem*() BPF improvements we have
in Cilium for libbpf. Next, the series adds two features, i) a helper
called redirect_peer() to improve latency on netns switch, and ii) to
allow map in map with dynamic inner array map sizes. Selftests for each
are added as well. For details, please check individual patches, thanks!

  [0] https://lore.kernel.org/bpf/cover.1601477936.git.daniel@iogearbox.net/



v5 -> v6:
  - Going with Andrii's suggestion to make the misconfigured verifier
    test more robust, and only probe on -EOPNOTSUPP (Andrii)
v4 -> v5:
  - Replace cnt == -EOPNOTSUPP check with cnt < 0; I've used < 0
    here as I think it's useful to keep the existing cnt == 0 ||
    cnt >= ARRAY_SIZE(insn_buf) for error detection (Andrii)
v3 -> v4:
  - Rename new array map flag to BPF_F_INNER_MAP (Alexei)
v2 -> v3:
  - Remove tab that slipped into uapi helper desc (Jakub)
  - Rework map in map for array to error from map_gen_lookup (Andrii)
v1 -> v2:
  - Fixed selftest comment wrt inner1/inner2 value (Yonghong)
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents ac53a0d3 9f4c53ca
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -420,6 +420,14 @@ static int veth_select_rxq(struct net_device *dev)
	return smp_processor_id() % dev->real_num_rx_queues;
}

static struct net_device *veth_peer_dev(struct net_device *dev)
{
	struct veth_priv *priv = netdev_priv(dev);

	/* Callers must be under RCU read side. */
	return rcu_dereference(priv->peer);
}

static int veth_xdp_xmit(struct net_device *dev, int n,
			 struct xdp_frame **frames,
			 u32 flags, bool ndo_xmit)
@@ -1224,6 +1232,7 @@ static const struct net_device_ops veth_netdev_ops = {
	.ndo_set_rx_headroom	= veth_set_rx_headroom,
	.ndo_bpf		= veth_xdp,
	.ndo_xdp_xmit		= veth_ndo_xdp_xmit,
	.ndo_get_peer_dev	= veth_peer_dev,
};

#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
+1 −1
Original line number Diff line number Diff line
@@ -82,7 +82,7 @@ struct bpf_map_ops {
	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
				int fd);
	void (*map_fd_put_ptr)(void *ptr);
	u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
	int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
	u32 (*map_fd_sys_lookup_elem)(void *ptr);
	void (*map_seq_show_elem)(struct bpf_map *map, void *key,
				  struct seq_file *m);
+4 −0
Original line number Diff line number Diff line
@@ -1277,6 +1277,9 @@ struct netdev_net_notifier {
 * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
 *			 int cmd);
 *	Add, change, delete or get information on an IPv4 tunnel.
 * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
 *	If a device is paired with a peer device, return the peer instance.
 *	The caller must be under RCU read context.
 */
struct net_device_ops {
	int			(*ndo_init)(struct net_device *dev);
@@ -1484,6 +1487,7 @@ struct net_device_ops {
	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
	int			(*ndo_tunnel_ctl)(struct net_device *dev,
						  struct ip_tunnel_parm *p, int cmd);
	struct net_device *	(*ndo_get_peer_dev)(struct net_device *dev);
};

/**
+27 −3
Original line number Diff line number Diff line
@@ -435,6 +435,9 @@ enum {

/* Share perf_event among processes */
	BPF_F_PRESERVE_ELEMS	= (1U << 11),

/* Create a map that is suitable to be an inner map with dynamic max entries */
	BPF_F_INNER_MAP		= (1U << 12),
};

/* Flags for BPF_PROG_QUERY. */
@@ -3679,10 +3682,14 @@ union bpf_attr {
 * 		Redirect the packet to another net device of index *ifindex*
 * 		and fill in L2 addresses from neighboring subsystem. This helper
 * 		is somewhat similar to **bpf_redirect**\ (), except that it
 * 		fills in e.g. MAC addresses based on the L3 information from
 * 		the packet. This helper is supported for IPv4 and IPv6 protocols.
 * 		populates L2 addresses as well, meaning, internally, the helper
 * 		performs a FIB lookup based on the skb's networking header to
 * 		get the address of the next hop and then relies on the neighbor
 * 		lookup for the L2 address of the nexthop.
 *
 * 		The *flags* argument is reserved and must be 0. The helper is
 * 		currently only supported for tc BPF program types.
 * 		currently only supported for tc BPF program types, and enabled
 * 		for IPv4 and IPv6 protocols.
 * 	Return
 * 		The helper returns **TC_ACT_REDIRECT** on success or
 * 		**TC_ACT_SHOT** on error.
@@ -3715,6 +3722,22 @@ union bpf_attr {
 *		never return NULL.
 *	Return
 *		A pointer pointing to the kernel percpu variable on this cpu.
 *
 * long bpf_redirect_peer(u32 ifindex, u64 flags)
 * 	Description
 * 		Redirect the packet to another net device of index *ifindex*.
 * 		This helper is somewhat similar to **bpf_redirect**\ (), except
 * 		that the redirection happens to the *ifindex*' peer device and
 * 		the netns switch takes place from ingress to ingress without
 * 		going through the CPU's backlog queue.
 *
 * 		The *flags* argument is reserved and must be 0. The helper is
 * 		currently only supported for tc BPF program types at the ingress
 * 		hook and for veth device types. The peer device must reside in a
 * 		different network namespace.
 * 	Return
 * 		The helper returns **TC_ACT_REDIRECT** on success or
 * 		**TC_ACT_SHOT** on error.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -3872,6 +3895,7 @@ union bpf_attr {
	FN(redirect_neigh),		\
	FN(bpf_per_cpu_ptr),            \
	FN(bpf_this_cpu_ptr),		\
	FN(redirect_peer),		\
	/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+11 −6
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@

#define ARRAY_CREATE_FLAG_MASK \
	(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
	 BPF_F_PRESERVE_ELEMS)
	 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)

static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -62,7 +62,7 @@ int array_map_alloc_check(union bpf_attr *attr)
		return -EINVAL;

	if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
	    attr->map_flags & BPF_F_MMAPABLE)
	    attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
		return -EINVAL;

	if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
@@ -214,7 +214,7 @@ static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
}

/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	struct bpf_insn *insn = insn_buf;
@@ -223,6 +223,9 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
	const int map_ptr = BPF_REG_1;
	const int index = BPF_REG_2;

	if (map->map_flags & BPF_F_INNER_MAP)
		return -EOPNOTSUPP;

	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
	if (!map->bypass_spec_v1) {
@@ -496,8 +499,10 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
static bool array_map_meta_equal(const struct bpf_map *meta0,
				 const struct bpf_map *meta1)
{
	return meta0->max_entries == meta1->max_entries &&
		bpf_map_meta_equal(meta0, meta1);
	if (!bpf_map_meta_equal(meta0, meta1))
		return false;
	return meta0->map_flags & BPF_F_INNER_MAP ? true :
	       meta0->max_entries == meta1->max_entries;
}

struct bpf_iter_seq_array_map_info {
@@ -1251,7 +1256,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
	return READ_ONCE(*inner_map);
}

static u32 array_of_map_gen_lookup(struct bpf_map *map,
static int array_of_map_gen_lookup(struct bpf_map *map,
				   struct bpf_insn *insn_buf)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
Loading