Commit 906312ca authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'xdp_devmap'



David Ahern says:

====================
Implementation of Daniel's proposal for allowing DEVMAP entries to be
a device index, program fd pair.

Programs are run after XDP_REDIRECT and have access to both Rx device
and Tx device.

v4
- moved struct bpf_devmap_val from uapi to devmap.c, named the union
  and dropped the prefix from the elements - Jesper
- fixed 2 bugs in selftests

v3
- renamed struct to bpf_devmap_val
- used offsetofend to check for expected map size, modification of
  Toke's comment
- check for explicit value sizes
- adjusted switch statement in dev_map_run_prog per Andrii's comment
- changed SEC shortcut to xdp_devmap
- changed selftests to use skeleton and new map declaration

v2
- moved dev_map_ext_val definition to uapi to formalize the API for devmap
  extensions; add bpf_ prefix to the prog_fd and prog_id entries
- changed devmap code to handle struct in a way that it can support future
  extensions
- fixed subject in libbpf patch

v1
- fixed prog put on invalid program - Toke
- changed write value from id to fd per Toke's comments about capabilities
- add test cases
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents b36e62eb d39aec79
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -1250,6 +1250,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
		    struct net_device *dev_rx);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
			     struct bpf_prog *xdp_prog);
bool dev_map_can_have_prog(struct bpf_map *map);

struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(void);
@@ -1363,6 +1364,10 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map
{
	return NULL;
}
static inline bool dev_map_can_have_prog(struct bpf_map *map)
{
	return false;
}

static inline void __dev_flush(void)
{
+5 −0
Original line number Diff line number Diff line
@@ -61,12 +61,17 @@ struct xdp_rxq_info {
	struct xdp_mem_info mem;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */

struct xdp_txq_info {
	struct net_device *dev;
};

struct xdp_buff {
	void *data;
	void *data_end;
	void *data_meta;
	void *data_hard_start;
	struct xdp_rxq_info *rxq;
	struct xdp_txq_info *txq;
	u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
};

+3 −0
Original line number Diff line number Diff line
@@ -225,6 +225,7 @@ enum bpf_attach_type {
	BPF_CGROUP_INET6_GETPEERNAME,
	BPF_CGROUP_INET4_GETSOCKNAME,
	BPF_CGROUP_INET6_GETSOCKNAME,
	BPF_XDP_DEVMAP,
	__MAX_BPF_ATTACH_TYPE
};

@@ -3706,6 +3707,8 @@ struct xdp_md {
	/* Below access go through struct xdp_rxq_info */
	__u32 ingress_ifindex; /* rxq->dev->ifindex */
	__u32 rx_queue_index;  /* rxq->queue_index  */

	__u32 egress_ifindex;  /* txq->dev->ifindex */
};

enum sk_action {
+112 −18
Original line number Diff line number Diff line
@@ -60,12 +60,23 @@ struct xdp_dev_bulk_queue {
	unsigned int count;
};

/* DEVMAP values */
struct bpf_devmap_val {
	u32 ifindex;   /* device index */
	union {
		int fd;  /* prog fd on map write */
		u32 id;  /* prog id on map read */
	} bpf_prog;
};

struct bpf_dtab_netdev {
	struct net_device *dev; /* must be first member, due to tracepoint */
	struct hlist_node index_hlist;
	struct bpf_dtab *dtab;
	struct bpf_prog *xdp_prog;
	struct rcu_head rcu;
	unsigned int idx;
	struct bpf_devmap_val val;
};

struct bpf_dtab {
@@ -105,12 +116,18 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,

static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
	u32 valsize = attr->value_size;
	u64 cost = 0;
	int err;

	/* check sanity of attributes */
	/* check sanity of attributes. 2 value sizes supported:
	 * 4 bytes: ifindex
	 * 8 bytes: ifindex + prog fd
	 */
	if (attr->max_entries == 0 || attr->key_size != 4 ||
	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
	    (valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
	     valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
	    attr->map_flags & ~DEV_CREATE_FLAG_MASK)
		return -EINVAL;

	/* Lookup returns a pointer straight to dev->ifindex, so make sure the
@@ -217,6 +234,8 @@ static void dev_map_free(struct bpf_map *map)

			hlist_for_each_entry_safe(dev, next, head, index_hlist) {
				hlist_del_rcu(&dev->index_hlist);
				if (dev->xdp_prog)
					bpf_prog_put(dev->xdp_prog);
				dev_put(dev->dev);
				kfree(dev);
			}
@@ -231,6 +250,8 @@ static void dev_map_free(struct bpf_map *map)
			if (!dev)
				continue;

			if (dev->xdp_prog)
				bpf_prog_put(dev->xdp_prog);
			dev_put(dev->dev);
			kfree(dev);
		}
@@ -317,6 +338,16 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
	return -ENOENT;
}

bool dev_map_can_have_prog(struct bpf_map *map)
{
	if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
	     map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
	    map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
		return true;

	return false;
}

static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
{
	struct net_device *dev = bq->dev;
@@ -441,6 +472,33 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
	return bq_enqueue(dev, xdpf, dev_rx);
}

static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
					 struct xdp_buff *xdp,
					 struct bpf_prog *xdp_prog)
{
	struct xdp_txq_info txq = { .dev = dev };
	u32 act;

	xdp->txq = &txq;

	act = bpf_prog_run_xdp(xdp_prog, xdp);
	switch (act) {
	case XDP_PASS:
		return xdp;
	case XDP_DROP:
		break;
	default:
		bpf_warn_invalid_xdp_action(act);
		fallthrough;
	case XDP_ABORTED:
		trace_xdp_exception(dev, xdp_prog, act);
		break;
	}

	xdp_return_buff(xdp);
	return NULL;
}

int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
		    struct net_device *dev_rx)
{
@@ -452,6 +510,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
{
	struct net_device *dev = dst->dev;

	if (dst->xdp_prog) {
		xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
		if (!xdp)
			return 0;
	}
	return __xdp_enqueue(dev, xdp, dev_rx);
}

@@ -472,18 +535,15 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
	struct net_device *dev = obj ? obj->dev : NULL;

	return dev ? &dev->ifindex : NULL;
	return obj ? &obj->val : NULL;
}

static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
{
	struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
								*(u32 *)key);
	struct net_device *dev = obj ? obj->dev : NULL;

	return dev ? &dev->ifindex : NULL;
	return obj ? &obj->val : NULL;
}

static void __dev_map_entry_free(struct rcu_head *rcu)
@@ -491,6 +551,8 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
	struct bpf_dtab_netdev *dev;

	dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
	if (dev->xdp_prog)
		bpf_prog_put(dev->xdp_prog);
	dev_put(dev->dev);
	kfree(dev);
}
@@ -541,9 +603,10 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)

static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
						    struct bpf_dtab *dtab,
						    u32 ifindex,
						    struct bpf_devmap_val *val,
						    unsigned int idx)
{
	struct bpf_prog *prog = NULL;
	struct bpf_dtab_netdev *dev;

	dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
@@ -551,24 +614,46 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
	if (!dev)
		return ERR_PTR(-ENOMEM);

	dev->dev = dev_get_by_index(net, ifindex);
	if (!dev->dev) {
		kfree(dev);
		return ERR_PTR(-EINVAL);
	dev->dev = dev_get_by_index(net, val->ifindex);
	if (!dev->dev)
		goto err_out;

	if (val->bpf_prog.fd >= 0) {
		prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
					     BPF_PROG_TYPE_XDP, false);
		if (IS_ERR(prog))
			goto err_put_dev;
		if (prog->expected_attach_type != BPF_XDP_DEVMAP)
			goto err_put_prog;
	}

	dev->idx = idx;
	dev->dtab = dtab;
	if (prog) {
		dev->xdp_prog = prog;
		dev->val.bpf_prog.id = prog->aux->id;
	} else {
		dev->xdp_prog = NULL;
		dev->val.bpf_prog.id = 0;
	}
	dev->val.ifindex = val->ifindex;

	return dev;
err_put_prog:
	bpf_prog_put(prog);
err_put_dev:
	dev_put(dev->dev);
err_out:
	kfree(dev);
	return ERR_PTR(-EINVAL);
}

static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
				 void *key, void *value, u64 map_flags)
{
	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
	struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
	struct bpf_dtab_netdev *dev, *old_dev;
	u32 ifindex = *(u32 *)value;
	u32 i = *(u32 *)key;

	if (unlikely(map_flags > BPF_EXIST))
@@ -578,10 +663,16 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
	if (unlikely(map_flags == BPF_NOEXIST))
		return -EEXIST;

	if (!ifindex) {
	/* already verified value_size <= sizeof val */
	memcpy(&val, value, map->value_size);

	if (!val.ifindex) {
		dev = NULL;
		/* can not specify fd if ifindex is 0 */
		if (val.bpf_prog.fd != -1)
			return -EINVAL;
	} else {
		dev = __dev_map_alloc_node(net, dtab, ifindex, i);
		dev = __dev_map_alloc_node(net, dtab, &val, i);
		if (IS_ERR(dev))
			return PTR_ERR(dev);
	}
@@ -608,13 +699,16 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
				     void *key, void *value, u64 map_flags)
{
	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
	struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
	struct bpf_dtab_netdev *dev, *old_dev;
	u32 ifindex = *(u32 *)value;
	u32 idx = *(u32 *)key;
	unsigned long flags;
	int err = -EEXIST;

	if (unlikely(map_flags > BPF_EXIST || !ifindex))
	/* already verified value_size <= sizeof val */
	memcpy(&val, value, map->value_size);

	if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
		return -EINVAL;

	spin_lock_irqsave(&dtab->index_lock, flags);
@@ -623,7 +717,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
	if (old_dev && (map_flags & BPF_NOEXIST))
		goto out_err;

	dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
	dev = __dev_map_alloc_node(net, dtab, &val, idx);
	if (IS_ERR(dev)) {
		err = PTR_ERR(dev);
		goto out_err;
+18 −0
Original line number Diff line number Diff line
@@ -5420,6 +5420,18 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
	struct bpf_prog *new = xdp->prog;
	int ret = 0;

	if (new) {
		u32 i;

		/* generic XDP does not work with DEVMAPs that can
		 * have a bpf_prog installed on an entry
		 */
		for (i = 0; i < new->aux->used_map_cnt; i++) {
			if (dev_map_can_have_prog(new->aux->used_maps[i]))
				return -EINVAL;
		}
	}

	switch (xdp->command) {
	case XDP_SETUP_PROG:
		rcu_assign_pointer(dev->xdp_prog, new);
@@ -8835,6 +8847,12 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
			return -EINVAL;
		}

		if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
			NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
			bpf_prog_put(prog);
			return -EINVAL;
		}

		/* prog->aux->id may be 0 for orphaned device-bound progs */
		if (prog->aux->id && prog->aux->id == prog_id) {
			bpf_prog_put(prog);
Loading