Commit b97e12e5 authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-array-mmap'



Andrii Nakryiko says:

====================
This patch set adds ability to memory-map BPF array maps (single- and
multi-element). The primary use case is memory-mapping BPF array maps, created
to back global data variables, created by libbpf implicitly. This allows for
much better usability, along with avoiding syscalls to read or update data
completely.

Due to memory-mapping requirements, BPF array map that is supposed to be
memory-mapped, has to be created with special BPF_F_MMAPABLE attribute, which
triggers slightly different memory allocation strategy internally. See
patch 1 for details.

Libbpf is extended to detect kernel support for this flag, and if supported,
will specify it for all global data maps automatically.

Patch #1 refactors bpf_map_inc() and converts bpf_map's refcnt to atomic64_t
to make refcounting never fail. Patch #2 does similar refactoring for
bpf_prog_add()/bpf_prog_inc().

v5->v6:
- add back uref counting (Daniel);

v4->v5:
- change bpf_prog's refcnt to atomic64_t (Daniel);

v3->v4:
- add mmap's open() callback to fix refcounting (Johannes);
- switch to remap_vmalloc_pages() instead of custom fault handler (Johannes);
- converted bpf_map's refcnt/usercnt into atomic64_t;
- provide default bpf_map_default_vmops handling open/close properly;

v2->v3:
- change allocation strategy to avoid extra pointer dereference (Jakub);

v1->v2:
- fix map lookup code generation for BPF_F_MMAPABLE case;
- prevent BPF_F_MMAPABLE flag for all but plain array map type;
- centralize ref-counting in generic bpf_map_mmap();
- don't use uref counting (Alexei);
- use vfree() directly;
- print flags with %x (Song);
- extend tests to verify bpf_map_{lookup,update}_elem() logic as well.
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 2893c996 5051b384
Loading
Loading
Loading
Loading
+2 −7
Original line number Diff line number Diff line
@@ -3171,13 +3171,8 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
	bnxt_init_rxbd_pages(ring, type);

	if (BNXT_RX_PAGE_MODE(bp) && bp->xdp_prog) {
		rxr->xdp_prog = bpf_prog_add(bp->xdp_prog, 1);
		if (IS_ERR(rxr->xdp_prog)) {
			int rc = PTR_ERR(rxr->xdp_prog);

			rxr->xdp_prog = NULL;
			return rc;
		}
		bpf_prog_add(bp->xdp_prog, 1);
		rxr->xdp_prog = bp->xdp_prog;
	}
	prod = rxr->rx_prod;
	for (i = 0; i < bp->rx_ring_size; i++) {
+2 −7
Original line number Diff line number Diff line
@@ -1876,13 +1876,8 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)

	if (nic->xdp_prog) {
		/* Attach BPF program */
		nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
		if (!IS_ERR(nic->xdp_prog)) {
		bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
		bpf_attached = true;
		} else {
			ret = PTR_ERR(nic->xdp_prog);
			nic->xdp_prog = NULL;
		}
	}

	/* Calculate Tx queues needed for XDP and network stack */
+2 −5
Original line number Diff line number Diff line
@@ -1807,11 +1807,8 @@ static int setup_xdp(struct net_device *dev, struct bpf_prog *prog)
	if (prog && !xdp_mtu_valid(priv, dev->mtu))
		return -EINVAL;

	if (prog) {
		prog = bpf_prog_add(prog, priv->num_channels);
		if (IS_ERR(prog))
			return PTR_ERR(prog);
	}
	if (prog)
		bpf_prog_add(prog, priv->num_channels);

	up = netif_running(dev);
	need_update = (!!priv->xdp_prog != !!prog);
+6 −18
Original line number Diff line number Diff line
@@ -2286,11 +2286,7 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
		lockdep_is_held(&priv->mdev->state_lock));

	if (xdp_prog && carry_xdp_prog) {
		xdp_prog = bpf_prog_add(xdp_prog, tmp->rx_ring_num);
		if (IS_ERR(xdp_prog)) {
			mlx4_en_free_resources(tmp);
			return PTR_ERR(xdp_prog);
		}
		bpf_prog_add(xdp_prog, tmp->rx_ring_num);
		for (i = 0; i < tmp->rx_ring_num; i++)
			rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog,
					   xdp_prog);
@@ -2782,11 +2778,9 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
	 * program for a new one.
	 */
	if (priv->tx_ring_num[TX_XDP] == xdp_ring_num) {
		if (prog) {
			prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
			if (IS_ERR(prog))
				return PTR_ERR(prog);
		}
		if (prog)
			bpf_prog_add(prog, priv->rx_ring_num - 1);

		mutex_lock(&mdev->state_lock);
		for (i = 0; i < priv->rx_ring_num; i++) {
			old_prog = rcu_dereference_protected(
@@ -2807,13 +2801,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
	if (!tmp)
		return -ENOMEM;

	if (prog) {
		prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
		if (IS_ERR(prog)) {
			err = PTR_ERR(prog);
			goto out;
		}
	}
	if (prog)
		bpf_prog_add(prog, priv->rx_ring_num - 1);

	mutex_lock(&mdev->state_lock);
	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
@@ -2862,7 +2851,6 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)

unlock_out:
	mutex_unlock(&mdev->state_lock);
out:
	kfree(tmp);
	return err;
}
+5 −13
Original line number Diff line number Diff line
@@ -408,12 +408,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
		rq->stats = &c->priv->channel_stats[c->ix].rq;
	INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);

	rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
	if (IS_ERR(rq->xdp_prog)) {
		err = PTR_ERR(rq->xdp_prog);
		rq->xdp_prog = NULL;
		goto err_rq_wq_destroy;
	}
	if (params->xdp_prog)
		bpf_prog_inc(params->xdp_prog);
	rq->xdp_prog = params->xdp_prog;

	rq_xdp_ix = rq->ix;
	if (xsk)
@@ -4406,16 +4403,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
	/* no need for full reset when exchanging programs */
	reset = (!priv->channels.params.xdp_prog || !prog);

	if (was_opened && !reset) {
	if (was_opened && !reset)
		/* num_channels is invariant here, so we can take the
		 * batched reference right upfront.
		 */
		prog = bpf_prog_add(prog, priv->channels.num);
		if (IS_ERR(prog)) {
			err = PTR_ERR(prog);
			goto unlock;
		}
	}
		bpf_prog_add(prog, priv->channels.num);

	if (was_opened && reset) {
		struct mlx5e_channels new_channels = {};
Loading