Commit 39d6443c authored by Björn Töpel's avatar Björn Töpel Committed by Alexei Starovoitov
Browse files

mlx5, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL



Use the new MEM_TYPE_XSK_BUFF_POOL API in lieu of MEM_TYPE_ZERO_COPY in
mlx5e. It allows to drop a lot of code from the driver (which is now
common in AF_XDP core and was related to XSK RX frame allocation, DMA
mapping, etc.) and slightly improve performance (RX +0.8 Mpps, TX +0.4
Mpps).

rfc->v1: Put back the sanity check for XSK params, use XSK API to get
         the total headroom size. (Maxim)

v1->v2: Fix DMA address handling, set XDP metadata to invalid. (Maxim)

v2->v3: Handle frame_sz, use xsk_buff_xdp_get_frame_dma, use xsk_buff
        API for DMA sync on TX, add performance numbers. (Maxim)

v3->v4: Remove unused variable num_xsk_frames. (Jakub)

Signed-off-by: default avatarBjörn Töpel <bjorn.topel@intel.com>
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-12-bjorn.topel@gmail.com
parent 7117132b
Loading
Loading
Loading
Loading
+1 −6
Original line number Diff line number Diff line
@@ -407,10 +407,7 @@ struct mlx5e_dma_info {
	dma_addr_t addr;
	union {
		struct page *page;
		struct {
			u64 handle;
			void *data;
		} xsk;
		struct xdp_buff *xsk;
	};
};

@@ -623,7 +620,6 @@ struct mlx5e_rq {
		} mpwqe;
	};
	struct {
		u16            umem_headroom;
		u16            headroom;
		u32            frame0_sz;
		u8             map_dir;   /* dma map direction */
@@ -656,7 +652,6 @@ struct mlx5e_rq {
	struct page_pool      *page_pool;

	/* AF_XDP zero-copy */
	struct zero_copy_allocator zca;
	struct xdp_umem       *umem;

	struct work_struct     recover_work;
+7 −6
Original line number Diff line number Diff line
@@ -12,15 +12,16 @@ static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
				 struct mlx5e_xsk_param *xsk)
{
	u16 headroom = NET_IP_ALIGN;
	u16 headroom;

	if (mlx5e_rx_is_xdp(params, xsk)) {
		headroom += XDP_PACKET_HEADROOM;
	if (xsk)
			headroom += xsk->headroom;
	} else {
		return xsk->headroom;

	headroom = NET_IP_ALIGN;
	if (mlx5e_rx_is_xdp(params, xsk))
		headroom += XDP_PACKET_HEADROOM;
	else
		headroom += MLX5_RX_HEADROOM;
	}

	return headroom;
}
+7 −24
Original line number Diff line number Diff line
@@ -71,7 +71,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
	xdptxd.data = xdpf->data;
	xdptxd.len  = xdpf->len;

	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
	if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
		/* The xdp_buff was in the UMEM and was copied into a newly
		 * allocated page. The UMEM page was returned via the ZCA, and
		 * this new page has to be mapped at this point and has to be
@@ -119,50 +119,33 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,

/* returns true if packet was consumed by xdp */
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
		      void *va, u16 *rx_headroom, u32 *len, bool xsk)
		      u32 *len, struct xdp_buff *xdp)
{
	struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
	struct xdp_umem *umem = rq->umem;
	struct xdp_buff xdp;
	u32 act;
	int err;

	if (!prog)
		return false;

	xdp.data = va + *rx_headroom;
	xdp_set_data_meta_invalid(&xdp);
	xdp.data_end = xdp.data + *len;
	xdp.data_hard_start = va;
	if (xsk)
		xdp.handle = di->xsk.handle;
	xdp.rxq = &rq->xdp_rxq;
	xdp.frame_sz = rq->buff.frame0_sz;

	act = bpf_prog_run_xdp(prog, &xdp);
	if (xsk) {
		u64 off = xdp.data - xdp.data_hard_start;

		xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off);
	}
	act = bpf_prog_run_xdp(prog, xdp);
	switch (act) {
	case XDP_PASS:
		*rx_headroom = xdp.data - xdp.data_hard_start;
		*len = xdp.data_end - xdp.data;
		*len = xdp->data_end - xdp->data;
		return false;
	case XDP_TX:
		if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
		if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp)))
			goto xdp_abort;
		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
		return true;
	case XDP_REDIRECT:
		/* When XDP enabled then page-refcnt==1 here */
		err = xdp_do_redirect(rq->netdev, &xdp, prog);
		err = xdp_do_redirect(rq->netdev, xdp, prog);
		if (unlikely(err))
			goto xdp_abort;
		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
		if (!xsk)
		if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
			mlx5e_page_dma_unmap(rq, di);
		rq->stats->xdp_redirect++;
		return true;
+1 −1
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
		      void *va, u16 *rx_headroom, u32 *len, bool xsk);
		      u32 *len, struct xdp_buff *xdp);
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
+21 −92
Original line number Diff line number Diff line
@@ -3,71 +3,10 @@

#include "rx.h"
#include "en/xdp.h"
#include <net/xdp_sock.h>
#include <net/xdp_sock_drv.h>

/* RX data path */

bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
{
	/* Check in advance that we have enough frames, instead of allocating
	 * one-by-one, failing and moving frames to the Reuse Ring.
	 */
	return xsk_umem_has_addrs_rq(rq->umem, count);
}

int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
			      struct mlx5e_dma_info *dma_info)
{
	struct xdp_umem *umem = rq->umem;
	u64 handle;

	if (!xsk_umem_peek_addr_rq(umem, &handle))
		return -ENOMEM;

	dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
						      rq->buff.umem_headroom);
	dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);

	/* No need to add headroom to the DMA address. In striding RQ case, we
	 * just provide pages for UMR, and headroom is counted at the setup
	 * stage when creating a WQE. In non-striding RQ case, headroom is
	 * accounted in mlx5e_alloc_rx_wqe.
	 */
	dma_info->addr = xdp_umem_get_dma(umem, handle);

	xsk_umem_release_addr_rq(umem);

	dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
				   DMA_BIDIRECTIONAL);

	return 0;
}

static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
{
	xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
}

/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
 * the userspace if possible, and if not, this function is called to reuse them
 * in the driver.
 */
void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
			    struct mlx5e_dma_info *dma_info)
{
	mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
}

/* Return a frame back to the hardware to fill in again. It is used by XDP when
 * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
 */
void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
{
	struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);

	mlx5e_xsk_recycle_frame(rq, handle);
}

static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
					       u32 cqe_bcnt)
{
@@ -90,11 +29,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
						    u32 head_offset,
						    u32 page_idx)
{
	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
	u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
	struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
	u32 cqe_bcnt32 = cqe_bcnt;
	void *va, *data;
	u32 frag_size;
	bool consumed;

	/* Check packet size. Note LRO doesn't use linear SKB */
@@ -103,22 +39,20 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
		return NULL;
	}

	/* head_offset is not used in this function, because di->xsk.data and
	 * di->addr point directly to the necessary place. Furthermore, in the
	 * current implementation, UMR pages are mapped to XSK frames, so
	/* head_offset is not used in this function, because xdp->data and the
	 * DMA address point directly to the necessary place. Furthermore, in
	 * the current implementation, UMR pages are mapped to XSK frames, so
	 * head_offset should always be 0.
	 */
	WARN_ON_ONCE(head_offset);

	va             = di->xsk.data;
	data           = va + rx_headroom;
	frag_size      = rq->buff.headroom + cqe_bcnt32;

	dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
	prefetch(data);
	xdp->data_end = xdp->data + cqe_bcnt32;
	xdp_set_data_meta_invalid(xdp);
	xsk_buff_dma_sync_for_cpu(xdp);
	prefetch(xdp->data);

	rcu_read_lock();
	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
	consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
	rcu_read_unlock();

	/* Possible flows:
@@ -145,7 +79,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
	/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
	 * frame. On SKB allocation failure, NULL is returned.
	 */
	return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
	return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32);
}

struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
@@ -153,25 +87,20 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
					      struct mlx5e_wqe_frag_info *wi,
					      u32 cqe_bcnt)
{
	struct mlx5e_dma_info *di = wi->di;
	u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
	void *va, *data;
	struct xdp_buff *xdp = wi->di->xsk;
	bool consumed;
	u32 frag_size;

	/* wi->offset is not used in this function, because di->xsk.data and
	 * di->addr point directly to the necessary place. Furthermore, in the
	 * current implementation, one page = one packet = one frame, so
	/* wi->offset is not used in this function, because xdp->data and the
	 * DMA address point directly to the necessary place. Furthermore, the
	 * XSK allocator allocates frames per packet, instead of pages, so
	 * wi->offset should always be 0.
	 */
	WARN_ON_ONCE(wi->offset);

	va             = di->xsk.data;
	data           = va + rx_headroom;
	frag_size      = rq->buff.headroom + cqe_bcnt;

	dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
	prefetch(data);
	xdp->data_end = xdp->data + cqe_bcnt;
	xdp_set_data_meta_invalid(xdp);
	xsk_buff_dma_sync_for_cpu(xdp);
	prefetch(xdp->data);

	if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
		rq->stats->wqe_err++;
@@ -179,7 +108,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
	}

	rcu_read_lock();
	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
	consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp);
	rcu_read_unlock();

	if (likely(consumed))
@@ -189,5 +118,5 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
	 * will be handled by mlx5e_put_rx_frag.
	 * On SKB allocation failure, NULL is returned.
	 */
	return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
	return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt);
}
Loading