Commit 89653987 authored by Lorenzo Bianconi's avatar Lorenzo Bianconi Committed by Daniel Borkmann
Browse files

net: xdp: Introduce bulking for xdp tx return path



XDP bulk APIs introduce a defer/flush mechanism to return
pages belonging to the same xdp_mem_allocator object
(identified via the mem.id field) in bulk to optimize
I-cache and D-cache since xdp_return_frame is usually run
inside the driver NAPI tx completion loop.
The bulk queue size is set to 16 to be aligned to how
XDP_REDIRECT bulking works. The bulk is flushed when
it is full or when mem.id changes.
xdp_frame_bulk is usually stored/allocated on the function
call-stack to avoid locking penalties.
Current implementation considers only page_pool memory model.

Suggested-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Co-developed-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarLorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Acked-by: default avatarIlias Apalodimas <ilias.apalodimas@linaro.org>
Link: https://lore.kernel.org/bpf/e190c03eac71b20c8407ae0fc2c399eda7835f49.1605267335.git.lorenzo@kernel.org
parent 6f100640
Loading
Loading
Loading
Loading
+16 −1
Original line number Diff line number Diff line
@@ -104,6 +104,18 @@ struct xdp_frame {
	struct net_device *dev_rx; /* used by cpumap */
};

#define XDP_BULK_QUEUE_SIZE	16
struct xdp_frame_bulk {
	int count;
	void *xa;
	void *q[XDP_BULK_QUEUE_SIZE];
};

static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq)
{
	/* bq->count will be zero'ed when bq->xa gets updated */
	bq->xa = NULL;
}

static inline struct skb_shared_info *
xdp_get_shared_info_from_frame(struct xdp_frame *frame)
@@ -194,6 +206,9 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
			   struct xdp_frame_bulk *bq);

/* When sending xdp_frame into the network stack, then there is no
 * return point callback, which is needed to release e.g. DMA-mapping
@@ -245,6 +260,6 @@ bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
void xdp_attachment_setup(struct xdp_attachment_info *info,
			  struct netdev_bpf *bpf);

#define DEV_MAP_BULK_SIZE 16
#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE

#endif /* __LINUX_NET_XDP_H__ */
+59 −0
Original line number Diff line number Diff line
@@ -380,6 +380,65 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);

/* XDP bulk APIs introduce a defer/flush mechanism to return
 * pages belonging to the same xdp_mem_allocator object
 * (identified via the mem.id field) in bulk to optimize
 * I-cache and D-cache.
 * The bulk queue size is set to 16 to be aligned to how
 * XDP_REDIRECT bulking works. The bulk is flushed when
 * it is full or when mem.id changes.
 * xdp_frame_bulk is usually stored/allocated on the function
 * call-stack to avoid locking penalties.
 */
void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq)
{
	struct xdp_mem_allocator *xa = bq->xa;
	int i;

	if (unlikely(!xa))
		return;

	for (i = 0; i < bq->count; i++) {
		struct page *page = virt_to_head_page(bq->q[i]);

		page_pool_put_full_page(xa->page_pool, page, false);
	}
	/* bq->xa is not cleared to save lookup, if mem.id same in next bulk */
	bq->count = 0;
}
EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk);

/* Must be called with rcu_read_lock held */
void xdp_return_frame_bulk(struct xdp_frame *xdpf,
			   struct xdp_frame_bulk *bq)
{
	struct xdp_mem_info *mem = &xdpf->mem;
	struct xdp_mem_allocator *xa;

	if (mem->type != MEM_TYPE_PAGE_POOL) {
		__xdp_return(xdpf->data, &xdpf->mem, false);
		return;
	}

	xa = bq->xa;
	if (unlikely(!xa)) {
		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
		bq->count = 0;
		bq->xa = xa;
	}

	if (bq->count == XDP_BULK_QUEUE_SIZE)
		xdp_flush_frame_bulk(bq);

	if (unlikely(mem->id != xa->mem.id)) {
		xdp_flush_frame_bulk(bq);
		bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
	}

	bq->q[bq->count++] = xdpf->data;
}
EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);

void xdp_return_buff(struct xdp_buff *xdp)
{
	__xdp_return(xdp->data, &xdp->rxq->mem, true);