Commit 193d0002 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'bulk-cpumap-redirect'

Jesper Dangaard Brouer says:

====================
This patchset utilize a number of different kernel bulk APIs for optimizing
the performance for the XDP cpumap redirect feature.

Benchmark details are available here:
 https://github.com/xdp-project/xdp-project/blob/master/areas/cpumap/cpumap03-optimizations.org



Performance measurements can be considered micro benchmarks, as they measure
dropping packets at different stages in the network stack.
Summary based on above:

Baseline benchmarks
- baseline-redirect: UdpNoPorts: 3,180,074
- baseline-redirect: iptables-raw drop: 6,193,534

Patch1: bpf: cpumap use ptr_ring_consume_batched
- redirect: UdpNoPorts: 3,327,729
- redirect: iptables-raw drop: 6,321,540

Patch2: net: core: introduce build_skb_around
- redirect: UdpNoPorts: 3,221,303
- redirect: iptables-raw drop: 6,320,066

Patch3: bpf: cpumap do bulk allocation of SKBs
- redirect: UdpNoPorts: 3,290,563
- redirect: iptables-raw drop: 6,650,112

Patch4: bpf: cpumap memory prefetchw optimizations for struct page
- redirect: UdpNoPorts: 3,520,250
- redirect: iptables-raw drop: 7,649,604

In this V2 submission I have chosen drop the SKB-list patch using
netif_receive_skb_list() as it was not showing a performance improvement for
these micro benchmarks.
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 00967e84 86d23145
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1042,6 +1042,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
			    int node);
struct sk_buff *__build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb_around(struct sk_buff *skb,
				 void *data, unsigned int frag_size);

/**
 * alloc_skb - allocate a network buffer
+37 −16
Original line number Diff line number Diff line
@@ -160,12 +160,12 @@ static void cpu_map_kthread_stop(struct work_struct *work)
}

static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
					 struct xdp_frame *xdpf)
					 struct xdp_frame *xdpf,
					 struct sk_buff *skb)
{
	unsigned int hard_start_headroom;
	unsigned int frame_size;
	void *pkt_data_start;
	struct sk_buff *skb;

	/* Part of headroom was reserved to xdpf */
	hard_start_headroom = sizeof(struct xdp_frame) +  xdpf->headroom;
@@ -191,8 +191,8 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

	pkt_data_start = xdpf->data - hard_start_headroom;
	skb = build_skb(pkt_data_start, frame_size);
	if (!skb)
	skb = build_skb_around(skb, pkt_data_start, frame_size);
	if (unlikely(!skb))
		return NULL;

	skb_reserve(skb, hard_start_headroom);
@@ -240,6 +240,8 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
	}
}

#define CPUMAP_BATCH 8

static int cpu_map_kthread_run(void *data)
{
	struct bpf_cpu_map_entry *rcpu = data;
@@ -252,8 +254,11 @@ static int cpu_map_kthread_run(void *data)
	 * kthread_stop signal until queue is empty.
	 */
	while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
		unsigned int processed = 0, drops = 0, sched = 0;
		struct xdp_frame *xdpf;
		unsigned int drops = 0, sched = 0;
		void *frames[CPUMAP_BATCH];
		void *skbs[CPUMAP_BATCH];
		gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
		int i, n, m;

		/* Release CPU reschedule checks */
		if (__ptr_ring_empty(rcpu->queue)) {
@@ -269,18 +274,38 @@ static int cpu_map_kthread_run(void *data)
			sched = cond_resched();
		}

		/* Process packets in rcpu->queue */
		local_bh_disable();
		/*
		 * The bpf_cpu_map_entry is single consumer, with this
		 * kthread CPU pinned. Lockless access to ptr_ring
		 * consume side valid as no-resize allowed of queue.
		 */
		while ((xdpf = __ptr_ring_consume(rcpu->queue))) {
			struct sk_buff *skb;
		n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);

		for (i = 0; i < n; i++) {
			void *f = frames[i];
			struct page *page = virt_to_page(f);

			/* Bring struct page memory area to curr CPU. Read by
			 * build_skb_around via page_is_pfmemalloc(), and when
			 * freed written by page_frag_free call.
			 */
			prefetchw(page);
		}

		m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
		if (unlikely(m == 0)) {
			for (i = 0; i < n; i++)
				skbs[i] = NULL; /* effect: xdp_return_frame */
			drops = n;
		}

		local_bh_disable();
		for (i = 0; i < n; i++) {
			struct xdp_frame *xdpf = frames[i];
			struct sk_buff *skb = skbs[i];
			int ret;

			skb = cpu_map_build_skb(rcpu, xdpf);
			skb = cpu_map_build_skb(rcpu, xdpf, skb);
			if (!skb) {
				xdp_return_frame(xdpf);
				continue;
@@ -290,13 +315,9 @@ static int cpu_map_kthread_run(void *data)
			ret = netif_receive_skb_core(skb);
			if (ret == NET_RX_DROP)
				drops++;

			/* Limit BH-disable period */
			if (++processed == 8)
				break;
		}
		/* Feedback loop via tracepoint */
		trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);
		trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);

		local_bh_enable(); /* resched point, may call do_softirq() */
	}
+52 −19
Original line number Diff line number Diff line
@@ -258,6 +258,33 @@ nodata:
}
EXPORT_SYMBOL(__alloc_skb);

/* Caller must provide SKB that is memset cleared */
static struct sk_buff *__build_skb_around(struct sk_buff *skb,
					  void *data, unsigned int frag_size)
{
	struct skb_shared_info *shinfo;
	unsigned int size = frag_size ? : ksize(data);

	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

	/* Assumes caller memset cleared SKB */
	skb->truesize = SKB_TRUESIZE(size);
	refcount_set(&skb->users, 1);
	skb->head = data;
	skb->data = data;
	skb_reset_tail_pointer(skb);
	skb->end = skb->tail + size;
	skb->mac_header = (typeof(skb->mac_header))~0U;
	skb->transport_header = (typeof(skb->transport_header))~0U;

	/* make sure we initialize shinfo sequentially */
	shinfo = skb_shinfo(skb);
	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
	atomic_set(&shinfo->dataref, 1);

	return skb;
}

/**
 * __build_skb - build a network buffer
 * @data: data buffer provided by caller
@@ -279,32 +306,15 @@ EXPORT_SYMBOL(__alloc_skb);
 */
struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
	struct skb_shared_info *shinfo;
	struct sk_buff *skb;
	unsigned int size = frag_size ? : ksize(data);

	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
	if (!skb)
	if (unlikely(!skb))
		return NULL;

	size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));

	memset(skb, 0, offsetof(struct sk_buff, tail));
	skb->truesize = SKB_TRUESIZE(size);
	refcount_set(&skb->users, 1);
	skb->head = data;
	skb->data = data;
	skb_reset_tail_pointer(skb);
	skb->end = skb->tail + size;
	skb->mac_header = (typeof(skb->mac_header))~0U;
	skb->transport_header = (typeof(skb->transport_header))~0U;

	/* make sure we initialize shinfo sequentially */
	shinfo = skb_shinfo(skb);
	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
	atomic_set(&shinfo->dataref, 1);

	return skb;
	return __build_skb_around(skb, data, frag_size);
}

/* build_skb() is wrapper over __build_skb(), that specifically
@@ -325,6 +335,29 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
}
EXPORT_SYMBOL(build_skb);

/**
 * build_skb_around - build a network buffer around provided skb
 * @skb: sk_buff provide by caller, must be memset cleared
 * @data: data buffer provided by caller
 * @frag_size: size of data, or 0 if head was kmalloced
 */
struct sk_buff *build_skb_around(struct sk_buff *skb,
				 void *data, unsigned int frag_size)
{
	if (unlikely(!skb))
		return NULL;

	skb = __build_skb_around(skb, data, frag_size);

	if (skb && frag_size) {
		skb->head_frag = 1;
		if (page_is_pfmemalloc(virt_to_head_page(data)))
			skb->pfmemalloc = 1;
	}
	return skb;
}
EXPORT_SYMBOL(build_skb_around);

#define NAPI_SKB_CACHE_SIZE	64

struct napi_alloc_cache {