i40e/i40evf: eliminate i40e_pull_tail() (9b37c937) · Commits · 戴 / test

drivers/net/ethernet/intel/i40e/i40e_txrx.c

+93 −76

Original line number	Diff line number	Diff line
		@@ -1425,45 +1425,6 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
		skb_record_rx_queue(skb, rx_ring->queue_index);
		}

		/**
		* i40e_pull_tail - i40e specific version of skb_pull_tail
		* @rx_ring: rx descriptor ring packet is being transacted on
		* @skb: pointer to current skb being adjusted
		*
		* This function is an i40e specific version of __pskb_pull_tail. The
		* main difference between this version and the original function is that
		* this function can make several assumptions about the state of things
		* that allow for significant optimizations versus the standard function.
		* As a result we can do things like drop a frag and maintain an accurate
		* truesize for the skb.
		*/
		static void i40e_pull_tail(struct i40e_ring rx_ring, struct sk_buff skb)
		{
		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
		unsigned char *va;
		unsigned int pull_len;

		/* it is valid to use page_address instead of kmap since we are
		* working with pages allocated out of the lomem pool per
		* alloc_page(GFP_ATOMIC)
		*/
		va = skb_frag_address(frag);

		/* we need the header to contain the greater of either ETH_HLEN or
		* 60 bytes if the skb->len is less than 60 for skb_pad.
		*/
		pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);

		/* align pull length to size of long to optimize memcpy performance */
		skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));

		/* update all of the pointers */
		skb_frag_size_sub(frag, pull_len);
		frag->page_offset += pull_len;
		skb->data_len -= pull_len;
		skb->tail += pull_len;
		}

		/**
		* i40e_cleanup_headers - Correct empty headers
		* @rx_ring: rx descriptor ring packet is being transacted on
		@@ -1479,10 +1440,6 @@ static void i40e_pull_tail(struct i40e_ring rx_ring, struct sk_buff skb)
		**/
		static bool i40e_cleanup_headers(struct i40e_ring rx_ring, struct sk_buff skb)
		{
		/* place header in linear portion of buffer */
		if (skb_is_nonlinear(skb))
		i40e_pull_tail(rx_ring, skb);

		/* if eth_skb_pad returns an error the skb was freed */
		if (eth_skb_pad(skb))
		return true;
		@@ -1514,12 +1471,78 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
		}

		/**
		* i40e_page_is_reserved - check if reuse is possible
		* i40e_page_is_reusable - check if any reuse is possible
		* @page: page struct to check
		*
		* A page is not reusable if it was allocated under low memory
		* conditions, or it's not in the same NUMA node as this CPU.
		*/
		static inline bool i40e_page_is_reserved(struct page *page)
		static inline bool i40e_page_is_reusable(struct page *page)
		{
		return (page_to_nid(page) != numa_mem_id()) \|\| page_is_pfmemalloc(page);
		return (page_to_nid(page) == numa_mem_id()) &&
		!page_is_pfmemalloc(page);
		}

		/**
		* i40e_can_reuse_rx_page - Determine if this page can be reused by
		* the adapter for another receive
		*
		* @rx_buffer: buffer containing the page
		* @page: page address from rx_buffer
		* @truesize: actual size of the buffer in this page
		*
		* If page is reusable, rx_buffer->page_offset is adjusted to point to
		* an unused region in the page.
		*
		* For small pages, @truesize will be a constant value, half the size
		* of the memory at page. We'll attempt to alternate between high and
		* low halves of the page, with one half ready for use by the hardware
		* and the other half being consumed by the stack. We use the page
		* ref count to determine whether the stack has finished consuming the
		* portion of this page that was passed up with a previous packet. If
		* the page ref count is >1, we'll assume the "other" half page is
		* still busy, and this page cannot be reused.
		*
		* For larger pages, @truesize will be the actual space used by the
		* received packet (adjusted upward to an even multiple of the cache
		* line size). This will advance through the page by the amount
		* actually consumed by the received packets while there is still
		* space for a buffer. Each region of larger pages will be used at
		* most once, after which the page will not be reused.
		*
		* In either case, if the page is reusable its refcount is increased.
		**/
		static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
		struct page *page,
		const unsigned int truesize)
		{
		#if (PAGE_SIZE >= 8192)
		unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
		#endif

		/* Is any reuse possible? */
		if (unlikely(!i40e_page_is_reusable(page)))
		return false;

		#if (PAGE_SIZE < 8192)
		/* if we are only owner of page we can reuse it */
		if (unlikely(page_count(page) != 1))
		return false;

		/* flip page offset to other buffer */
		rx_buffer->page_offset ^= truesize;
		#else
		/* move offset up to the next cache line */
		rx_buffer->page_offset += truesize;

		if (rx_buffer->page_offset > last_offset)
		return false;
		#endif

		/* Inc ref count on page before passing it up to the stack */
		get_page(page);

		return true;
		}

		/**
		@@ -1543,23 +1566,25 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
		struct sk_buff *skb)
		{
		struct page *page = rx_buffer->page;
		unsigned char *va = page_address(page) + rx_buffer->page_offset;
		#if (PAGE_SIZE < 8192)
		unsigned int truesize = I40E_RXBUFFER_2048;
		#else
		unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
		unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
		#endif
		unsigned int pull_len;

		if (unlikely(skb_is_nonlinear(skb)))
		goto add_tail_frag;

		/* will the data fit in the skb we allocated? if so, just
		* copy it as it is pretty small anyway
		*/
		if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
		unsigned char *va = page_address(page) + rx_buffer->page_offset;

		if (size <= I40E_RX_HDR_SIZE) {
		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));

		/* page is not reserved, we can reuse buffer as-is */
		if (likely(!i40e_page_is_reserved(page)))
		/* page is reusable, we can reuse buffer as-is */
		if (likely(i40e_page_is_reusable(page)))
		return true;

		/* this page cannot be reused so discard it */
		@@ -1567,34 +1592,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
		return false;
		}

		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
		rx_buffer->page_offset, size, truesize);

		/* avoid re-using remote pages */
		if (unlikely(i40e_page_is_reserved(page)))
		return false;

		#if (PAGE_SIZE < 8192)
		/* if we are only owner of page we can reuse it */
		if (unlikely(page_count(page) != 1))
		return false;
		/* we need the header to contain the greater of either
		* ETH_HLEN or 60 bytes if the skb->len is less than
		* 60 for skb_pad.
		*/
		pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);

		/* flip page offset to other buffer */
		rx_buffer->page_offset ^= truesize;
		#else
		/* move offset up to the next cache line */
		rx_buffer->page_offset += truesize;
		/* align pull length to size of long to optimize
		* memcpy performance
		*/
		memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));

		if (rx_buffer->page_offset > last_offset)
		return false;
		#endif
		/* update all of the pointers */
		va += pull_len;
		size -= pull_len;

		/* Even if we own the page, we are not allowed to use atomic_set()
		* This would break get_page_unless_zero() users.
		*/
		get_page(rx_buffer->page);
		add_tail_frag:
		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
		(unsigned long)va & ~PAGE_MASK, size, truesize);

		return true;
		return i40e_can_reuse_rx_page(rx_buffer, page, truesize);
		}

		/**

drivers/net/ethernet/intel/i40evf/i40e_txrx.c

+93 −76

Original line number	Diff line number	Diff line
		@@ -903,45 +903,6 @@ void i40evf_process_skb_fields(struct i40e_ring *rx_ring,
		skb_record_rx_queue(skb, rx_ring->queue_index);
		}

		/**
		* i40e_pull_tail - i40e specific version of skb_pull_tail
		* @rx_ring: rx descriptor ring packet is being transacted on
		* @skb: pointer to current skb being adjusted
		*
		* This function is an i40e specific version of __pskb_pull_tail. The
		* main difference between this version and the original function is that
		* this function can make several assumptions about the state of things
		* that allow for significant optimizations versus the standard function.
		* As a result we can do things like drop a frag and maintain an accurate
		* truesize for the skb.
		*/
		static void i40e_pull_tail(struct i40e_ring rx_ring, struct sk_buff skb)
		{
		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
		unsigned char *va;
		unsigned int pull_len;

		/* it is valid to use page_address instead of kmap since we are
		* working with pages allocated out of the lomem pool per
		* alloc_page(GFP_ATOMIC)
		*/
		va = skb_frag_address(frag);

		/* we need the header to contain the greater of either ETH_HLEN or
		* 60 bytes if the skb->len is less than 60 for skb_pad.
		*/
		pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);

		/* align pull length to size of long to optimize memcpy performance */
		skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));

		/* update all of the pointers */
		skb_frag_size_sub(frag, pull_len);
		frag->page_offset += pull_len;
		skb->data_len -= pull_len;
		skb->tail += pull_len;
		}

		/**
		* i40e_cleanup_headers - Correct empty headers
		* @rx_ring: rx descriptor ring packet is being transacted on
		@@ -957,10 +918,6 @@ static void i40e_pull_tail(struct i40e_ring rx_ring, struct sk_buff skb)
		**/
		static bool i40e_cleanup_headers(struct i40e_ring rx_ring, struct sk_buff skb)
		{
		/* place header in linear portion of buffer */
		if (skb_is_nonlinear(skb))
		i40e_pull_tail(rx_ring, skb);

		/* if eth_skb_pad returns an error the skb was freed */
		if (eth_skb_pad(skb))
		return true;
		@@ -992,12 +949,78 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
		}

		/**
		* i40e_page_is_reserved - check if reuse is possible
		* i40e_page_is_reusable - check if any reuse is possible
		* @page: page struct to check
		*
		* A page is not reusable if it was allocated under low memory
		* conditions, or it's not in the same NUMA node as this CPU.
		*/
		static inline bool i40e_page_is_reserved(struct page *page)
		static inline bool i40e_page_is_reusable(struct page *page)
		{
		return (page_to_nid(page) != numa_mem_id()) \|\| page_is_pfmemalloc(page);
		return (page_to_nid(page) == numa_mem_id()) &&
		!page_is_pfmemalloc(page);
		}

		/**
		* i40e_can_reuse_rx_page - Determine if this page can be reused by
		* the adapter for another receive
		*
		* @rx_buffer: buffer containing the page
		* @page: page address from rx_buffer
		* @truesize: actual size of the buffer in this page
		*
		* If page is reusable, rx_buffer->page_offset is adjusted to point to
		* an unused region in the page.
		*
		* For small pages, @truesize will be a constant value, half the size
		* of the memory at page. We'll attempt to alternate between high and
		* low halves of the page, with one half ready for use by the hardware
		* and the other half being consumed by the stack. We use the page
		* ref count to determine whether the stack has finished consuming the
		* portion of this page that was passed up with a previous packet. If
		* the page ref count is >1, we'll assume the "other" half page is
		* still busy, and this page cannot be reused.
		*
		* For larger pages, @truesize will be the actual space used by the
		* received packet (adjusted upward to an even multiple of the cache
		* line size). This will advance through the page by the amount
		* actually consumed by the received packets while there is still
		* space for a buffer. Each region of larger pages will be used at
		* most once, after which the page will not be reused.
		*
		* In either case, if the page is reusable its refcount is increased.
		**/
		static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
		struct page *page,
		const unsigned int truesize)
		{
		#if (PAGE_SIZE >= 8192)
		unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
		#endif

		/* Is any reuse possible? */
		if (unlikely(!i40e_page_is_reusable(page)))
		return false;

		#if (PAGE_SIZE < 8192)
		/* if we are only owner of page we can reuse it */
		if (unlikely(page_count(page) != 1))
		return false;

		/* flip page offset to other buffer */
		rx_buffer->page_offset ^= truesize;
		#else
		/* move offset up to the next cache line */
		rx_buffer->page_offset += truesize;

		if (rx_buffer->page_offset > last_offset)
		return false;
		#endif

		/* Inc ref count on page before passing it up to the stack */
		get_page(page);

		return true;
		}

		/**
		@@ -1021,23 +1044,25 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
		struct sk_buff *skb)
		{
		struct page *page = rx_buffer->page;
		unsigned char *va = page_address(page) + rx_buffer->page_offset;
		#if (PAGE_SIZE < 8192)
		unsigned int truesize = I40E_RXBUFFER_2048;
		#else
		unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
		unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
		#endif
		unsigned int pull_len;

		if (unlikely(skb_is_nonlinear(skb)))
		goto add_tail_frag;

		/* will the data fit in the skb we allocated? if so, just
		* copy it as it is pretty small anyway
		*/
		if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
		unsigned char *va = page_address(page) + rx_buffer->page_offset;

		if (size <= I40E_RX_HDR_SIZE) {
		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));

		/* page is not reserved, we can reuse buffer as-is */
		if (likely(!i40e_page_is_reserved(page)))
		/* page is reusable, we can reuse buffer as-is */
		if (likely(i40e_page_is_reusable(page)))
		return true;

		/* this page cannot be reused so discard it */
		@@ -1045,34 +1070,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
		return false;
		}

		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
		rx_buffer->page_offset, size, truesize);

		/* avoid re-using remote pages */
		if (unlikely(i40e_page_is_reserved(page)))
		return false;

		#if (PAGE_SIZE < 8192)
		/* if we are only owner of page we can reuse it */
		if (unlikely(page_count(page) != 1))
		return false;
		/* we need the header to contain the greater of either
		* ETH_HLEN or 60 bytes if the skb->len is less than
		* 60 for skb_pad.
		*/
		pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);

		/* flip page offset to other buffer */
		rx_buffer->page_offset ^= truesize;
		#else
		/* move offset up to the next cache line */
		rx_buffer->page_offset += truesize;
		/* align pull length to size of long to optimize
		* memcpy performance
		*/
		memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));

		if (rx_buffer->page_offset > last_offset)
		return false;
		#endif
		/* update all of the pointers */
		va += pull_len;
		size -= pull_len;

		/* Even if we own the page, we are not allowed to use atomic_set()
		* This would break get_page_unless_zero() users.
		*/
		get_page(rx_buffer->page);
		add_tail_frag:
		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
		(unsigned long)va & ~PAGE_MASK, size, truesize);

		return true;
		return i40e_can_reuse_rx_page(rx_buffer, page, truesize);
		}

		/**

Admin message