Commit be1222b5 authored by Björn Töpel's avatar Björn Töpel Committed by Alexei Starovoitov
Browse files

i40e: Separate kernel allocated rx_bi rings from AF_XDP rings



Continuing the path to support MEM_TYPE_XSK_BUFF_POOL, the AF_XDP
zero-copy/sk_buff rx_bi rings are now separate. Functions to properly
allocate the different rings are added as well.

v3->v4: Made i40e_fd_handle_status() static. (kbuild test robot)
v4->v5: Fix kdoc for i40e_clean_programming_status(). (Jakub)

Signed-off-by: default avatarBjörn Töpel <bjorn.topel@intel.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Link: https://lore.kernel.org/bpf/20200520192103.355233-8-bjorn.topel@gmail.com
parent e1675f97
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -3260,8 +3260,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
	if (ring->vsi->type == I40E_VSI_MAIN)
		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);

	kfree(ring->rx_bi);
	ring->xsk_umem = i40e_xsk_umem(ring);
	if (ring->xsk_umem) {
		ret = i40e_alloc_rx_bi_zc(ring);
		if (ret)
			return ret;
		ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
				   XDP_PACKET_HEADROOM;
		/* For AF_XDP ZC, we disallow packets to span on
@@ -3280,6 +3284,9 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
			 ring->queue_index);

	} else {
		ret = i40e_alloc_rx_bi(ring);
		if (ret)
			return ret;
		ring->rx_buf_len = vsi->rx_buf_len;
		if (ring->vsi->type == I40E_VSI_MAIN) {
			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+44 −75
Original line number Diff line number Diff line
@@ -521,28 +521,29 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
/**
 * i40e_fd_handle_status - check the Programming Status for FD
 * @rx_ring: the Rx ring for this descriptor
 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
 * @qword0_raw: qword0
 * @qword1: qword1 after le_to_cpu
 * @prog_id: the id originally used for programming
 *
 * This is used to verify if the FD programming or invalidation
 * requested by SW to the HW is successful or not and take actions accordingly.
 **/
void i40e_fd_handle_status(struct i40e_ring *rx_ring,
			   union i40e_rx_desc *rx_desc, u8 prog_id)
static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
				  u64 qword1, u8 prog_id)
{
	struct i40e_pf *pf = rx_ring->vsi->back;
	struct pci_dev *pdev = pf->pdev;
	struct i40e_32b_rx_wb_qw0 *qw0;
	u32 fcnt_prog, fcnt_avail;
	u32 error;
	u64 qw;

	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
	qw0 = (struct i40e_32b_rx_wb_qw0 *)&qword0_raw;
	error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;

	if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
		pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
		pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id);
		if (qw0->hi_dword.fd_id != 0 ||
		    (I40E_DEBUG_FD & pf->hw.debug_mask))
			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
				 pf->fd_inv);
@@ -560,7 +561,7 @@ void i40e_fd_handle_status(struct i40e_ring *rx_ring,
		/* store the current atr filter count */
		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);

		if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
		if (qw0->hi_dword.fd_id == 0 &&
		    test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
			/* These set_bit() calls aren't atomic with the
			 * test_bit() here, but worse case we potentially
@@ -589,7 +590,7 @@ void i40e_fd_handle_status(struct i40e_ring *rx_ring,
	} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
		if (I40E_DEBUG_FD & pf->hw.debug_mask)
			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
				 rx_desc->wb.qword0.hi_dword.fd_id);
				 qw0->hi_dword.fd_id);
	}
}

@@ -1232,29 +1233,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
}

/**
 * i40e_rx_is_programming_status - check for programming status descriptor
 * @qw: qword representing status_error_len in CPU ordering
 *
 * The value of in the descriptor length field indicate if this
 * is a programming status descriptor for flow director or FCoE
 * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
 * it is a packet descriptor.
 **/
static inline bool i40e_rx_is_programming_status(u64 qw)
{
	/* The Rx filter programming status and SPH bit occupy the same
	 * spot in the descriptor. Since we don't support packet split we
	 * can just reuse the bit as an indication that this is a
	 * programming status descriptor.
	 */
	return qw & I40E_RXD_QW1_LENGTH_SPH_MASK;
}

/**
 * i40e_clean_programming_status - try clean the programming status descriptor
 * i40e_clean_programming_status - clean the programming status descriptor
 * @rx_ring: the rx ring that has this descriptor
 * @rx_desc: the rx descriptor written back by HW
 * @qw: qword representing status_error_len in CPU ordering
 * @qword0_raw: qword0
 * @qword1: qword1 representing status_error_len in CPU ordering
 *
 * Flow director should handle FD_FILTER_STATUS to check its filter programming
 * status being successful or not and take actions accordingly. FCoE should
@@ -1262,34 +1244,16 @@ static inline bool i40e_rx_is_programming_status(u64 qw)
 *
 * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
 **/
struct i40e_rx_buffer *i40e_clean_programming_status(
	struct i40e_ring *rx_ring,
	union i40e_rx_desc *rx_desc,
	u64 qw)
void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
				   u64 qword1)
{
	struct i40e_rx_buffer *rx_buffer;
	u32 ntc;
	u8 id;

	if (!i40e_rx_is_programming_status(qw))
		return NULL;

	ntc = rx_ring->next_to_clean;

	/* fetch, update, and store next to clean */
	rx_buffer = i40e_rx_bi(rx_ring, ntc++);
	ntc = (ntc < rx_ring->count) ? ntc : 0;
	rx_ring->next_to_clean = ntc;

	prefetch(I40E_RX_DESC(rx_ring, ntc));

	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
	id = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;

	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
		i40e_fd_handle_status(rx_ring, rx_desc, id);

	return rx_buffer;
		i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id);
}

/**
@@ -1341,13 +1305,25 @@ err:
	return -ENOMEM;
}

int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
{
	unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;

	rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
	return rx_ring->rx_bi ? 0 : -ENOMEM;
}

static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
{
	memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
}

/**
 * i40e_clean_rx_ring - Free Rx buffers
 * @rx_ring: ring to be cleaned
 **/
void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
{
	unsigned long bi_size;
	u16 i;

	/* ring already cleared, nothing to do */
@@ -1393,8 +1369,10 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
	}

skip_free:
	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
	memset(rx_ring->rx_bi, 0, bi_size);
	if (rx_ring->xsk_umem)
		i40e_clear_rx_bi_zc(rx_ring);
	else
		i40e_clear_rx_bi(rx_ring);

	/* Zero out the descriptor ring */
	memset(rx_ring->desc, 0, rx_ring->size);
@@ -1435,15 +1413,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
	struct device *dev = rx_ring->dev;
	int err = -ENOMEM;
	int bi_size;

	/* warn if we are about to overwrite the pointer */
	WARN_ON(rx_ring->rx_bi);
	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
	if (!rx_ring->rx_bi)
		goto err;
	int err;

	u64_stats_init(&rx_ring->syncp);

@@ -1456,7 +1426,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
	if (!rx_ring->desc) {
		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
			 rx_ring->size);
		goto err;
		return -ENOMEM;
	}

	rx_ring->next_to_alloc = 0;
@@ -1468,16 +1438,12 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
		err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
				       rx_ring->queue_index);
		if (err < 0)
			goto err;
			return err;
	}

	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;

	return 0;
err:
	kfree(rx_ring->rx_bi);
	rx_ring->rx_bi = NULL;
	return err;
}

/**
@@ -2387,9 +2353,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
		 */
		dma_rmb();

		rx_buffer = i40e_clean_programming_status(rx_ring, rx_desc,
		if (i40e_rx_is_programming_status(qword)) {
			i40e_clean_programming_status(rx_ring,
						      rx_desc->raw.qword[0],
						      qword);
		if (unlikely(rx_buffer)) {
			rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
			i40e_inc_ntc(rx_ring);
			i40e_reuse_rx_page(rx_ring, rx_buffer);
			cleaned_count++;
			continue;
+11 −11
Original line number Diff line number Diff line
@@ -296,18 +296,16 @@ struct i40e_tx_buffer {

struct i40e_rx_buffer {
	dma_addr_t dma;
	union {
		struct {
	struct page *page;
	__u32 page_offset;
	__u16 pagecnt_bias;
};
		struct {

struct i40e_rx_buffer_zc {
	dma_addr_t dma;
	void *addr;
	u64 handle;
};
	};
};

struct i40e_queue_stats {
	u64 packets;
@@ -358,6 +356,7 @@ struct i40e_ring {
	union {
		struct i40e_tx_buffer *tx_bi;
		struct i40e_rx_buffer *rx_bi;
		struct i40e_rx_buffer_zc *rx_bi_zc;
	};
	DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
	u16 queue_index;		/* Queue number of ring */
@@ -495,6 +494,7 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
		  u32 flags);
int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);

/**
 * i40e_get_head - Retrieve head from head writeback
+34 −6
Original line number Diff line number Diff line
@@ -4,13 +4,9 @@
#ifndef I40E_TXRX_COMMON_
#define I40E_TXRX_COMMON_

void i40e_fd_handle_status(struct i40e_ring *rx_ring,
			   union i40e_rx_desc *rx_desc, u8 prog_id);
int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
struct i40e_rx_buffer *i40e_clean_programming_status(
	struct i40e_ring *rx_ring,
	union i40e_rx_desc *rx_desc,
	u64 qw);
void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
				   u64 qword1);
void i40e_process_skb_fields(struct i40e_ring *rx_ring,
			     union i40e_rx_desc *rx_desc, struct sk_buff *skb);
void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring);
@@ -84,6 +80,38 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
	}
}

/**
 * i40e_rx_is_programming_status - check for programming status descriptor
 * @qword1: qword1 representing status_error_len in CPU ordering
 *
 * The value of in the descriptor length field indicate if this
 * is a programming status descriptor for flow director or FCoE
 * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
 * it is a packet descriptor.
 **/
static inline bool i40e_rx_is_programming_status(u64 qword1)
{
	/* The Rx filter programming status and SPH bit occupy the same
	 * spot in the descriptor. Since we don't support packet split we
	 * can just reuse the bit as an indication that this is a
	 * programming status descriptor.
	 */
	return qword1 & I40E_RXD_QW1_LENGTH_SPH_MASK;
}

/**
 * i40e_inc_ntc: Advance the next_to_clean index
 * @rx_ring: Rx ring
 **/
static inline void i40e_inc_ntc(struct i40e_ring *rx_ring)
{
	u32 ntc = rx_ring->next_to_clean + 1;

	ntc = (ntc < rx_ring->count) ? ntc : 0;
	rx_ring->next_to_clean = ntc;
	prefetch(I40E_RX_DESC(rx_ring, ntc));
}

void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
+4 −1
Original line number Diff line number Diff line
@@ -689,7 +689,7 @@ union i40e_32byte_rx_desc {
		__le64  rsvd2;
	} read;
	struct {
		struct {
		struct i40e_32b_rx_wb_qw0 {
			struct {
				union {
					__le16 mirroring_status;
@@ -727,6 +727,9 @@ union i40e_32byte_rx_desc {
			} hi_dword;
		} qword3;
	} wb;  /* writeback */
	struct {
		u64 qword[4];
	} raw;
};

enum i40e_rx_desc_status_bits {
Loading