Commit 20eb08b2 authored by David S. Miller's avatar David S. Miller
Browse files

Merge tag 'mlx5-updates-2019-04-22' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux



Saeed Mahameed says:

====================
mlx5-updates-2019-04-22

This series includes updates to mlx5e driver RX data path and some
significant XDP RX/TX improvements to overcome/mitigate HW and PCIE
bottlenecks.

From Tariq:
1) Some Enhancements in rq->flags
2) Stabilize RX packet rate (on Striding RQ) with
multiple outstanding UMR posts
In this patch, we add support for multiple outstanding UMR posts,
 to allow faster gap closure between consuming MPWQEs and reposting
them back into the WQ.

Performance test:
As expected, huge improvement in large-scale (48 cores).

xdp_redirect_map, 64B UDP multi-stream.
Redirect from ConnectX-5 100Gbps to ConnectX-6 100Gbps.
CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz.

Before: Unstable, 7 to 30 Mpps
After:  Stable,   at 70.5 Mpps

From Shay:
3) XDP, Inline small packets into the TX MPWQE in XDP xmit flow

Upon high packet rate with multiple CPUs TX workloads, much of the HCA's
resources are spent on prefetching TX descriptors, thus affecting
transmission rates.
This patch comes to mitigate this problem by moving some workload to the
CPU and reducing the HW data prefetch overhead for small packets (<= 256B).

When forwarding packets with XDP, a packet that is smaller
than a certain size (set to ~256 bytes) would be sent inline within
its WQE TX descrptor (mem-copied), when the hardware tx queue is congested
beyond a pre-defined water-mark.

Performance:
    Tested packet rate for UDP 64Byte multi-stream
    over two dual port ConnectX-5 100Gbps NICs.
    CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz

    * Tested with hyper-threading disabled

    XDP_TX:

    |          | before | after   |       |
    | 24 rings | 51Mpps | 116Mpps | +126% |
    | 1 ring   | 12Mpps | 12Mpps  | same  |

    XDP_REDIRECT:

    ** Below is the transmit rate, not the redirection rate
    which might be larger, and is not affected by this patch.

    |          | before  | after   |      |
    | 32 rings | 64Mpps  | 92Mpps  | +43% |
    | 1 ring   | 6.4Mpps | 6.4Mpps | same |

As we can see, feature significantly improves scaling, without
hurting single ring performance.

From Maxim:
4) Some trivial refactoring and code improvements prior to a larger series
to support AF_XDP.
====================

Acked-by: default avatarJakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 539b593d f8ebecf2
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -22,7 +22,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
#
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
		en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o
		en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \
		en/params.o

#
# Netdev extra
+13 −7
Original line number Diff line number Diff line
@@ -409,14 +409,17 @@ struct mlx5e_xdp_info_fifo {

struct mlx5e_xdp_wqe_info {
	u8 num_wqebbs;
	u8 num_ds;
	u8 num_pkts;
};

struct mlx5e_xdp_mpwqe {
	/* Current MPWQE session */
	struct mlx5e_tx_wqe *wqe;
	u8                   ds_count;
	u8                   pkt_count;
	u8                   max_ds_count;
	u8                   complete;
	u8                   inline_on;
};

struct mlx5e_xdpsq;
@@ -428,7 +431,6 @@ struct mlx5e_xdpsq {
	/* dirtied @completion */
	u32                        xdpi_fifo_cc;
	u16                        cc;
	bool                       redirect_flush;

	/* dirtied @xmit */
	u32                        xdpi_fifo_pc ____cacheline_aligned_in_smp;
@@ -461,10 +463,10 @@ struct mlx5e_xdpsq {

struct mlx5e_icosq {
	/* data path */
	u16                        cc;
	u16                        pc;

	/* dirtied @xmit */
	u16                        pc ____cacheline_aligned_in_smp;

	struct mlx5_wqe_ctrl_seg  *doorbell_cseg;
	struct mlx5e_cq            cq;

	/* write@xmit, read@completion */
@@ -531,7 +533,8 @@ typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);

enum mlx5e_rq_flag {
	MLX5E_RQ_FLAG_XDP_XMIT = BIT(0),
	MLX5E_RQ_FLAG_XDP_XMIT,
	MLX5E_RQ_FLAG_XDP_REDIRECT,
};

struct mlx5e_rq_frag_info {
@@ -562,8 +565,10 @@ struct mlx5e_rq {
			struct mlx5e_mpw_info *info;
			mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq;
			u16                    num_strides;
			u16                    actual_wq_head;
			u8                     log_stride_sz;
			bool                   umr_in_progress;
			u8                     umr_in_progress;
			u8                     umr_last_bulk;
		} mpwqe;
	};
	struct {
@@ -773,6 +778,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
			  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);

void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
void mlx5e_completion_event(struct mlx5_core_cq *mcq);
void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
+104 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies. */

#include "en/params.h"

u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params)
{
	u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
	u16 linear_rq_headroom = params->xdp_prog ?
		XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
	u32 frag_sz;

	linear_rq_headroom += NET_IP_ALIGN;

	frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu);

	if (params->xdp_prog && frag_sz < PAGE_SIZE)
		frag_sz = PAGE_SIZE;

	return frag_sz;
}

u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params)
{
	u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params);

	return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz);
}

bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params)
{
	u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);

	return !params->lro_en && frag_sz <= PAGE_SIZE;
}

#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \
					  MLX5_MPWQE_LOG_STRIDE_SZ_BASE)
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
				  struct mlx5e_params *params)
{
	u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params);
	s8 signed_log_num_strides_param;
	u8 log_num_strides;

	if (!mlx5e_rx_is_linear_skb(params))
		return false;

	if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ)
		return false;

	if (MLX5_CAP_GEN(mdev, ext_stride_num_range))
		return true;

	log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz);
	signed_log_num_strides_param =
		(s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE;

	return signed_log_num_strides_param >= 0;
}

u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params)
{
	u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params);

	/* Numbers are unsigned, don't subtract to avoid underflow. */
	if (params->log_rq_mtu_frames <
	    log_pkts_per_wqe + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW)
		return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;

	return params->log_rq_mtu_frames - log_pkts_per_wqe;
}

u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
				   struct mlx5e_params *params)
{
	if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params))
		return order_base_2(mlx5e_rx_get_linear_frag_sz(params));

	return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
}

u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
				   struct mlx5e_params *params)
{
	return MLX5_MPWRQ_LOG_WQE_SZ -
		mlx5e_mpwqe_get_log_stride_size(mdev, params);
}

u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
			  struct mlx5e_params *params)
{
	u16 linear_rq_headroom = params->xdp_prog ?
		XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM;
	bool is_linear_skb;

	linear_rq_headroom += NET_IP_ALIGN;

	is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
		mlx5e_rx_is_linear_skb(params) :
		mlx5e_rx_mpwqe_is_linear_skb(mdev, params);

	return is_linear_skb ? linear_rq_headroom : 0;
}
+22 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */

#ifndef __MLX5_EN_PARAMS_H__
#define __MLX5_EN_PARAMS_H__

#include "en.h"

u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params);
u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params);
bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params);
bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev,
				  struct mlx5e_params *params);
u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params);
u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev,
				   struct mlx5e_params *params);
u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
				   struct mlx5e_params *params);
u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
			  struct mlx5e_params *params);

#endif /* __MLX5_EN_PARAMS_H__ */
+18 −12
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
		if (unlikely(err))
			goto xdp_abort;
		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
		rq->xdpsq.redirect_flush = true;
		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
		mlx5e_page_dma_unmap(rq, di);
		rq->stats->xdp_redirect++;
		return true;
@@ -105,6 +105,7 @@ xdp_abort:
static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
{
	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
	struct mlx5e_xdpsq_stats *stats = sq->stats;
	struct mlx5_wq_cyc *wq = &sq->wq;
	u8  wqebbs;
	u16 pi;
@@ -113,6 +114,8 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)

	prefetchw(session->wqe->data);
	session->ds_count  = MLX5E_XDP_TX_EMPTY_DS_COUNT;
	session->pkt_count = 0;
	session->complete  = 0;

	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);

@@ -131,6 +134,10 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
		       MLX5E_XDP_MPW_MAX_WQEBBS);

	session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;

	mlx5e_xdp_update_inline_state(sq);

	stats->mpwqe++;
}

static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
@@ -147,7 +154,7 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);

	wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
	wi->num_ds     = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
	wi->num_pkts   = session->pkt_count;

	sq->pc += wi->num_wqebbs;

@@ -162,11 +169,9 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
	struct mlx5e_xdpsq_stats *stats = sq->stats;

	dma_addr_t dma_addr    = xdpi->dma_addr;
	struct xdp_frame *xdpf = xdpi->xdpf;
	unsigned int dma_len   = xdpf->len;

	if (unlikely(sq->hw_mtu < dma_len)) {
	if (unlikely(sq->hw_mtu < xdpf->len)) {
		stats->err++;
		return false;
	}
@@ -183,9 +188,10 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
		mlx5e_xdp_mpwqe_session_start(sq);
	}

	mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
	mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats);

	if (unlikely(session->ds_count == session->max_ds_count))
	if (unlikely(session->complete ||
		     session->ds_count == session->max_ds_count))
		mlx5e_xdp_mpwqe_complete(sq);

	mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
@@ -299,7 +305,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)

			sqcc += wi->num_wqebbs;

			for (j = 0; j < wi->num_ds; j++) {
			for (j = 0; j < wi->num_pkts; j++) {
				struct mlx5e_xdp_info xdpi =
					mlx5e_xdpi_fifo_pop(xdpi_fifo);

@@ -340,7 +346,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)

		sq->cc += wi->num_wqebbs;

		for (i = 0; i < wi->num_ds; i++) {
		for (i = 0; i < wi->num_pkts; i++) {
			struct mlx5e_xdp_info xdpi =
				mlx5e_xdpi_fifo_pop(xdpi_fifo);

@@ -419,9 +425,9 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)

	mlx5e_xmit_xdp_doorbell(xdpsq);

	if (xdpsq->redirect_flush) {
	if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) {
		xdp_do_flush_map();
		xdpsq->redirect_flush = false;
		__clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
	}
}

Loading