Commit 8c40f3b2 authored by David S. Miller's avatar David S. Miller
Browse files

Merge tag 'mlx5-updates-2019-08-15' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux



Saeed Mahameed says:

====================
mlx5-updates-2019-08-15

This patchset introduces changes in mlx5 devlink health reporters.
The highlight of these changes is adding a new reporter: RX reporter

mlx5 RX reporter: reports and recovers from timeouts and RX completion
error.

1) Perform TX reporter cleanup. In order to maintain the
code flow as similar as possible between RX and TX reporters, start the
set with cleanup.

2) Prepare for code sharing, generalize and move shared
functionality.

3) Refactor and extend TX reporter diagnostics information
to align the TX reporter diagnostics output with the RX reporter's
diagnostics output.

4) Add helper functions Patch 11: Add RX reporter, initially
supports only the diagnostics call back.

5) Change ICOSQ (Internal Operations Send Queue) open/close flow to
avoid race between interface down and completion error recovery.

6) Introduce recovery flows for RX ring population timeout on ICOSQ,
and for completion errors on ICOSQ and on RQ (Regular receive queues).

7) Include RX reporters in mlx5 documentation.

8) Last two patches of this series, are trivial fixes for previously
submitted patches on this release cycle.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ac2eb56e b1b9f97a
Loading
Loading
Loading
Loading
+31 −2
Original line number Diff line number Diff line
@@ -126,7 +126,7 @@ Devlink health reporters

tx reporter
-----------
The tx reporter is responsible of two error scenarios:
The tx reporter is responsible for reporting and recovering of the following two error scenarios:

- TX timeout
    Report on kernel tx timeout detection.
@@ -135,7 +135,7 @@ The tx reporter is responsible of two error scenarios:
    Report on error tx completion.
    Recover by flushing the TX queue and reset it.

TX reporter also support Diagnose callback, on which it provides
TX reporter also support on demand diagnose callback, on which it provides
real time information of its send queues status.

User commands examples:
@@ -144,11 +144,40 @@ User commands examples:

    $ devlink health diagnose pci/0000:82:00.0 reporter tx

NOTE: This command has valid output only when interface is up, otherwise the command has empty output.

- Show number of tx errors indicated, number of recover flows ended successfully,
  is autorecover enabled and graceful period from last recover::

    $ devlink health show pci/0000:82:00.0 reporter tx

rx reporter
-----------
The rx reporter is responsible for reporting and recovering of the following two error scenarios:

- RX queues initialization (population) timeout
    RX queues descriptors population on ring initialization is done in
    napi context via triggering an irq, in case of a failure to get
    the minimum amount of descriptors, a timeout would occur and it
    could be recoverable by polling the EQ (Event Queue).
- RX completions with errors (reported by HW on interrupt context)
    Report on rx completion error.
    Recover (if needed) by flushing the related queue and reset it.

RX reporter also supports on demand diagnose callback, on which it
provides real time information of its receive queues status.

- Diagnose rx queues status, and corresponding completion queue::

    $ devlink health diagnose pci/0000:82:00.0 reporter rx

NOTE: This command has valid output only when interface is up, otherwise the command has empty output.

- Show number of rx errors indicated, number of recover flows ended successfully,
  is autorecover enabled and graceful period from last recover::

    $ devlink health show pci/0000:82:00.0 reporter rx

fw reporter
-----------
The fw reporter implements diagnose and dump callbacks.
+3 −2
Original line number Diff line number Diff line
@@ -23,8 +23,9 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
#
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
		en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \
		en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o

#
# Netdev extra
+32 −0
Original line number Diff line number Diff line
@@ -300,6 +300,7 @@ struct mlx5e_dcbx_dp {

enum {
	MLX5E_RQ_STATE_ENABLED,
	MLX5E_RQ_STATE_RECOVERING,
	MLX5E_RQ_STATE_AM,
	MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
	MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */
@@ -551,6 +552,8 @@ struct mlx5e_icosq {
	/* control path */
	struct mlx5_wq_ctrl        wq_ctrl;
	struct mlx5e_channel      *channel;

	struct work_struct         recover_work;
} ____cacheline_aligned_in_smp;

struct mlx5e_wqe_frag_info {
@@ -670,6 +673,8 @@ struct mlx5e_rq {
	struct zero_copy_allocator zca;
	struct xdp_umem       *umem;

	struct work_struct     recover_work;

	/* control */
	struct mlx5_wq_ctrl    wq_ctrl;
	__be32                 mkey_be;
@@ -846,6 +851,7 @@ struct mlx5e_priv {
	struct mlx5e_tls          *tls;
#endif
	struct devlink_health_reporter *tx_reporter;
	struct devlink_health_reporter *rx_reporter;
	struct mlx5e_xsk           xsk;
};

@@ -887,6 +893,26 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);

static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
{
	switch (rq->wq_type) {
	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
		return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
	default:
		return mlx5_wq_cyc_get_size(&rq->wqe.wq);
	}
}

static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
{
	switch (rq->wq_type) {
	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
		return rq->mpwqe.wq.cur_sz;
	default:
		return rq->wqe.wq.cur_sz;
	}
}

bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
				struct mlx5e_params *params);
@@ -1005,6 +1031,12 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
			       struct mlx5e_params *params);
int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
void mlx5e_activate_rq(struct mlx5e_rq *rq);
void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
void mlx5e_activate_icosq(struct mlx5e_icosq *icosq);
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);

int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
		    struct mlx5e_modify_sq_param *p);
+205 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Mellanox Technologies.

#include "health.h"
#include "lib/eq.h"

int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
{
	int err;

	err = devlink_fmsg_pair_nest_start(fmsg, name);
	if (err)
		return err;

	err = devlink_fmsg_obj_nest_start(fmsg);
	if (err)
		return err;

	return 0;
}

int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg)
{
	int err;

	err = devlink_fmsg_obj_nest_end(fmsg);
	if (err)
		return err;

	err = devlink_fmsg_pair_nest_end(fmsg);
	if (err)
		return err;

	return 0;
}

int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
{
	struct mlx5e_priv *priv = cq->channel->priv;
	u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
	u8 hw_status;
	void *cqc;
	int err;

	err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out));
	if (err)
		return err;

	cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
	hw_status = MLX5_GET(cqc, cqc, status);

	err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
	if (err)
		return err;

	err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_end(fmsg);
	if (err)
		return err;

	return 0;
}

int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
{
	u8 cq_log_stride;
	u32 cq_sz;
	int err;

	cq_sz = mlx5_cqwq_get_size(&cq->wq);
	cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);

	err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
	if (err)
		return err;

	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_end(fmsg);
	if (err)
		return err;

	return 0;
}

int mlx5e_health_create_reporters(struct mlx5e_priv *priv)
{
	int err;

	err = mlx5e_reporter_tx_create(priv);
	if (err)
		return err;

	err = mlx5e_reporter_rx_create(priv);
	if (err)
		return err;

	return 0;
}

void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
{
	mlx5e_reporter_rx_destroy(priv);
	mlx5e_reporter_tx_destroy(priv);
}

void mlx5e_health_channels_update(struct mlx5e_priv *priv)
{
	if (priv->tx_reporter)
		devlink_health_reporter_state_update(priv->tx_reporter,
						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
	if (priv->rx_reporter)
		devlink_health_reporter_state_update(priv->rx_reporter,
						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
}

int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
{
	struct mlx5_core_dev *mdev = channel->mdev;
	struct net_device *dev = channel->netdev;
	struct mlx5e_modify_sq_param msp = {};
	int err;

	msp.curr_state = MLX5_SQC_STATE_ERR;
	msp.next_state = MLX5_SQC_STATE_RST;

	err = mlx5e_modify_sq(mdev, sqn, &msp);
	if (err) {
		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
		return err;
	}

	memset(&msp, 0, sizeof(msp));
	msp.curr_state = MLX5_SQC_STATE_RST;
	msp.next_state = MLX5_SQC_STATE_RDY;

	err = mlx5e_modify_sq(mdev, sqn, &msp);
	if (err) {
		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
		return err;
	}

	return 0;
}

int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
{
	int err = 0;

	rtnl_lock();
	mutex_lock(&priv->state_lock);

	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
		goto out;

	err = mlx5e_safe_reopen_channels(priv);

out:
	mutex_unlock(&priv->state_lock);
	rtnl_unlock();

	return err;
}

int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel)
{
	u32 eqe_count;

	netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);

	eqe_count = mlx5_eq_poll_irq_disabled(eq);
	if (!eqe_count)
		return -EIO;

	netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n",
		   eqe_count, eq->core.eqn);

	channel->stats->eq_rearm++;
	return 0;
}

int mlx5e_health_report(struct mlx5e_priv *priv,
			struct devlink_health_reporter *reporter, char *err_str,
			struct mlx5e_err_ctx *err_ctx)
{
	if (!reporter) {
		netdev_err(priv->netdev, err_str);
		return err_ctx->recover(&err_ctx->ctx);
	}
	return devlink_health_report(reporter, err_str, err_ctx);
}
+53 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2019 Mellanox Technologies. */

#ifndef __MLX5E_EN_HEALTH_H
#define __MLX5E_EN_HEALTH_H

#include "en.h"

#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)

static inline bool cqe_syndrome_needs_recover(u8 syndrome)
{
	return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR ||
	       syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
	       syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
	       syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
}

int mlx5e_reporter_tx_create(struct mlx5e_priv *priv);
void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv);
void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq);
int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq);

int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg);
int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);

int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq);
void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);

#define MLX5E_REPORTER_PER_Q_MAX_LEN 256

struct mlx5e_err_ctx {
	int (*recover)(void *ctx);
	void *ctx;
};

int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn);
int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel);
int mlx5e_health_recover_channels(struct mlx5e_priv *priv);
int mlx5e_health_report(struct mlx5e_priv *priv,
			struct devlink_health_reporter *reporter, char *err_str,
			struct mlx5e_err_ctx *err_ctx);
int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
void mlx5e_health_channels_update(struct mlx5e_priv *priv);


#endif
Loading