Commit 5f29458b authored by Aya Levin's avatar Aya Levin Committed by Saeed Mahameed
Browse files

net/mlx5e: Support dump callback in TX reporter



Add support for SQ's FW dump on TX reporter's events. Use Resource dump
API to retrieve the relevant data: SX slice, SQ dump and SQ buffer. Wrap
it in formatted messages and store the binary output in devlink core.

Example:
$ devlink health dump show pci/0000:00:0b.0 reporter tx
SX Slice:
   data:
     00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
     22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
     ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
     00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
     22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
     00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
     ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
     00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff
  SQs:
    SQ:
      index: 1511
      data:
        00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
        00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff
    SQ:
      index: 1516
      data:
        00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
        00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff

$ devlink health dump show pci/0000:00:0b.0 reporter tx -jp
{
    "SX Slice": {
    	"data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255],
    	},
    "SQs": [ {
            "SQ": {
                "index": 1511,
                "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255]
            }
        },{
            "SQ": {
                "index": 1516,
                "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255]
            }
        } ]
}

Signed-off-by: default avatarAya Levin <ayal@mellanox.com>
Reviewed-by: default avatarMoshe Shemesh <moshe@mellanox.com>
Acked-by: default avatarJiri Pirko <jiri@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 0a56be3c
Loading
Loading
Loading
Loading
+105 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@

#include "health.h"
#include "lib/eq.h"
#include "lib/mlx5.h"

int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
{
@@ -204,3 +205,107 @@ int mlx5e_health_report(struct mlx5e_priv *priv,

	return devlink_health_report(reporter, err_str, err_ctx);
}

#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
					const void *value, u32 value_len)

{
	u32 data_size;
	u32 offset;
	int err;

	for (offset = 0; offset < value_len; offset += data_size) {
		data_size = value_len - offset;
		if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
			data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
		err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
		if (err)
			break;
	}
	return err;
}

int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
			       struct devlink_fmsg *fmsg)
{
	struct mlx5_core_dev *mdev = priv->mdev;
	struct mlx5_rsc_dump_cmd *cmd;
	struct page *page;
	int cmd_err, err;
	int end_err;
	int size;

	if (IS_ERR_OR_NULL(mdev->rsc_dump))
		return -EOPNOTSUPP;

	page = alloc_page(GFP_KERNEL);
	if (!page)
		return -ENOMEM;

	err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
	if (err)
		return err;

	cmd = mlx5_rsc_dump_cmd_create(mdev, key);
	if (IS_ERR(cmd)) {
		err = PTR_ERR(cmd);
		goto free_page;
	}

	do {
		cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
		if (cmd_err < 0) {
			err = cmd_err;
			goto destroy_cmd;
		}

		err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
		if (err)
			goto destroy_cmd;

	} while (cmd_err > 0);

destroy_cmd:
	mlx5_rsc_dump_cmd_destroy(cmd);
	end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
	if (end_err)
		err = end_err;
free_page:
	__free_page(page);
	return err;
}

int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
			    int queue_idx, char *lbl)
{
	struct mlx5_rsc_key key = {};
	int err;

	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
	key.index1 = queue_idx;
	key.size = PAGE_SIZE;
	key.num_of_obj1 = 1;

	err = devlink_fmsg_obj_nest_start(fmsg);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl);
	if (err)
		return err;

	err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
	if (err)
		return err;

	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_end(fmsg);
	if (err)
		return err;

	return devlink_fmsg_obj_nest_end(fmsg);
}
+6 −2
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#define __MLX5E_EN_HEALTH_H

#include "en.h"
#include "diag/rsc_dump.h"

#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)

@@ -36,6 +37,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);

struct mlx5e_err_ctx {
	int (*recover)(void *ctx);
	int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx);
	void *ctx;
};

@@ -48,6 +50,8 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
void mlx5e_health_channels_update(struct mlx5e_priv *priv);


int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
			       struct devlink_fmsg *fmsg);
int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
			    int queue_idx, char *lbl);
#endif
+123 −0
Original line number Diff line number Diff line
@@ -246,6 +246,126 @@ unlock:
	return err;
}

static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
				     void *ctx)
{
	struct mlx5_rsc_key key = {};
	struct mlx5e_txqsq *sq = ctx;
	int err;

	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
		return 0;

	err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
	if (err)
		return err;

	key.size = PAGE_SIZE;
	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_end(fmsg);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC");
	if (err)
		return err;

	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
	key.index1 = sq->sqn;
	key.num_of_obj1 = 1;

	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_end(fmsg);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff");
	if (err)
		return err;

	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_end(fmsg);
	if (err)
		return err;

	return mlx5e_reporter_named_obj_nest_end(fmsg);
}

static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
					  struct devlink_fmsg *fmsg)
{
	struct mlx5_rsc_key key = {};
	int i, tc, err;

	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
		return 0;

	err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
	if (err)
		return err;

	key.size = PAGE_SIZE;
	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
	if (err)
		return err;

	err = mlx5e_reporter_named_obj_nest_end(fmsg);
	if (err)
		return err;

	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
	if (err)
		return err;

	for (i = 0; i < priv->channels.num; i++) {
		struct mlx5e_channel *c = priv->channels.c[i];

		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
			struct mlx5e_txqsq *sq = &c->sq[tc];

			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
			if (err)
				return err;
		}
	}
	return devlink_fmsg_arr_pair_nest_end(fmsg);
}

static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
					   struct mlx5e_err_ctx *err_ctx,
					   struct devlink_fmsg *fmsg)
{
	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
}

static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
				  struct devlink_fmsg *fmsg, void *context,
				  struct netlink_ext_ack *extack)
{
	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
	struct mlx5e_err_ctx *err_ctx = context;

	return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
			 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
}

void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
{
	struct mlx5e_priv *priv = sq->channel->priv;
@@ -254,6 +374,7 @@ void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)

	err_ctx.ctx = sq;
	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
	sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);

	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
@@ -267,6 +388,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)

	err_ctx.ctx = sq;
	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
	sprintf(err_str,
		"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
		sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
@@ -279,6 +401,7 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
		.name = "tx",
		.recover = mlx5e_tx_reporter_recover,
		.diagnose = mlx5e_tx_reporter_diagnose,
		.dump = mlx5e_tx_reporter_dump,
};

#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500