Commit 1f99fc7f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bnxt_en-health-and-error-recovery'



Michael Chan says:

====================
bnxt_en: health and error recovery.

This patchset implements adapter health and error recovery.  The status
is reported through several devlink reporters and the driver will
initiate and complete the recovery process using the devlink infrastructure.

v2: Added 4 patches at the beginning of the patchset to clean up error code
    handling related to firmware messages and to convert to use standard
    error codes.

    Removed the dropping of rtnl_lock in bnxt_close().

    Broke up the patches some more for better patch organization and
    future bisection.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 19828bd3 acfb50e4
Loading
Loading
Loading
Loading
+779 −105

File changed.

Preview size limit exceeded, changes collapsed.

+91 −0
Original line number Original line Diff line number Diff line
@@ -472,6 +472,19 @@ struct rx_tpa_end_cmp_ext {
	((le32_to_cpu((rx_tpa_end_ext)->rx_tpa_end_cmp_dup_acks) &	\
	((le32_to_cpu((rx_tpa_end_ext)->rx_tpa_end_cmp_dup_acks) &	\
	 RX_TPA_END_CMP_AGG_BUFS_P5) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT_P5)
	 RX_TPA_END_CMP_AGG_BUFS_P5) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT_P5)


#define EVENT_DATA1_RESET_NOTIFY_FATAL(data1)				\
	(((data1) &							\
	  ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK) ==\
	 ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL)

#define EVENT_DATA1_RECOVERY_MASTER_FUNC(data1)				\
	!!((data1) &							\
	   ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC)

#define EVENT_DATA1_RECOVERY_ENABLED(data1)				\
	!!((data1) &							\
	   ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED)

struct nqe_cn {
struct nqe_cn {
	__le16	type;
	__le16	type;
	#define NQ_CN_TYPE_MASK           0x3fUL
	#define NQ_CN_TYPE_MASK           0x3fUL
@@ -632,6 +645,7 @@ struct nqe_cn {
#define BNXT_HWRM_MAX_REQ_LEN		(bp->hwrm_max_req_len)
#define BNXT_HWRM_MAX_REQ_LEN		(bp->hwrm_max_req_len)
#define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
#define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
#define DFLT_HWRM_CMD_TIMEOUT		500
#define DFLT_HWRM_CMD_TIMEOUT		500
#define SHORT_HWRM_CMD_TIMEOUT		20
#define HWRM_CMD_TIMEOUT		(bp->hwrm_cmd_timeout)
#define HWRM_CMD_TIMEOUT		(bp->hwrm_cmd_timeout)
#define HWRM_RESET_TIMEOUT		((HWRM_CMD_TIMEOUT) * 4)
#define HWRM_RESET_TIMEOUT		((HWRM_CMD_TIMEOUT) * 4)
#define HWRM_RESP_ERR_CODE_MASK		0xffff
#define HWRM_RESP_ERR_CODE_MASK		0xffff
@@ -1058,6 +1072,7 @@ struct bnxt_pf_info {
	u8	mac_addr[ETH_ALEN];
	u8	mac_addr[ETH_ALEN];
	u32	first_vf_id;
	u32	first_vf_id;
	u16	active_vfs;
	u16	active_vfs;
	u16	registered_vfs;
	u16	max_vfs;
	u16	max_vfs;
	u32	max_encap_records;
	u32	max_encap_records;
	u32	max_decap_records;
	u32	max_decap_records;
@@ -1217,6 +1232,9 @@ struct bnxt_test_info {
#define BNXT_GRCPF_REG_KONG_COMM		0xA00
#define BNXT_GRCPF_REG_KONG_COMM		0xA00
#define BNXT_GRCPF_REG_KONG_COMM_TRIGGER	0xB00
#define BNXT_GRCPF_REG_KONG_COMM_TRIGGER	0xB00


#define BNXT_GRC_BASE_MASK			0xfffff000
#define BNXT_GRC_OFFSET_MASK			0x00000ffc

struct bnxt_tc_flow_stats {
struct bnxt_tc_flow_stats {
	u64		packets;
	u64		packets;
	u64		bytes;
	u64		bytes;
@@ -1333,6 +1351,53 @@ struct bnxt_ctx_mem_info {
	struct bnxt_ctx_pg_info *tqm_mem[9];
	struct bnxt_ctx_pg_info *tqm_mem[9];
};
};


struct bnxt_fw_health {
	u32 flags;
	u32 polling_dsecs;
	u32 master_func_wait_dsecs;
	u32 normal_func_wait_dsecs;
	u32 post_reset_wait_dsecs;
	u32 post_reset_max_wait_dsecs;
	u32 regs[4];
	u32 mapped_regs[4];
#define BNXT_FW_HEALTH_REG		0
#define BNXT_FW_HEARTBEAT_REG		1
#define BNXT_FW_RESET_CNT_REG		2
#define BNXT_FW_RESET_INPROG_REG	3
	u32 fw_reset_inprog_reg_mask;
	u32 last_fw_heartbeat;
	u32 last_fw_reset_cnt;
	u8 enabled:1;
	u8 master:1;
	u8 tmr_multiplier;
	u8 tmr_counter;
	u8 fw_reset_seq_cnt;
	u32 fw_reset_seq_regs[16];
	u32 fw_reset_seq_vals[16];
	u32 fw_reset_seq_delay_msec[16];
	struct devlink_health_reporter	*fw_reporter;
	struct devlink_health_reporter *fw_reset_reporter;
	struct devlink_health_reporter *fw_fatal_reporter;
};

struct bnxt_fw_reporter_ctx {
	unsigned long sp_event;
};

#define BNXT_FW_HEALTH_REG_TYPE_MASK	3
#define BNXT_FW_HEALTH_REG_TYPE_CFG	0
#define BNXT_FW_HEALTH_REG_TYPE_GRC	1
#define BNXT_FW_HEALTH_REG_TYPE_BAR0	2
#define BNXT_FW_HEALTH_REG_TYPE_BAR1	3

#define BNXT_FW_HEALTH_REG_TYPE(reg)	((reg) & BNXT_FW_HEALTH_REG_TYPE_MASK)
#define BNXT_FW_HEALTH_REG_OFF(reg)	((reg) & ~BNXT_FW_HEALTH_REG_TYPE_MASK)

#define BNXT_FW_HEALTH_WIN_BASE		0x3000
#define BNXT_FW_HEALTH_WIN_MAP_OFF	8

#define BNXT_FW_STATUS_HEALTHY		0x8000

struct bnxt {
struct bnxt {
	void __iomem		*bar0;
	void __iomem		*bar0;
	void __iomem		*bar1;
	void __iomem		*bar1;
@@ -1555,6 +1620,10 @@ struct bnxt {
#define BNXT_STATE_OPEN		0
#define BNXT_STATE_OPEN		0
#define BNXT_STATE_IN_SP_TASK	1
#define BNXT_STATE_IN_SP_TASK	1
#define BNXT_STATE_READ_STATS	2
#define BNXT_STATE_READ_STATS	2
#define BNXT_STATE_FW_RESET_DET 3
#define BNXT_STATE_IN_FW_RESET	4
#define BNXT_STATE_ABORT_ERR	5
#define BNXT_STATE_FW_FATAL_COND	6


	struct bnxt_irq	*irq_tbl;
	struct bnxt_irq	*irq_tbl;
	int			total_irqs;
	int			total_irqs;
@@ -1579,6 +1648,7 @@ struct bnxt {
	#define BNXT_FW_CAP_KONG_MB_CHNL		0x00000080
	#define BNXT_FW_CAP_KONG_MB_CHNL		0x00000080
	#define BNXT_FW_CAP_OVS_64BIT_HANDLE		0x00000400
	#define BNXT_FW_CAP_OVS_64BIT_HANDLE		0x00000400
	#define BNXT_FW_CAP_TRUSTED_VF			0x00000800
	#define BNXT_FW_CAP_TRUSTED_VF			0x00000800
	#define BNXT_FW_CAP_ERROR_RECOVERY		0x00002000
	#define BNXT_FW_CAP_PKG_VER			0x00004000
	#define BNXT_FW_CAP_PKG_VER			0x00004000
	#define BNXT_FW_CAP_CFA_ADV_FLOW		0x00008000
	#define BNXT_FW_CAP_CFA_ADV_FLOW		0x00008000
	#define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX	0x00010000
	#define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX	0x00010000
@@ -1663,6 +1733,24 @@ struct bnxt {
#define BNXT_FLOW_STATS_SP_EVENT	15
#define BNXT_FLOW_STATS_SP_EVENT	15
#define BNXT_UPDATE_PHY_SP_EVENT	16
#define BNXT_UPDATE_PHY_SP_EVENT	16
#define BNXT_RING_COAL_NOW_SP_EVENT	17
#define BNXT_RING_COAL_NOW_SP_EVENT	17
#define BNXT_FW_RESET_NOTIFY_SP_EVENT	18
#define BNXT_FW_EXCEPTION_SP_EVENT	19

	struct delayed_work	fw_reset_task;
	int			fw_reset_state;
#define BNXT_FW_RESET_STATE_POLL_VF	1
#define BNXT_FW_RESET_STATE_RESET_FW	2
#define BNXT_FW_RESET_STATE_ENABLE_DEV	3
#define BNXT_FW_RESET_STATE_POLL_FW	4
#define BNXT_FW_RESET_STATE_OPENING	5

	u16			fw_reset_min_dsecs;
#define BNXT_DFLT_FW_RST_MIN_DSECS	20
	u16			fw_reset_max_dsecs;
#define BNXT_DFLT_FW_RST_MAX_DSECS	60
	unsigned long		fw_reset_timestamp;

	struct bnxt_fw_health	*fw_health;


	struct bnxt_hw_resc	hw_resc;
	struct bnxt_hw_resc	hw_resc;
	struct bnxt_pf_info	pf;
	struct bnxt_pf_info	pf;
@@ -1868,6 +1956,7 @@ extern const u16 bnxt_lhint_arr[];
int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
		       u16 prod, gfp_t gfp);
		       u16 prod, gfp_t gfp);
void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data);
void bnxt_reuse_rx_data(struct bnxt_rx_ring_info *rxr, u16 cons, void *data);
u32 bnxt_fw_health_readl(struct bnxt *bp, int reg_idx);
void bnxt_set_tpa_flags(struct bnxt *bp);
void bnxt_set_tpa_flags(struct bnxt *bp);
void bnxt_set_ring_params(struct bnxt *);
void bnxt_set_ring_params(struct bnxt *);
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode);
@@ -1900,6 +1989,8 @@ int bnxt_open_nic(struct bnxt *, bool, bool);
int bnxt_half_open_nic(struct bnxt *bp);
int bnxt_half_open_nic(struct bnxt *bp);
void bnxt_half_close_nic(struct bnxt *bp);
void bnxt_half_close_nic(struct bnxt *bp);
int bnxt_close_nic(struct bnxt *, bool, bool);
int bnxt_close_nic(struct bnxt *, bool, bool);
void bnxt_fw_exception(struct bnxt *bp);
void bnxt_fw_reset(struct bnxt *bp);
int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
		     int tx_xdp);
		     int tx_xdp);
int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
+2 −4
Original line number Original line Diff line number Diff line
@@ -377,8 +377,6 @@ static int bnxt_hwrm_set_dcbx_app(struct bnxt *bp, struct dcb_app *app,
	set.data_len = cpu_to_le16(sizeof(*data) + sizeof(*fw_app) * n);
	set.data_len = cpu_to_le16(sizeof(*data) + sizeof(*fw_app) * n);
	set.hdr_cnt = 1;
	set.hdr_cnt = 1;
	rc = hwrm_send_message(bp, &set, sizeof(set), HWRM_CMD_TIMEOUT);
	rc = hwrm_send_message(bp, &set, sizeof(set), HWRM_CMD_TIMEOUT);
	if (rc)
		rc = -EIO;


set_app_exit:
set_app_exit:
	dma_free_coherent(&bp->pdev->dev, data_len, data, mapping);
	dma_free_coherent(&bp->pdev->dev, data_len, data, mapping);
@@ -391,6 +389,7 @@ static int bnxt_hwrm_queue_dscp_qcaps(struct bnxt *bp)
	struct hwrm_queue_dscp_qcaps_input req = {0};
	struct hwrm_queue_dscp_qcaps_input req = {0};
	int rc;
	int rc;


	bp->max_dscp_value = 0;
	if (bp->hwrm_spec_code < 0x10800 || BNXT_VF(bp))
	if (bp->hwrm_spec_code < 0x10800 || BNXT_VF(bp))
		return 0;
		return 0;


@@ -433,8 +432,6 @@ static int bnxt_hwrm_queue_dscp2pri_cfg(struct bnxt *bp, struct dcb_app *app,
	dscp2pri->pri = app->priority;
	dscp2pri->pri = app->priority;
	req.entry_cnt = cpu_to_le16(1);
	req.entry_cnt = cpu_to_le16(1);
	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
	if (rc)
		rc = -EIO;
	dma_free_coherent(&bp->pdev->dev, sizeof(*dscp2pri), dscp2pri,
	dma_free_coherent(&bp->pdev->dev, sizeof(*dscp2pri), dscp2pri,
			  mapping);
			  mapping);
	return rc;
	return rc;
@@ -722,6 +719,7 @@ static const struct dcbnl_rtnl_ops dcbnl_ops = {


void bnxt_dcb_init(struct bnxt *bp)
void bnxt_dcb_init(struct bnxt *bp)
{
{
	bp->dcbx_cap = 0;
	if (bp->hwrm_spec_code < 0x10501)
	if (bp->hwrm_spec_code < 0x10501)
		return;
		return;


+191 −6
Original line number Original line Diff line number Diff line
@@ -15,6 +15,192 @@
#include "bnxt_vfr.h"
#include "bnxt_vfr.h"
#include "bnxt_devlink.h"
#include "bnxt_devlink.h"


static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
				     struct devlink_fmsg *fmsg)
{
	struct bnxt *bp = devlink_health_reporter_priv(reporter);
	struct bnxt_fw_health *health = bp->fw_health;
	u32 val, health_status;
	int rc;

	if (!health || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
		return 0;

	val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
	health_status = val & 0xffff;

	if (health_status == BNXT_FW_STATUS_HEALTHY) {
		rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
						  "Healthy;");
		if (rc)
			return rc;
	} else if (health_status < BNXT_FW_STATUS_HEALTHY) {
		rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
						  "Not yet completed initialization;");
		if (rc)
			return rc;
	} else if (health_status > BNXT_FW_STATUS_HEALTHY) {
		rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
						  "Encountered fatal error and cannot recover;");
		if (rc)
			return rc;
	}

	if (val >> 16) {
		rc = devlink_fmsg_u32_pair_put(fmsg, "Error", val >> 16);
		if (rc)
			return rc;
	}

	val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
	rc = devlink_fmsg_u32_pair_put(fmsg, "Reset count", val);
	if (rc)
		return rc;

	return 0;
}

static const struct devlink_health_reporter_ops bnxt_dl_fw_reporter_ops = {
	.name = "fw",
	.diagnose = bnxt_fw_reporter_diagnose,
};

static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter,
				 void *priv_ctx)
{
	struct bnxt *bp = devlink_health_reporter_priv(reporter);

	if (!priv_ctx)
		return -EOPNOTSUPP;

	bnxt_fw_reset(bp);
	return 0;
}

static const
struct devlink_health_reporter_ops bnxt_dl_fw_reset_reporter_ops = {
	.name = "fw_reset",
	.recover = bnxt_fw_reset_recover,
};

static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter,
				 void *priv_ctx)
{
	struct bnxt *bp = devlink_health_reporter_priv(reporter);
	struct bnxt_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
	unsigned long event;

	if (!priv_ctx)
		return -EOPNOTSUPP;

	event = fw_reporter_ctx->sp_event;
	if (event == BNXT_FW_RESET_NOTIFY_SP_EVENT)
		bnxt_fw_reset(bp);
	else if (event == BNXT_FW_EXCEPTION_SP_EVENT)
		bnxt_fw_exception(bp);

	return 0;
}

static const
struct devlink_health_reporter_ops bnxt_dl_fw_fatal_reporter_ops = {
	.name = "fw_fatal",
	.recover = bnxt_fw_fatal_recover,
};

static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
{
	struct bnxt_fw_health *health = bp->fw_health;

	if (!health)
		return;

	health->fw_reporter =
		devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops,
					       0, false, bp);
	if (IS_ERR(health->fw_reporter)) {
		netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
			    PTR_ERR(health->fw_reporter));
		health->fw_reporter = NULL;
	}

	health->fw_reset_reporter =
		devlink_health_reporter_create(bp->dl,
					       &bnxt_dl_fw_reset_reporter_ops,
					       0, true, bp);
	if (IS_ERR(health->fw_reset_reporter)) {
		netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
			    PTR_ERR(health->fw_reset_reporter));
		health->fw_reset_reporter = NULL;
	}

	health->fw_fatal_reporter =
		devlink_health_reporter_create(bp->dl,
					       &bnxt_dl_fw_fatal_reporter_ops,
					       0, true, bp);
	if (IS_ERR(health->fw_fatal_reporter)) {
		netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
			    PTR_ERR(health->fw_fatal_reporter));
		health->fw_fatal_reporter = NULL;
	}
}

static void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
{
	struct bnxt_fw_health *health = bp->fw_health;

	if (!health)
		return;

	if (health->fw_reporter)
		devlink_health_reporter_destroy(health->fw_reporter);

	if (health->fw_reset_reporter)
		devlink_health_reporter_destroy(health->fw_reset_reporter);

	if (health->fw_fatal_reporter)
		devlink_health_reporter_destroy(health->fw_fatal_reporter);
}

void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
{
	struct bnxt_fw_health *fw_health = bp->fw_health;
	struct bnxt_fw_reporter_ctx fw_reporter_ctx;

	if (!fw_health)
		return;

	fw_reporter_ctx.sp_event = event;
	switch (event) {
	case BNXT_FW_RESET_NOTIFY_SP_EVENT:
		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
			if (!fw_health->fw_fatal_reporter)
				return;

			devlink_health_report(fw_health->fw_fatal_reporter,
					      "FW fatal async event received",
					      &fw_reporter_ctx);
			return;
		}
		if (!fw_health->fw_reset_reporter)
			return;

		devlink_health_report(fw_health->fw_reset_reporter,
				      "FW non-fatal reset event received",
				      &fw_reporter_ctx);
		return;

	case BNXT_FW_EXCEPTION_SP_EVENT:
		if (!fw_health->fw_fatal_reporter)
			return;

		devlink_health_report(fw_health->fw_fatal_reporter,
				      "FW fatal error reported",
				      &fw_reporter_ctx);
		return;
	}
}

static const struct devlink_ops bnxt_dl_ops = {
static const struct devlink_ops bnxt_dl_ops = {
#ifdef CONFIG_BNXT_SRIOV
#ifdef CONFIG_BNXT_SRIOV
	.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
	.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
@@ -109,13 +295,9 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
		memcpy(buf, data_addr, bytesize);
		memcpy(buf, data_addr, bytesize);


	dma_free_coherent(&bp->pdev->dev, bytesize, data_addr, data_dma_addr);
	dma_free_coherent(&bp->pdev->dev, bytesize, data_addr, data_dma_addr);
	if (rc == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) {
	if (rc == -EACCES)
		netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n");
		netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n");
		return -EACCES;
	return rc;
	} else if (rc) {
		return -EIO;
	}
	return 0;
}
}


static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id,
static int bnxt_dl_nvm_param_get(struct devlink *dl, u32 id,
@@ -251,6 +433,8 @@ int bnxt_dl_register(struct bnxt *bp)


	devlink_params_publish(dl);
	devlink_params_publish(dl);


	bnxt_dl_fw_reporters_create(bp);

	return 0;
	return 0;


err_dl_port_unreg:
err_dl_port_unreg:
@@ -273,6 +457,7 @@ void bnxt_dl_unregister(struct bnxt *bp)
	if (!dl)
	if (!dl)
		return;
		return;


	bnxt_dl_fw_reporters_destroy(bp);
	devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
	devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
				       ARRAY_SIZE(bnxt_dl_port_params));
				       ARRAY_SIZE(bnxt_dl_port_params));
	devlink_port_unregister(&bp->dl_port);
	devlink_port_unregister(&bp->dl_port);
+1 −0
Original line number Original line Diff line number Diff line
@@ -55,6 +55,7 @@ struct bnxt_dl_nvm_param {
	u16 num_bits;
	u16 num_bits;
};
};


void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
int bnxt_dl_register(struct bnxt *bp);
int bnxt_dl_register(struct bnxt *bp);
void bnxt_dl_unregister(struct bnxt *bp);
void bnxt_dl_unregister(struct bnxt *bp);


Loading