Commit d639836a authored by Igor Russkikh's avatar Igor Russkikh Committed by David S. Miller
Browse files

net: qed: adding hw_err states and handling



Here we introduce qed device error tracking flags and error types.

qed_hw_err_notify is an entrace point to report errors.
It'll notify higher level drivers (qede/qedr/etc) to handle and recover
the error.

List of posible errors comes from hardware interfaces, but could be
extended in future.

Signed-off-by: default avatarAriel Elior <ariel.elior@marvell.com>
Signed-off-by: default avatarMichal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: default avatarIgor Russkikh <irusskikh@marvell.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c8a867a3
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1020,6 +1020,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
		   u32 input_len, u8 *input_buf,
		   u32 max_size, u8 *unzip_buf);
void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
			   enum qed_hw_err_type err_type);
void qed_get_protocol_stats(struct qed_dev *cdev,
			    enum qed_mcp_protocol_type type,
			    union qed_mcp_protocol_stats *stats);
+32 −0
Original line number Diff line number Diff line
@@ -837,6 +837,38 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
	return rc;
}

void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
		       struct qed_ptt *p_ptt,
		       enum qed_hw_err_type err_type, char *fmt, ...)
{
	char buf[QED_HW_ERR_MAX_STR_SIZE];
	va_list vl;
	int len;

	if (fmt) {
		va_start(vl, fmt);
		len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
		va_end(vl);

		if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
			len = QED_HW_ERR_MAX_STR_SIZE - 1;

		DP_NOTICE(p_hwfn, "%s", buf);
	}

	/* Fan failure cannot be masked by handling of another HW error */
	if (p_hwfn->cdev->recov_in_prog &&
	    err_type != QED_HW_ERR_FAN_FAIL) {
		DP_VERBOSE(p_hwfn,
			   NETIF_MSG_DRV,
			   "Recovery is in progress. Avoid notifying about HW error %d.\n",
			   err_type);
		return;
	}

	qed_hw_error_occurred(p_hwfn, err_type);
}

int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
		    struct qed_ptt *p_ptt, const char *phase)
{
+15 −0
Original line number Diff line number Diff line
@@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
		    struct qed_ptt *p_ptt, const char *phase);

#define QED_HW_ERR_MAX_STR_SIZE 256

/**
 * @brief qed_hw_err_notify - Notify upper layer driver and management FW
 *	about a HW error.
 *
 * @param p_hwfn
 * @param p_ptt
 * @param err_type
 * @param fmt - debug data buffer to send to the MFW
 * @param ... - buffer format args
 */
void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
		       struct qed_ptt *p_ptt,
		       enum qed_hw_err_type err_type, char *fmt, ...);
#endif
+29 −0
Original line number Diff line number Diff line
@@ -2468,6 +2468,35 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
		ops->schedule_recovery_handler(cookie);
}

char *qed_hw_err_type_descr[] = {
	[QED_HW_ERR_FAN_FAIL]		= "Fan Failure",
	[QED_HW_ERR_MFW_RESP_FAIL]	= "MFW Response Failure",
	[QED_HW_ERR_HW_ATTN]		= "HW Attention",
	[QED_HW_ERR_DMAE_FAIL]		= "DMAE Failure",
	[QED_HW_ERR_RAMROD_FAIL]	= "Ramrod Failure",
	[QED_HW_ERR_FW_ASSERT]		= "FW Assertion",
	[QED_HW_ERR_LAST]		= "Unknown",
};

void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
			   enum qed_hw_err_type err_type)
{
	struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
	void *cookie = p_hwfn->cdev->ops_cookie;
	char *err_str;

	if (err_type > QED_HW_ERR_LAST)
		err_type = QED_HW_ERR_LAST;
	err_str = qed_hw_err_type_descr[err_type];

	DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);

	/* Call the HW error handler of the protocol driver
	 */
	if (ops && ops->schedule_hw_err_handler)
		ops->schedule_hw_err_handler(cookie, err_type);
}

static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
			    void *handle)
{
+12 −0
Original line number Diff line number Diff line
@@ -607,6 +607,16 @@ struct qed_sb_info {
	struct qed_dev *cdev;
};

enum qed_hw_err_type {
	QED_HW_ERR_FAN_FAIL,
	QED_HW_ERR_MFW_RESP_FAIL,
	QED_HW_ERR_HW_ATTN,
	QED_HW_ERR_DMAE_FAIL,
	QED_HW_ERR_RAMROD_FAIL,
	QED_HW_ERR_FW_ASSERT,
	QED_HW_ERR_LAST,
};

enum qed_dev_type {
	QED_DEV_TYPE_BB,
	QED_DEV_TYPE_AH,
@@ -814,6 +824,8 @@ struct qed_common_cb_ops {
	void	(*link_update)(void			*dev,
			       struct qed_link_output	*link);
	void (*schedule_recovery_handler)(void *dev);
	void (*schedule_hw_err_handler)(void *dev,
					enum qed_hw_err_type err_type);
	void	(*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
	void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
	void (*get_protocol_tlv_data)(void *dev, void *data);