Commit b5c7f857 authored by Evan Swanson's avatar Evan Swanson Committed by Jeff Kirsher
Browse files

ice: Handle critical FW error during admin queue initialization



A race condition between FW and SW can occur between admin queue setup and
the first command sent. A link event may occur and FW attempts to notify a
non-existent queue. FW will set the critical error bit and disable the
queue. When this happens retry queue setup.

Signed-off-by: default avatarEvan Swanson <evan.swanson@intel.com>
Signed-off-by: default avatarAnirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: default avatarAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 19608275
Loading
Loading
Loading
Loading
+72 −54
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ do { \
	(qinfo)->sq.bal = prefix##_ATQBAL;			\
	(qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M;	\
	(qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M;	\
	(qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M;	\
	(qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M;		\
	(qinfo)->rq.head = prefix##_ARQH;			\
	(qinfo)->rq.tail = prefix##_ARQT;			\
@@ -20,6 +21,7 @@ do { \
	(qinfo)->rq.bal = prefix##_ARQBAL;			\
	(qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M;	\
	(qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M;	\
	(qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M;	\
	(qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M;		\
} while (0)

@@ -641,6 +643,50 @@ init_ctrlq_free_sq:
	return ret_code;
}

/**
 * ice_shutdown_ctrlq - shutdown routine for any control queue
 * @hw: pointer to the hardware structure
 * @q_type: specific Control queue type
 *
 * NOTE: this function does not destroy the control queue locks.
 */
static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
{
	struct ice_ctl_q_info *cq;

	switch (q_type) {
	case ICE_CTL_Q_ADMIN:
		cq = &hw->adminq;
		if (ice_check_sq_alive(hw, cq))
			ice_aq_q_shutdown(hw, true);
		break;
	case ICE_CTL_Q_MAILBOX:
		cq = &hw->mailboxq;
		break;
	default:
		return;
	}

	ice_shutdown_sq(hw, cq);
	ice_shutdown_rq(hw, cq);
}

/**
 * ice_shutdown_all_ctrlq - shutdown routine for all control queues
 * @hw: pointer to the hardware structure
 *
 * NOTE: this function does not destroy the control queue locks. The driver
 * may call this at runtime to shutdown and later restart control queues, such
 * as in response to a reset event.
 */
void ice_shutdown_all_ctrlq(struct ice_hw *hw)
{
	/* Shutdown FW admin queue */
	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
	/* Shutdown PF-VF Mailbox */
	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
}

/**
 * ice_init_all_ctrlq - main initialization routine for all control queues
 * @hw: pointer to the hardware structure
@@ -656,17 +702,27 @@ init_ctrlq_free_sq:
 */
enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
{
	enum ice_status ret_code;
	enum ice_status status;
	u32 retry = 0;

	/* Init FW admin queue */
	ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
	if (ret_code)
		return ret_code;
	do {
		status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
		if (status)
			return status;

	ret_code = ice_init_check_adminq(hw);
	if (ret_code)
		return ret_code;
		status = ice_init_check_adminq(hw);
		if (status != ICE_ERR_AQ_FW_CRITICAL)
			break;

		ice_debug(hw, ICE_DBG_AQ_MSG,
			  "Retry Admin Queue init due to FW critical error\n");
		ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
		msleep(ICE_CTL_Q_ADMIN_INIT_MSEC);
	} while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);

	if (status)
		return status;
	/* Init Mailbox queue */
	return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
}
@@ -707,50 +763,6 @@ enum ice_status ice_create_all_ctrlq(struct ice_hw *hw)
	return ice_init_all_ctrlq(hw);
}

/**
 * ice_shutdown_ctrlq - shutdown routine for any control queue
 * @hw: pointer to the hardware structure
 * @q_type: specific Control queue type
 *
 * NOTE: this function does not destroy the control queue locks.
 */
static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
{
	struct ice_ctl_q_info *cq;

	switch (q_type) {
	case ICE_CTL_Q_ADMIN:
		cq = &hw->adminq;
		if (ice_check_sq_alive(hw, cq))
			ice_aq_q_shutdown(hw, true);
		break;
	case ICE_CTL_Q_MAILBOX:
		cq = &hw->mailboxq;
		break;
	default:
		return;
	}

	ice_shutdown_sq(hw, cq);
	ice_shutdown_rq(hw, cq);
}

/**
 * ice_shutdown_all_ctrlq - shutdown routine for all control queues
 * @hw: pointer to the hardware structure
 *
 * NOTE: this function does not destroy the control queue locks. The driver
 * may call this at runtime to shutdown and later restart control queues, such
 * as in response to a reset event.
 */
void ice_shutdown_all_ctrlq(struct ice_hw *hw)
{
	/* Shutdown FW admin queue */
	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
	/* Shutdown PF-VF Mailbox */
	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
}

/**
 * ice_destroy_ctrlq_locks - Destroy locks for a control queue
 * @cq: pointer to the control queue
@@ -1049,10 +1061,16 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,

	/* update the error if time out occurred */
	if (!cmd_completed) {
		if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask ||
		    rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) {
			ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n");
			status = ICE_ERR_AQ_FW_CRITICAL;
		} else {
			ice_debug(hw, ICE_DBG_AQ_MSG,
				  "Control Send Queue Writeback timeout.\n");
			status = ICE_ERR_AQ_TIMEOUT;
		}
	}

sq_send_command_error:
	mutex_unlock(&cq->sq_lock);
+3 −0
Original line number Diff line number Diff line
@@ -34,6 +34,8 @@ enum ice_ctl_q {
/* Control Queue timeout settings - max delay 250ms */
#define ICE_CTL_Q_SQ_CMD_TIMEOUT	2500  /* Count 2500 times */
#define ICE_CTL_Q_SQ_CMD_USEC		100   /* Check every 100usec */
#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT	10    /* Count 10 times */
#define ICE_CTL_Q_ADMIN_INIT_MSEC	100   /* Check every 100msec */

struct ice_ctl_q_ring {
	void *dma_head;			/* Virtual address to DMA head */
@@ -59,6 +61,7 @@ struct ice_ctl_q_ring {
	u32 bal;
	u32 len_mask;
	u32 len_ena_mask;
	u32 len_crit_mask;
	u32 head_mask;
};

+2 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
#define PF_MBX_ARQH_ARQH_M			ICE_M(0x3FF, 0)
#define PF_MBX_ARQLEN				0x0022E480
#define PF_MBX_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
#define PF_MBX_ARQLEN_ARQCRIT_M			BIT(30)
#define PF_MBX_ARQLEN_ARQENABLE_M		BIT(31)
#define PF_MBX_ARQT				0x0022E580
#define PF_MBX_ATQBAH				0x0022E180
@@ -47,6 +48,7 @@
#define PF_MBX_ATQH_ATQH_M			ICE_M(0x3FF, 0)
#define PF_MBX_ATQLEN				0x0022E200
#define PF_MBX_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
#define PF_MBX_ATQLEN_ATQCRIT_M			BIT(30)
#define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
#define PF_MBX_ATQT				0x0022E300
#define PRTDCB_GENC				0x00083000
+2 −0
Original line number Diff line number Diff line
@@ -5207,6 +5207,8 @@ const char *ice_stat_str(enum ice_status stat_err)
		return "ICE_ERR_AQ_NO_WORK";
	case ICE_ERR_AQ_EMPTY:
		return "ICE_ERR_AQ_EMPTY";
	case ICE_ERR_AQ_FW_CRITICAL:
		return "ICE_ERR_AQ_FW_CRITICAL";
	}

	return "ICE_ERR_UNKNOWN";
+1 −0
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@ enum ice_status {
	ICE_ERR_AQ_FULL				= -102,
	ICE_ERR_AQ_NO_WORK			= -103,
	ICE_ERR_AQ_EMPTY			= -104,
	ICE_ERR_AQ_FW_CRITICAL			= -105,
};

#endif /* _ICE_STATUS_H_ */