Commit ea96ceac authored by Thomas Klein's avatar Thomas Klein Committed by David S. Miller
Browse files

ehea: error handling improvement



Reset a port's resources only if they're actually in an error state

Signed-off-by: default avatarThomas Klein <tklein@de.ibm.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent a1aa8822
Loading
Loading
Loading
Loading
+26 −5
Original line number Diff line number Diff line
@@ -791,12 +791,18 @@ static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota)
		cqe_counter++;
		rmb();
		if (cqe->status & EHEA_CQE_STAT_ERR_MASK) {
			ehea_error("Send Completion Error: Resetting port");
			ehea_error("Bad send completion status=0x%04X",
				   cqe->status);

			if (netif_msg_tx_err(pr->port))
				ehea_dump(cqe, sizeof(*cqe), "Send CQE");

			if (cqe->status & EHEA_CQE_STAT_RESET_MASK) {
				ehea_error("Resetting port");
				ehea_schedule_port_reset(pr->port);
				break;
			}
		}

		if (netif_msg_tx_done(pr->port))
			ehea_dump(cqe, sizeof(*cqe), "CQE");
@@ -901,6 +907,8 @@ static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param)
	struct ehea_eqe *eqe;
	struct ehea_qp *qp;
	u32 qp_token;
	u64 resource_type, aer, aerr;
	int reset_port = 0;

	eqe = ehea_poll_eq(port->qp_eq);

@@ -910,11 +918,24 @@ static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param)
			   eqe->entry, qp_token);

		qp = port->port_res[qp_token].qp;
		ehea_error_data(port->adapter, qp->fw_handle);

		resource_type = ehea_error_data(port->adapter, qp->fw_handle,
						&aer, &aerr);

		if (resource_type == EHEA_AER_RESTYPE_QP) {
			if ((aer & EHEA_AER_RESET_MASK) ||
			    (aerr & EHEA_AERR_RESET_MASK))
				 reset_port = 1;
		} else
			reset_port = 1;   /* Reset in case of CQ or EQ error */

		eqe = ehea_poll_eq(port->qp_eq);
	}

	if (reset_port) {
		ehea_error("Resetting port");
		ehea_schedule_port_reset(port);
	}

	return IRQ_HANDLED;
}
+23 −20
Original line number Diff line number Diff line
@@ -229,14 +229,14 @@ u64 ehea_destroy_cq_res(struct ehea_cq *cq, u64 force)

int ehea_destroy_cq(struct ehea_cq *cq)
{
	u64 hret;
	u64 hret, aer, aerr;
	if (!cq)
		return 0;

	hcp_epas_dtor(&cq->epas);
	hret = ehea_destroy_cq_res(cq, NORMAL_FREE);
	if (hret == H_R_STATE) {
		ehea_error_data(cq->adapter, cq->fw_handle);
		ehea_error_data(cq->adapter, cq->fw_handle, &aer, &aerr);
		hret = ehea_destroy_cq_res(cq, FORCE_FREE);
	}

@@ -357,7 +357,7 @@ u64 ehea_destroy_eq_res(struct ehea_eq *eq, u64 force)

int ehea_destroy_eq(struct ehea_eq *eq)
{
	u64 hret;
	u64 hret, aer, aerr;
	if (!eq)
		return 0;

@@ -365,7 +365,7 @@ int ehea_destroy_eq(struct ehea_eq *eq)

	hret = ehea_destroy_eq_res(eq, NORMAL_FREE);
	if (hret == H_R_STATE) {
		ehea_error_data(eq->adapter, eq->fw_handle);
		ehea_error_data(eq->adapter, eq->fw_handle, &aer, &aerr);
		hret = ehea_destroy_eq_res(eq, FORCE_FREE);
	}

@@ -540,7 +540,7 @@ u64 ehea_destroy_qp_res(struct ehea_qp *qp, u64 force)

int ehea_destroy_qp(struct ehea_qp *qp)
{
	u64 hret;
	u64 hret, aer, aerr;
	if (!qp)
		return 0;

@@ -548,7 +548,7 @@ int ehea_destroy_qp(struct ehea_qp *qp)

	hret = ehea_destroy_qp_res(qp, NORMAL_FREE);
	if (hret == H_R_STATE) {
		ehea_error_data(qp->adapter, qp->fw_handle);
		ehea_error_data(qp->adapter, qp->fw_handle, &aer, &aerr);
		hret = ehea_destroy_qp_res(qp, FORCE_FREE);
	}

@@ -986,42 +986,45 @@ void print_error_data(u64 *data)
	if (length > EHEA_PAGESIZE)
		length = EHEA_PAGESIZE;

	if (type == 0x8) /* Queue Pair */
	if (type == EHEA_AER_RESTYPE_QP)
		ehea_error("QP (resource=%llX) state: AER=0x%llX, AERR=0x%llX, "
			   "port=%llX", resource, data[6], data[12], data[22]);

	if (type == 0x4) /* Completion Queue */
	else if (type == EHEA_AER_RESTYPE_CQ)
		ehea_error("CQ (resource=%llX) state: AER=0x%llX", resource,
			   data[6]);

	if (type == 0x3) /* Event Queue */
	else if (type == EHEA_AER_RESTYPE_EQ)
		ehea_error("EQ (resource=%llX) state: AER=0x%llX", resource,
			   data[6]);

	ehea_dump(data, length, "error data");
}

void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle)
u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle,
		    u64 *aer, u64 *aerr)
{
	unsigned long ret;
	u64 *rblock;
	u64 type = 0;

	rblock = (void *)get_zeroed_page(GFP_KERNEL);
	if (!rblock) {
		ehea_error("Cannot allocate rblock memory.");
		return;
		goto out;
	}

	ret = ehea_h_error_data(adapter->handle,
				res_handle,
				rblock);
	ret = ehea_h_error_data(adapter->handle, res_handle, rblock);

	if (ret == H_R_STATE)
		ehea_error("No error data is available: %llX.", res_handle);
	else if (ret == H_SUCCESS)
	if (ret == H_SUCCESS) {
		type = EHEA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]);
		*aer = rblock[6];
		*aerr = rblock[12];
		print_error_data(rblock);
	else
	} else if (ret == H_R_STATE) {
		ehea_error("No error data available: %llX.", res_handle);
	} else
		ehea_error("Error data could not be fetched: %llX", res_handle);

	free_page((unsigned long)rblock);
out:
	return type;
}
+13 −1
Original line number Diff line number Diff line
@@ -154,6 +154,9 @@ struct ehea_rwqe {
#define EHEA_CQE_STAT_ERR_IP       0x2000
#define EHEA_CQE_STAT_ERR_CRC      0x1000

/* Defines which bad send cqe stati lead to a port reset */
#define EHEA_CQE_STAT_RESET_MASK   0x0002

struct ehea_cqe {
	u64 wr_id;		/* work request ID from WQE */
	u8 type;
@@ -187,6 +190,14 @@ struct ehea_cqe {
#define EHEA_EQE_SM_MECH_NUMBER  EHEA_BMASK_IBM(48, 55)
#define EHEA_EQE_SM_PORT_NUMBER  EHEA_BMASK_IBM(56, 63)

#define EHEA_AER_RESTYPE_QP  0x8
#define EHEA_AER_RESTYPE_CQ  0x4
#define EHEA_AER_RESTYPE_EQ  0x3

/* Defines which affiliated errors lead to a port reset */
#define EHEA_AER_RESET_MASK   0xFFFFFFFFFEFFFFFFULL
#define EHEA_AERR_RESET_MASK  0xFFFFFFFFFFFFFFFFULL

struct ehea_eqe {
	u64 entry;
};
@@ -379,7 +390,8 @@ int ehea_gen_smr(struct ehea_adapter *adapter, struct ehea_mr *old_mr,

int ehea_rem_mr(struct ehea_mr *mr);

void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle);
u64 ehea_error_data(struct ehea_adapter *adapter, u64 res_handle,
		    u64 *aer, u64 *aerr);

int ehea_add_sect_bmap(unsigned long pfn, unsigned long nr_pages);
int ehea_rem_sect_bmap(unsigned long pfn, unsigned long nr_pages);