Commit 9e93e967 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford
Browse files

IB/hfi1: Add a function to receive TID RDMA ACK packet



This patch adds a function to receive TID RDMA ACK packet, which could
be an acknowledge to either a TID RDMA WRITE DATA packet or an TID
RDMA RESYNC packet. For an ACK to TID RDMA WRITE DATA packet, the
request segments are completed appropriately. For an ACK to a TID
RDMA RESYNC packet, any pending segment flow information is updated
accordingly.

Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarAshutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 0f75e325
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -245,6 +245,9 @@ void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
	struct hfi1_qp_priv *priv = qp->priv;
	unsigned long flags;

	if (attr_mask & IB_QP_RETRY_CNT)
		priv->s_retry = attr->retry_cnt;

	spin_lock_irqsave(&priv->opfn.lock, flags);
	if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
		struct tid_rdma_params *local = &priv->tid_rdma.local;
+2 −0
Original line number Diff line number Diff line
@@ -65,6 +65,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
 * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
 * HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
 * HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
 * HFI1_S_WAIT_HALT - halt the first leg send engine
 * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
 */
#define HFI1_S_AHG_VALID         0x80000000
@@ -72,6 +73,7 @@ extern const struct rvt_operation_params hfi1_post_parms[];
#define HFI1_S_WAIT_PIO_DRAIN    0x20000000
#define HFI1_S_WAIT_TID_SPACE    0x10000000
#define HFI1_S_WAIT_TID_RESP     0x08000000
#define HFI1_S_WAIT_HALT         0x04000000
#define HFI1_S_MIN_BIT_MASK      0x01000000

/*
+212 −0
Original line number Diff line number Diff line
@@ -319,6 +319,7 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
	qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
	qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
	qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
	qpriv->s_state = TID_OP(WRITE_RESP);
	qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
	qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
	qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
@@ -327,6 +328,7 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
	qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
	qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
	qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
	atomic_set(&qpriv->n_tid_requests, 0);
	timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
	INIT_LIST_HEAD(&qpriv->tid_wait);

@@ -4318,3 +4320,213 @@ u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,

	return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
}

void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
{
	struct ib_other_headers *ohdr = packet->ohdr;
	struct rvt_qp *qp = packet->qp;
	struct hfi1_qp_priv *qpriv = qp->priv;
	struct rvt_swqe *wqe;
	struct tid_rdma_request *req;
	struct tid_rdma_flow *flow;
	u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn;
	bool is_fecn;
	unsigned long flags;
	u16 fidx;

	is_fecn = process_ecn(qp, packet);
	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
	aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
	req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
	resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));

	spin_lock_irqsave(&qp->s_lock, flags);

	/* If we are waiting for an ACK to RESYNC, drop any other packets */
	if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
	    cmp_psn(psn, qpriv->s_resync_psn))
		goto ack_op_err;

	ack_psn = req_psn;
	if (hfi1_tid_rdma_is_resync_psn(psn))
		ack_kpsn = resync_psn;
	else
		ack_kpsn = psn;
	if (aeth >> 29) {
		ack_psn--;
		ack_kpsn--;
	}

	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);

	if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
		goto ack_op_err;

	req = wqe_to_tid_req(wqe);
	flow = &req->flows[req->acked_tail];

	/* Drop stale ACK/NAK */
	if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0)
		goto ack_op_err;

	while (cmp_psn(ack_kpsn,
		       full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
	       req->ack_seg < req->cur_seg) {
		req->ack_seg++;
		/* advance acked segment pointer */
		req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
		req->r_last_acked = flow->flow_state.resp_ib_psn;
		if (req->ack_seg == req->total_segs) {
			req->state = TID_REQUEST_COMPLETE;
			wqe = do_rc_completion(qp, wqe,
					       to_iport(qp->ibqp.device,
							qp->port_num));
			atomic_dec(&qpriv->n_tid_requests);
			if (qp->s_acked == qp->s_tail)
				break;
			if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
				break;
			req = wqe_to_tid_req(wqe);
		}
		flow = &req->flows[req->acked_tail];
	}

	switch (aeth >> 29) {
	case 0:         /* ACK */
		if (qpriv->s_flags & RVT_S_WAIT_ACK)
			qpriv->s_flags &= ~RVT_S_WAIT_ACK;
		if (!hfi1_tid_rdma_is_resync_psn(psn)) {
			hfi1_schedule_send(qp);
		} else {
			u32 spsn, fpsn, last_acked, generation;
			struct tid_rdma_request *rptr;

			/* Allow new requests (see hfi1_make_tid_rdma_pkt) */
			qp->s_flags &= ~HFI1_S_WAIT_HALT;
			/*
			 * Clear RVT_S_SEND_ONE flag in case that the TID RDMA
			 * ACK is received after the TID retry timer is fired
			 * again. In this case, do not send any more TID
			 * RESYNC request or wait for any more TID ACK packet.
			 */
			qpriv->s_flags &= ~RVT_S_SEND_ONE;
			hfi1_schedule_send(qp);

			if ((qp->s_acked == qpriv->s_tid_tail &&
			     req->ack_seg == req->total_segs) ||
			    qp->s_acked == qp->s_tail) {
				qpriv->s_state = TID_OP(WRITE_DATA_LAST);
				goto done;
			}

			if (req->ack_seg == req->comp_seg) {
				qpriv->s_state = TID_OP(WRITE_DATA);
				goto done;
			}

			/*
			 * The PSN to start with is the next PSN after the
			 * RESYNC PSN.
			 */
			psn = mask_psn(psn + 1);
			generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
			spsn = 0;

			/*
			 * Update to the correct WQE when we get an ACK(RESYNC)
			 * in the middle of a request.
			 */
			if (delta_psn(ack_psn, wqe->lpsn))
				wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
			req = wqe_to_tid_req(wqe);
			flow = &req->flows[req->acked_tail];
			/*
			 * RESYNC re-numbers the PSN ranges of all remaining
			 * segments. Also, PSN's start from 0 in the middle of a
			 * segment and the first segment size is less than the
			 * default number of packets. flow->resync_npkts is used
			 * to track the number of packets from the start of the
			 * real segment to the point of 0 PSN after the RESYNC
			 * in order to later correctly rewind the SGE.
			 */
			fpsn = full_flow_psn(flow, flow->flow_state.spsn);
			req->r_ack_psn = psn;
			flow->resync_npkts +=
				delta_psn(mask_psn(resync_psn + 1), fpsn);
			/*
			 * Renumber all packet sequence number ranges
			 * based on the new generation.
			 */
			last_acked = qp->s_acked;
			rptr = req;
			while (1) {
				/* start from last acked segment */
				for (fidx = rptr->acked_tail;
				     CIRC_CNT(rptr->setup_head, fidx,
					      MAX_FLOWS);
				     fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
					u32 lpsn;
					u32 gen;

					flow = &rptr->flows[fidx];
					gen = flow->flow_state.generation;
					if (WARN_ON(gen == generation &&
						    flow->flow_state.spsn !=
						     spsn))
						continue;
					lpsn = flow->flow_state.lpsn;
					lpsn = full_flow_psn(flow, lpsn);
					flow->npkts =
						delta_psn(lpsn,
							  mask_psn(resync_psn)
							  );
					flow->flow_state.generation =
						generation;
					flow->flow_state.spsn = spsn;
					flow->flow_state.lpsn =
						flow->flow_state.spsn +
						flow->npkts - 1;
					flow->pkt = 0;
					spsn += flow->npkts;
					resync_psn += flow->npkts;
				}
				if (++last_acked == qpriv->s_tid_cur + 1)
					break;
				if (last_acked == qp->s_size)
					last_acked = 0;
				wqe = rvt_get_swqe_ptr(qp, last_acked);
				rptr = wqe_to_tid_req(wqe);
			}
			req->cur_seg = req->ack_seg;
			qpriv->s_tid_tail = qp->s_acked;
			qpriv->s_state = TID_OP(WRITE_REQ);
		}
done:
		qpriv->s_retry = qp->s_retry_cnt;
		break;

	case 3:         /* NAK */
		switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
			IB_AETH_CREDIT_MASK) {
		case 0: /* PSN sequence error */
			flow = &req->flows[req->acked_tail];
			fspsn = full_flow_psn(flow, flow->flow_state.spsn);
			req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
			req->cur_seg = req->ack_seg;
			qpriv->s_tid_tail = qp->s_acked;
			qpriv->s_state = TID_OP(WRITE_REQ);
			qpriv->s_retry = qp->s_retry_cnt;
			break;

		default:
			break;
		}
		break;

	default:
		break;
	}

ack_op_err:
	spin_unlock_irqrestore(&qp->s_lock, flags);
}
+3 −0
Original line number Diff line number Diff line
@@ -101,6 +101,7 @@ struct tid_rdma_request {

	u32 seg_len;
	u32 total_len;
	u32 r_ack_psn;          /* next expected ack PSN */
	u32 r_flow_psn;         /* IB PSN of next segment start */
	u32 r_last_acked;       /* IB PSN of last ACK'ed packet */
	u32 s_next_psn;		/* IB PSN of next segment start for read */
@@ -285,4 +286,6 @@ u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
				  struct ib_other_headers *ohdr, u16 iflow,
				  u32 *bth1, u32 *bth2);

void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet);

#endif /* HFI1_TID_RDMA_H */
+4 −1
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
#define TID_READ_SENDER_PRN "[%s] qpn 0x%x newreq %u tid_r_reqs %u " \
			    "tid_r_comp %u pending_tid_r_segs %u " \
			    "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \
			    "hw_flow_index %u generation 0x%x " \
			    "s_state 0x%x hw_flow_index %u generation 0x%x " \
			    "fpsn 0x%x flow_flags 0x%x"

#define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \
@@ -844,6 +844,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
		__field(u32, s_flags)
		__field(u32, ps_flags)
		__field(unsigned long, iow_flags)
		__field(u8, s_state)
		__field(u32, hw_flow_index)
		__field(u32, generation)
		__field(u32, fpsn)
@@ -861,6 +862,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
		__entry->s_flags = qp->s_flags;
		__entry->ps_flags = priv->s_flags;
		__entry->iow_flags = priv->s_iowait.flags;
		__entry->s_state = priv->s_state;
		__entry->hw_flow_index = priv->flow_state.index;
		__entry->generation = priv->flow_state.generation;
		__entry->fpsn = priv->flow_state.psn;
@@ -877,6 +879,7 @@ DECLARE_EVENT_CLASS(/* tid_read_sender */
		__entry->s_flags,
		__entry->ps_flags,
		__entry->iow_flags,
		__entry->s_state,
		__entry->hw_flow_index,
		__entry->generation,
		__entry->fpsn,
Loading