Commit 3ce5daa2 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford
Browse files

IB/hfi1: Add static trace for TID RDMA READ protocol



This patch makes the following changes to the static trace:
1. Adds the decoding of TID RDMA READ packets in IB header trace;
2. Tracks qpriv->s_flags and iow_flags in qpsleepwakeup trace;
3. Adds a new event to track RC ACK receiving;
4. Adds trace events for various stages of the TID RDMA READ
protocol. These events provide a fine-grained control for monitoring
and debugging the hfi1 driver in the filed.

Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent f1ab4efa
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
	bool last_pkt;
	u32 delta;

	trace_hfi1_rsp_make_rc_ack(qp, 0);
	lockdep_assert_held(&qp->s_lock);
	/* Don't send an ACK if we aren't supposed to. */
	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
@@ -349,6 +350,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
	int delta;
	struct tid_rdma_flow *flow = NULL;

	trace_hfi1_sender_make_rc_req(qp);
	lockdep_assert_held(&qp->s_lock);
	ps->s_txreq = get_txreq(ps->dev, qp);
	if (!ps->s_txreq)
@@ -596,8 +598,13 @@ no_flow_control:
			break;

		case IB_WR_TID_RDMA_READ:
			trace_hfi1_tid_read_sender_make_req(qp, newreq);
			wpriv = wqe->priv;
			req = wqe_to_tid_req(wqe);
			trace_hfi1_tid_req_make_req_read(qp, newreq,
							 wqe->wr.opcode,
							 wqe->psn, wqe->lpsn,
							 req);
			delta = cmp_psn(qp->s_psn, wqe->psn);

			/*
@@ -892,6 +899,8 @@ no_flow_control:
		    ++qp->s_cur == qp->s_size)
			qp->s_cur = 0;
		qp->s_psn = req->s_next_psn;
		trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
						 wqe->psn, wqe->lpsn, req);
		break;
	case TID_OP(READ_REQ):
		req = wqe_to_tid_req(wqe);
@@ -933,6 +942,8 @@ no_flow_control:
		    ++qp->s_cur == qp->s_size)
			qp->s_cur = 0;
		qp->s_psn = req->s_next_psn;
		trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
						 wqe->psn, wqe->lpsn, req);
		break;
	}
	qp->s_sending_hpsn = bth2;
@@ -1341,6 +1352,7 @@ done:
	    (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
		qp->s_flags |= RVT_S_WAIT_PSN;
	qp->s_flags &= ~HFI1_S_AHG_VALID;
	trace_hfi1_sender_reset_psn(qp);
}

/*
@@ -1355,6 +1367,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)

	lockdep_assert_held(&qp->r_lock);
	lockdep_assert_held(&qp->s_lock);
	trace_hfi1_sender_restart_rc(qp);
	if (qp->s_retry == 0) {
		if (qp->s_mig_state == IB_MIG_ARMED) {
			hfi1_migrate_qp(qp);
@@ -1558,6 +1571,7 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
	 * completion if the SWQE is being resent until the send
	 * is finished.
	 */
	trace_hfi1_rc_completion(qp, wqe->lpsn);
	if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
		u32 s_last;
@@ -1742,6 +1756,8 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
			break;
	}

	trace_hfi1_rc_ack_do(qp, aeth, psn, wqe);
	trace_hfi1_sender_do_rc_ack(qp);
	switch (aeth >> IB_AETH_NAK_SHIFT) {
	case 0:         /* ACK */
		this_cpu_inc(*ibp->rvp.rc_acks);
+46 −5
Original line number Diff line number Diff line
@@ -1688,6 +1688,7 @@ u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,

	/* This is the IB psn used to send the request */
	*bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
	trace_hfi1_tid_flow_build_read_pkt(qp, req->flow_idx, flow);

	/* TID Entries for TID RDMA READ payload */
	req_addr = &flow->tid_entry[flow->tid_idx];
@@ -1768,6 +1769,8 @@ u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
	bool retry = true;
	u32 npkts = rvt_div_round_up_mtu(qp, *len);

	trace_hfi1_tid_req_build_read_req(qp, 0, wqe->wr.opcode, wqe->psn,
					  wqe->lpsn, req);
	/*
	 * Check sync conditions. Make sure that there are no pending
	 * segments before freeing the flow.
@@ -1883,6 +1886,8 @@ static int tid_rdma_rcv_read_request(struct rvt_qp *qp,
	 */
	flow->npkts = rvt_div_round_up_mtu(qp, len);
	for (i = 0; i < flow->tidcnt; i++) {
		trace_hfi1_tid_entry_rcv_read_req(qp, i,
						  flow->tid_entry[i]);
		tlen = EXP_TID_GET(flow->tid_entry[i], LEN);
		if (!tlen)
			return 1;
@@ -1917,6 +1922,7 @@ static int tid_rdma_rcv_read_request(struct rvt_qp *qp,
	flow->flow_state.ib_spsn = psn;
	flow->flow_state.ib_lpsn = flow->flow_state.ib_spsn + flow->npkts - 1;

	trace_hfi1_tid_flow_rcv_read_req(qp, req->setup_head, flow);
	/* Set the initial flow index to the current flow. */
	req->flow_idx = req->setup_head;

@@ -1942,6 +1948,8 @@ static int tid_rdma_rcv_read_request(struct rvt_qp *qp,
	req->total_segs = 1;
	req->r_flow_psn = e->psn;

	trace_hfi1_tid_req_rcv_read_req(qp, 0, e->opcode, e->psn, e->lpsn,
					req);
	return 0;
}

@@ -1957,6 +1965,8 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
	u8 prev;
	bool old_req;

	trace_hfi1_rsp_tid_rcv_error(qp, psn);
	trace_hfi1_tid_rdma_rcv_err(qp, 0, psn, diff);
	if (diff > 0) {
		/* sequence error */
		if (!qp->r_nak_state) {
@@ -1977,7 +1987,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,

	req = ack_to_tid_req(e);
	req->r_flow_psn = psn;

	trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
	if (e->opcode == TID_OP(READ_REQ)) {
		struct ib_reth *reth;
		u32 offset;
@@ -2088,6 +2098,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)

	is_fecn = process_ecn(qp, packet);
	psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
	trace_hfi1_rsp_rcv_tid_read_req(qp, psn);

	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
		rvt_comm_est(qp);
@@ -2199,6 +2210,9 @@ u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
	next_offset = flow->tid_offset + *len;
	last_pkt = (flow->sent >= flow->length);

	trace_hfi1_tid_entry_build_read_resp(qp, flow->tid_idx, tidentry);
	trace_hfi1_tid_flow_build_read_resp(qp, req->clear_tail, flow);

	rcu_read_lock();
	remote = rcu_dereference(qpriv->tid_rdma.remote);
	if (!remote) {
@@ -2293,6 +2307,7 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
	unsigned long flags;
	u32 kpsn, ipsn;

	trace_hfi1_sender_rcv_tid_read_resp(qp);
	is_fecn = process_ecn(qp, packet);
	kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
	aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
@@ -2322,6 +2337,12 @@ void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
		hfi1_schedule_send(qp);
	}

	trace_hfi1_ack(qp, ipsn);
	trace_hfi1_tid_req_rcv_read_resp(qp, 0, req->e.swqe->wr.opcode,
					 req->e.swqe->psn, req->e.swqe->lpsn,
					 req);
	trace_hfi1_tid_flow_rcv_read_resp(qp, req->clear_tail, flow);

	/* Release the tid resources */
	hfi1_kern_exp_rcv_clear(req);

@@ -2671,6 +2692,8 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
	unsigned long flags;
	bool ret = true;

	trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
					   packet->rhf);
	if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
		return ret;

@@ -2754,12 +2777,20 @@ void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
	if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
		*bth2 = mask_psn(qp->s_psn);
		flow = find_flow_ib(req, *bth2, &fidx);
		if (!flow)
		if (!flow) {
			trace_hfi1_msg_tid_restart_req(/* msg */
			   qp, "!!!!!! Could not find flow to restart: bth2 ",
			   (u64)*bth2);
			trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode,
						       wqe->psn, wqe->lpsn,
						       req);
			return;
		}
	} else {
		return;
	}

	trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
	diff = delta_psn(*bth2, flow->flow_state.ib_spsn);

	flow->sent = 0;
@@ -2794,6 +2825,9 @@ void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
	/* Move flow_idx to correct index */
	req->flow_idx = fidx;

	trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
	trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode, wqe->psn,
				       wqe->lpsn, req);
	req->state = TID_REQUEST_ACTIVE;
}

@@ -2868,14 +2902,17 @@ interlock:
}

/* Does @sge meet the alignment requirements for tid rdma? */
static inline bool hfi1_check_sge_align(struct rvt_sge *sge, int num_sge)
static inline bool hfi1_check_sge_align(struct rvt_qp *qp,
					struct rvt_sge *sge, int num_sge)
{
	int i;

	for (i = 0; i < num_sge; i++, sge++)
	for (i = 0; i < num_sge; i++, sge++) {
		trace_hfi1_sge_check_align(qp, i, sge);
		if ((u64)sge->vaddr & ~PAGE_MASK ||
		    sge->sge_length & ~PAGE_MASK)
			return false;
	}
	return true;
}

@@ -2904,7 +2941,8 @@ void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
		goto exit;

	if (wqe->wr.opcode == IB_WR_RDMA_READ) {
		if (hfi1_check_sge_align(&wqe->sg_list[0], wqe->wr.num_sge)) {
		if (hfi1_check_sge_align(qp, &wqe->sg_list[0],
					 wqe->wr.num_sge)) {
			new_opcode = IB_WR_TID_RDMA_READ;
			do_tid_rdma = true;
		}
@@ -2930,6 +2968,9 @@ void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
		priv->tid_req.comp_seg = 0;
		priv->tid_req.ack_seg = 0;
		priv->tid_req.state = TID_REQUEST_INACTIVE;
		trace_hfi1_tid_req_setup_tid_wqe(qp, 1, wqe->wr.opcode,
						 wqe->psn, wqe->lpsn,
						 &priv->tid_req);
	}
exit:
	rcu_read_unlock();
+36 −0
Original line number Diff line number Diff line
@@ -129,6 +129,10 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
#define TID_RDMA_KDETH "kdeth0 0x%x kdeth1 0x%x"
#define TID_RDMA_KDETH_DATA "kdeth0 0x%x: kver %u sh %u intr %u tidctrl %u tid %x offset %x kdeth1 0x%x: jkey %x"
#define TID_READ_REQ_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
#define TID_READ_RSP_PRN "verbs_qp 0x%x"

#define OP(transport, op) IB_OPCODE_## transport ## _ ## op

@@ -323,6 +327,38 @@ const char *parse_everbs_hdrs(
				 parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
				 be32_to_cpu(eh->aeth) & IB_MSN_MASK);
		break;
	case OP(TID_RDMA, READ_REQ):
		trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
				 TID_READ_REQ_PRN,
				 le32_to_cpu(eh->tid_rdma.r_req.kdeth0),
				 le32_to_cpu(eh->tid_rdma.r_req.kdeth1),
				 ib_u64_get(&eh->tid_rdma.r_req.reth.vaddr),
				 be32_to_cpu(eh->tid_rdma.r_req.reth.rkey),
				 be32_to_cpu(eh->tid_rdma.r_req.reth.length),
				 be32_to_cpu(eh->tid_rdma.r_req.tid_flow_psn),
				 be32_to_cpu(eh->tid_rdma.r_req.tid_flow_qp),
				 be32_to_cpu(eh->tid_rdma.r_req.verbs_qp));
		break;
	case OP(TID_RDMA, READ_RESP):
		trace_seq_printf(p, TID_RDMA_KDETH_DATA " " AETH_PRN " "
				 TID_READ_RSP_PRN,
				 le32_to_cpu(eh->tid_rdma.r_rsp.kdeth0),
				 KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, KVER),
				 KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, SH),
				 KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, INTR),
				 KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TIDCTRL),
				 KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TID),
				 KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, OFFSET),
				 le32_to_cpu(eh->tid_rdma.r_rsp.kdeth1),
				 KDETH_GET(eh->tid_rdma.r_rsp.kdeth1, JKEY),
				 be32_to_cpu(eh->tid_rdma.r_rsp.aeth) >> 24,
				 parse_syndrome(/* aeth */
					 be32_to_cpu(eh->tid_rdma.r_rsp.aeth)
					 >> 24),
				 (be32_to_cpu(eh->tid_rdma.r_rsp.aeth) &
				  IB_MSN_MASK),
				 be32_to_cpu(eh->tid_rdma.r_rsp.verbs_qp));
		break;
	/* aeth + atomicacketh */
	case OP(RC, ATOMIC_ACKNOWLEDGE):
		trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
+2 −0
Original line number Diff line number Diff line
@@ -79,6 +79,8 @@ __print_symbolic(opcode, \
	ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE),             \
	ib_opcode_name(RC_COMPARE_SWAP),                   \
	ib_opcode_name(RC_FETCH_ADD),                      \
	ib_opcode_name(TID_RDMA_READ_REQ),	           \
	ib_opcode_name(TID_RDMA_READ_RESP),	           \
	ib_opcode_name(UC_SEND_FIRST),                     \
	ib_opcode_name(UC_SEND_MIDDLE),                    \
	ib_opcode_name(UC_SEND_LAST),                      \
+48 −0
Original line number Diff line number Diff line
@@ -109,6 +109,54 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
	     TP_ARGS(qp, psn)
);

DEFINE_EVENT(/* event */
	hfi1_rc_template, hfi1_rc_completion,
	TP_PROTO(struct rvt_qp *qp, u32 psn),
	TP_ARGS(qp, psn)
);

DECLARE_EVENT_CLASS(/* rc_ack */
	hfi1_rc_ack_template,
	TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
		 struct rvt_swqe *wqe),
	TP_ARGS(qp, aeth, psn, wqe),
	TP_STRUCT__entry(/* entry */
		DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
		__field(u32, qpn)
		__field(u32, aeth)
		__field(u32, psn)
		__field(u8, opcode)
		__field(u32, spsn)
		__field(u32, lpsn)
	),
	TP_fast_assign(/* assign */
		DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
		__entry->qpn = qp->ibqp.qp_num;
		__entry->aeth = aeth;
		__entry->psn = psn;
		__entry->opcode = wqe->wr.opcode;
		__entry->spsn = wqe->psn;
		__entry->lpsn = wqe->lpsn;
	),
	TP_printk(/* print */
		"[%s] qpn 0x%x aeth 0x%x psn 0x%x opcode 0x%x spsn 0x%x lpsn 0x%x",
		__get_str(dev),
		__entry->qpn,
		__entry->aeth,
		__entry->psn,
		__entry->opcode,
		__entry->spsn,
		__entry->lpsn
	)
);

DEFINE_EVENT(/* do_rc_ack */
	hfi1_rc_ack_template, hfi1_rc_ack_do,
	TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
		 struct rvt_swqe *wqe),
	TP_ARGS(qp, aeth, psn, wqe)
);

#endif /* __HFI1_TRACE_RC_H */

#undef TRACE_INCLUDE_PATH
Loading