Commit a0b34f75 authored by Kaike Wan's avatar Kaike Wan Committed by Doug Ledford
Browse files

IB/hfi1: Add interlock between a TID RDMA request and other requests



This locking mechanism is designed to provent vavious memory corruption
scenarios from occurring when requests are pipelined, especially when
RDMA READ/WRITE requests are interleaved with TID RDMA READ/WRITE
requests:
1. READ-AFTER-READ;
2. READ-AFTER-WRITE;
3. WRITE-AFTER-READ;
When memory corruption is likely, a request will be held back until
previous requests have been completed.

Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 24b11923
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -482,6 +482,15 @@ check_s_state:
		len = wqe->length;
		ss = &qp->s_sge;
		bth2 = mask_psn(qp->s_psn);

		/*
		 * Interlock between various IB requests and TID RDMA
		 * if necessary.
		 */
		if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
		    hfi1_tid_rdma_wqe_interlock(qp, wqe))
			goto bail;

		switch (wqe->wr.opcode) {
		case IB_WR_SEND:
		case IB_WR_SEND_WITH_IMM:
@@ -1321,6 +1330,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
		qp->s_state = OP(SEND_LAST);
	}
done:
	priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
	qp->s_psn = psn;
	/*
	 * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
@@ -1540,6 +1550,8 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
				  struct rvt_swqe *wqe,
				  struct hfi1_ibport *ibp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	lockdep_assert_held(&qp->s_lock);
	/*
	 * Don't decrement refcount and don't generate a
@@ -1608,6 +1620,10 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
			qp->s_draining = 0;
		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
	}
	if (priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) {
		priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
		hfi1_schedule_send(qp);
	}
	return wqe;
}

+37 −0
Original line number Diff line number Diff line
@@ -2829,3 +2829,40 @@ void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
		} while (!ret);
	}
}

bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
{
	struct rvt_swqe *prev;
	struct hfi1_qp_priv *priv = qp->priv;
	u32 s_prev;

	s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
	prev = rvt_get_swqe_ptr(qp, s_prev);

	switch (wqe->wr.opcode) {
	case IB_WR_SEND:
	case IB_WR_SEND_WITH_IMM:
	case IB_WR_SEND_WITH_INV:
	case IB_WR_ATOMIC_CMP_AND_SWP:
	case IB_WR_ATOMIC_FETCH_AND_ADD:
	case IB_WR_RDMA_WRITE:
	case IB_WR_RDMA_READ:
		break;
	case IB_WR_TID_RDMA_READ:
		switch (prev->wr.opcode) {
		case IB_WR_RDMA_READ:
			if (qp->s_acked != qp->s_cur)
				goto interlock;
			break;
		default:
			break;
		}
	default:
		break;
	}
	return false;

interlock:
	priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
	return true;
}
+11 −0
Original line number Diff line number Diff line
@@ -17,6 +17,16 @@
#define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES              (BIT(18) >> PAGE_SHIFT)

/*
 * Bit definitions for priv->s_flags.
 * These bit flags overload the bit flags defined for the QP's s_flags.
 * Due to the fact that these bit fields are used only for the QP priv
 * s_flags, there are no collisions.
 *
 * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
 */
#define HFI1_S_TID_WAIT_INTERLCK  BIT(5)

struct tid_rdma_params {
	struct rcu_head rcu_head;
	u32 qp;
@@ -210,5 +220,6 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
			       u32 *bth2);
void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);

#endif /* HFI1_TID_RDMA_H */
+3 −0
Original line number Diff line number Diff line
@@ -171,6 +171,9 @@ struct hfi1_qp_priv {
	u8 hdr_type; /* 9B or 16B */
	unsigned long tid_timer_timeout_jiffies;

	/* variables for the TID RDMA SE state machine */
	u32 s_flags;

	/* For TID RDMA READ */
	u32 tid_r_reqs;         /* Num of tid reads requested */
	u32 tid_r_comp;         /* Num of tid reads completed */