Commit 4a9ceb7d authored by Mike Marciniszyn's avatar Mike Marciniszyn Committed by Doug Ledford
Browse files

IB/{rdmavt, qib, hfi1}: Convert to new completion API

Convert all completions to use the new completion routine that
fixes a race between post send and completion where fields from
a SWQE can be read after SWQE has been freed.

This patch also addresses issues reported in
https://marc.info/?l=linux-kernel&m=155656897409107&w=2

.

The reserved operation path has no need for any barrier.

The barrier for the other path is addressed by the
smp_load_acquire() barrier.

Cc: Andrea Parri <andrea.parri@amarulasolutions.com>
Reviewed-by: default avatarMichael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent f56044d6
Loading
Loading
Loading
Loading
+4 −22
Original line number Diff line number Diff line
@@ -1819,23 +1819,14 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
	}

	while (qp->s_last != qp->s_acked) {
		u32 s_last;

		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
		if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
		    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
			break;
		trdma_clean_swqe(qp, wqe);
		rvt_qp_wqe_unreserve(qp, wqe);
		s_last = qp->s_last;
		trace_hfi1_qp_send_completion(qp, wqe, s_last);
		if (++s_last >= qp->s_size)
			s_last = 0;
		qp->s_last = s_last;
		/* see post_send() */
		barrier();
		rvt_put_qp_swqe(qp, wqe);
		rvt_qp_swqe_complete(qp,
		trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
		rvt_qp_complete_swqe(qp,
				     wqe,
				     ib_hfi1_wc_opcode[wqe->wr.opcode],
				     IB_WC_SUCCESS);
@@ -1879,19 +1870,10 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
	trace_hfi1_rc_completion(qp, wqe->lpsn);
	if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
		u32 s_last;

		trdma_clean_swqe(qp, wqe);
		rvt_put_qp_swqe(qp, wqe);
		rvt_qp_wqe_unreserve(qp, wqe);
		s_last = qp->s_last;
		trace_hfi1_qp_send_completion(qp, wqe, s_last);
		if (++s_last >= qp->s_size)
			s_last = 0;
		qp->s_last = s_last;
		/* see post_send() */
		barrier();
		rvt_qp_swqe_complete(qp,
		trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
		rvt_qp_complete_swqe(qp,
				     wqe,
				     ib_hfi1_wc_opcode[wqe->wr.opcode],
				     IB_WC_SUCCESS);
+4 −22
Original line number Diff line number Diff line
@@ -921,20 +921,11 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
		rvt_add_retry_timer(qp);

	while (qp->s_last != qp->s_acked) {
		u32 s_last;

		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
		if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
		    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
			break;
		s_last = qp->s_last;
		if (++s_last >= qp->s_size)
			s_last = 0;
		qp->s_last = s_last;
		/* see post_send() */
		barrier();
		rvt_put_qp_swqe(qp, wqe);
		rvt_qp_swqe_complete(qp,
		rvt_qp_complete_swqe(qp,
				     wqe,
				     ib_qib_wc_opcode[wqe->wr.opcode],
				     IB_WC_SUCCESS);
@@ -972,21 +963,12 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
	 * is finished.
	 */
	if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
		u32 s_last;

		rvt_put_qp_swqe(qp, wqe);
		s_last = qp->s_last;
		if (++s_last >= qp->s_size)
			s_last = 0;
		qp->s_last = s_last;
		/* see post_send() */
		barrier();
		rvt_qp_swqe_complete(qp,
	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0)
		rvt_qp_complete_swqe(qp,
				     wqe,
				     ib_qib_wc_opcode[wqe->wr.opcode],
				     IB_WC_SUCCESS);
	} else
	else
		this_cpu_inc(*ibp->rvp.rc_delayed_comp);

	qp->s_retry = qp->s_retry_cnt;
+9 −22
Original line number Diff line number Diff line
@@ -1853,10 +1853,9 @@ static inline int rvt_qp_is_avail(

	/* see rvt_qp_wqe_unreserve() */
	smp_mb__before_atomic();
	reserved_used = atomic_read(&qp->s_reserved_used);
	if (unlikely(reserved_op)) {
		/* see rvt_qp_wqe_unreserve() */
		smp_mb__before_atomic();
		reserved_used = atomic_read(&qp->s_reserved_used);
		if (reserved_used >= rdi->dparms.reserved_operations)
			return -ENOMEM;
		return 0;
@@ -1864,14 +1863,13 @@ static inline int rvt_qp_is_avail(
	/* non-reserved operations */
	if (likely(qp->s_avail))
		return 0;
	slast = READ_ONCE(qp->s_last);
	/* See rvt_qp_complete_swqe() */
	slast = smp_load_acquire(&qp->s_last);
	if (qp->s_head >= slast)
		avail = qp->s_size - (qp->s_head - slast);
	else
		avail = slast - qp->s_head;

	/* see rvt_qp_wqe_unreserve() */
	smp_mb__before_atomic();
	reserved_used = atomic_read(&qp->s_reserved_used);
	avail =  avail - 1 -
		(rdi->dparms.reserved_operations - reserved_used);
@@ -2664,27 +2662,16 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
		       enum ib_wc_status status)
{
	u32 old_last, last;
	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
	struct rvt_dev_info *rdi;

	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
		return;
	rdi = ib_to_rvt(qp->ibqp.device);

	last = qp->s_last;
	old_last = last;
	trace_rvt_qp_send_completion(qp, wqe, last);
	if (++last >= qp->s_size)
		last = 0;
	trace_rvt_qp_send_completion(qp, wqe, last);
	qp->s_last = last;
	/* See post_send() */
	barrier();
	rvt_put_qp_swqe(qp, wqe);

	rvt_qp_swqe_complete(qp,
			     wqe,
			     rdi->wc_opcode[wqe->wr.opcode],
	old_last = qp->s_last;
	trace_rvt_qp_send_completion(qp, wqe, old_last);
	last = rvt_qp_complete_swqe(qp, wqe, rdi->wc_opcode[wqe->wr.opcode],
				    status);

	if (qp->s_acked == old_last)
		qp->s_acked = last;
	if (qp->s_cur == old_last)
+0 −36
Original line number Diff line number Diff line
@@ -565,42 +565,6 @@ static inline void rvt_qp_wqe_unreserve(

extern const enum ib_wc_opcode ib_rvt_wc_opcode[];

/**
 * rvt_qp_swqe_complete() - insert send completion
 * @qp - the qp
 * @wqe - the send wqe
 * @status - completion status
 *
 * Insert a send completion into the completion
 * queue if the qp indicates it should be done.
 *
 * See IBTA 10.7.3.1 for info on completion
 * control.
 */
static inline void rvt_qp_swqe_complete(
	struct rvt_qp *qp,
	struct rvt_swqe *wqe,
	enum ib_wc_opcode opcode,
	enum ib_wc_status status)
{
	if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
		return;
	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
	     status != IB_WC_SUCCESS) {
		struct ib_wc wc;

		memset(&wc, 0, sizeof(wc));
		wc.wr_id = wqe->wr.wr_id;
		wc.status = status;
		wc.opcode = opcode;
		wc.qp = &qp->ibqp;
		wc.byte_len = wqe->length;
		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
			     status != IB_WC_SUCCESS);
	}
}

/*
 * Compare the lower 24 bits of the msn values.
 * Returns an integer <, ==, or > than zero.