Commit d21a1240 authored by Bob Pearson's avatar Bob Pearson Committed by Jason Gunthorpe
Browse files

RDMA/rxe: Use acquire/release for memory ordering

Change work and completion queues to use smp_load_acquire() and
smp_store_release() to synchronize between driver and users.  This commit
goes with a matching series of commits in the rxe user space provider.

Link: https://lore.kernel.org/r/20201210174258.5234-1-rpearson@hpe.com


Signed-off-by: default avatarBob Pearson <rpearson@hpe.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent d8cc403b
Loading
Loading
Loading
Loading
+0 −5
Original line number Diff line number Diff line
@@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)

	memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe));

	/* make sure all changes to the CQ are written before we update the
	 * producer pointer
	 */
	smp_wmb();

	advance_producer(cq->queue);
	spin_unlock_irqrestore(&cq->cq_lock, flags);

+60 −34
Original line number Diff line number Diff line
@@ -7,9 +7,11 @@
#ifndef RXE_QUEUE_H
#define RXE_QUEUE_H

/* for definition of shared struct rxe_queue_buf */
#include <uapi/rdma/rdma_user_rxe.h>

/* implements a simple circular buffer that can optionally be
 * shared between user space and the kernel and can be resized

 * the requested element size is rounded up to a power of 2
 * and the number of elements in the buffer is also rounded
 * up to a power of 2. Since the queue is empty when the
@@ -17,28 +19,6 @@
 * of the queue is one less than the number of element slots
 */

/* this data structure is shared between user space and kernel
 * space for those cases where the queue is shared. It contains
 * the producer and consumer indices. Is also contains a copy
 * of the queue size parameters for user space to use but the
 * kernel must use the parameters in the rxe_queue struct
 * this MUST MATCH the corresponding librxe struct
 * for performance reasons arrange to have producer and consumer
 * pointers in separate cache lines
 * the kernel should always mask the indices to avoid accessing
 * memory outside of the data area
 */
struct rxe_queue_buf {
	__u32			log2_elem_size;
	__u32			index_mask;
	__u32			pad_1[30];
	__u32			producer_index;
	__u32			pad_2[31];
	__u32			consumer_index;
	__u32			pad_3[31];
	__u8			data[];
};

struct rxe_queue {
	struct rxe_dev		*rxe;
	struct rxe_queue_buf	*buf;
@@ -46,7 +26,7 @@ struct rxe_queue {
	size_t			buf_size;
	size_t			elem_size;
	unsigned int		log2_elem_size;
	unsigned int		index_mask;
	u32			index_mask;
};

int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf,
@@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index)

static inline int queue_empty(struct rxe_queue *q)
{
	return ((q->buf->producer_index - q->buf->consumer_index)
			& q->index_mask) == 0;
	u32 prod;
	u32 cons;

	/* make sure all changes to queue complete before
	 * testing queue empty
	 */
	prod = smp_load_acquire(&q->buf->producer_index);
	/* same */
	cons = smp_load_acquire(&q->buf->consumer_index);

	return ((prod - cons) & q->index_mask) == 0;
}

static inline int queue_full(struct rxe_queue *q)
{
	return ((q->buf->producer_index + 1 - q->buf->consumer_index)
			& q->index_mask) == 0;
	u32 prod;
	u32 cons;

	/* make sure all changes to queue complete before
	 * testing queue full
	 */
	prod = smp_load_acquire(&q->buf->producer_index);
	/* same */
	cons = smp_load_acquire(&q->buf->consumer_index);

	return ((prod + 1 - cons) & q->index_mask) == 0;
}

static inline void advance_producer(struct rxe_queue *q)
{
	q->buf->producer_index = (q->buf->producer_index + 1)
			& q->index_mask;
	u32 prod;

	prod = (q->buf->producer_index + 1) & q->index_mask;

	/* make sure all changes to queue complete before
	 * changing producer index
	 */
	smp_store_release(&q->buf->producer_index, prod);
}

static inline void advance_consumer(struct rxe_queue *q)
{
	q->buf->consumer_index = (q->buf->consumer_index + 1)
			& q->index_mask;
	u32 cons;

	cons = (q->buf->consumer_index + 1) & q->index_mask;

	/* make sure all changes to queue complete before
	 * changing consumer index
	 */
	smp_store_release(&q->buf->consumer_index, cons);
}

static inline void *producer_addr(struct rxe_queue *q)
@@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q)

static inline unsigned int producer_index(struct rxe_queue *q)
{
	return q->buf->producer_index;
	u32 index;

	/* make sure all changes to queue
	 * complete before getting producer index
	 */
	index = smp_load_acquire(&q->buf->producer_index);
	index &= q->index_mask;

	return index;
}

static inline unsigned int consumer_index(struct rxe_queue *q)
{
	return q->buf->consumer_index;
	u32 index;

	/* make sure all changes to queue
	 * complete before getting consumer index
	 */
	index = smp_load_acquire(&q->buf->consumer_index);
	index &= q->index_mask;

	return index;
}

static inline void *addr_from_index(struct rxe_queue *q, unsigned int index)
+0 −11
Original line number Diff line number Diff line
@@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
	recv_wqe->dma.cur_sge		= 0;
	recv_wqe->dma.sge_offset	= 0;

	/* make sure all changes to the work queue are written before we
	 * update the producer pointer
	 */
	smp_wmb();

	advance_producer(rq->queue);
	return 0;

@@ -633,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
	if (unlikely(err))
		goto err1;

	/*
	 * make sure all changes to the work queue are
	 * written before we update the producer pointer
	 */
	smp_wmb();

	advance_producer(sq->queue);
	spin_unlock_irqrestore(&qp->sq.sq_lock, flags);

+21 −0
Original line number Diff line number Diff line
@@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd {
	__aligned_u64 mmap_info_addr;
};

/* This data structure is stored at the base of work and
 * completion queues shared between user space and kernel space.
 * It contains the producer and consumer indices. Is also
 * contains a copy of the queue size parameters for user space
 * to use but the kernel must use the parameters in the
 * rxe_queue struct. For performance reasons arrange to have
 * producer and consumer indices in separate cache lines
 * the kernel should always mask the indices to avoid accessing
 * memory outside of the data area
 */
struct rxe_queue_buf {
	__u32			log2_elem_size;
	__u32			index_mask;
	__u32			pad_1[30];
	__u32			producer_index;
	__u32			pad_2[31];
	__u32			consumer_index;
	__u32			pad_3[31];
	__u8			data[];
};

#endif /* RDMA_USER_RXE_H */