Commit 287f329e authored by Yamin Friedman's avatar Yamin Friedman Committed by Christoph Hellwig
Browse files

nvme-rdma: use new shared CQ mechanism



Has the driver use shared CQs providing ~10%-20% improvement as seen in
the patch introducing shared CQs. Instead of opening a CQ for each QP
per controller connected, a CQ for each QP will be provided by the RDMA
core driver that will be shared between the QPs on that core reducing
interrupt overhead.

Signed-off-by: default avatarYamin Friedman <yaminf@mellanox.com>
Signed-off-by: default avatarMax Gurtovoy <maxg@mellanox.com>
Reviewed-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent df4f9bc4
Loading
Loading
Loading
Loading
+51 −26
Original line number Diff line number Diff line
@@ -96,6 +96,7 @@ struct nvme_rdma_queue {
	int			cm_error;
	struct completion	cm_done;
	bool			pi_support;
	int			cq_size;
};

struct nvme_rdma_ctrl {
@@ -275,6 +276,7 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
	init_attr.recv_cq = queue->ib_cq;
	if (queue->pi_support)
		init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
	init_attr.qp_context = queue;

	ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);

@@ -409,6 +411,14 @@ out_err:
	return NULL;
}

static void nvme_rdma_free_cq(struct nvme_rdma_queue *queue)
{
	if (nvme_rdma_poll_queue(queue))
		ib_free_cq(queue->ib_cq);
	else
		ib_cq_pool_put(queue->ib_cq, queue->cq_size);
}

static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
{
	struct nvme_rdma_device *dev;
@@ -430,7 +440,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
	 * the destruction of the QP shouldn't use rdma_cm API.
	 */
	ib_destroy_qp(queue->qp);
	ib_free_cq(queue->ib_cq);
	nvme_rdma_free_cq(queue);

	nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
			sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@@ -450,13 +460,42 @@ static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
	return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
}

static int nvme_rdma_create_cq(struct ib_device *ibdev,
		struct nvme_rdma_queue *queue)
{
	int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
	enum ib_poll_context poll_ctx;

	/*
	 * Spread I/O queues completion vectors according their queue index.
	 * Admin queues can always go on completion vector 0.
	 */
	comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;

	/* Polling queues need direct cq polling context */
	if (nvme_rdma_poll_queue(queue)) {
		poll_ctx = IB_POLL_DIRECT;
		queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
					   comp_vector, poll_ctx);
	} else {
		poll_ctx = IB_POLL_SOFTIRQ;
		queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
					      comp_vector, poll_ctx);
	}

	if (IS_ERR(queue->ib_cq)) {
		ret = PTR_ERR(queue->ib_cq);
		return ret;
	}

	return 0;
}

static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
{
	struct ib_device *ibdev;
	const int send_wr_factor = 3;			/* MR, SEND, INV */
	const int cq_factor = send_wr_factor + 1;	/* + RECV */
	int comp_vector, idx = nvme_rdma_queue_idx(queue);
	enum ib_poll_context poll_ctx;
	int ret, pages_per_mr;

	queue->device = nvme_rdma_find_get_device(queue->cm_id);
@@ -467,26 +506,12 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
	}
	ibdev = queue->device->dev;

	/*
	 * Spread I/O queues completion vectors according their queue index.
	 * Admin queues can always go on completion vector 0.
	 */
	comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;

	/* Polling queues need direct cq polling context */
	if (nvme_rdma_poll_queue(queue))
		poll_ctx = IB_POLL_DIRECT;
	else
		poll_ctx = IB_POLL_SOFTIRQ;

	/* +1 for ib_stop_cq */
	queue->ib_cq = ib_alloc_cq(ibdev, queue,
				cq_factor * queue->queue_size + 1,
				comp_vector, poll_ctx);
	if (IS_ERR(queue->ib_cq)) {
		ret = PTR_ERR(queue->ib_cq);
	queue->cq_size = cq_factor * queue->queue_size + 1;

	ret = nvme_rdma_create_cq(ibdev, queue);
	if (ret)
		goto out_put_dev;
	}

	ret = nvme_rdma_create_qp(queue, send_wr_factor);
	if (ret)
@@ -512,7 +537,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
	if (ret) {
		dev_err(queue->ctrl->ctrl.device,
			"failed to initialize MR pool sized %d for QID %d\n",
			queue->queue_size, idx);
			queue->queue_size, nvme_rdma_queue_idx(queue));
		goto out_destroy_ring;
	}

@@ -523,7 +548,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
		if (ret) {
			dev_err(queue->ctrl->ctrl.device,
				"failed to initialize PI MR pool sized %d for QID %d\n",
				queue->queue_size, idx);
				queue->queue_size, nvme_rdma_queue_idx(queue));
			goto out_destroy_mr_pool;
		}
	}
@@ -540,7 +565,7 @@ out_destroy_ring:
out_destroy_qp:
	rdma_destroy_qp(queue->cm_id);
out_destroy_ib_cq:
	ib_free_cq(queue->ib_cq);
	nvme_rdma_free_cq(queue);
out_put_dev:
	nvme_rdma_dev_put(queue->device);
	return ret;
@@ -1163,7 +1188,7 @@ static void nvme_rdma_end_request(struct nvme_rdma_request *req)
static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
		const char *op)
{
	struct nvme_rdma_queue *queue = cq->cq_context;
	struct nvme_rdma_queue *queue = wc->qp->qp_context;
	struct nvme_rdma_ctrl *ctrl = queue->ctrl;

	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
@@ -1706,7 +1731,7 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
	struct nvme_rdma_qe *qe =
		container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
	struct nvme_rdma_queue *queue = cq->cq_context;
	struct nvme_rdma_queue *queue = wc->qp->qp_context;
	struct ib_device *ibdev = queue->device->dev;
	struct nvme_completion *cqe = qe->data;
	const size_t len = sizeof(struct nvme_completion);