Commit 21f90243 authored by Israel Rukshin's avatar Israel Rukshin Committed by Christoph Hellwig
Browse files

nvmet-rdma: fix double free of rdma queue



In case rdma accept fails at nvmet_rdma_queue_connect(), release work is
scheduled. Later on, a new RDMA CM event may arrive since we didn't
destroy the cm-id and call nvmet_rdma_queue_connect_fail(), which
schedule another release work. This will cause calling
nvmet_rdma_free_queue twice. To fix this we implicitly destroy the cm_id
with non-zero ret code, which guarantees that new rdma_cm events will
not arrive afterwards. Also add a qp pointer to nvmet_rdma_queue
structure, so we can use it when the cm_id pointer is NULL or was
destroyed.

Signed-off-by: default avatarIsrael Rukshin <israelr@mellanox.com>
Suggested-by: default avatarSagi Grimberg <sagi@grimberg.me>
Reviewed-by: default avatarMax Gurtovoy <maxg@mellanox.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 8c5c6605
Loading
Loading
Loading
Loading
+18 −12
Original line number Diff line number Diff line
@@ -78,6 +78,7 @@ enum nvmet_rdma_queue_state {

struct nvmet_rdma_queue {
	struct rdma_cm_id	*cm_id;
	struct ib_qp		*qp;
	struct nvmet_port	*port;
	struct ib_cq		*cq;
	atomic_t		sq_wr_avail;
@@ -474,7 +475,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
	if (ndev->srq)
		ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
	else
		ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL);
		ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);

	if (unlikely(ret))
		pr_err("post_recv cmd failed\n");
@@ -513,7 +514,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
	atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);

	if (rsp->n_rdma) {
		rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp,
		rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
				queue->cm_id->port_num, rsp->req.sg,
				rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
	}
@@ -597,7 +598,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)

	WARN_ON(rsp->n_rdma <= 0);
	atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
	rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp,
	rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
			queue->cm_id->port_num, rsp->req.sg,
			rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
	rsp->n_rdma = 0;
@@ -752,7 +753,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
	}

	if (nvmet_rdma_need_data_in(rsp)) {
		if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp,
		if (rdma_rw_ctx_post(&rsp->rw, queue->qp,
				queue->cm_id->port_num, &rsp->read_cqe, NULL))
			nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
	} else {
@@ -1038,6 +1039,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
		pr_err("failed to create_qp ret= %d\n", ret);
		goto err_destroy_cq;
	}
	queue->qp = queue->cm_id->qp;

	atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);

@@ -1066,11 +1068,10 @@ err_destroy_cq:

static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
{
	struct ib_qp *qp = queue->cm_id->qp;

	ib_drain_qp(qp);
	ib_drain_qp(queue->qp);
	if (queue->cm_id)
		rdma_destroy_id(queue->cm_id);
	ib_destroy_qp(qp);
	ib_destroy_qp(queue->qp);
	ib_free_cq(queue->cq);
}

@@ -1305,9 +1306,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,

	ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
	if (ret) {
		schedule_work(&queue->release_work);
		/* Destroying rdma_cm id is not needed here */
		return 0;
		/*
		 * Don't destroy the cm_id in free path, as we implicitly
		 * destroy the cm_id here with non-zero ret code.
		 */
		queue->cm_id = NULL;
		goto free_queue;
	}

	mutex_lock(&nvmet_rdma_queue_mutex);
@@ -1316,6 +1320,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,

	return 0;

free_queue:
	nvmet_rdma_free_queue(queue);
put_device:
	kref_put(&ndev->ref, nvmet_rdma_free_dev);