Commit cc23267a authored by Xi Wang's avatar Xi Wang Committed by Jason Gunthorpe
Browse files

RDMA/hns: Optimize hns buffer allocation flow

When the value of nbufs is 1, the buffer is in direct mode, which may cause
confusion. So optimizes current codes to make it easier to maintain and
understand.

Link: https://lore.kernel.org/r/1586779091-51410-3-git-send-email-liweihang@huawei.com


Signed-off-by: default avatarXi Wang <wangxi11@huawei.com>
Signed-off-by: default avatarLang Cheng <chenglang@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 3c873161
Loading
Loading
Loading
Loading
+47 −56
Original line number Diff line number Diff line
@@ -157,84 +157,78 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)
	kfree(bitmap->table);
}

void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
		       struct hns_roce_buf *buf)
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
{
	int i;
	struct device *dev = hr_dev->dev;
	u32 size = buf->size;
	int i;

	if (size == 0)
		return;

	if (buf->nbufs == 1) {
	buf->size = 0;

	if (hns_roce_buf_is_direct(buf)) {
		dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
	} else {
		for (i = 0; i < buf->nbufs; ++i)
		for (i = 0; i < buf->npages; ++i)
			if (buf->page_list[i].buf)
				dma_free_coherent(dev, 1 << buf->page_shift,
						  buf->page_list[i].buf,
						  buf->page_list[i].map);
		kfree(buf->page_list);
		buf->page_list = NULL;
	}
}

int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
		       struct hns_roce_buf *buf, u32 page_shift)
{
	int i = 0;
	dma_addr_t t;
	struct hns_roce_buf_list *buf_list;
	struct device *dev = hr_dev->dev;
	u32 page_size = 1 << page_shift;
	u32 order;
	u32 page_size;
	int i;

	/* SQ/RQ buf lease than one page, SQ + RQ = 8K */
	/* The minimum shift of the page accessed by hw is PAGE_ADDR_SHIFT */
	buf->page_shift = max_t(int, PAGE_ADDR_SHIFT, page_shift);

	page_size = 1 << buf->page_shift;
	buf->npages = DIV_ROUND_UP(size, page_size);

	/* required size is not bigger than one trunk size */
	if (size <= max_direct) {
		buf->nbufs = 1;
		/* Npages calculated by page_size */
		order = get_order(size);
		if (order <= page_shift - PAGE_SHIFT)
			order = 0;
		else
			order -= page_shift - PAGE_SHIFT;
		buf->npages = 1 << order;
		buf->page_shift = page_shift;
		/* MTT PA must be recorded in 4k alignment, t is 4k aligned */
		buf->direct.buf = dma_alloc_coherent(dev, size, &t,
		buf->page_list = NULL;
		buf->direct.buf = dma_alloc_coherent(dev, size,
						     &buf->direct.map,
						     GFP_KERNEL);
		if (!buf->direct.buf)
			return -ENOMEM;

		buf->direct.map = t;

		while (t & ((1 << buf->page_shift) - 1)) {
			--buf->page_shift;
			buf->npages *= 2;
		}
	} else {
		buf->nbufs = (size + page_size - 1) / page_size;
		buf->npages = buf->nbufs;
		buf->page_shift = page_shift;
		buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
					 GFP_KERNEL);

		if (!buf->page_list)
		buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL);
		if (!buf_list)
			return -ENOMEM;

		for (i = 0; i < buf->nbufs; ++i) {
			buf->page_list[i].buf = dma_alloc_coherent(dev,
								   page_size,
								   &t,
		for (i = 0; i < buf->npages; i++) {
			buf_list[i].buf = dma_alloc_coherent(dev, page_size,
							     &buf_list[i].map,
							     GFP_KERNEL);
			if (!buf_list[i].buf)
				break;
		}

			if (!buf->page_list[i].buf)
				goto err_free;

			buf->page_list[i].map = t;
		if (i != buf->npages && i > 0) {
			while (i-- > 0)
				dma_free_coherent(dev, page_size,
						  buf_list[i].buf,
						  buf_list[i].map);
			kfree(buf_list);
			return -ENOMEM;
		}
		buf->page_list = buf_list;
	}
	buf->size = size;

	return 0;

err_free:
	hns_roce_buf_free(hr_dev, size, buf);
	return -ENOMEM;
}

int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
@@ -246,18 +240,14 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
	end = start + buf_cnt;
	if (end > buf->npages) {
		dev_err(hr_dev->dev,
			"invalid kmem region,offset %d,buf_cnt %d,total %d!\n",
			"Failed to check kmem bufs, end %d + %d total %d!\n",
			start, buf_cnt, buf->npages);
		return -EINVAL;
	}

	total = 0;
	for (i = start; i < end; i++)
		if (buf->nbufs == 1)
			bufs[total++] = buf->direct.map +
					((dma_addr_t)i << buf->page_shift);
		else
			bufs[total++] = buf->page_list[i].map;
		bufs[total++] = hns_roce_buf_page(buf, i);

	return total;
}
@@ -271,8 +261,9 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
	int idx = 0;
	u64 addr;

	if (page_shift < PAGE_SHIFT) {
		dev_err(hr_dev->dev, "invalid page shift %d!\n", page_shift);
	if (page_shift < PAGE_ADDR_SHIFT) {
		dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n",
			page_shift);
		return -EINVAL;
	}

+9 −9
Original line number Diff line number Diff line
@@ -157,13 +157,12 @@ static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
		       struct hns_roce_ib_create_cq ucmd,
		       struct ib_udata *udata)
{
	struct hns_roce_buf *buf = &hr_cq->buf;
	struct hns_roce_mtt *mtt = &hr_cq->mtt;
	struct ib_umem **umem = &hr_cq->umem;
	u32 npages;
	int ret;

	*umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, buf->size,
	*umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, hr_cq->buf_size,
			    IB_ACCESS_LOCAL_WRITE);
	if (IS_ERR(*umem))
		return PTR_ERR(*umem);
@@ -175,7 +174,7 @@ static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,

	npages = DIV_ROUND_UP(ib_umem_page_count(*umem),
			      1 << hr_dev->caps.cqe_buf_pg_sz);
	ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt);
	ret = hns_roce_mtt_init(hr_dev, npages, hr_cq->page_shift, mtt);
	if (ret)
		goto err_buf;

@@ -199,8 +198,9 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
	struct hns_roce_mtt *mtt = &hr_cq->mtt;
	int ret;

	ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2,
				 buf, buf->page_shift);
	ret = hns_roce_buf_alloc(hr_dev, hr_cq->buf_size,
				 (1 << hr_cq->page_shift) * 2,
				 buf, hr_cq->page_shift);
	if (ret)
		goto out;

@@ -223,7 +223,7 @@ err_mtt:
	hns_roce_mtt_cleanup(hr_dev, mtt);

err_buf:
	hns_roce_buf_free(hr_dev, buf->size, buf);
	hns_roce_buf_free(hr_dev, buf);

out:
	return ret;
@@ -231,7 +231,7 @@ out:

static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
	hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf);
	hns_roce_buf_free(hr_dev, &hr_cq->buf);
}

static int create_user_cq(struct hns_roce_dev *hr_dev,
@@ -367,8 +367,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
	hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */
	hr_cq->cq_depth = cq_entries;
	hr_cq->vector = vector;
	hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
	hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
	hr_cq->buf_size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz;
	hr_cq->page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
	spin_lock_init(&hr_cq->lock);
	INIT_LIST_HEAD(&hr_cq->sq_list);
	INIT_LIST_HEAD(&hr_cq->rq_list);
+24 −8
Original line number Diff line number Diff line
@@ -468,7 +468,6 @@ struct hns_roce_buf_list {
struct hns_roce_buf {
	struct hns_roce_buf_list	direct;
	struct hns_roce_buf_list	*page_list;
	int				nbufs;
	u32				npages;
	u32				size;
	int				page_shift;
@@ -510,6 +509,8 @@ struct hns_roce_cq {
	u8				db_en;
	spinlock_t			lock;
	struct ib_umem			*umem;
	u32				buf_size;
	int				page_shift;
	u32				cq_depth;
	u32				cons_index;
	u32				*set_ci_db;
@@ -549,6 +550,8 @@ struct hns_roce_srq {
	struct hns_roce_buf	buf;
	u64		       *wrid;
	struct ib_umem	       *umem;
	u32			buf_size;
	int			page_shift;
	struct hns_roce_mtt	mtt;
	struct hns_roce_idx_que idx_que;
	spinlock_t		lock;
@@ -1124,15 +1127,29 @@ static inline struct hns_roce_qp
	return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1));
}

static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf)
{
	if (buf->page_list)
		return false;

	return true;
}

static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
{
	u32 page_size = 1 << buf->page_shift;
	if (hns_roce_buf_is_direct(buf))
		return (char *)(buf->direct.buf) + (offset & (buf->size - 1));

	if (buf->nbufs == 1)
		return (char *)(buf->direct.buf) + offset;
	else
	return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
		       (offset & (page_size - 1));
	       (offset & ((1 << buf->page_shift) - 1));
}

static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx)
{
	if (hns_roce_buf_is_direct(buf))
		return buf->direct.map + ((dma_addr_t)idx << buf->page_shift);
	else
		return buf->page_list[idx].map;
}

static inline u64 to_hr_hw_page_addr(u64 addr)
@@ -1240,8 +1257,7 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
				struct ib_udata *udata);
int hns_roce_dealloc_mw(struct ib_mw *ibmw);

void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
		       struct hns_roce_buf *buf);
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
		       struct hns_roce_buf *buf, u32 page_shift);

+1 −1
Original line number Diff line number Diff line
@@ -3666,7 +3666,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
	ib_umem_release(hr_cq->umem);
	if (!udata) {
		/* Free the buff of stored cq */
		hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf);
		hns_roce_buf_free(hr_dev, &hr_cq->buf);
	}
}

+18 −25
Original line number Diff line number Diff line
@@ -4989,24 +4989,14 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
	hns_roce_write64(hr_dev, doorbell, eq->doorbell);
}

static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset)
{
	u32 buf_chk_sz;

	buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
	if (eq->buf.nbufs == 1)
		return eq->buf.direct.buf + offset % buf_chk_sz;
	else
		return eq->buf.page_list[offset / buf_chk_sz].buf +
		       offset % buf_chk_sz;
}

static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
{
	struct hns_roce_aeqe *aeqe;

	aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
	aeqe = hns_roce_buf_offset(&eq->buf,
				   (eq->cons_index & (eq->entries - 1)) *
				   HNS_ROCE_AEQ_ENTRY_SIZE);

	return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
		!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
@@ -5103,7 +5093,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
{
	struct hns_roce_ceqe *ceqe;

	ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) *
	ceqe = hns_roce_buf_offset(&eq->buf,
				   (eq->cons_index & (eq->entries - 1)) *
				   HNS_ROCE_CEQ_ENTRY_SIZE);
	return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
		(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
@@ -5265,7 +5256,7 @@ static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
{
	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0)
		hns_roce_mtr_cleanup(hr_dev, &eq->mtr);
	hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf);
	hns_roce_buf_free(hr_dev, &eq->buf);
}

static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
@@ -5290,7 +5281,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
	eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
	eq->shift = ilog2((unsigned int)eq->entries);

	/* if not muti-hop, eqe buffer only use one trunk */
	/* if not multi-hop, eqe buffer only use one trunk */
	eq->eqe_buf_pg_sz = eq->buf.page_shift - PAGE_ADDR_SHIFT;
	if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) {
		eq->eqe_ba = eq->buf.direct.map;
		eq->cur_eqe_ba = eq->eqe_ba;
@@ -5432,7 +5424,7 @@ static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
		goto done;
	}

	hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz,
	hns_roce_mtr_init(&eq->mtr, PAGE_ADDR_SHIFT + hr_dev->caps.eqe_ba_pg_sz,
			  page_shift);
	ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, &region, 1);
	if (ret)
@@ -5454,23 +5446,24 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
	u32 page_shift;
	u32 mhop_num;
	u32 max_size;
	u32 buf_size;
	int ret;

	page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz;
	page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.eqe_buf_pg_sz;
	mhop_num = hr_dev->caps.eqe_hop_num;
	if (!mhop_num) {
		max_size = 1 << page_shift;
		buf->size = max_size;
		buf_size = max_size;
	} else if (mhop_num == HNS_ROCE_HOP_NUM_0) {
		max_size = eq->entries * eq->eqe_size;
		buf->size = max_size;
		buf_size = max_size;
	} else {
		max_size = 1 << page_shift;
		buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size);
		buf_size = round_up(eq->entries * eq->eqe_size, max_size);
		is_mhop = true;
	}

	ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift);
	ret = hns_roce_buf_alloc(hr_dev, buf_size, max_size, buf, page_shift);
	if (ret) {
		dev_err(hr_dev->dev, "alloc eq buf error\n");
		return ret;
@@ -5486,7 +5479,7 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)

	return 0;
err_alloc:
	hns_roce_buf_free(hr_dev, buf->size, buf);
	hns_roce_buf_free(hr_dev, buf);
	return ret;
}

Loading