Commit 38f8ff5b authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

RDMA/mlx5: Reorganize mlx5_ib_reg_user_mr()

This function handles an ODP and regular MR flow all mushed together, even
though the two flows are quite different. Split them into two dedicated
functions.

Link: https://lore.kernel.org/r/20201130075839.278575-5-leon@kernel.org


Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 6e0954b1
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -1340,7 +1340,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
			       enum ib_uverbs_advise_mr_advice advice,
			       u32 flags, struct ib_sge *sg_list, u32 num_sge);
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable);
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1362,7 +1362,7 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
	return -EOPNOTSUPP;
}
static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
	return -EOPNOTSUPP;
}
+130 −119
Original line number Diff line number Diff line
@@ -56,6 +56,10 @@ enum {

static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
				     struct ib_umem *umem, u64 iova,
				     int access_flags, unsigned int page_size,
				     bool populate);

static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
					  struct ib_pd *pd)
@@ -875,32 +879,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
	return MLX5_MAX_UMR_SHIFT;
}

static struct ib_umem *mr_umem_get(struct mlx5_ib_dev *dev, u64 start,
				   u64 length, int access_flags)
{
	struct ib_umem *u;

	if (access_flags & IB_ACCESS_ON_DEMAND) {
		struct ib_umem_odp *odp;

		odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
				      &mlx5_mn_ops);
		if (IS_ERR(odp)) {
			mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
				    PTR_ERR(odp));
			return ERR_CAST(odp);
		}
		return &odp->umem;
	}

	u = ib_umem_get(&dev->ib_dev, start, length, access_flags);
	if (IS_ERR(u)) {
		mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
		return u;
	}
	return u;
}

static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
	struct mlx5_ib_umr_context *context =
@@ -957,7 +935,16 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
	return &cache->ent[order];
}

static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
			  u64 length, int access_flags)
{
	mr->ibmr.lkey = mr->mmkey.key;
	mr->ibmr.rkey = mr->mmkey.key;
	mr->ibmr.length = length;
	mr->access_flags = access_flags;
}

static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
					     struct ib_umem *umem, u64 iova,
					     int access_flags)
{
@@ -971,16 +958,26 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
		return ERR_PTR(-EINVAL);
	ent = mr_cache_ent_from_order(
		dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
	if (!ent)
		return ERR_PTR(-E2BIG);

	/* Matches access in alloc_cache_mr() */
	if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
		return ERR_PTR(-EOPNOTSUPP);
	/*
	 * Matches access in alloc_cache_mr(). If the MR can't come from the
	 * cache then synchronously create an uncached one.
	 */
	if (!ent || ent->limit == 0 ||
	    !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
		mutex_lock(&dev->slow_path_mutex);
		mr = reg_create(NULL, pd, umem, iova, access_flags, page_size,
				false);
		mutex_unlock(&dev->slow_path_mutex);
		return mr;
	}

	mr = get_cache_mr(ent);
	if (!mr) {
		mr = create_cache_mr(ent);
		/*
		 * The above already tried to do the same stuff as reg_create(),
		 * no reason to try it again.
		 */
		if (IS_ERR(mr))
			return mr;
	}
@@ -993,6 +990,8 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
	mr->mmkey.size = umem->length;
	mr->mmkey.pd = to_mpd(pd)->pdn;
	mr->page_shift = order_base_2(page_size);
	mr->umem = umem;
	set_mr_fields(dev, mr, umem->length, access_flags);

	return mr;
}
@@ -1279,10 +1278,10 @@ err:
 */
static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
				     struct ib_umem *umem, u64 iova,
				     int access_flags, bool populate)
				     int access_flags, unsigned int page_size,
				     bool populate)
{
	struct mlx5_ib_dev *dev = to_mdev(pd->device);
	unsigned int page_size;
	struct mlx5_ib_mr *mr;
	__be64 *pas;
	void *mkc;
@@ -1291,11 +1290,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
	int err;
	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));

	page_size =
		mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
	if (WARN_ON(!page_size))
	if (!page_size) {
		page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
						     0, iova);
		if (!page_size)
			return ERR_PTR(-EINVAL);

	}
	mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
	if (!mr)
		return ERR_PTR(-ENOMEM);
@@ -1352,6 +1352,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
	mr->mmkey.type = MLX5_MKEY_MR;
	mr->desc_size = sizeof(struct mlx5_mtt);
	mr->dev = dev;
	mr->umem = umem;
	set_mr_fields(dev, mr, umem->length, access_flags);
	kvfree(in);

	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1368,15 +1370,6 @@ err_1:
	return ERR_PTR(err);
}

static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
			  u64 length, int access_flags)
{
	mr->ibmr.lkey = mr->mmkey.key;
	mr->ibmr.rkey = mr->mmkey.key;
	mr->ibmr.length = length;
	mr->access_flags = access_flags;
}

static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
				       u64 length, int acc, int mode)
{
@@ -1471,70 +1464,32 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
				 attr->access_flags, mode);
}

struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
				  u64 virt_addr, int access_flags,
				  struct ib_udata *udata)
static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
				    u64 iova, int access_flags)
{
	struct mlx5_ib_dev *dev = to_mdev(pd->device);
	struct mlx5_ib_mr *mr = NULL;
	bool xlt_with_umr;
	struct ib_umem *umem;
	int err;

	if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
		return ERR_PTR(-EOPNOTSUPP);

	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
		    start, virt_addr, length, access_flags);

	xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length);
	/* ODP requires xlt update via umr to work. */
	if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND))
		return ERR_PTR(-EINVAL);

	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
	    length == U64_MAX) {
		if (virt_addr != start)
			return ERR_PTR(-EINVAL);
		if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
		    !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
			return ERR_PTR(-EINVAL);

		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
		if (IS_ERR(mr))
			return ERR_CAST(mr);
		return &mr->ibmr;
	}

	umem = mr_umem_get(dev, start, length, access_flags);
	if (IS_ERR(umem))
		return ERR_CAST(umem);

	xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
	if (xlt_with_umr) {
		mr = alloc_mr_from_cache(pd, umem, virt_addr, access_flags);
		if (IS_ERR(mr))
			mr = NULL;
	}

	if (!mr) {
		mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
	} else {
		mutex_lock(&dev->slow_path_mutex);
		mr = reg_create(NULL, pd, umem, virt_addr, access_flags,
				!xlt_with_umr);
		mr = reg_create(NULL, pd, umem, iova, access_flags, 0, true);
		mutex_unlock(&dev->slow_path_mutex);
	}

	if (IS_ERR(mr)) {
		err = PTR_ERR(mr);
		goto error;
		ib_umem_release(umem);
		return ERR_CAST(mr);
	}

	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);

	mr->umem = umem;
	atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
	set_mr_fields(dev, mr, length, access_flags);
	atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);

	if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) {
	if (xlt_with_umr) {
		/*
		 * If the MR was created with reg_create then it will be
		 * configured properly but left disabled. It is safe to go ahead
@@ -1546,30 +1501,86 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
			return ERR_PTR(err);
		}
	}
	return &mr->ibmr;
}

	if (is_odp_mr(mr)) {
		to_ib_umem_odp(mr->umem)->private = mr;
static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
					u64 iova, int access_flags,
					struct ib_udata *udata)
{
	struct mlx5_ib_dev *dev = to_mdev(pd->device);
	struct ib_umem_odp *odp;
	struct mlx5_ib_mr *mr;
	int err;

	if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
		return ERR_PTR(-EOPNOTSUPP);

	if (!start && length == U64_MAX) {
		if (iova != 0)
			return ERR_PTR(-EINVAL);
		if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
			return ERR_PTR(-EINVAL);

		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
		if (IS_ERR(mr))
			return ERR_CAST(mr);
		return &mr->ibmr;
	}

	/* ODP requires xlt update via umr to work. */
	if (!mlx5_ib_can_load_pas_with_umr(dev, length))
		return ERR_PTR(-EINVAL);

	odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
			      &mlx5_mn_ops);
	if (IS_ERR(odp))
		return ERR_CAST(odp);

	mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
	if (IS_ERR(mr)) {
		ib_umem_release(&odp->umem);
		return ERR_CAST(mr);
	}

	odp->private = mr;
	init_waitqueue_head(&mr->q_deferred_work);
	atomic_set(&mr->num_deferred_work, 0);
		err = xa_err(xa_store(&dev->odp_mkeys,
				      mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
				      GFP_KERNEL));
		if (err) {
			dereg_mr(dev, mr);
			return ERR_PTR(err);
		}
	err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
			      &mr->mmkey, GFP_KERNEL));
	if (err)
		goto err_dereg_mr;

		err = mlx5_ib_init_odp_mr(mr, xlt_with_umr);
		if (err) {
	err = mlx5_ib_init_odp_mr(mr);
	if (err)
		goto err_dereg_mr;
	return &mr->ibmr;

err_dereg_mr:
	dereg_mr(dev, mr);
	return ERR_PTR(err);
}
	}

	return &mr->ibmr;
error:
	ib_umem_release(umem);
	return ERR_PTR(err);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
				  u64 iova, int access_flags,
				  struct ib_udata *udata)
{
	struct mlx5_ib_dev *dev = to_mdev(pd->device);
	struct ib_umem *umem;

	if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
		return ERR_PTR(-EOPNOTSUPP);

	mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
		    start, iova, length, access_flags);

	if (access_flags & IB_ACCESS_ON_DEMAND)
		return create_user_odp_mr(pd, start, length, iova, access_flags,
					  udata);
	umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
	if (IS_ERR(umem))
		return ERR_CAST(umem);
	return create_real_mr(pd, umem, iova, access_flags);
}

/**
@@ -1661,7 +1672,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
		atomic_sub(ib_umem_num_pages(mr->umem),
			   &dev->mdev->priv.reg_pages);
		ib_umem_release(mr->umem);
		mr->umem = mr_umem_get(dev, addr, len, access_flags);
		mr->umem = ib_umem_get(&dev->ib_dev, addr, len, access_flags);
		if (IS_ERR(mr->umem)) {
			err = PTR_ERR(mr->umem);
			mr->umem = NULL;
@@ -1685,7 +1696,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
		if (err)
			goto err;

		mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, true);
		mr = reg_create(ib_mr, pd, mr->umem, addr, access_flags, 0, true);
		if (IS_ERR(mr)) {
			err = PTR_ERR(mr);
			mr = to_mmr(ib_mr);
+8 −8
Original line number Diff line number Diff line
@@ -536,6 +536,10 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
	struct mlx5_ib_mr *imr;
	int err;

	if (!mlx5_ib_can_load_pas_with_umr(dev,
					   MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
		return ERR_PTR(-EOPNOTSUPP);

	umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
	if (IS_ERR(umem_odp))
		return ERR_CAST(umem_odp);
@@ -831,17 +835,13 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
				     flags);
}

int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable)
int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
{
	u32 flags = MLX5_PF_FLAGS_SNAPSHOT;
	int ret;

	if (enable)
		flags |= MLX5_PF_FLAGS_ENABLE;

	ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem),
				mr->umem->address, mr->umem->length, NULL,
				flags);
	ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address,
				mr->umem->length, NULL,
				MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE);
	return ret >= 0 ? 0 : ret;
}