Commit b91e1751 authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

RDMA/mlx5: Simplify how the MR cache bucket is located

There are many bad APIs here that are accepting a cache bucket index
instead of a bucket pointer. Many of the callers already have a bucket
pointer, so this results in a lot of confusing uses of order2idx().

Pass the struct mlx5_cache_ent into add_keys(), remove_keys(), and
alloc_cached_mr().

Once the MR is in the cache, store the cache bucket pointer directly in
the MR, replacing the 'bool allocated_from cache'.

In the end there is only one place that needs to form index from order,
alloc_mr_from_cache(). Increase the safety of this function by disallowing
it from accessing cache entries in the ODP special area.

Link: https://lore.kernel.org/r/20200310082238.239865-7-leon@kernel.org


Signed-off-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 7c8691a3
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -617,8 +617,8 @@ struct mlx5_ib_mr {
	struct ib_umem	       *umem;
	struct mlx5_shared_mr_info	*smr_info;
	struct list_head	list;
	int			order;
	bool			allocated_from_cache;
	unsigned int		order;
	struct mlx5_cache_ent  *cache_ent;
	int			npages;
	struct mlx5_ib_dev     *dev;
	u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
@@ -1274,7 +1274,8 @@ int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);

struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
				       unsigned int entry);
void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr);

+66 −94
Original line number Diff line number Diff line
@@ -99,16 +99,6 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
	return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
}

static int order2idx(struct mlx5_ib_dev *dev, int order)
{
	struct mlx5_mr_cache *cache = &dev->cache;

	if (order < cache->ent[0].order)
		return 0;
	else
		return order - cache->ent[0].order;
}

static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
{
	return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
@@ -120,9 +110,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
	struct mlx5_ib_mr *mr =
		container_of(context, struct mlx5_ib_mr, cb_work);
	struct mlx5_ib_dev *dev = mr->dev;
	struct mlx5_mr_cache *cache = &dev->cache;
	int c = order2idx(dev, mr->order);
	struct mlx5_cache_ent *ent = &cache->ent[c];
	struct mlx5_cache_ent *ent = mr->cache_ent;
	unsigned long flags;

	spin_lock_irqsave(&ent->lock, flags);
@@ -140,7 +128,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
	mr->mmkey.key |= mlx5_idx_to_mkey(
		MLX5_GET(create_mkey_out, mr->out, mkey_index));

	cache->last_add = jiffies;
	dev->cache.last_add = jiffies;

	spin_lock_irqsave(&ent->lock, flags);
	list_add_tail(&mr->list, &ent->head);
@@ -152,10 +140,8 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
		complete(&ent->compl);
}

static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
static int add_keys(struct mlx5_cache_ent *ent, int num)
{
	struct mlx5_mr_cache *cache = &dev->cache;
	struct mlx5_cache_ent *ent = &cache->ent[c];
	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
	struct mlx5_ib_mr *mr;
	void *mkc;
@@ -180,8 +166,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
			break;
		}
		mr->order = ent->order;
		mr->allocated_from_cache = true;
		mr->dev = dev;
		mr->cache_ent = ent;
		mr->dev = ent->dev;

		MLX5_SET(mkc, mkc, free, 1);
		MLX5_SET(mkc, mkc, umr_en, 1);
@@ -196,15 +182,15 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
		spin_lock_irq(&ent->lock);
		ent->pending++;
		spin_unlock_irq(&ent->lock);
		err = mlx5_ib_create_mkey_cb(dev, &mr->mmkey,
					       &dev->async_ctx, in, inlen,
		err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey,
					     &ent->dev->async_ctx, in, inlen,
					     mr->out, sizeof(mr->out),
					     &mr->cb_work);
		if (err) {
			spin_lock_irq(&ent->lock);
			ent->pending--;
			spin_unlock_irq(&ent->lock);
			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
			mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
			kfree(mr);
			break;
		}
@@ -214,10 +200,8 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
	return err;
}

static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
static void remove_keys(struct mlx5_cache_ent *ent, int num)
{
	struct mlx5_mr_cache *cache = &dev->cache;
	struct mlx5_cache_ent *ent = &cache->ent[c];
	struct mlx5_ib_mr *tmp_mr;
	struct mlx5_ib_mr *mr;
	LIST_HEAD(del_list);
@@ -234,7 +218,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
		ent->available_mrs--;
		ent->total_mrs--;
		spin_unlock_irq(&ent->lock);
		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
		mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
	}

	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
@@ -247,18 +231,14 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
			  size_t count, loff_t *pos)
{
	struct mlx5_cache_ent *ent = filp->private_data;
	struct mlx5_ib_dev *dev = ent->dev;
	char lbuf[20] = {0};
	u32 var;
	int err;
	int c;

	count = min(count, sizeof(lbuf) - 1);
	if (copy_from_user(lbuf, buf, count))
		return -EFAULT;

	c = order2idx(dev, ent->order);

	if (sscanf(lbuf, "%u", &var) != 1)
		return -EINVAL;

@@ -267,14 +247,14 @@ static ssize_t size_write(struct file *filp, const char __user *buf,

	if (var > ent->total_mrs) {
		do {
			err = add_keys(dev, c, var - ent->total_mrs);
			err = add_keys(ent, var - ent->total_mrs);
			if (err && err != -EAGAIN)
				return err;

			usleep_range(3000, 5000);
		} while (err);
	} else if (var < ent->total_mrs) {
		remove_keys(dev, c, ent->total_mrs - var);
		remove_keys(ent, ent->total_mrs - var);
	}

	return count;
@@ -305,18 +285,14 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
			   size_t count, loff_t *pos)
{
	struct mlx5_cache_ent *ent = filp->private_data;
	struct mlx5_ib_dev *dev = ent->dev;
	char lbuf[20] = {0};
	u32 var;
	int err;
	int c;

	count = min(count, sizeof(lbuf) - 1);
	if (copy_from_user(lbuf, buf, count))
		return -EFAULT;

	c = order2idx(dev, ent->order);

	if (sscanf(lbuf, "%u", &var) != 1)
		return -EINVAL;

@@ -326,7 +302,7 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
	ent->limit = var;

	if (ent->available_mrs < ent->limit) {
		err = add_keys(dev, c, 2 * ent->limit - ent->available_mrs);
		err = add_keys(ent, 2 * ent->limit - ent->available_mrs);
		if (err)
			return err;
	}
@@ -371,24 +347,22 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
{
	struct mlx5_ib_dev *dev = ent->dev;
	struct mlx5_mr_cache *cache = &dev->cache;
	int i = order2idx(dev, ent->order);
	int err;

	if (cache->stopped)
		return;

	ent = &dev->cache.ent[i];
	if (ent->available_mrs < 2 * ent->limit && !dev->fill_delay) {
		err = add_keys(dev, i, 1);
		err = add_keys(ent, 1);
		if (ent->available_mrs < 2 * ent->limit) {
			if (err == -EAGAIN) {
				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
					    i + 2);
					    ent->order);
				queue_delayed_work(cache->wq, &ent->dwork,
						   msecs_to_jiffies(3));
			} else if (err) {
				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
					     i + 2, err);
					     ent->order, err);
				queue_delayed_work(cache->wq, &ent->dwork,
						   msecs_to_jiffies(1000));
			} else {
@@ -410,7 +384,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
		 */
		if (!need_resched() && !someone_adding(cache) &&
		    time_after(jiffies, cache->last_add + 300 * HZ)) {
			remove_keys(dev, i, 1);
			remove_keys(ent, 1);
			if (ent->available_mrs > ent->limit)
				queue_work(cache->wq, &ent->work);
		} else {
@@ -435,17 +409,18 @@ static void cache_work_func(struct work_struct *work)
	__cache_work_func(ent);
}

struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
/* Allocate a special entry from the cache */
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
				       unsigned int entry)
{
	struct mlx5_mr_cache *cache = &dev->cache;
	struct mlx5_cache_ent *ent;
	struct mlx5_ib_mr *mr;
	int err;

	if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
		mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
	if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY ||
		    entry >= ARRAY_SIZE(cache->ent)))
		return ERR_PTR(-EINVAL);
	}

	ent = &cache->ent[entry];
	while (1) {
@@ -453,7 +428,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
		if (list_empty(&ent->head)) {
			spin_unlock_irq(&ent->lock);

			err = add_keys(dev, entry, 1);
			err = add_keys(ent, 1);
			if (err && err != -EAGAIN)
				return ERR_PTR(err);

@@ -471,26 +446,16 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
	}
}

static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_cache_ent *req_ent)
{
	struct mlx5_mr_cache *cache = &dev->cache;
	struct mlx5_ib_dev *dev = req_ent->dev;
	struct mlx5_ib_mr *mr = NULL;
	struct mlx5_cache_ent *ent;
	int last_umr_cache_entry;
	int c;
	int i;

	c = order2idx(dev, order);
	last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
	if (c < 0 || c > last_umr_cache_entry) {
		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
		return NULL;
	}
	struct mlx5_cache_ent *ent = req_ent;

	for (i = c; i <= last_umr_cache_entry; i++) {
		ent = &cache->ent[i];

		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
	/* Try larger MR pools from the cache to satisfy the allocation */
	for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) {
		mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order,
			    ent - dev->cache.ent);

		spin_lock_irq(&ent->lock);
		if (!list_empty(&ent->head)) {
@@ -500,43 +465,36 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
			ent->available_mrs--;
			spin_unlock_irq(&ent->lock);
			if (ent->available_mrs < ent->limit)
				queue_work(cache->wq, &ent->work);
				queue_work(dev->cache.wq, &ent->work);
			break;
		}
		spin_unlock_irq(&ent->lock);

		queue_work(cache->wq, &ent->work);
		queue_work(dev->cache.wq, &ent->work);
	}

	if (!mr)
		cache->ent[c].miss++;
		req_ent->miss++;

	return mr;
}

void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
	struct mlx5_mr_cache *cache = &dev->cache;
	struct mlx5_cache_ent *ent;
	struct mlx5_cache_ent *ent = mr->cache_ent;
	int shrink = 0;
	int c;

	if (!mr->allocated_from_cache)
	if (!ent)
		return;

	c = order2idx(dev, mr->order);
	WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);

	if (mlx5_mr_cache_invalidate(mr)) {
		mr->allocated_from_cache = false;
		mr->cache_ent = NULL;
		destroy_mkey(dev, mr);
		ent = &cache->ent[c];
		if (ent->available_mrs < ent->limit)
			queue_work(cache->wq, &ent->work);
			queue_work(dev->cache.wq, &ent->work);
		return;
	}

	ent = &cache->ent[c];
	spin_lock_irq(&ent->lock);
	list_add_tail(&mr->list, &ent->head);
	ent->available_mrs++;
@@ -545,7 +503,7 @@ void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
	spin_unlock_irq(&ent->lock);

	if (shrink)
		queue_work(cache->wq, &ent->work);
		queue_work(dev->cache.wq, &ent->work);
}

static void clean_keys(struct mlx5_ib_dev *dev, int c)
@@ -872,22 +830,38 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
	return err;
}

static struct mlx5_ib_mr *alloc_mr_from_cache(
				  struct ib_pd *pd, struct ib_umem *umem,
				  u64 virt_addr, u64 len, int npages,
				  int page_shift, int order, int access_flags)
static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
						      unsigned int order)
{
	struct mlx5_mr_cache *cache = &dev->cache;

	if (order < cache->ent[0].order)
		return &cache->ent[0];
	order = order - cache->ent[0].order;
	if (order > MR_CACHE_LAST_STD_ENTRY)
		return NULL;
	return &cache->ent[order];
}

static struct mlx5_ib_mr *
alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr,
		    u64 len, int npages, int page_shift, unsigned int order,
		    int access_flags)
{
	struct mlx5_ib_dev *dev = to_mdev(pd->device);
	struct mlx5_cache_ent *ent = mr_cache_ent_from_order(dev, order);
	struct mlx5_ib_mr *mr;
	int err = 0;
	int i;

	if (!ent)
		return ERR_PTR(-E2BIG);
	for (i = 0; i < 1; i++) {
		mr = alloc_cached_mr(dev, order);
		mr = alloc_cached_mr(ent);
		if (mr)
			break;

		err = add_keys(dev, order2idx(dev, order), 1);
		err = add_keys(ent, 1);
		if (err && err != -EAGAIN) {
			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
			break;
@@ -1470,7 +1444,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
		/*
		 * UMR can't be used - MKey needs to be replaced.
		 */
		if (mr->allocated_from_cache)
		if (mr->cache_ent)
			err = mlx5_mr_cache_invalidate(mr);
		else
			err = destroy_mkey(dev, mr);
@@ -1486,7 +1460,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
			goto err;
		}

		mr->allocated_from_cache = false;
		mr->cache_ent = NULL;
	} else {
		/*
		 * Send a UMR WQE
@@ -1573,8 +1547,6 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)

static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
	int allocated_from_cache = mr->allocated_from_cache;

	if (mr->sig) {
		if (mlx5_core_destroy_psv(dev->mdev,
					  mr->sig->psv_memory.psv_idx))
@@ -1589,7 +1561,7 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
		mr->sig = NULL;
	}

	if (!allocated_from_cache) {
	if (!mr->cache_ent) {
		destroy_mkey(dev, mr);
		mlx5_free_priv_descs(mr);
	}
@@ -1606,7 +1578,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
	else
		clean_mr(dev, mr);

	if (mr->allocated_from_cache)
	if (mr->cache_ent)
		mlx5_mr_cache_free(dev, mr);
	else
		kfree(mr);
+1 −1
Original line number Diff line number Diff line
@@ -197,7 +197,7 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr)
	odp->private = NULL;
	mutex_unlock(&odp->umem_mutex);

	if (!mr->allocated_from_cache) {
	if (!mr->cache_ent) {
		mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey);
		WARN_ON(mr->descs);
	}