Commit 3889551d authored by Jason Gunthorpe's avatar Jason Gunthorpe
Browse files

RDMA/hfi1: Use mmu_interval_notifier_insert for user_exp_rcv

This converts one of the two users of mmu_notifiers to use the new API.
The conversion is fairly straightforward, however the existing use of
notifiers here seems to be racey.

Link: https://lore.kernel.org/r/20191112202231.3856-7-jgg@ziepe.ca


Tested-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent f25a546e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1138,7 +1138,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
			HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
			HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
	/* adjust flag if this fd is not able to cache */
	if (!fd->handler)
	if (!fd->use_mn)
		cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */

	cinfo.num_active = hfi1_count_active_units();
+1 −1
Original line number Diff line number Diff line
@@ -1444,7 +1444,7 @@ struct hfi1_filedata {
	/* for cpu affinity; -1 if none */
	int rec_cpu_num;
	u32 tid_n_pinned;
	struct mmu_rb_handler *handler;
	bool use_mn;
	struct tid_rb_node **entry_to_rb;
	spinlock_t tid_lock; /* protect tid_[limit,used] counters */
	u32 tid_limit;
+56 −90
Original line number Diff line number Diff line
@@ -59,11 +59,11 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
			      struct tid_user_buf *tbuf,
			      u32 rcventry, struct tid_group *grp,
			      u16 pageidx, unsigned int npages);
static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
				    struct tid_rb_node *tnode);
static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
			      const struct mmu_notifier_range *range,
			      unsigned long cur_seq);
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
			    struct tid_group *grp,
			    unsigned int start, u16 count,
@@ -73,10 +73,8 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
			      struct tid_group **grp);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);

static struct mmu_rb_ops tid_rb_ops = {
	.insert = tid_rb_insert,
	.remove = tid_rb_remove,
	.invalidate = tid_rb_invalidate
static const struct mmu_interval_notifier_ops tid_mn_ops = {
	.invalidate = tid_rb_invalidate,
};

/*
@@ -87,7 +85,6 @@ static struct mmu_rb_ops tid_rb_ops = {
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
			   struct hfi1_ctxtdata *uctxt)
{
	struct hfi1_devdata *dd = uctxt->dd;
	int ret = 0;

	spin_lock_init(&fd->tid_lock);
@@ -109,20 +106,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
			fd->entry_to_rb = NULL;
			return -ENOMEM;
		}

		/*
		 * Register MMU notifier callbacks. If the registration
		 * fails, continue without TID caching for this context.
		 */
		ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
					   dd->pport->hfi1_wq,
					   &fd->handler);
		if (ret) {
			dd_dev_info(dd,
				    "Failed MMU notifier registration %d\n",
				    ret);
			ret = 0;
		}
		fd->use_mn = true;
	}

	/*
@@ -139,7 +123,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
	 * init.
	 */
	spin_lock(&fd->tid_lock);
	if (uctxt->subctxt_cnt && fd->handler) {
	if (uctxt->subctxt_cnt && fd->use_mn) {
		u16 remainder;

		fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -158,18 +142,10 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
	struct hfi1_ctxtdata *uctxt = fd->uctxt;

	/*
	 * The notifier would have been removed when the process'es mm
	 * was freed.
	 */
	if (fd->handler) {
		hfi1_mmu_rb_unregister(fd->handler);
	} else {
	if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
		unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
	if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
		unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
	}

	kfree(fd->invalid_tids);
	fd->invalid_tids = NULL;
@@ -201,7 +177,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,

	if (mapped) {
		pci_unmap_single(dd->pcidev, node->dma_addr,
				 node->mmu.len, PCI_DMA_FROMDEVICE);
				 node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
		pages = &node->pages[idx];
	} else {
		pages = &tidbuf->pages[idx];
@@ -777,8 +753,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
		return -EFAULT;
	}

	node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
	node->mmu.len = npages * PAGE_SIZE;
	node->fdata = fd;
	node->phys = page_to_phys(pages[0]);
	node->npages = npages;
	node->rcventry = rcventry;
@@ -787,24 +762,36 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
	node->freed = false;
	memcpy(node->pages, pages, sizeof(struct page *) * npages);

	if (!fd->handler)
		ret = tid_rb_insert(fd, &node->mmu);
	else
		ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
	if (fd->use_mn) {
		ret = mmu_interval_notifier_insert(
			&node->notifier, fd->mm,
			tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
			&tid_mn_ops);
		if (ret)
			goto out_unmap;
		/*
		 * FIXME: This is in the wrong order, the notifier should be
		 * established before the pages are pinned by pin_rcv_pages.
		 */
		mmu_interval_read_begin(&node->notifier);
	}
	fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;

	if (ret) {
	hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
	trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
			       node->notifier.interval_tree.start, node->phys,
			       phys);
	return 0;

out_unmap:
	hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
			  node->rcventry, node->mmu.addr, node->phys, ret);
		  node->rcventry, node->notifier.interval_tree.start,
		  node->phys, ret);
	pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
			 PCI_DMA_FROMDEVICE);
	kfree(node);
	return -EFAULT;
}
	hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
	trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
			       node->mmu.addr, node->phys, phys);
	return 0;
}

static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
			      struct tid_group **grp)
@@ -833,10 +820,9 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
	if (grp)
		*grp = node->grp;

	if (!fd->handler)
	if (fd->use_mn)
		mmu_interval_notifier_remove(&node->notifier);
	cacheless_tid_rb_remove(fd, node);
	else
		hfi1_mmu_rb_remove(fd->handler, &node->mmu);

	return 0;
}
@@ -847,7 +833,8 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
	struct hfi1_devdata *dd = uctxt->dd;

	trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
				 node->npages, node->mmu.addr, node->phys,
				 node->npages,
				 node->notifier.interval_tree.start, node->phys,
				 node->dma_addr);

	/*
@@ -894,30 +881,29 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
				if (!node || node->rcventry != rcventry)
					continue;

				if (fd->use_mn)
					mmu_interval_notifier_remove(
						&node->notifier);
				cacheless_tid_rb_remove(fd, node);
			}
		}
	}
}

/*
 * Always return 0 from this function.  A non-zero return indicates that the
 * remove operation will be called and that memory should be unpinned.
 * However, the driver cannot unpin out from under PSM.  Instead, retain the
 * memory (by returning 0) and inform PSM that the memory is going away.  PSM
 * will call back later when it has removed the memory from its list.
 */
static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
			      const struct mmu_notifier_range *range,
			      unsigned long cur_seq)
{
	struct hfi1_filedata *fdata = arg;
	struct hfi1_ctxtdata *uctxt = fdata->uctxt;
	struct tid_rb_node *node =
		container_of(mnode, struct tid_rb_node, mmu);
		container_of(mni, struct tid_rb_node, notifier);
	struct hfi1_filedata *fdata = node->fdata;
	struct hfi1_ctxtdata *uctxt = fdata->uctxt;

	if (node->freed)
		return 0;
		return true;

	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
	trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
				 node->notifier.interval_tree.start,
				 node->rcventry, node->npages, node->dma_addr);
	node->freed = true;

@@ -946,18 +932,7 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
		fdata->invalid_tid_idx++;
	}
	spin_unlock(&fdata->invalid_lock);
	return 0;
}

static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
{
	struct hfi1_filedata *fdata = arg;
	struct tid_rb_node *tnode =
		container_of(node, struct tid_rb_node, mmu);
	u32 base = fdata->uctxt->expected_base;

	fdata->entry_to_rb[tnode->rcventry - base] = tnode;
	return 0;
	return true;
}

static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@@ -968,12 +943,3 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
	fdata->entry_to_rb[tnode->rcventry - base] = NULL;
	clear_tid_node(fdata, tnode);
}

static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
{
	struct hfi1_filedata *fdata = arg;
	struct tid_rb_node *tnode =
		container_of(node, struct tid_rb_node, mmu);

	cacheless_tid_rb_remove(fdata, tnode);
}
+2 −1
Original line number Diff line number Diff line
@@ -65,7 +65,8 @@ struct tid_user_buf {
};

struct tid_rb_node {
	struct mmu_rb_node mmu;
	struct mmu_interval_notifier notifier;
	struct hfi1_filedata *fdata;
	unsigned long phys;
	struct tid_group *grp;
	u32 rcventry;