Commit b86deba9 authored by Michal Kalderon's avatar Michal Kalderon Committed by Jason Gunthorpe
Browse files

RDMA/core: Move core content from ib_uverbs to ib_core

Move functionality that is called by the driver, which is
related to umap, to a new file that will be linked in ib_core.
This is a first step in later enabling ib_uverbs to be optional.
vm_ops is now initialized in ib_uverbs_mmap instead of
priv_init to avoid having to move all the rdma_umap functions
as well.

Link: https://lore.kernel.org/r/20191030094417.16866-2-michal.kalderon@marvell.com


Suggested-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarAriel Elior <ariel.elior@marvell.com>
Signed-off-by: default avatarMichal Kalderon <michal.kalderon@marvell.com>
Reviewed-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 11f552e2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
				device.o fmr_pool.o cache.o netlink.o \
				roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
				multicast.o mad.o smi.o agent.o mad_rmpp.o \
				nldev.o restrack.o counters.o
				nldev.o restrack.o counters.o ib_core_uverbs.o

ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
+9 −0
Original line number Diff line number Diff line
@@ -387,4 +387,13 @@ int ib_device_set_netns_put(struct sk_buff *skb,

int rdma_nl_net_init(struct rdma_dev_net *rnet);
void rdma_nl_net_exit(struct rdma_dev_net *rnet);

struct rdma_umap_priv {
	struct vm_area_struct *vma;
	struct list_head list;
};

void rdma_umap_priv_init(struct rdma_umap_priv *priv,
			 struct vm_area_struct *vma);

#endif /* _CORE_PRIV_H */
+73 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
 * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
 * Copyright 2019 Marvell. All rights reserved.
 */
#include <linux/xarray.h>
#include "uverbs.h"
#include "core_priv.h"

/*
 * Each time we map IO memory into user space this keeps track of the mapping.
 * When the device is hot-unplugged we 'zap' the mmaps in user space to point
 * to the zero page and allow the hot unplug to proceed.
 *
 * This is necessary for cases like PCI physical hot unplug as the actual BAR
 * memory may vanish after this and access to it from userspace could MCE.
 *
 * RDMA drivers supporting disassociation must have their user space designed
 * to cope in some way with their IO pages going to the zero page.
 */
void rdma_umap_priv_init(struct rdma_umap_priv *priv,
			 struct vm_area_struct *vma)
{
	struct ib_uverbs_file *ufile = vma->vm_file->private_data;

	priv->vma = vma;
	vma->vm_private_data = priv;
	/* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */

	mutex_lock(&ufile->umap_lock);
	list_add(&priv->list, &ufile->umaps);
	mutex_unlock(&ufile->umap_lock);
}
EXPORT_SYMBOL(rdma_umap_priv_init);

/*
 * Map IO memory into a process. This is to be called by drivers as part of
 * their mmap() functions if they wish to send something like PCI-E BAR memory
 * to userspace.
 */
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
		      unsigned long pfn, unsigned long size, pgprot_t prot)
{
	struct ib_uverbs_file *ufile = ucontext->ufile;
	struct rdma_umap_priv *priv;

	if (!(vma->vm_flags & VM_SHARED))
		return -EINVAL;

	if (vma->vm_end - vma->vm_start != size)
		return -EINVAL;

	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
	if (WARN_ON(!vma->vm_file ||
		    vma->vm_file->private_data != ufile))
		return -EINVAL;
	lockdep_assert_held(&ufile->device->disassociate_srcu);

	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
	if (!priv)
		return -ENOMEM;

	vma->vm_page_prot = prot;
	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
		kfree(priv);
		return -EAGAIN;
	}

	rdma_umap_priv_init(priv, vma);
	return 0;
}
EXPORT_SYMBOL(rdma_user_mmap_io);
+3 −71
Original line number Diff line number Diff line
@@ -772,6 +772,8 @@ out_unlock:
	return (ret) ? : count;
}

static const struct vm_operations_struct rdma_umap_ops;

static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct ib_uverbs_file *file = filp->private_data;
@@ -785,45 +787,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
		ret = PTR_ERR(ucontext);
		goto out;
	}

	vma->vm_ops = &rdma_umap_ops;
	ret = ucontext->device->ops.mmap(ucontext, vma);
out:
	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
	return ret;
}

/*
 * Each time we map IO memory into user space this keeps track of the mapping.
 * When the device is hot-unplugged we 'zap' the mmaps in user space to point
 * to the zero page and allow the hot unplug to proceed.
 *
 * This is necessary for cases like PCI physical hot unplug as the actual BAR
 * memory may vanish after this and access to it from userspace could MCE.
 *
 * RDMA drivers supporting disassociation must have their user space designed
 * to cope in some way with their IO pages going to the zero page.
 */
struct rdma_umap_priv {
	struct vm_area_struct *vma;
	struct list_head list;
};

static const struct vm_operations_struct rdma_umap_ops;

static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
				struct vm_area_struct *vma)
{
	struct ib_uverbs_file *ufile = vma->vm_file->private_data;

	priv->vma = vma;
	vma->vm_private_data = priv;
	vma->vm_ops = &rdma_umap_ops;

	mutex_lock(&ufile->umap_lock);
	list_add(&priv->list, &ufile->umaps);
	mutex_unlock(&ufile->umap_lock);
}

/*
 * The VMA has been dup'd, initialize the vm_private_data with a new tracking
 * struct
@@ -931,44 +901,6 @@ static const struct vm_operations_struct rdma_umap_ops = {
	.fault = rdma_umap_fault,
};

/*
 * Map IO memory into a process. This is to be called by drivers as part of
 * their mmap() functions if they wish to send something like PCI-E BAR memory
 * to userspace.
 */
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
		      unsigned long pfn, unsigned long size, pgprot_t prot)
{
	struct ib_uverbs_file *ufile = ucontext->ufile;
	struct rdma_umap_priv *priv;

	if (!(vma->vm_flags & VM_SHARED))
		return -EINVAL;

	if (vma->vm_end - vma->vm_start != size)
		return -EINVAL;

	/* Driver is using this wrong, must be called by ib_uverbs_mmap */
	if (WARN_ON(!vma->vm_file ||
		    vma->vm_file->private_data != ufile))
		return -EINVAL;
	lockdep_assert_held(&ufile->device->disassociate_srcu);

	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
	if (!priv)
		return -ENOMEM;

	vma->vm_page_prot = prot;
	if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
		kfree(priv);
		return -EAGAIN;
	}

	rdma_umap_priv_init(priv, vma);
	return 0;
}
EXPORT_SYMBOL(rdma_user_mmap_io);

void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
{
	struct rdma_umap_priv *priv, *next_priv;