Commit 48219795 authored by Alex Williamson's avatar Alex Williamson
Browse files

Merge branches 'v5.7/vfio/alex-sriov-v3' and 'v5.7/vfio/yan-dma-rw-v4' into v5.7/vfio/next

Loading
Loading
Loading
Loading
+366 −24
Original line number Diff line number Diff line
@@ -9,7 +9,6 @@
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#define dev_fmt pr_fmt

#include <linux/device.h>
#include <linux/eventfd.h>
@@ -54,6 +53,12 @@ module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(disable_idle_d3,
		 "Disable using the PCI D3 low power state for idle, unused devices");

static bool enable_sriov;
#ifdef CONFIG_PCI_IOV
module_param(enable_sriov, bool, 0644);
MODULE_PARM_DESC(enable_sriov, "Enable support for SR-IOV configuration.  Enabling SR-IOV on a PF typically requires support of the userspace PF driver, enabling VFs without such support may result in non-functional VFs or PF.");
#endif

static inline bool vfio_vga_disabled(void)
{
#ifdef CONFIG_VFIO_PCI_VGA
@@ -466,6 +471,44 @@ out:
		vfio_pci_set_power_state(vdev, PCI_D3hot);
}

static struct pci_driver vfio_pci_driver;

static struct vfio_pci_device *get_pf_vdev(struct vfio_pci_device *vdev,
					   struct vfio_device **pf_dev)
{
	struct pci_dev *physfn = pci_physfn(vdev->pdev);

	if (!vdev->pdev->is_virtfn)
		return NULL;

	*pf_dev = vfio_device_get_from_dev(&physfn->dev);
	if (!*pf_dev)
		return NULL;

	if (pci_dev_driver(physfn) != &vfio_pci_driver) {
		vfio_device_put(*pf_dev);
		return NULL;
	}

	return vfio_device_data(*pf_dev);
}

static void vfio_pci_vf_token_user_add(struct vfio_pci_device *vdev, int val)
{
	struct vfio_device *pf_dev;
	struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);

	if (!pf_vdev)
		return;

	mutex_lock(&pf_vdev->vf_token->lock);
	pf_vdev->vf_token->users += val;
	WARN_ON(pf_vdev->vf_token->users < 0);
	mutex_unlock(&pf_vdev->vf_token->lock);

	vfio_device_put(pf_dev);
}

static void vfio_pci_release(void *device_data)
{
	struct vfio_pci_device *vdev = device_data;
@@ -473,6 +516,7 @@ static void vfio_pci_release(void *device_data)
	mutex_lock(&vdev->reflck->lock);

	if (!(--vdev->refcnt)) {
		vfio_pci_vf_token_user_add(vdev, -1);
		vfio_spapr_pci_eeh_release(vdev->pdev);
		vfio_pci_disable(vdev);
	}
@@ -498,6 +542,7 @@ static int vfio_pci_open(void *device_data)
			goto error;

		vfio_spapr_pci_eeh_open(vdev->pdev);
		vfio_pci_vf_token_user_add(vdev, 1);
	}
	vdev->refcnt++;
error:
@@ -1140,6 +1185,65 @@ hot_reset_release:

		return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
					  ioeventfd.data, count, ioeventfd.fd);
	} else if (cmd == VFIO_DEVICE_FEATURE) {
		struct vfio_device_feature feature;
		uuid_t uuid;

		minsz = offsetofend(struct vfio_device_feature, flags);

		if (copy_from_user(&feature, (void __user *)arg, minsz))
			return -EFAULT;

		if (feature.argsz < minsz)
			return -EINVAL;

		/* Check unknown flags */
		if (feature.flags & ~(VFIO_DEVICE_FEATURE_MASK |
				      VFIO_DEVICE_FEATURE_SET |
				      VFIO_DEVICE_FEATURE_GET |
				      VFIO_DEVICE_FEATURE_PROBE))
			return -EINVAL;

		/* GET & SET are mutually exclusive except with PROBE */
		if (!(feature.flags & VFIO_DEVICE_FEATURE_PROBE) &&
		    (feature.flags & VFIO_DEVICE_FEATURE_SET) &&
		    (feature.flags & VFIO_DEVICE_FEATURE_GET))
			return -EINVAL;

		switch (feature.flags & VFIO_DEVICE_FEATURE_MASK) {
		case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
			if (!vdev->vf_token)
				return -ENOTTY;

			/*
			 * We do not support GET of the VF Token UUID as this
			 * could expose the token of the previous device user.
			 */
			if (feature.flags & VFIO_DEVICE_FEATURE_GET)
				return -EINVAL;

			if (feature.flags & VFIO_DEVICE_FEATURE_PROBE)
				return 0;

			/* Don't SET unless told to do so */
			if (!(feature.flags & VFIO_DEVICE_FEATURE_SET))
				return -EINVAL;

			if (feature.argsz < minsz + sizeof(uuid))
				return -EINVAL;

			if (copy_from_user(&uuid, (void __user *)(arg + minsz),
					   sizeof(uuid)))
				return -EFAULT;

			mutex_lock(&vdev->vf_token->lock);
			uuid_copy(&vdev->vf_token->uuid, &uuid);
			mutex_unlock(&vdev->vf_token->lock);

			return 0;
		default:
			return -ENOTTY;
		}
	}

	return -ENOTTY;
@@ -1278,6 +1382,150 @@ static void vfio_pci_request(void *device_data, unsigned int count)
	mutex_unlock(&vdev->igate);
}

static int vfio_pci_validate_vf_token(struct vfio_pci_device *vdev,
				      bool vf_token, uuid_t *uuid)
{
	/*
	 * There's always some degree of trust or collaboration between SR-IOV
	 * PF and VFs, even if just that the PF hosts the SR-IOV capability and
	 * can disrupt VFs with a reset, but often the PF has more explicit
	 * access to deny service to the VF or access data passed through the
	 * VF.  We therefore require an opt-in via a shared VF token (UUID) to
	 * represent this trust.  This both prevents that a VF driver might
	 * assume the PF driver is a trusted, in-kernel driver, and also that
	 * a PF driver might be replaced with a rogue driver, unknown to in-use
	 * VF drivers.
	 *
	 * Therefore when presented with a VF, if the PF is a vfio device and
	 * it is bound to the vfio-pci driver, the user needs to provide a VF
	 * token to access the device, in the form of appending a vf_token to
	 * the device name, for example:
	 *
	 * "0000:04:10.0 vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
	 *
	 * When presented with a PF which has VFs in use, the user must also
	 * provide the current VF token to prove collaboration with existing
	 * VF users.  If VFs are not in use, the VF token provided for the PF
	 * device will act to set the VF token.
	 *
	 * If the VF token is provided but unused, an error is generated.
	 */
	if (!vdev->pdev->is_virtfn && !vdev->vf_token && !vf_token)
		return 0; /* No VF token provided or required */

	if (vdev->pdev->is_virtfn) {
		struct vfio_device *pf_dev;
		struct vfio_pci_device *pf_vdev = get_pf_vdev(vdev, &pf_dev);
		bool match;

		if (!pf_vdev) {
			if (!vf_token)
				return 0; /* PF is not vfio-pci, no VF token */

			pci_info_ratelimited(vdev->pdev,
				"VF token incorrectly provided, PF not bound to vfio-pci\n");
			return -EINVAL;
		}

		if (!vf_token) {
			vfio_device_put(pf_dev);
			pci_info_ratelimited(vdev->pdev,
				"VF token required to access device\n");
			return -EACCES;
		}

		mutex_lock(&pf_vdev->vf_token->lock);
		match = uuid_equal(uuid, &pf_vdev->vf_token->uuid);
		mutex_unlock(&pf_vdev->vf_token->lock);

		vfio_device_put(pf_dev);

		if (!match) {
			pci_info_ratelimited(vdev->pdev,
				"Incorrect VF token provided for device\n");
			return -EACCES;
		}
	} else if (vdev->vf_token) {
		mutex_lock(&vdev->vf_token->lock);
		if (vdev->vf_token->users) {
			if (!vf_token) {
				mutex_unlock(&vdev->vf_token->lock);
				pci_info_ratelimited(vdev->pdev,
					"VF token required to access device\n");
				return -EACCES;
			}

			if (!uuid_equal(uuid, &vdev->vf_token->uuid)) {
				mutex_unlock(&vdev->vf_token->lock);
				pci_info_ratelimited(vdev->pdev,
					"Incorrect VF token provided for device\n");
				return -EACCES;
			}
		} else if (vf_token) {
			uuid_copy(&vdev->vf_token->uuid, uuid);
		}

		mutex_unlock(&vdev->vf_token->lock);
	} else if (vf_token) {
		pci_info_ratelimited(vdev->pdev,
			"VF token incorrectly provided, not a PF or VF\n");
		return -EINVAL;
	}

	return 0;
}

#define VF_TOKEN_ARG "vf_token="

static int vfio_pci_match(void *device_data, char *buf)
{
	struct vfio_pci_device *vdev = device_data;
	bool vf_token = false;
	uuid_t uuid;
	int ret;

	if (strncmp(pci_name(vdev->pdev), buf, strlen(pci_name(vdev->pdev))))
		return 0; /* No match */

	if (strlen(buf) > strlen(pci_name(vdev->pdev))) {
		buf += strlen(pci_name(vdev->pdev));

		if (*buf != ' ')
			return 0; /* No match: non-whitespace after name */

		while (*buf) {
			if (*buf == ' ') {
				buf++;
				continue;
			}

			if (!vf_token && !strncmp(buf, VF_TOKEN_ARG,
						  strlen(VF_TOKEN_ARG))) {
				buf += strlen(VF_TOKEN_ARG);

				if (strlen(buf) < UUID_STRING_LEN)
					return -EINVAL;

				ret = uuid_parse(buf, &uuid);
				if (ret)
					return ret;

				vf_token = true;
				buf += UUID_STRING_LEN;
			} else {
				/* Unknown/duplicate option */
				return -EINVAL;
			}
		}
	}

	ret = vfio_pci_validate_vf_token(vdev, vf_token, &uuid);
	if (ret)
		return ret;

	return 1; /* Match */
}

static const struct vfio_device_ops vfio_pci_ops = {
	.name		= "vfio-pci",
	.open		= vfio_pci_open,
@@ -1287,10 +1535,40 @@ static const struct vfio_device_ops vfio_pci_ops = {
	.write		= vfio_pci_write,
	.mmap		= vfio_pci_mmap,
	.request	= vfio_pci_request,
	.match		= vfio_pci_match,
};

static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
static struct pci_driver vfio_pci_driver;

static int vfio_pci_bus_notifier(struct notifier_block *nb,
				 unsigned long action, void *data)
{
	struct vfio_pci_device *vdev = container_of(nb,
						    struct vfio_pci_device, nb);
	struct device *dev = data;
	struct pci_dev *pdev = to_pci_dev(dev);
	struct pci_dev *physfn = pci_physfn(pdev);

	if (action == BUS_NOTIFY_ADD_DEVICE &&
	    pdev->is_virtfn && physfn == vdev->pdev) {
		pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n",
			 pci_name(pdev));
		pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
						  vfio_pci_ops.name);
	} else if (action == BUS_NOTIFY_BOUND_DRIVER &&
		   pdev->is_virtfn && physfn == vdev->pdev) {
		struct pci_driver *drv = pci_dev_driver(pdev);

		if (drv && drv != &vfio_pci_driver)
			pci_warn(vdev->pdev,
				 "VF %s bound to driver %s while PF bound to vfio-pci\n",
				 pci_name(pdev), drv->name);
	}

	return 0;
}

static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
@@ -1302,12 +1580,12 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
		return -EINVAL;

	/*
	 * Prevent binding to PFs with VFs enabled, this too easily allows
	 * userspace instance with VFs and PFs from the same device, which
	 * cannot work.  Disabling SR-IOV here would initiate removing the
	 * VFs, which would unbind the driver, which is prone to blocking
	 * if that VF is also in use by vfio-pci.  Just reject these PFs
	 * and let the user sort it out.
	 * Prevent binding to PFs with VFs enabled, the VFs might be in use
	 * by the host or other users.  We cannot capture the VFs if they
	 * already exist, nor can we track VF users.  Disabling SR-IOV here
	 * would initiate removing the VFs, which would unbind the driver,
	 * which is prone to blocking if that VF is also in use by vfio-pci.
	 * Just reject these PFs and let the user sort it out.
	 */
	if (pci_num_vf(pdev)) {
		pci_warn(pdev, "Cannot bind to PF with SR-IOV enabled\n");
@@ -1320,8 +1598,8 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)

	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
	if (!vdev) {
		vfio_iommu_group_put(group, &pdev->dev);
		return -ENOMEM;
		ret = -ENOMEM;
		goto out_group_put;
	}

	vdev->pdev = pdev;
@@ -1332,18 +1610,27 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	INIT_LIST_HEAD(&vdev->ioeventfds_list);

	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
	if (ret) {
		vfio_iommu_group_put(group, &pdev->dev);
		kfree(vdev);
		return ret;
	}
	if (ret)
		goto out_free;

	ret = vfio_pci_reflck_attach(vdev);
	if (ret) {
		vfio_del_group_dev(&pdev->dev);
		vfio_iommu_group_put(group, &pdev->dev);
		kfree(vdev);
		return ret;
	if (ret)
		goto out_del_group_dev;

	if (pdev->is_physfn) {
		vdev->vf_token = kzalloc(sizeof(*vdev->vf_token), GFP_KERNEL);
		if (!vdev->vf_token) {
			ret = -ENOMEM;
			goto out_reflck;
		}

		mutex_init(&vdev->vf_token->lock);
		uuid_gen(&vdev->vf_token->uuid);

		vdev->nb.notifier_call = vfio_pci_bus_notifier;
		ret = bus_register_notifier(&pci_bus_type, &vdev->nb);
		if (ret)
			goto out_vf_token;
	}

	if (vfio_pci_is_vga(pdev)) {
@@ -1369,16 +1656,39 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	}

	return ret;

out_vf_token:
	kfree(vdev->vf_token);
out_reflck:
	vfio_pci_reflck_put(vdev->reflck);
out_del_group_dev:
	vfio_del_group_dev(&pdev->dev);
out_free:
	kfree(vdev);
out_group_put:
	vfio_iommu_group_put(group, &pdev->dev);
	return ret;
}

static void vfio_pci_remove(struct pci_dev *pdev)
{
	struct vfio_pci_device *vdev;

	pci_disable_sriov(pdev);

	vdev = vfio_del_group_dev(&pdev->dev);
	if (!vdev)
		return;

	if (vdev->vf_token) {
		WARN_ON(vdev->vf_token->users);
		mutex_destroy(&vdev->vf_token->lock);
		kfree(vdev->vf_token);
	}

	if (vdev->nb.notifier_call)
		bus_unregister_notifier(&pci_bus_type, &vdev->nb);

	vfio_pci_reflck_put(vdev->reflck);

	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
@@ -1427,6 +1737,37 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
	return PCI_ERS_RESULT_CAN_RECOVER;
}

static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
{
	struct vfio_pci_device *vdev;
	struct vfio_device *device;
	int ret = 0;

	might_sleep();

	if (!enable_sriov)
		return -ENOENT;

	device = vfio_device_get_from_dev(&pdev->dev);
	if (!device)
		return -ENODEV;

	vdev = vfio_device_data(device);
	if (!vdev) {
		vfio_device_put(device);
		return -ENODEV;
	}

	if (nr_virtfn == 0)
		pci_disable_sriov(pdev);
	else
		ret = pci_enable_sriov(pdev, nr_virtfn);

	vfio_device_put(device);

	return ret < 0 ? ret : nr_virtfn;
}

static const struct pci_error_handlers vfio_err_handlers = {
	.error_detected = vfio_pci_aer_err_detected,
};
@@ -1436,6 +1777,7 @@ static struct pci_driver vfio_pci_driver = {
	.id_table		= NULL, /* only dynamic ids */
	.probe			= vfio_pci_probe,
	.remove			= vfio_pci_remove,
	.sriov_configure	= vfio_pci_sriov_configure,
	.err_handler		= &vfio_err_handlers,
};

+10 −0
Original line number Diff line number Diff line
@@ -12,6 +12,8 @@
#include <linux/pci.h>
#include <linux/irqbypass.h>
#include <linux/types.h>
#include <linux/uuid.h>
#include <linux/notifier.h>

#ifndef VFIO_PCI_PRIVATE_H
#define VFIO_PCI_PRIVATE_H
@@ -84,6 +86,12 @@ struct vfio_pci_reflck {
	struct mutex		lock;
};

struct vfio_pci_vf_token {
	struct mutex		lock;
	uuid_t			uuid;
	int			users;
};

struct vfio_pci_device {
	struct pci_dev		*pdev;
	void __iomem		*barmap[PCI_STD_NUM_BARS];
@@ -122,6 +130,8 @@ struct vfio_pci_device {
	struct list_head	dummy_resources_list;
	struct mutex		ioeventfds_lock;
	struct list_head	ioeventfds_list;
	struct vfio_pci_vf_token	*vf_token;
	struct notifier_block	nb;
};

#define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
+194 −4
Original line number Diff line number Diff line
@@ -875,11 +875,23 @@ EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
						     char *buf)
{
	struct vfio_device *it, *device = NULL;
	struct vfio_device *it, *device = ERR_PTR(-ENODEV);

	mutex_lock(&group->device_lock);
	list_for_each_entry(it, &group->device_list, group_next) {
		if (!strcmp(dev_name(it->dev), buf)) {
		int ret;

		if (it->ops->match) {
			ret = it->ops->match(it->device_data, buf);
			if (ret < 0) {
				device = ERR_PTR(ret);
				break;
			}
		} else {
			ret = !strcmp(dev_name(it->dev), buf);
		}

		if (ret) {
			device = it;
			vfio_device_get(device);
			break;
@@ -1430,8 +1442,8 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
		return -EPERM;

	device = vfio_device_get_from_name(group, buf);
	if (!device)
		return -ENODEV;
	if (IS_ERR(device))
		return PTR_ERR(device);

	ret = device->ops->open(device->device_data);
	if (ret) {
@@ -1720,6 +1732,44 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
}
EXPORT_SYMBOL_GPL(vfio_group_get_external_user);

/**
 * External user API, exported by symbols to be linked dynamically.
 * The external user passes in a device pointer
 * to verify that:
 *	- A VFIO group is assiciated with the device;
 *	- IOMMU is set for the group.
 * If both checks passed, vfio_group_get_external_user_from_dev()
 * increments the container user counter to prevent the VFIO group
 * from disposal before external user exits and returns the pointer
 * to the VFIO group.
 *
 * When the external user finishes using the VFIO group, it calls
 * vfio_group_put_external_user() to release the VFIO group and
 * decrement the container user counter.
 *
 * @dev [in]	: device
 * Return error PTR or pointer to VFIO group.
 */

struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev)
{
	struct vfio_group *group;
	int ret;

	group = vfio_group_get_from_dev(dev);
	if (!group)
		return ERR_PTR(-ENODEV);

	ret = vfio_group_add_container_user(group);
	if (ret) {
		vfio_group_put(group);
		return ERR_PTR(ret);
	}

	return group;
}
EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev);

void vfio_group_put_external_user(struct vfio_group *group)
{
	vfio_group_try_dissolve_container(group);
@@ -1961,6 +2011,146 @@ err_unpin_pages:
}
EXPORT_SYMBOL(vfio_unpin_pages);

/*
 * Pin a set of guest IOVA PFNs and return their associated host PFNs for a
 * VFIO group.
 *
 * The caller needs to call vfio_group_get_external_user() or
 * vfio_group_get_external_user_from_dev() prior to calling this interface,
 * so as to prevent the VFIO group from disposal in the middle of the call.
 * But it can keep the reference to the VFIO group for several calls into
 * this interface.
 * After finishing using of the VFIO group, the caller needs to release the
 * VFIO group by calling vfio_group_put_external_user().
 *
 * @group [in]		: VFIO group
 * @user_iova_pfn [in]	: array of user/guest IOVA PFNs to be pinned.
 * @npage [in]		: count of elements in user_iova_pfn array.
 *			  This count should not be greater
 *			  VFIO_PIN_PAGES_MAX_ENTRIES.
 * @prot [in]		: protection flags
 * @phys_pfn [out]	: array of host PFNs
 * Return error or number of pages pinned.
 */
int vfio_group_pin_pages(struct vfio_group *group,
			 unsigned long *user_iova_pfn, int npage,
			 int prot, unsigned long *phys_pfn)
{
	struct vfio_container *container;
	struct vfio_iommu_driver *driver;
	int ret;

	if (!group || !user_iova_pfn || !phys_pfn || !npage)
		return -EINVAL;

	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
		return -E2BIG;

	container = group->container;
	driver = container->iommu_driver;
	if (likely(driver && driver->ops->pin_pages))
		ret = driver->ops->pin_pages(container->iommu_data,
					     user_iova_pfn, npage,
					     prot, phys_pfn);
	else
		ret = -ENOTTY;

	return ret;
}
EXPORT_SYMBOL(vfio_group_pin_pages);

/*
 * Unpin a set of guest IOVA PFNs for a VFIO group.
 *
 * The caller needs to call vfio_group_get_external_user() or
 * vfio_group_get_external_user_from_dev() prior to calling this interface,
 * so as to prevent the VFIO group from disposal in the middle of the call.
 * But it can keep the reference to the VFIO group for several calls into
 * this interface.
 * After finishing using of the VFIO group, the caller needs to release the
 * VFIO group by calling vfio_group_put_external_user().
 *
 * @group [in]		: vfio group
 * @user_iova_pfn [in]	: array of user/guest IOVA PFNs to be unpinned.
 * @npage [in]		: count of elements in user_iova_pfn array.
 *			  This count should not be greater than
 *			  VFIO_PIN_PAGES_MAX_ENTRIES.
 * Return error or number of pages unpinned.
 */
int vfio_group_unpin_pages(struct vfio_group *group,
			   unsigned long *user_iova_pfn, int npage)
{
	struct vfio_container *container;
	struct vfio_iommu_driver *driver;
	int ret;

	if (!group || !user_iova_pfn || !npage)
		return -EINVAL;

	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
		return -E2BIG;

	container = group->container;
	driver = container->iommu_driver;
	if (likely(driver && driver->ops->unpin_pages))
		ret = driver->ops->unpin_pages(container->iommu_data,
					       user_iova_pfn, npage);
	else
		ret = -ENOTTY;

	return ret;
}
EXPORT_SYMBOL(vfio_group_unpin_pages);


/*
 * This interface allows the CPUs to perform some sort of virtual DMA on
 * behalf of the device.
 *
 * CPUs read/write from/into a range of IOVAs pointing to user space memory
 * into/from a kernel buffer.
 *
 * As the read/write of user space memory is conducted via the CPUs and is
 * not a real device DMA, it is not necessary to pin the user space memory.
 *
 * The caller needs to call vfio_group_get_external_user() or
 * vfio_group_get_external_user_from_dev() prior to calling this interface,
 * so as to prevent the VFIO group from disposal in the middle of the call.
 * But it can keep the reference to the VFIO group for several calls into
 * this interface.
 * After finishing using of the VFIO group, the caller needs to release the
 * VFIO group by calling vfio_group_put_external_user().
 *
 * @group [in]		: VFIO group
 * @user_iova [in]	: base IOVA of a user space buffer
 * @data [in]		: pointer to kernel buffer
 * @len [in]		: kernel buffer length
 * @write		: indicate read or write
 * Return error code on failure or 0 on success.
 */
int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
		void *data, size_t len, bool write)
{
	struct vfio_container *container;
	struct vfio_iommu_driver *driver;
	int ret = 0;

	if (!group || !data || len <= 0)
		return -EINVAL;

	container = group->container;
	driver = container->iommu_driver;

	if (likely(driver && driver->ops->dma_rw))
		ret = driver->ops->dma_rw(container->iommu_data,
					  user_iova, data, len, write);
	else
		ret = -ENOTTY;

	return ret;
}
EXPORT_SYMBOL(vfio_dma_rw);

static int vfio_register_iommu_notifier(struct vfio_group *group,
					unsigned long *events,
					struct notifier_block *nb)
+76 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/rbtree.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
@@ -2305,6 +2306,80 @@ static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
	return blocking_notifier_chain_unregister(&iommu->notifier, nb);
}

static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
					 dma_addr_t user_iova, void *data,
					 size_t count, bool write,
					 size_t *copied)
{
	struct mm_struct *mm;
	unsigned long vaddr;
	struct vfio_dma *dma;
	bool kthread = current->mm == NULL;
	size_t offset;

	*copied = 0;

	dma = vfio_find_dma(iommu, user_iova, 1);
	if (!dma)
		return -EINVAL;

	if ((write && !(dma->prot & IOMMU_WRITE)) ||
			!(dma->prot & IOMMU_READ))
		return -EPERM;

	mm = get_task_mm(dma->task);

	if (!mm)
		return -EPERM;

	if (kthread)
		use_mm(mm);
	else if (current->mm != mm)
		goto out;

	offset = user_iova - dma->iova;

	if (count > dma->size - offset)
		count = dma->size - offset;

	vaddr = dma->vaddr + offset;

	if (write)
		*copied = __copy_to_user((void __user *)vaddr, data,
					 count) ? 0 : count;
	else
		*copied = __copy_from_user(data, (void __user *)vaddr,
					   count) ? 0 : count;
	if (kthread)
		unuse_mm(mm);
out:
	mmput(mm);
	return *copied ? 0 : -EFAULT;
}

static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova,
				   void *data, size_t count, bool write)
{
	struct vfio_iommu *iommu = iommu_data;
	int ret = 0;
	size_t done;

	mutex_lock(&iommu->lock);
	while (count > 0) {
		ret = vfio_iommu_type1_dma_rw_chunk(iommu, user_iova, data,
						    count, write, &done);
		if (ret)
			break;

		count -= done;
		data += done;
		user_iova += done;
	}

	mutex_unlock(&iommu->lock);
	return ret;
}

static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
	.name			= "vfio-iommu-type1",
	.owner			= THIS_MODULE,
@@ -2317,6 +2392,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
	.unpin_pages		= vfio_iommu_type1_unpin_pages,
	.register_notifier	= vfio_iommu_type1_register_notifier,
	.unregister_notifier	= vfio_iommu_type1_unregister_notifier,
	.dma_rw			= vfio_iommu_type1_dma_rw,
};

static int __init vfio_iommu_type1_init(void)
+17 −0
Original line number Diff line number Diff line
@@ -26,6 +26,9 @@
 *         operations documented below
 * @mmap: Perform mmap(2) on a region of the device file descriptor
 * @request: Request for the bus driver to release the device
 * @match: Optional device name match callback (return: 0 for no-match, >0 for
 *         match, -errno for abort (ex. match with insufficient or incorrect
 *         additional args)
 */
struct vfio_device_ops {
	char	*name;
@@ -39,6 +42,7 @@ struct vfio_device_ops {
			 unsigned long arg);
	int	(*mmap)(void *device_data, struct vm_area_struct *vma);
	void	(*request)(void *device_data, unsigned int count);
	int	(*match)(void *device_data, char *buf);
};

extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
@@ -82,6 +86,8 @@ struct vfio_iommu_driver_ops {
					     struct notifier_block *nb);
	int		(*unregister_notifier)(void *iommu_data,
					       struct notifier_block *nb);
	int		(*dma_rw)(void *iommu_data, dma_addr_t user_iova,
				  void *data, size_t count, bool write);
};

extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
@@ -94,6 +100,8 @@ extern void vfio_unregister_iommu_driver(
 */
extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
extern void vfio_group_put_external_user(struct vfio_group *group);
extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device
								*dev);
extern bool vfio_external_group_match_file(struct vfio_group *group,
					   struct file *filep);
extern int vfio_external_user_iommu_id(struct vfio_group *group);
@@ -107,6 +115,15 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
			    int npage);

extern int vfio_group_pin_pages(struct vfio_group *group,
				unsigned long *user_iova_pfn, int npage,
				int prot, unsigned long *phys_pfn);
extern int vfio_group_unpin_pages(struct vfio_group *group,
				  unsigned long *user_iova_pfn, int npage);

extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
		       void *data, size_t len, bool write);

/* each type has independent events */
enum vfio_notify_type {
	VFIO_IOMMU_NOTIFY = 0,
Loading