Commit e6c5d727 authored by Alex Williamson's avatar Alex Williamson
Browse files

Merge branches 'v5.4/vfio/alexey-tce-memory-free-v1',...

Merge branches 'v5.4/vfio/alexey-tce-memory-free-v1', 'v5.4/vfio/connie-re-arrange-v2', 'v5.4/vfio/hexin-pci-reset-v3', 'v5.4/vfio/parav-mtty-uuid-v2' and 'v5.4/vfio/shameer-iova-list-v8' into v5.4/vfio/next
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -438,11 +438,20 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);

	/*
	 * Try to reset the device.  The success of this is dependent on
	 * being able to lock the device, which is not always possible.
	 * Try to get the locks ourselves to prevent a deadlock. The
	 * success of this is dependent on being able to lock the device,
	 * which is not always possible.
	 * We can not use the "try" reset interface here, which will
	 * overwrite the previously restored configuration information.
	 */
	if (vdev->reset_works && !pci_try_reset_function(pdev))
	if (vdev->reset_works && pci_cfg_access_trylock(pdev)) {
		if (device_trylock(&pdev->dev)) {
			if (!__pci_reset_function_locked(pdev))
				vdev->needs_reset = false;
			device_unlock(&pdev->dev);
		}
		pci_cfg_access_unlock(pdev);
	}

	pci_restore_state(pdev);
out:
+5 −4
Original line number Diff line number Diff line
@@ -1234,7 +1234,7 @@ release_exit:
static int tce_iommu_attach_group(void *iommu_data,
		struct iommu_group *iommu_group)
{
	int ret;
	int ret = 0;
	struct tce_container *container = iommu_data;
	struct iommu_table_group *table_group;
	struct tce_iommu_group *tcegrp = NULL;
@@ -1287,13 +1287,13 @@ static int tce_iommu_attach_group(void *iommu_data,
			!table_group->ops->release_ownership) {
		if (container->v2) {
			ret = -EPERM;
			goto unlock_exit;
			goto free_exit;
		}
		ret = tce_iommu_take_ownership(container, table_group);
	} else {
		if (!container->v2) {
			ret = -EPERM;
			goto unlock_exit;
			goto free_exit;
		}
		ret = tce_iommu_take_ownership_ddw(container, table_group);
		if (!tce_groups_attached(container) && !container->tables[0])
@@ -1305,10 +1305,11 @@ static int tce_iommu_attach_group(void *iommu_data,
		list_add(&tcegrp->next, &container->group_list);
	}

unlock_exit:
free_exit:
	if (ret && tcegrp)
		kfree(tcegrp);

unlock_exit:
	mutex_unlock(&container->lock);

	return ret;
+506 −12
Original line number Diff line number Diff line
@@ -62,6 +62,7 @@ MODULE_PARM_DESC(dma_entry_limit,

struct vfio_iommu {
	struct list_head	domain_list;
	struct list_head	iova_list;
	struct vfio_domain	*external_domain; /* domain for external user */
	struct mutex		lock;
	struct rb_root		dma_list;
@@ -97,6 +98,12 @@ struct vfio_group {
	bool			mdev_group;	/* An mdev group */
};

struct vfio_iova {
	struct list_head	list;
	dma_addr_t		start;
	dma_addr_t		end;
};

/*
 * Guest RAM pinning working set or DMA target
 */
@@ -1031,6 +1038,27 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
	return ret;
}

/*
 * Check dma map request is within a valid iova range
 */
static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
				      dma_addr_t start, dma_addr_t end)
{
	struct list_head *iova = &iommu->iova_list;
	struct vfio_iova *node;

	list_for_each_entry(node, iova, list) {
		if (start >= node->start && end <= node->end)
			return true;
	}

	/*
	 * Check for list_empty() as well since a container with
	 * a single mdev device will have an empty list.
	 */
	return list_empty(iova);
}

static int vfio_dma_do_map(struct vfio_iommu *iommu,
			   struct vfio_iommu_type1_dma_map *map)
{
@@ -1074,6 +1102,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
		goto out_unlock;
	}

	if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) {
		ret = -EINVAL;
		goto out_unlock;
	}

	dma = kzalloc(sizeof(*dma), GFP_KERNEL);
	if (!dma) {
		ret = -ENOMEM;
@@ -1263,15 +1296,13 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain,
	return NULL;
}

static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
				  phys_addr_t *base)
{
	struct list_head group_resv_regions;
	struct iommu_resv_region *region, *next;
	struct iommu_resv_region *region;
	bool ret = false;

	INIT_LIST_HEAD(&group_resv_regions);
	iommu_get_group_resv_regions(group, &group_resv_regions);
	list_for_each_entry(region, &group_resv_regions, list) {
	list_for_each_entry(region, group_resv_regions, list) {
		/*
		 * The presence of any 'real' MSI regions should take
		 * precedence over the software-managed one if the
@@ -1287,8 +1318,7 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
			ret = true;
		}
	}
	list_for_each_entry_safe(region, next, &group_resv_regions, list)
		kfree(region);

	return ret;
}

@@ -1388,6 +1418,228 @@ static int vfio_mdev_iommu_device(struct device *dev, void *data)
	return 0;
}

/*
 * This is a helper function to insert an address range to iova list.
 * The list is initially created with a single entry corresponding to
 * the IOMMU domain geometry to which the device group is attached.
 * The list aperture gets modified when a new domain is added to the
 * container if the new aperture doesn't conflict with the current one
 * or with any existing dma mappings. The list is also modified to
 * exclude any reserved regions associated with the device group.
 */
static int vfio_iommu_iova_insert(struct list_head *head,
				  dma_addr_t start, dma_addr_t end)
{
	struct vfio_iova *region;

	region = kmalloc(sizeof(*region), GFP_KERNEL);
	if (!region)
		return -ENOMEM;

	INIT_LIST_HEAD(&region->list);
	region->start = start;
	region->end = end;

	list_add_tail(&region->list, head);
	return 0;
}

/*
 * Check the new iommu aperture conflicts with existing aper or with any
 * existing dma mappings.
 */
static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu,
				     dma_addr_t start, dma_addr_t end)
{
	struct vfio_iova *first, *last;
	struct list_head *iova = &iommu->iova_list;

	if (list_empty(iova))
		return false;

	/* Disjoint sets, return conflict */
	first = list_first_entry(iova, struct vfio_iova, list);
	last = list_last_entry(iova, struct vfio_iova, list);
	if (start > last->end || end < first->start)
		return true;

	/* Check for any existing dma mappings below the new start */
	if (start > first->start) {
		if (vfio_find_dma(iommu, first->start, start - first->start))
			return true;
	}

	/* Check for any existing dma mappings beyond the new end */
	if (end < last->end) {
		if (vfio_find_dma(iommu, end + 1, last->end - end))
			return true;
	}

	return false;
}

/*
 * Resize iommu iova aperture window. This is called only if the new
 * aperture has no conflict with existing aperture and dma mappings.
 */
static int vfio_iommu_aper_resize(struct list_head *iova,
				  dma_addr_t start, dma_addr_t end)
{
	struct vfio_iova *node, *next;

	if (list_empty(iova))
		return vfio_iommu_iova_insert(iova, start, end);

	/* Adjust iova list start */
	list_for_each_entry_safe(node, next, iova, list) {
		if (start < node->start)
			break;
		if (start >= node->start && start < node->end) {
			node->start = start;
			break;
		}
		/* Delete nodes before new start */
		list_del(&node->list);
		kfree(node);
	}

	/* Adjust iova list end */
	list_for_each_entry_safe(node, next, iova, list) {
		if (end > node->end)
			continue;
		if (end > node->start && end <= node->end) {
			node->end = end;
			continue;
		}
		/* Delete nodes after new end */
		list_del(&node->list);
		kfree(node);
	}

	return 0;
}

/*
 * Check reserved region conflicts with existing dma mappings
 */
static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu,
				     struct list_head *resv_regions)
{
	struct iommu_resv_region *region;

	/* Check for conflict with existing dma mappings */
	list_for_each_entry(region, resv_regions, list) {
		if (region->type == IOMMU_RESV_DIRECT_RELAXABLE)
			continue;

		if (vfio_find_dma(iommu, region->start, region->length))
			return true;
	}

	return false;
}

/*
 * Check iova region overlap with  reserved regions and
 * exclude them from the iommu iova range
 */
static int vfio_iommu_resv_exclude(struct list_head *iova,
				   struct list_head *resv_regions)
{
	struct iommu_resv_region *resv;
	struct vfio_iova *n, *next;

	list_for_each_entry(resv, resv_regions, list) {
		phys_addr_t start, end;

		if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
			continue;

		start = resv->start;
		end = resv->start + resv->length - 1;

		list_for_each_entry_safe(n, next, iova, list) {
			int ret = 0;

			/* No overlap */
			if (start > n->end || end < n->start)
				continue;
			/*
			 * Insert a new node if current node overlaps with the
			 * reserve region to exlude that from valid iova range.
			 * Note that, new node is inserted before the current
			 * node and finally the current node is deleted keeping
			 * the list updated and sorted.
			 */
			if (start > n->start)
				ret = vfio_iommu_iova_insert(&n->list, n->start,
							     start - 1);
			if (!ret && end < n->end)
				ret = vfio_iommu_iova_insert(&n->list, end + 1,
							     n->end);
			if (ret)
				return ret;

			list_del(&n->list);
			kfree(n);
		}
	}

	if (list_empty(iova))
		return -EINVAL;

	return 0;
}

static void vfio_iommu_resv_free(struct list_head *resv_regions)
{
	struct iommu_resv_region *n, *next;

	list_for_each_entry_safe(n, next, resv_regions, list) {
		list_del(&n->list);
		kfree(n);
	}
}

static void vfio_iommu_iova_free(struct list_head *iova)
{
	struct vfio_iova *n, *next;

	list_for_each_entry_safe(n, next, iova, list) {
		list_del(&n->list);
		kfree(n);
	}
}

static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu,
				    struct list_head *iova_copy)
{
	struct list_head *iova = &iommu->iova_list;
	struct vfio_iova *n;
	int ret;

	list_for_each_entry(n, iova, list) {
		ret = vfio_iommu_iova_insert(iova_copy, n->start, n->end);
		if (ret)
			goto out_free;
	}

	return 0;

out_free:
	vfio_iommu_iova_free(iova_copy);
	return ret;
}

static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
					struct list_head *iova_copy)
{
	struct list_head *iova = &iommu->iova_list;

	vfio_iommu_iova_free(iova);

	list_splice_tail(iova_copy, iova);
}
static int vfio_iommu_type1_attach_group(void *iommu_data,
					 struct iommu_group *iommu_group)
{
@@ -1398,6 +1650,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
	int ret;
	bool resv_msi, msi_remap;
	phys_addr_t resv_msi_base;
	struct iommu_domain_geometry geo;
	LIST_HEAD(iova_copy);
	LIST_HEAD(group_resv_regions);

	mutex_lock(&iommu->lock);

@@ -1474,7 +1729,43 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
	if (ret)
		goto out_domain;

	resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
	/* Get aperture info */
	iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);

	if (vfio_iommu_aper_conflict(iommu, geo.aperture_start,
				     geo.aperture_end)) {
		ret = -EINVAL;
		goto out_detach;
	}

	ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions);
	if (ret)
		goto out_detach;

	if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) {
		ret = -EINVAL;
		goto out_detach;
	}

	/*
	 * We don't want to work on the original iova list as the list
	 * gets modified and in case of failure we have to retain the
	 * original list. Get a copy here.
	 */
	ret = vfio_iommu_iova_get_copy(iommu, &iova_copy);
	if (ret)
		goto out_detach;

	ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start,
				     geo.aperture_end);
	if (ret)
		goto out_detach;

	ret = vfio_iommu_resv_exclude(&iova_copy, &group_resv_regions);
	if (ret)
		goto out_detach;

	resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base);

	INIT_LIST_HEAD(&domain->group_list);
	list_add(&group->next, &domain->group_list);
@@ -1507,8 +1798,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
				list_add(&group->next, &d->group_list);
				iommu_domain_free(domain->domain);
				kfree(domain);
				mutex_unlock(&iommu->lock);
				return 0;
				goto done;
			}

			ret = vfio_iommu_attach_group(domain, group);
@@ -1531,8 +1821,11 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
	}

	list_add(&domain->next, &iommu->domain_list);

done:
	/* Delete the old one and insert new iova list */
	vfio_iommu_iova_insert_copy(iommu, &iova_copy);
	mutex_unlock(&iommu->lock);
	vfio_iommu_resv_free(&group_resv_regions);

	return 0;

@@ -1540,6 +1833,8 @@ out_detach:
	vfio_iommu_detach_group(domain, group);
out_domain:
	iommu_domain_free(domain->domain);
	vfio_iommu_iova_free(&iova_copy);
	vfio_iommu_resv_free(&group_resv_regions);
out_free:
	kfree(domain);
	kfree(group);
@@ -1595,12 +1890,93 @@ static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu)
	WARN_ON(iommu->notifier.head);
}

/*
 * Called when a domain is removed in detach. It is possible that
 * the removed domain decided the iova aperture window. Modify the
 * iova aperture with the smallest window among existing domains.
 */
static void vfio_iommu_aper_expand(struct vfio_iommu *iommu,
				   struct list_head *iova_copy)
{
	struct vfio_domain *domain;
	struct iommu_domain_geometry geo;
	struct vfio_iova *node;
	dma_addr_t start = 0;
	dma_addr_t end = (dma_addr_t)~0;

	if (list_empty(iova_copy))
		return;

	list_for_each_entry(domain, &iommu->domain_list, next) {
		iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
				      &geo);
		if (geo.aperture_start > start)
			start = geo.aperture_start;
		if (geo.aperture_end < end)
			end = geo.aperture_end;
	}

	/* Modify aperture limits. The new aper is either same or bigger */
	node = list_first_entry(iova_copy, struct vfio_iova, list);
	node->start = start;
	node = list_last_entry(iova_copy, struct vfio_iova, list);
	node->end = end;
}

/*
 * Called when a group is detached. The reserved regions for that
 * group can be part of valid iova now. But since reserved regions
 * may be duplicated among groups, populate the iova valid regions
 * list again.
 */
static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu,
				   struct list_head *iova_copy)
{
	struct vfio_domain *d;
	struct vfio_group *g;
	struct vfio_iova *node;
	dma_addr_t start, end;
	LIST_HEAD(resv_regions);
	int ret;

	if (list_empty(iova_copy))
		return -EINVAL;

	list_for_each_entry(d, &iommu->domain_list, next) {
		list_for_each_entry(g, &d->group_list, next) {
			ret = iommu_get_group_resv_regions(g->iommu_group,
							   &resv_regions);
			if (ret)
				goto done;
		}
	}

	node = list_first_entry(iova_copy, struct vfio_iova, list);
	start = node->start;
	node = list_last_entry(iova_copy, struct vfio_iova, list);
	end = node->end;

	/* purge the iova list and create new one */
	vfio_iommu_iova_free(iova_copy);

	ret = vfio_iommu_aper_resize(iova_copy, start, end);
	if (ret)
		goto done;

	/* Exclude current reserved regions from iova ranges */
	ret = vfio_iommu_resv_exclude(iova_copy, &resv_regions);
done:
	vfio_iommu_resv_free(&resv_regions);
	return ret;
}

static void vfio_iommu_type1_detach_group(void *iommu_data,
					  struct iommu_group *iommu_group)
{
	struct vfio_iommu *iommu = iommu_data;
	struct vfio_domain *domain;
	struct vfio_group *group;
	LIST_HEAD(iova_copy);

	mutex_lock(&iommu->lock);

@@ -1623,6 +1999,13 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
		}
	}

	/*
	 * Get a copy of iova list. This will be used to update
	 * and to replace the current one later. Please note that
	 * we will leave the original list as it is if update fails.
	 */
	vfio_iommu_iova_get_copy(iommu, &iova_copy);

	list_for_each_entry(domain, &iommu->domain_list, next) {
		group = find_iommu_group(domain, iommu_group);
		if (!group)
@@ -1648,10 +2031,16 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
			iommu_domain_free(domain->domain);
			list_del(&domain->next);
			kfree(domain);
			vfio_iommu_aper_expand(iommu, &iova_copy);
		}
		break;
	}

	if (!vfio_iommu_resv_refresh(iommu, &iova_copy))
		vfio_iommu_iova_insert_copy(iommu, &iova_copy);
	else
		vfio_iommu_iova_free(&iova_copy);

detach_group_done:
	mutex_unlock(&iommu->lock);
}
@@ -1679,6 +2068,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
	}

	INIT_LIST_HEAD(&iommu->domain_list);
	INIT_LIST_HEAD(&iommu->iova_list);
	iommu->dma_list = RB_ROOT;
	iommu->dma_avail = dma_entry_limit;
	mutex_init(&iommu->lock);
@@ -1722,6 +2112,9 @@ static void vfio_iommu_type1_release(void *iommu_data)
		list_del(&domain->next);
		kfree(domain);
	}

	vfio_iommu_iova_free(&iommu->iova_list);

	kfree(iommu);
}

@@ -1742,6 +2135,73 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
	return ret;
}

static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps,
		 struct vfio_iommu_type1_info_cap_iova_range *cap_iovas,
		 size_t size)
{
	struct vfio_info_cap_header *header;
	struct vfio_iommu_type1_info_cap_iova_range *iova_cap;

	header = vfio_info_cap_add(caps, size,
				   VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
	if (IS_ERR(header))
		return PTR_ERR(header);

	iova_cap = container_of(header,
				struct vfio_iommu_type1_info_cap_iova_range,
				header);
	iova_cap->nr_iovas = cap_iovas->nr_iovas;
	memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges,
	       cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges));
	return 0;
}

static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
				      struct vfio_info_cap *caps)
{
	struct vfio_iommu_type1_info_cap_iova_range *cap_iovas;
	struct vfio_iova *iova;
	size_t size;
	int iovas = 0, i = 0, ret;

	mutex_lock(&iommu->lock);

	list_for_each_entry(iova, &iommu->iova_list, list)
		iovas++;

	if (!iovas) {
		/*
		 * Return 0 as a container with a single mdev device
		 * will have an empty list
		 */
		ret = 0;
		goto out_unlock;
	}

	size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges));

	cap_iovas = kzalloc(size, GFP_KERNEL);
	if (!cap_iovas) {
		ret = -ENOMEM;
		goto out_unlock;
	}

	cap_iovas->nr_iovas = iovas;

	list_for_each_entry(iova, &iommu->iova_list, list) {
		cap_iovas->iova_ranges[i].start = iova->start;
		cap_iovas->iova_ranges[i].end = iova->end;
		i++;
	}

	ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size);

	kfree(cap_iovas);
out_unlock:
	mutex_unlock(&iommu->lock);
	return ret;
}

static long vfio_iommu_type1_ioctl(void *iommu_data,
				   unsigned int cmd, unsigned long arg)
{
@@ -1763,19 +2223,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
		}
	} else if (cmd == VFIO_IOMMU_GET_INFO) {
		struct vfio_iommu_type1_info info;
		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
		unsigned long capsz;
		int ret;

		minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);

		/* For backward compatibility, cannot require this */
		capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);

		if (copy_from_user(&info, (void __user *)arg, minsz))
			return -EFAULT;

		if (info.argsz < minsz)
			return -EINVAL;

		if (info.argsz >= capsz) {
			minsz = capsz;
			info.cap_offset = 0; /* output, no-recopy necessary */
		}

		info.flags = VFIO_IOMMU_INFO_PGSIZES;

		info.iova_pgsizes = vfio_pgsize_bitmap(iommu);

		ret = vfio_iommu_iova_build_caps(iommu, &caps);
		if (ret)
			return ret;

		if (caps.size) {
			info.flags |= VFIO_IOMMU_INFO_CAPS;

			if (info.argsz < sizeof(info) + caps.size) {
				info.argsz = sizeof(info) + caps.size;
			} else {
				vfio_info_cap_shift(&caps, sizeof(info));
				if (copy_to_user((void __user *)arg +
						sizeof(info), caps.buf,
						caps.size)) {
					kfree(caps.buf);
					return -EFAULT;
				}
				info.cap_offset = sizeof(info);
			}

			kfree(caps.buf);
		}

		return copy_to_user((void __user *)arg, &info, minsz) ?
			-EFAULT : 0;

+51 −20
Original line number Diff line number Diff line
@@ -295,15 +295,38 @@ struct vfio_region_info_cap_type {
	__u32 subtype;	/* type specific */
};

/*
 * List of region types, global per bus driver.
 * If you introduce a new type, please add it here.
 */

/* PCI region type containing a PCI vendor part */
#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
#define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
#define VFIO_REGION_TYPE_GFX                    (1)
#define VFIO_REGION_TYPE_CCW			(2)

/* sub-types for VFIO_REGION_TYPE_PCI_* */

/* 8086 Vendor sub-types */
/* 8086 vendor PCI sub-types */
#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)

#define VFIO_REGION_TYPE_GFX                    (1)
/* 10de vendor PCI sub-types */
/*
 * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
 */
#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)

/* 1014 vendor PCI sub-types */
/*
 * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
 * to do TLB invalidation on a GPU.
 */
#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)

/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID            (1)

/**
@@ -353,25 +376,9 @@ struct vfio_region_gfx_edid {
#define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
};

#define VFIO_REGION_TYPE_CCW			(2)
/* ccw sub-types */
/* sub-types for VFIO_REGION_TYPE_CCW */
#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)

/*
 * 10de vendor sub-type
 *
 * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
 */
#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)

/*
 * 1014 vendor sub-type
 *
 * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
 * to do TLB invalidation on a GPU.
 */
#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)

/*
 * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
 * which allows direct access to non-MSIX registers which happened to be within
@@ -714,7 +721,31 @@ struct vfio_iommu_type1_info {
	__u32	argsz;
	__u32	flags;
#define VFIO_IOMMU_INFO_PGSIZES (1 << 0)	/* supported page sizes info */
#define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
	__u32   cap_offset;	/* Offset within info struct of first cap */
};

/*
 * The IOVA capability allows to report the valid IOVA range(s)
 * excluding any non-relaxable reserved regions exposed by
 * devices attached to the container. Any DMA map attempt
 * outside the valid iova range will return error.
 *
 * The structures below define version 1 of this capability.
 */
#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1

struct vfio_iova_range {
	__u64	start;
	__u64	end;
};

struct vfio_iommu_type1_info_cap_iova_range {
	struct	vfio_info_cap_header header;
	__u32	nr_iovas;
	__u32	reserved;
	struct	vfio_iova_range iova_ranges[];
};

#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+8 −31
Original line number Diff line number Diff line
@@ -152,20 +152,9 @@ static const struct file_operations vd_fops = {

/* function prototypes */

static int mtty_trigger_interrupt(const guid_t *uuid);
static int mtty_trigger_interrupt(struct mdev_state *mdev_state);

/* Helper functions */
static struct mdev_state *find_mdev_state_by_uuid(const guid_t *uuid)
{
	struct mdev_state *mds;

	list_for_each_entry(mds, &mdev_devices_list, next) {
		if (guid_equal(mdev_uuid(mds->mdev), uuid))
			return mds;
	}

	return NULL;
}

static void dump_buffer(u8 *buf, uint32_t count)
{
@@ -337,8 +326,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
				pr_err("Serial port %d: Fifo level trigger\n",
					index);
#endif
				mtty_trigger_interrupt(
						mdev_uuid(mdev_state->mdev));
				mtty_trigger_interrupt(mdev_state);
			}
		} else {
#if defined(DEBUG_INTR)
@@ -352,8 +340,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
			 */
			if (mdev_state->s[index].uart_reg[UART_IER] &
								UART_IER_RLSI)
				mtty_trigger_interrupt(
						mdev_uuid(mdev_state->mdev));
				mtty_trigger_interrupt(mdev_state);
		}
		mutex_unlock(&mdev_state->rxtx_lock);
		break;
@@ -372,8 +359,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
				pr_err("Serial port %d: IER_THRI write\n",
					index);
#endif
				mtty_trigger_interrupt(
						mdev_uuid(mdev_state->mdev));
				mtty_trigger_interrupt(mdev_state);
			}

			mutex_unlock(&mdev_state->rxtx_lock);
@@ -444,7 +430,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
#if defined(DEBUG_INTR)
			pr_err("Serial port %d: MCR_OUT2 write\n", index);
#endif
			mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev));
			mtty_trigger_interrupt(mdev_state);
		}

		if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
@@ -452,7 +438,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
#if defined(DEBUG_INTR)
			pr_err("Serial port %d: MCR RTS/DTR write\n", index);
#endif
			mtty_trigger_interrupt(mdev_uuid(mdev_state->mdev));
			mtty_trigger_interrupt(mdev_state);
		}
		break;

@@ -503,8 +489,7 @@ static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
#endif
			if (mdev_state->s[index].uart_reg[UART_IER] &
							 UART_IER_THRI)
				mtty_trigger_interrupt(
					mdev_uuid(mdev_state->mdev));
				mtty_trigger_interrupt(mdev_state);
		}
		mutex_unlock(&mdev_state->rxtx_lock);

@@ -1028,17 +1013,9 @@ static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags,
	return ret;
}

static int mtty_trigger_interrupt(const guid_t *uuid)
static int mtty_trigger_interrupt(struct mdev_state *mdev_state)
{
	int ret = -1;
	struct mdev_state *mdev_state;

	mdev_state = find_mdev_state_by_uuid(uuid);

	if (!mdev_state) {
		pr_info("%s: mdev not found\n", __func__);
		return -EINVAL;
	}

	if ((mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX) &&
	    (!mdev_state->msi_evtfd))