Commit c2dc4c07 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull vhost fixes from Michael Tsirkin:
 "Fixes all over the place.

  A new UAPI is borderline: can also be considered a new feature but
  also seems to be the only way we could come up with to fix addressing
  for userspace - and it seems important to switch to it now before
  userspace making assumptions about addressing ability of devices is
  set in stone"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  vdpasim: allow to assign a MAC address
  vdpasim: fix MAC address configuration
  vdpa: handle irq bypass register failure case
  vdpa_sim: Fix DMA mask
  Revert "vhost-vdpa: fix page pinning leakage in error path"
  vdpa/mlx5: Fix error return in map_direct_mr()
  vhost_vdpa: Return -EFAULT if copy_from_user() fails
  vdpa_sim: implement get_iova_range()
  vhost: vdpa: report iova range
  vdpa: introduce config op to get valid iova range
parents 53760f9b 0c86d774
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -239,7 +239,6 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
	u64 paend;
	struct scatterlist *sg;
	struct device *dma = mvdev->mdev->device;
	int ret;

	for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
	     map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
@@ -277,8 +276,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
done:
	mr->log_size = log_entity_size;
	mr->nsg = nsg;
	ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
	if (!ret)
	err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
	if (!err)
		goto err_map;

	err = create_direct_mr(mvdev, mr);
+30 −3
Original line number Diff line number Diff line
@@ -38,6 +38,10 @@ static int batch_mapping = 1;
module_param(batch_mapping, int, 0444);
MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");

static char *macaddr;
module_param(macaddr, charp, 0);
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");

struct vdpasim_virtqueue {
	struct vringh vring;
	struct vringh_kiov iov;
@@ -60,7 +64,8 @@ struct vdpasim_virtqueue {

static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
			      (1ULL << VIRTIO_F_VERSION_1)  |
			      (1ULL << VIRTIO_F_ACCESS_PLATFORM);
			      (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
			      (1ULL << VIRTIO_NET_F_MAC);

/* State of each vdpasim device */
struct vdpasim {
@@ -361,7 +366,9 @@ static struct vdpasim *vdpasim_create(void)
	spin_lock_init(&vdpasim->iommu_lock);

	dev = &vdpasim->vdpa.dev;
	dev->coherent_dma_mask = DMA_BIT_MASK(64);
	dev->dma_mask = &dev->coherent_dma_mask;
	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
		goto err_iommu;
	set_dma_ops(dev, &vdpasim_dma_ops);

	vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
@@ -372,7 +379,15 @@ static struct vdpasim *vdpasim_create(void)
	if (!vdpasim->buffer)
		goto err_iommu;

	if (macaddr) {
		mac_pton(macaddr, vdpasim->config.mac);
		if (!is_valid_ether_addr(vdpasim->config.mac)) {
			ret = -EADDRNOTAVAIL;
			goto err_iommu;
		}
	} else {
		eth_random_addr(vdpasim->config.mac);
	}

	vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
	vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
@@ -574,6 +589,16 @@ static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
	return vdpasim->generation;
}

static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa)
{
	struct vdpa_iova_range range = {
		.first = 0ULL,
		.last = ULLONG_MAX,
	};

	return range;
}

static int vdpasim_set_map(struct vdpa_device *vdpa,
			   struct vhost_iotlb *iotlb)
{
@@ -657,6 +682,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = {
	.get_config             = vdpasim_get_config,
	.set_config             = vdpasim_set_config,
	.get_generation         = vdpasim_get_generation,
	.get_iova_range         = vdpasim_get_iova_range,
	.dma_map                = vdpasim_dma_map,
	.dma_unmap              = vdpasim_dma_unmap,
	.free                   = vdpasim_free,
@@ -683,6 +709,7 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
	.get_config             = vdpasim_get_config,
	.set_config             = vdpasim_set_config,
	.get_generation         = vdpasim_get_generation,
	.get_iova_range         = vdpasim_get_iova_range,
	.set_map                = vdpasim_set_map,
	.free                   = vdpasim_free,
};
+97 −76
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@ struct vhost_vdpa {
	int minor;
	struct eventfd_ctx *config_ctx;
	int in_batch;
	struct vdpa_iova_range range;
};

static DEFINE_IDA(vhost_vdpa_ida);
@@ -103,6 +104,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
	vq->call_ctx.producer.token = vq->call_ctx.ctx;
	vq->call_ctx.producer.irq = irq;
	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
	if (unlikely(ret))
		dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret =  %d\n",
			 qid, vq->call_ctx.producer.token, ret);
}

static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
@@ -337,6 +341,16 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
	return 0;
}

static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
{
	struct vhost_vdpa_iova_range range = {
		.first = v->range.first,
		.last = v->range.last,
	};

	return copy_to_user(argp, &range, sizeof(range));
}

static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
				   void __user *argp)
{
@@ -421,12 +435,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
	void __user *argp = (void __user *)arg;
	u64 __user *featurep = argp;
	u64 features;
	long r;
	long r = 0;

	if (cmd == VHOST_SET_BACKEND_FEATURES) {
		r = copy_from_user(&features, featurep, sizeof(features));
		if (r)
			return r;
		if (copy_from_user(&features, featurep, sizeof(features)))
			return -EFAULT;
		if (features & ~VHOST_VDPA_BACKEND_FEATURES)
			return -EOPNOTSUPP;
		vhost_set_backend_features(&v->vdev, features);
@@ -469,7 +482,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
		break;
	case VHOST_GET_BACKEND_FEATURES:
		features = VHOST_VDPA_BACKEND_FEATURES;
		r = copy_to_user(featurep, &features, sizeof(features));
		if (copy_to_user(featurep, &features, sizeof(features)))
			r = -EFAULT;
		break;
	case VHOST_VDPA_GET_IOVA_RANGE:
		r = vhost_vdpa_get_iova_range(v, argp);
		break;
	default:
		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
@@ -588,19 +605,25 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
	struct vhost_dev *dev = &v->vdev;
	struct vhost_iotlb *iotlb = dev->iotlb;
	struct page **page_list;
	struct vm_area_struct **vmas;
	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
	unsigned int gup_flags = FOLL_LONGTERM;
	unsigned long map_pfn, last_pfn = 0;
	unsigned long npages, lock_limit;
	unsigned long i, nmap = 0;
	unsigned long npages, cur_base, map_pfn, last_pfn = 0;
	unsigned long locked, lock_limit, pinned, i;
	u64 iova = msg->iova;
	long pinned;
	int ret = 0;

	if (msg->iova < v->range.first ||
	    msg->iova + msg->size - 1 > v->range.last)
		return -EINVAL;

	if (vhost_iotlb_itree_first(iotlb, msg->iova,
				    msg->iova + msg->size - 1))
		return -EEXIST;

	page_list = (struct page **) __get_free_page(GFP_KERNEL);
	if (!page_list)
		return -ENOMEM;

	if (msg->perm & VHOST_ACCESS_WO)
		gup_flags |= FOLL_WRITE;

@@ -608,86 +631,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
	if (!npages)
		return -EINVAL;

	page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
	vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
			      GFP_KERNEL);
	if (!page_list || !vmas) {
		ret = -ENOMEM;
		goto free;
	}

	mmap_read_lock(dev->mm);

	locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
	if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
		ret = -ENOMEM;
		goto unlock;
	}

	pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
				page_list, vmas);
	if (npages != pinned) {
		if (pinned < 0) {
			ret = pinned;
		} else {
			unpin_user_pages(page_list, pinned);
	if (locked > lock_limit) {
		ret = -ENOMEM;
		}
		goto unlock;
		goto out;
	}

	cur_base = msg->uaddr & PAGE_MASK;
	iova &= PAGE_MASK;

	while (npages) {
		pinned = min_t(unsigned long, npages, list_size);
		ret = pin_user_pages(cur_base, pinned,
				     gup_flags, page_list, NULL);
		if (ret != pinned)
			goto out;

		if (!last_pfn)
			map_pfn = page_to_pfn(page_list[0]);

	/* One more iteration to avoid extra vdpa_map() call out of loop. */
	for (i = 0; i <= npages; i++) {
		unsigned long this_pfn;
		for (i = 0; i < ret; i++) {
			unsigned long this_pfn = page_to_pfn(page_list[i]);
			u64 csize;

		/* The last chunk may have no valid PFN next to it */
		this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;

		if (last_pfn && (this_pfn == -1UL ||
				 this_pfn != last_pfn + 1)) {
			if (last_pfn && (this_pfn != last_pfn + 1)) {
				/* Pin a contiguous chunk of memory */
			csize = last_pfn - map_pfn + 1;
			ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
				csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
				if (vhost_vdpa_map(v, iova, csize,
						   map_pfn << PAGE_SHIFT,
					     msg->perm);
			if (ret) {
				/*
				 * Unpin the rest chunks of memory on the
				 * flight with no corresponding vdpa_map()
				 * calls having been made yet. On the other
				 * hand, vdpa_unmap() in the failure path
				 * is in charge of accounting the number of
				 * pinned pages for its own.
				 * This asymmetrical pattern of accounting
				 * is for efficiency to pin all pages at
				 * once, while there is no other callsite
				 * of vdpa_map() than here above.
				 */
				unpin_user_pages(&page_list[nmap],
						 npages - nmap);
						   msg->perm))
					goto out;
			}
			atomic64_add(csize, &dev->mm->pinned_vm);
			nmap += csize;
			iova += csize << PAGE_SHIFT;
				map_pfn = this_pfn;
				iova += csize;
			}

			last_pfn = this_pfn;
		}

	WARN_ON(nmap != npages);
		cur_base += ret << PAGE_SHIFT;
		npages -= ret;
	}

	/* Pin the rest chunk */
	ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
			     map_pfn << PAGE_SHIFT, msg->perm);
out:
	if (ret)
	if (ret) {
		vhost_vdpa_unmap(v, msg->iova, msg->size);
unlock:
		atomic64_sub(npages, &dev->mm->pinned_vm);
	}
	mmap_read_unlock(dev->mm);
free:
	kvfree(vmas);
	kvfree(page_list);
	free_page((unsigned long)page_list);
	return ret;
}

@@ -783,6 +781,27 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
	v->domain = NULL;
}

static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
{
	struct vdpa_iova_range *range = &v->range;
	struct iommu_domain_geometry geo;
	struct vdpa_device *vdpa = v->vdpa;
	const struct vdpa_config_ops *ops = vdpa->config;

	if (ops->get_iova_range) {
		*range = ops->get_iova_range(vdpa);
	} else if (v->domain &&
		   !iommu_domain_get_attr(v->domain,
		   DOMAIN_ATTR_GEOMETRY, &geo) &&
		   geo.force_aperture) {
		range->first = geo.aperture_start;
		range->last = geo.aperture_end;
	} else {
		range->first = 0;
		range->last = ULLONG_MAX;
	}
}

static int vhost_vdpa_open(struct inode *inode, struct file *filep)
{
	struct vhost_vdpa *v;
@@ -823,6 +842,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
	if (r)
		goto err_init_iotlb;

	vhost_vdpa_set_iova_range(v);

	filep->private_data = v;

	return 0;
+15 −0
Original line number Diff line number Diff line
@@ -52,6 +52,16 @@ struct vdpa_device {
	int nvqs;
};

/**
 * vDPA IOVA range - the IOVA range support by the device
 * @first: start of the IOVA range
 * @last: end of the IOVA range
 */
struct vdpa_iova_range {
	u64 first;
	u64 last;
};

/**
 * vDPA_config_ops - operations for configuring a vDPA device.
 * Note: vDPA device drivers are required to implement all of the
@@ -151,6 +161,10 @@ struct vdpa_device {
 * @get_generation:		Get device config generation (optional)
 *				@vdev: vdpa device
 *				Returns u32: device generation
 * @get_iova_range:		Get supported iova range (optional)
 *				@vdev: vdpa device
 *				Returns the iova range supported by
 *				the device.
 * @set_map:			Set device memory mapping (optional)
 *				Needed for device that using device
 *				specific DMA translation (on-chip IOMMU)
@@ -216,6 +230,7 @@ struct vdpa_config_ops {
	void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
			   const void *buf, unsigned int len);
	u32 (*get_generation)(struct vdpa_device *vdev);
	struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);

	/* DMA ops */
	int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
+4 −0
Original line number Diff line number Diff line
@@ -146,4 +146,8 @@

/* Set event fd for config interrupt*/
#define VHOST_VDPA_SET_CONFIG_CALL	_IOW(VHOST_VIRTIO, 0x77, int)

/* Get the valid iova range */
#define VHOST_VDPA_GET_IOVA_RANGE	_IOR(VHOST_VIRTIO, 0x78, \
					     struct vhost_vdpa_iova_range)
#endif
Loading