Commit 2a5a3148 authored by Jean-Philippe Brucker's avatar Jean-Philippe Brucker Committed by Michael S. Tsirkin
Browse files

iommu/virtio: Add probe request



When the device offers the probe feature, send a probe request for each
device managed by the IOMMU. Extract RESV_MEM information. When we
encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
This will tell other subsystems that there is no need to map the MSI
doorbell in the virtio-iommu, because MSIs bypass it.

Tested-by: default avatarBharat Bhushan <bharat.bhushan@nxp.com>
Tested-by: default avatarEric Auger <eric.auger@redhat.com>
Reviewed-by: default avatarEric Auger <eric.auger@redhat.com>
Signed-off-by: default avatarJean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent edcd69ab
Loading
Loading
Loading
Loading
+151 −6
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@ struct viommu_dev {
	struct iommu_domain_geometry	geometry;
	u64				pgsize_bitmap;
	u8				domain_bits;
	u32				probe_size;
};

struct viommu_mapping {
@@ -67,8 +68,10 @@ struct viommu_domain {
};

struct viommu_endpoint {
	struct device			*dev;
	struct viommu_dev		*viommu;
	struct viommu_domain		*vdomain;
	struct list_head		resv_regions;
};

struct viommu_request {
@@ -119,6 +122,9 @@ static off_t viommu_get_write_desc_offset(struct viommu_dev *viommu,
{
	size_t tail_size = sizeof(struct virtio_iommu_req_tail);

	if (req->type == VIRTIO_IOMMU_T_PROBE)
		return len - viommu->probe_size - tail_size;

	return len - tail_size;
}

@@ -393,6 +399,110 @@ static int viommu_replay_mappings(struct viommu_domain *vdomain)
	return ret;
}

static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
			       struct virtio_iommu_probe_resv_mem *mem,
			       size_t len)
{
	size_t size;
	u64 start64, end64;
	phys_addr_t start, end;
	struct iommu_resv_region *region = NULL;
	unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;

	start = start64 = le64_to_cpu(mem->start);
	end = end64 = le64_to_cpu(mem->end);
	size = end64 - start64 + 1;

	/* Catch any overflow, including the unlikely end64 - start64 + 1 = 0 */
	if (start != start64 || end != end64 || size < end64 - start64)
		return -EOVERFLOW;

	if (len < sizeof(*mem))
		return -EINVAL;

	switch (mem->subtype) {
	default:
		dev_warn(vdev->dev, "unknown resv mem subtype 0x%x\n",
			 mem->subtype);
		/* Fall-through */
	case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
		region = iommu_alloc_resv_region(start, size, 0,
						 IOMMU_RESV_RESERVED);
		break;
	case VIRTIO_IOMMU_RESV_MEM_T_MSI:
		region = iommu_alloc_resv_region(start, size, prot,
						 IOMMU_RESV_MSI);
		break;
	}
	if (!region)
		return -ENOMEM;

	list_add(&vdev->resv_regions, &region->list);
	return 0;
}

static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device *dev)
{
	int ret;
	u16 type, len;
	size_t cur = 0;
	size_t probe_len;
	struct virtio_iommu_req_probe *probe;
	struct virtio_iommu_probe_property *prop;
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
	struct viommu_endpoint *vdev = fwspec->iommu_priv;

	if (!fwspec->num_ids)
		return -EINVAL;

	probe_len = sizeof(*probe) + viommu->probe_size +
		    sizeof(struct virtio_iommu_req_tail);
	probe = kzalloc(probe_len, GFP_KERNEL);
	if (!probe)
		return -ENOMEM;

	probe->head.type = VIRTIO_IOMMU_T_PROBE;
	/*
	 * For now, assume that properties of an endpoint that outputs multiple
	 * IDs are consistent. Only probe the first one.
	 */
	probe->endpoint = cpu_to_le32(fwspec->ids[0]);

	ret = viommu_send_req_sync(viommu, probe, probe_len);
	if (ret)
		goto out_free;

	prop = (void *)probe->properties;
	type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;

	while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
	       cur < viommu->probe_size) {
		len = le16_to_cpu(prop->length) + sizeof(*prop);

		switch (type) {
		case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
			ret = viommu_add_resv_mem(vdev, (void *)prop, len);
			break;
		default:
			dev_err(dev, "unknown viommu prop 0x%x\n", type);
		}

		if (ret)
			dev_err(dev, "failed to parse viommu prop 0x%x\n", type);

		cur += len;
		if (cur >= viommu->probe_size)
			break;

		prop = (void *)probe->properties + cur;
		type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
	}

out_free:
	kfree(probe);
	return ret;
}

/* IOMMU API */

static struct iommu_domain *viommu_domain_alloc(unsigned type)
@@ -614,15 +724,34 @@ static void viommu_iotlb_sync(struct iommu_domain *domain)

static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
{
	struct iommu_resv_region *region;
	struct iommu_resv_region *entry, *new_entry, *msi = NULL;
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
	struct viommu_endpoint *vdev = fwspec->iommu_priv;
	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;

	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH, prot,
					 IOMMU_RESV_SW_MSI);
	if (!region)
	list_for_each_entry(entry, &vdev->resv_regions, list) {
		if (entry->type == IOMMU_RESV_MSI)
			msi = entry;

		new_entry = kmemdup(entry, sizeof(*entry), GFP_KERNEL);
		if (!new_entry)
			return;
		list_add_tail(&new_entry->list, head);
	}

	/*
	 * If the device didn't register any bypass MSI window, add a
	 * software-mapped region.
	 */
	if (!msi) {
		msi = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
					      prot, IOMMU_RESV_SW_MSI);
		if (!msi)
			return;

	list_add_tail(&region->list, head);
		list_add_tail(&msi->list, head);
	}

	iommu_dma_get_resv_regions(dev, head);
}

@@ -670,9 +799,18 @@ static int viommu_add_device(struct device *dev)
	if (!vdev)
		return -ENOMEM;

	vdev->dev = dev;
	vdev->viommu = viommu;
	INIT_LIST_HEAD(&vdev->resv_regions);
	fwspec->iommu_priv = vdev;

	if (viommu->probe_size) {
		/* Get additional information for this endpoint */
		ret = viommu_probe_endpoint(viommu, dev);
		if (ret)
			goto err_free_dev;
	}

	ret = iommu_device_link(&viommu->iommu, dev);
	if (ret)
		goto err_free_dev;
@@ -694,6 +832,7 @@ static int viommu_add_device(struct device *dev)
err_unlink_dev:
	iommu_device_unlink(&viommu->iommu, dev);
err_free_dev:
	viommu_put_resv_regions(dev, &vdev->resv_regions);
	kfree(vdev);

	return ret;
@@ -711,6 +850,7 @@ static void viommu_remove_device(struct device *dev)

	iommu_group_remove_device(dev);
	iommu_device_unlink(&vdev->viommu->iommu, dev);
	viommu_put_resv_regions(dev, &vdev->resv_regions);
	kfree(vdev);
}

@@ -810,6 +950,10 @@ static int viommu_probe(struct virtio_device *vdev)
			     struct virtio_iommu_config, domain_bits,
			     &viommu->domain_bits);

	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
			     struct virtio_iommu_config, probe_size,
			     &viommu->probe_size);

	viommu->geometry = (struct iommu_domain_geometry) {
		.aperture_start	= input_start,
		.aperture_end	= input_end,
@@ -891,6 +1035,7 @@ static unsigned int features[] = {
	VIRTIO_IOMMU_F_MAP_UNMAP,
	VIRTIO_IOMMU_F_DOMAIN_BITS,
	VIRTIO_IOMMU_F_INPUT_RANGE,
	VIRTIO_IOMMU_F_PROBE,
};

static struct virtio_device_id id_table[] = {
+36 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#define VIRTIO_IOMMU_F_DOMAIN_BITS		1
#define VIRTIO_IOMMU_F_MAP_UNMAP		2
#define VIRTIO_IOMMU_F_BYPASS			3
#define VIRTIO_IOMMU_F_PROBE			4

struct virtio_iommu_range {
	__u64					start;
@@ -37,6 +38,7 @@ struct virtio_iommu_config {
#define VIRTIO_IOMMU_T_DETACH			0x02
#define VIRTIO_IOMMU_T_MAP			0x03
#define VIRTIO_IOMMU_T_UNMAP			0x04
#define VIRTIO_IOMMU_T_PROBE			0x05

/* Status types */
#define VIRTIO_IOMMU_S_OK			0x00
@@ -103,4 +105,38 @@ struct virtio_iommu_req_unmap {
	struct virtio_iommu_req_tail		tail;
};

#define VIRTIO_IOMMU_PROBE_T_NONE		0
#define VIRTIO_IOMMU_PROBE_T_RESV_MEM		1

#define VIRTIO_IOMMU_PROBE_T_MASK		0xfff

struct virtio_iommu_probe_property {
	__le16					type;
	__le16					length;
};

#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED	0
#define VIRTIO_IOMMU_RESV_MEM_T_MSI		1

struct virtio_iommu_probe_resv_mem {
	struct virtio_iommu_probe_property	head;
	__u8					subtype;
	__u8					reserved[3];
	__le64					start;
	__le64					end;
};

struct virtio_iommu_req_probe {
	struct virtio_iommu_req_head		head;
	__le32					endpoint;
	__u8					reserved[64];

	__u8					properties[];

	/*
	 * Tail follows the variable-length properties array. No padding,
	 * property lengths are all aligned on 8 bytes.
	 */
};

#endif