Commit 18f18376 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull IOMMU updates from Joerg Roedel:
 "Nothing big this time. In particular:

   - Debugging code for Tegra-GART

   - Improvement in Intel VT-d fault printing to prevent soft-lockups
     when on fault storms

   - Improvements in AMD IOMMU event reporting

   - NUMA aware allocation in io-pgtable code for ARM

   - Various other small fixes and cleanups all over the place"

* tag 'iommu-updates-v4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
  iommu/io-pgtable-arm: Make allocations NUMA-aware
  iommu/amd: Prevent possible null pointer dereference and infinite loop
  iommu/amd: Fix grammar of comments
  iommu: Clean up the comments for iommu_group_alloc
  iommu/vt-d: Remove unnecessary parentheses
  iommu/vt-d: Clean up pasid quirk for pre-production devices
  iommu/vt-d: Clean up unused variable in find_or_alloc_domain
  iommu/vt-d: Fix iotlb psi missing for mappings
  iommu/vt-d: Introduce __mapping_notify_one()
  iommu: Remove extra NULL check when call strtobool()
  iommu/amd: Update logging information for new event type
  iommu/amd: Update the PASID information printed to the system log
  iommu/tegra: gart: Fix gart_iommu_unmap()
  iommu/tegra: gart: Add debugging facility
  iommu/io-pgtable-arm: Use for_each_set_bit to simplify code
  iommu/qcom: Simplify getting .drvdata
  iommu: Remove depends on HAS_DMA in case of platform dependency
  iommu/vt-d: Ratelimit each dmar fault printing
parents f4e70c2e 1f568357
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE
config IOMMU_IO_PGTABLE_LPAE
	bool "ARMv7/v8 Long Descriptor Format"
	select IOMMU_IO_PGTABLE
	depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64))
	depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)
	help
	  Enable support for the ARM long descriptor pagetable format.
	  This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@ -42,7 +42,7 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST
config IOMMU_IO_PGTABLE_ARMV7S
	bool "ARMv7/v8 Short Descriptor Format"
	select IOMMU_IO_PGTABLE
	depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
	depends on ARM || ARM64 || COMPILE_TEST
	help
	  Enable support for the ARM Short-descriptor pagetable format.
	  This supports 32-bit virtual and physical addresses mapped using
@@ -377,7 +377,6 @@ config QCOM_IOMMU
	# Note: iommu drivers cannot (yet?) be built as modules
	bool "Qualcomm IOMMU Support"
	depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
	depends on HAS_DMA
	select IOMMU_API
	select IOMMU_IO_PGTABLE_LPAE
	select ARM_DMA_USE_IOMMU
+35 −34
Original line number Diff line number Diff line
@@ -547,7 +547,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
{
	struct device *dev = iommu->iommu.dev;
	int type, devid, domid, flags;
	int type, devid, pasid, flags, tag;
	volatile u32 *event = __evt;
	int count = 0;
	u64 address;
@@ -555,7 +555,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
retry:
	type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
	devid   = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
	domid   = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK;
	pasid   = PPR_PASID(*(u64 *)&event[0]);
	flags   = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
	address = (u64)(((u64)event[3]) << 32) | event[2];

@@ -570,7 +570,7 @@ retry:
	}

	if (type == EVENT_TYPE_IO_FAULT) {
		amd_iommu_report_page_fault(devid, domid, address, flags);
		amd_iommu_report_page_fault(devid, pasid, address, flags);
		return;
	} else {
		dev_err(dev, "AMD-Vi: Event logged [");
@@ -578,10 +578,9 @@ retry:

	switch (type) {
	case EVENT_TYPE_ILL_DEV:
		dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
			"address=0x%016llx flags=0x%04x]\n",
		dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
			address, flags);
			pasid, address, flags);
		dump_dte_entry(devid);
		break;
	case EVENT_TYPE_DEV_TAB_ERR:
@@ -591,34 +590,38 @@ retry:
			address, flags);
		break;
	case EVENT_TYPE_PAGE_TAB_ERR:
		dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
			"domain=0x%04x address=0x%016llx flags=0x%04x]\n",
		dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
			domid, address, flags);
			pasid, address, flags);
		break;
	case EVENT_TYPE_ILL_CMD:
		dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
		dump_command(address);
		break;
	case EVENT_TYPE_CMD_HARD_ERR:
		dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx "
			"flags=0x%04x]\n", address, flags);
		dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx flags=0x%04x]\n",
			address, flags);
		break;
	case EVENT_TYPE_IOTLB_INV_TO:
		dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
			"address=0x%016llx]\n",
		dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%016llx]\n",
			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
			address);
		break;
	case EVENT_TYPE_INV_DEV_REQ:
		dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
			"address=0x%016llx flags=0x%04x]\n",
		dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
			address, flags);
			pasid, address, flags);
		break;
	case EVENT_TYPE_INV_PPR_REQ:
		pasid = ((event[0] >> 16) & 0xFFFF)
			| ((event[1] << 6) & 0xF0000);
		tag = event[1] & 0x03FF;
		dev_err(dev, "INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n",
			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
			pasid, address, flags);
		break;
	default:
		dev_err(dev, KERN_ERR "UNKNOWN event[0]=0x%08x event[1]=0x%08x "
			"event[2]=0x%08x event[3]=0x%08x\n",
		dev_err(dev, "UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
			event[0], event[1], event[2], event[3]);
	}

@@ -1914,15 +1917,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
	struct amd_iommu *iommu;
	u16 alias;

	/*
	 * First check if the device is still attached. It might already
	 * be detached from its domain because the generic
	 * iommu_detach_group code detached it and we try again here in
	 * our alias handling.
	 */
	if (!dev_data->domain)
		return;

	iommu = amd_iommu_rlookup_table[dev_data->devid];
	alias = dev_data->alias;

@@ -1942,8 +1936,8 @@ static void do_detach(struct iommu_dev_data *dev_data)
}

/*
 * If a device is not yet associated with a domain, this function does
 * assigns it visible for the hardware
 * If a device is not yet associated with a domain, this function makes the
 * device visible in the domain
 */
static int __attach_device(struct iommu_dev_data *dev_data,
			   struct protection_domain *domain)
@@ -2064,8 +2058,8 @@ static bool pci_pri_tlp_required(struct pci_dev *pdev)
}

/*
 * If a device is not yet associated with a domain, this function
 * assigns it visible for the hardware
 * If a device is not yet associated with a domain, this function makes the
 * device visible in the domain
 */
static int attach_device(struct device *dev,
			 struct protection_domain *domain)
@@ -2127,9 +2121,6 @@ static void __detach_device(struct iommu_dev_data *dev_data)
	 */
	WARN_ON(!irqs_disabled());

	if (WARN_ON(!dev_data->domain))
		return;

	domain = dev_data->domain;

	spin_lock(&domain->lock);
@@ -2151,6 +2142,15 @@ static void detach_device(struct device *dev)
	dev_data = get_dev_data(dev);
	domain   = dev_data->domain;

	/*
	 * First check if the device is still attached. It might already
	 * be detached from its domain because the generic
	 * iommu_detach_group code detached it and we try again here in
	 * our alias handling.
	 */
	if (WARN_ON(!dev_data->domain))
		return;

	/* lock device table */
	spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
	__detach_device(dev_data);
@@ -2796,6 +2796,7 @@ static void cleanup_domain(struct protection_domain *domain)
	while (!list_empty(&domain->dev_list)) {
		entry = list_first_entry(&domain->dev_list,
					 struct iommu_dev_data, list);
		BUG_ON(!entry->domain);
		__detach_device(entry);
	}

+1 −0
Original line number Diff line number Diff line
@@ -133,6 +133,7 @@
#define EVENT_TYPE_CMD_HARD_ERR	0x6
#define EVENT_TYPE_IOTLB_INV_TO	0x7
#define EVENT_TYPE_INV_DEV_REQ	0x8
#define EVENT_TYPE_INV_PPR_REQ	0x9
#define EVENT_DEVID_MASK	0xffff
#define EVENT_DEVID_SHIFT	0
#define EVENT_DOMID_MASK	0xffff
+3 −5
Original line number Diff line number Diff line
@@ -1618,17 +1618,13 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
	int reg, fault_index;
	u32 fault_status;
	unsigned long flag;
	bool ratelimited;
	static DEFINE_RATELIMIT_STATE(rs,
				      DEFAULT_RATELIMIT_INTERVAL,
				      DEFAULT_RATELIMIT_BURST);

	/* Disable printing, simply clear the fault when ratelimited */
	ratelimited = !__ratelimit(&rs);

	raw_spin_lock_irqsave(&iommu->register_lock, flag);
	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
	if (fault_status && !ratelimited)
	if (fault_status && __ratelimit(&rs))
		pr_err("DRHD: handling fault status reg %x\n", fault_status);

	/* TBD: ignore advanced fault log currently */
@@ -1638,6 +1634,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
	fault_index = dma_fsts_fault_record_index(fault_status);
	reg = cap_fault_reg_offset(iommu->cap);
	while (1) {
		/* Disable printing, simply clear the fault when ratelimited */
		bool ratelimited = !__ratelimit(&rs);
		u8 fault_reason;
		u16 source_id;
		u64 guest_addr;
+49 −52
Original line number Diff line number Diff line
@@ -485,37 +485,14 @@ static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int intel_iommu_ecs = 1;
static int intel_iommu_pasid28;
static int iommu_identity_mapping;

#define IDENTMAP_ALL		1
#define IDENTMAP_GFX		2
#define IDENTMAP_AZALIA		4

/* Broadwell and Skylake have broken ECS support — normal so-called "second
 * level" translation of DMA requests-without-PASID doesn't actually happen
 * unless you also set the NESTE bit in an extended context-entry. Which of
 * course means that SVM doesn't work because it's trying to do nested
 * translation of the physical addresses it finds in the process page tables,
 * through the IOVA->phys mapping found in the "second level" page tables.
 *
 * The VT-d specification was retroactively changed to change the definition
 * of the capability bits and pretend that Broadwell/Skylake never happened...
 * but unfortunately the wrong bit was changed. It's ECS which is broken, but
 * for some reason it was the PASID capability bit which was redefined (from
 * bit 28 on BDW/SKL to bit 40 in future).
 *
 * So our test for ECS needs to eschew those implementations which set the old
 * PASID capabiity bit 28, since those are the ones on which ECS is broken.
 * Unless we are working around the 'pasid28' limitations, that is, by putting
 * the device into passthrough mode for normal DMA and thus masking the bug.
 */
#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
			    (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
/* PASID support is thus enabled if ECS is enabled and *either* of the old
 * or new capability bits are set. */
#define pasid_enabled(iommu) (ecs_enabled(iommu) &&			\
			      (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
#define ecs_enabled(iommu)	(intel_iommu_ecs && ecap_ecs(iommu->ecap))
#define pasid_enabled(iommu)	(ecs_enabled(iommu) && ecap_pasid(iommu->ecap))

int intel_iommu_gfx_mapped;
EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@ -578,11 +555,6 @@ static int __init intel_iommu_setup(char *str)
			printk(KERN_INFO
				"Intel-IOMMU: disable extended context table support\n");
			intel_iommu_ecs = 0;
		} else if (!strncmp(str, "pasid28", 7)) {
			printk(KERN_INFO
				"Intel-IOMMU: enable pre-production PASID support\n");
			intel_iommu_pasid28 = 1;
			iommu_identity_mapping |= IDENTMAP_GFX;
		} else if (!strncmp(str, "tboot_noforce", 13)) {
			printk(KERN_INFO
				"Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
@@ -1606,6 +1578,18 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
		iommu_flush_dev_iotlb(domain, addr, mask);
}

/* Notification for newly created mappings */
static inline void __mapping_notify_one(struct intel_iommu *iommu,
					struct dmar_domain *domain,
					unsigned long pfn, unsigned int pages)
{
	/* It's a non-present to present mapping. Only flush if caching mode */
	if (cap_caching_mode(iommu->cap))
		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
	else
		iommu_flush_write_buffer(iommu);
}

static void iommu_flush_iova(struct iova_domain *iovad)
{
	struct dmar_domain *domain;
@@ -2340,18 +2324,47 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
	return 0;
}

static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                         struct scatterlist *sg, unsigned long phys_pfn,
                         unsigned long nr_pages, int prot)
{
       int ret;
       struct intel_iommu *iommu;

       /* Do the real mapping first */
       ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
       if (ret)
               return ret;

       /* Notify about the new mapping */
       if (domain_type_is_vm(domain)) {
	       /* VM typed domains can have more than one IOMMUs */
	       int iommu_id;
	       for_each_domain_iommu(iommu_id, domain) {
		       iommu = g_iommus[iommu_id];
		       __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
	       }
       } else {
	       /* General domains only have one IOMMU */
	       iommu = domain_get_iommu(domain);
	       __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
       }

       return 0;
}

static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
				    struct scatterlist *sg, unsigned long nr_pages,
				    int prot)
{
	return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
	return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
}

static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
				     unsigned long phys_pfn, unsigned long nr_pages,
				     int prot)
{
	return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
	return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
}

static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
@@ -2534,7 +2547,7 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
	struct device_domain_info *info = NULL;
	struct dmar_domain *domain = NULL;
	struct intel_iommu *iommu;
	u16 req_id, dma_alias;
	u16 dma_alias;
	unsigned long flags;
	u8 bus, devfn;

@@ -2542,8 +2555,6 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
	if (!iommu)
		return NULL;

	req_id = ((u16)bus << 8) | devfn;

	if (dev_is_pci(dev)) {
		struct pci_dev *pdev = to_pci_dev(dev);

@@ -2657,8 +2668,8 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
	 */
	dma_pte_clear_range(domain, first_vpfn, last_vpfn);

	return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
				  last_vpfn - first_vpfn + 1,
	return __domain_mapping(domain, first_vpfn, NULL,
				first_vpfn, last_vpfn - first_vpfn + 1,
				DMA_PTE_READ|DMA_PTE_WRITE);
}

@@ -3626,14 +3637,6 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
	if (ret)
		goto error;

	/* it's a non-present to present mapping. Only flush if caching mode */
	if (cap_caching_mode(iommu->cap))
		iommu_flush_iotlb_psi(iommu, domain,
				      mm_to_dma_pfn(iova_pfn),
				      size, 0, 1);
	else
		iommu_flush_write_buffer(iommu);

	start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
	start_paddr += paddr & ~PAGE_MASK;
	return start_paddr;
@@ -3820,12 +3823,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
		return 0;
	}

	/* it's a non-present to present mapping. Only flush if caching mode */
	if (cap_caching_mode(iommu->cap))
		iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
	else
		iommu_flush_write_buffer(iommu);

	return nelems;
}

Loading