Commit 37e91bd4 authored by Lu Baolu's avatar Lu Baolu Committed by Joerg Roedel
Browse files

iommu/vt-d: Disable non-recoverable fault processing before unbind



When a PASID is used for SVA by the device, it's possible that the PASID
entry is cleared before the device flushes all ongoing DMA requests. The
IOMMU should tolerate and ignore the non-recoverable faults caused by the
untranslated requests from this device.

For example, when an exception happens, the process terminates before the
device driver stops DMA and call IOMMU driver to unbind PASID. The flow
of process exist is as follows:

do_exit() {
     exit_mm() {
             mm_put();
             exit_mmap() {
                     intel_invalidate_range() //mmu notifier
                     tlb_finish_mmu()
                     mmu_notifier_release(mm) {
                             intel_iommu_release() {
[2]                                  intel_iommu_teardown_pasid();
                                     intel_iommu_flush_tlbs();
                             }
                     }
                     unmap_vmas();
                     free_pgtables();
             };
     }
     exit_files(tsk) {
             close_files() {
                     dsa_close();
[1]                  dsa_stop_dma();
                     intel_svm_unbind_pasid();
             }
     }
}

Care must be taken on VT-d to avoid unrecoverable faults between the time
window of [1] and [2]. [Process exist flow was contributed by Jacob Pan.]

Intel VT-d provides such function through the FPD bit of the PASID entry.
This sets FPD bit when PASID entry is changing from present to nonpresent
in the mm notifier and will clear it when the pasid is unbound.

Signed-off-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: default avatarJacob Pan <jacob.jun.pan@linux.intel.com>
Link: https://lore.kernel.org/r/20200516062101.29541-15-baolu.lu@linux.intel.com


Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 4c0fa5bf
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -5005,7 +5005,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
	if (info->dev) {
		if (dev_is_pci(info->dev) && sm_supported(iommu))
			intel_pasid_tear_down_entry(iommu, info->dev,
					PASID_RID2PASID);
					PASID_RID2PASID, false);

		iommu_disable_dev_iotlb(info);
		domain_context_clear(iommu, info->dev);
@@ -5234,7 +5234,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
	auxiliary_unlink_device(domain, dev);

	spin_lock(&iommu->lock);
	intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid);
	intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false);
	domain_detach_iommu(domain, iommu);
	spin_unlock(&iommu->lock);

+21 −5
Original line number Diff line number Diff line
@@ -292,7 +292,20 @@ static inline void pasid_clear_entry(struct pasid_entry *pe)
	WRITE_ONCE(pe->val[7], 0);
}

static void intel_pasid_clear_entry(struct device *dev, int pasid)
static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe)
{
	WRITE_ONCE(pe->val[0], PASID_PTE_FPD);
	WRITE_ONCE(pe->val[1], 0);
	WRITE_ONCE(pe->val[2], 0);
	WRITE_ONCE(pe->val[3], 0);
	WRITE_ONCE(pe->val[4], 0);
	WRITE_ONCE(pe->val[5], 0);
	WRITE_ONCE(pe->val[6], 0);
	WRITE_ONCE(pe->val[7], 0);
}

static void
intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore)
{
	struct pasid_entry *pe;

@@ -300,6 +313,9 @@ static void intel_pasid_clear_entry(struct device *dev, int pasid)
	if (WARN_ON(!pe))
		return;

	if (fault_ignore && pasid_pte_is_present(pe))
		pasid_clear_entry_with_fpd(pe);
	else
		pasid_clear_entry(pe);
}

@@ -473,8 +489,8 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
	qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
}

void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
				 struct device *dev, int pasid)
void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
				 int pasid, bool fault_ignore)
{
	struct pasid_entry *pte;
	u16 did;
@@ -484,7 +500,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
		return;

	did = pasid_get_domain_id(pte);
	intel_pasid_clear_entry(dev, pasid);
	intel_pasid_clear_entry(dev, pasid, fault_ignore);

	if (!ecap_coherent(iommu->ecap))
		clflush_cache_range(pte, sizeof(*pte));
+3 −1
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#define PASID_MAX			0x100000
#define PASID_PTE_MASK			0x3F
#define PASID_PTE_PRESENT		1
#define PASID_PTE_FPD			2
#define PDE_PFN_MASK			PAGE_MASK
#define PASID_PDE_SHIFT			6
#define MAX_NR_PASID_BITS		20
@@ -120,7 +121,8 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu,
			     struct iommu_gpasid_bind_data_vtd *pasid_data,
			     struct dmar_domain *domain, int addr_width);
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
				 struct device *dev, int pasid);
				 struct device *dev, int pasid,
				 bool fault_ignore);
int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid);
void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid);
#endif /* __INTEL_PASID_H */
+6 −3
Original line number Diff line number Diff line
@@ -207,7 +207,8 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
	 */
	rcu_read_lock();
	list_for_each_entry_rcu(sdev, &svm->devs, list) {
		intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
		intel_pasid_tear_down_entry(svm->iommu, sdev->dev,
					    svm->pasid, true);
		intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
	}
	rcu_read_unlock();
@@ -396,7 +397,8 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
			sdev->users--;
		if (!sdev->users) {
			list_del_rcu(&sdev->list);
			intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
			intel_pasid_tear_down_entry(iommu, dev,
						    svm->pasid, false);
			intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
			/* TODO: Drain in flight PRQ for the PASID since it
			 * may get reused soon, we don't want to
@@ -639,7 +641,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
			 * to use. We have a *shared* PASID table, because it's
			 * large and has to be physically contiguous. So it's
			 * hard to be as defensive as we might like. */
			intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
			intel_pasid_tear_down_entry(iommu, dev,
						    svm->pasid, false);
			intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
			kfree_rcu(sdev, rcu);