Commit 6d674e28 authored by Marc Zyngier's avatar Marc Zyngier
Browse files

KVM: arm/arm64: Properly handle faulting of device mappings



A device mapping is normally always mapped at Stage-2, since there
is very little gain in having it faulted in.

Nonetheless, it is possible to end-up in a situation where the device
mapping has been removed from Stage-2 (userspace munmaped the VFIO
region, and the MMU notifier did its job), but present in a userspace
mapping (userpace has mapped it back at the same address). In such
a situation, the device mapping will be demand-paged as the guest
performs memory accesses.

This requires to be careful when dealing with mapping size, cache
management, and to handle potential execution of a device mapping.

Reported-by: default avatarAlexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Tested-by: default avatarAlexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: default avatarJames Morse <james.morse@arm.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20191211165651.7889-2-maz@kernel.org
parent 1ce74e96
Loading
Loading
Loading
Loading
+17 −4
Original line number Diff line number Diff line
@@ -38,6 +38,11 @@ static unsigned long io_map_base;
#define KVM_S2PTE_FLAG_IS_IOMAP		(1UL << 0)
#define KVM_S2_FLAG_LOGGING_ACTIVE	(1UL << 1)

static bool is_iomap(unsigned long flags)
{
	return flags & KVM_S2PTE_FLAG_IS_IOMAP;
}

static bool memslot_is_logging(struct kvm_memory_slot *memslot)
{
	return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@@ -1698,6 +1703,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,

	vma_pagesize = vma_kernel_pagesize(vma);
	if (logging_active ||
	    (vma->vm_flags & VM_PFNMAP) ||
	    !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
		force_pte = true;
		vma_pagesize = PAGE_SIZE;
@@ -1760,6 +1766,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			writable = false;
	}

	if (exec_fault && is_iomap(flags))
		return -ENOEXEC;

	spin_lock(&kvm->mmu_lock);
	if (mmu_notifier_retry(kvm, mmu_seq))
		goto out_unlock;
@@ -1781,7 +1790,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	if (writable)
		kvm_set_pfn_dirty(pfn);

	if (fault_status != FSC_PERM)
	if (fault_status != FSC_PERM && !is_iomap(flags))
		clean_dcache_guest_page(pfn, vma_pagesize);

	if (exec_fault)
@@ -1948,9 +1957,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
		if (is_iabt) {
			/* Prefetch Abort on I/O address */
			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
			ret = 1;
			goto out_unlock;
			ret = -ENOEXEC;
			goto out;
		}

		/*
@@ -1992,6 +2000,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
	if (ret == 0)
		ret = 1;
out:
	if (ret == -ENOEXEC) {
		kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
		ret = 1;
	}
out_unlock:
	srcu_read_unlock(&vcpu->kvm->srcu, idx);
	return ret;