Commit 71233d05 authored by Will Deacon's avatar Will Deacon Committed by Marc Zyngier
Browse files

KVM: arm64: Add support for creating kernel-agnostic stage-2 page tables



Introduce alloc() and free() functions to the generic page-table code
for guest stage-2 page-tables and plumb these into the existing KVM
page-table allocator. Subsequent patches will convert other operations
within the KVM allocator over to the generic code.

Signed-off-by: default avatarWill Deacon <will@kernel.org>
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Reviewed-by: default avatarGavin Shan <gshan@redhat.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20200911132529.19844-6-will@kernel.org
parent 0f9d09b8
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@ struct kvm_s2_mmu {
	 */
	pgd_t		*pgd;
	phys_addr_t	pgd_phys;
	struct kvm_pgtable *pgt;

	/* The last vcpu id that ran on each physical CPU */
	int __percpu *last_vcpu_ran;
+18 −0
Original line number Diff line number Diff line
@@ -122,6 +122,24 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
			enum kvm_pgtable_prot prot);

/**
 * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
 * @pgt:	Uninitialised page-table structure to initialise.
 * @kvm:	KVM structure representing the guest virtual machine.
 *
 * Return: 0 on success, negative error code on failure.
 */
int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm);

/**
 * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
 * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
 *
 * The page-table is assumed to be unreachable by any hardware walkers prior
 * to freeing and therefore no TLB invalidation is performed.
 */
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);

/**
 * kvm_pgtable_walk() - Walk a page-table.
 * @pgt:	Page-table structure initialised by kvm_pgtable_*_init().
+54 −0
Original line number Diff line number Diff line
@@ -416,3 +416,57 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
	free_page((unsigned long)pgt->pgd);
	pgt->pgd = NULL;
}

int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
{
	size_t pgd_sz;
	u64 vtcr = kvm->arch.vtcr;
	u32 ia_bits = VTCR_EL2_IPA(vtcr);
	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;

	pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
	pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO);
	if (!pgt->pgd)
		return -ENOMEM;

	pgt->ia_bits		= ia_bits;
	pgt->start_level	= start_level;
	pgt->mmu		= &kvm->arch.mmu;

	/* Ensure zeroed PGD pages are visible to the hardware walker */
	dsb(ishst);
	return 0;
}

static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
			      enum kvm_pgtable_walk_flags flag,
			      void * const arg)
{
	kvm_pte_t pte = *ptep;

	if (!kvm_pte_valid(pte))
		return 0;

	put_page(virt_to_page(ptep));

	if (kvm_pte_table(pte, level))
		free_page((unsigned long)kvm_pte_follow(pte));

	return 0;
}

void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
	size_t pgd_sz;
	struct kvm_pgtable_walker walker = {
		.cb	= stage2_free_walker,
		.flags	= KVM_PGTABLE_WALK_LEAF |
			  KVM_PGTABLE_WALK_TABLE_POST,
	};

	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
	free_pages_exact(pgt->pgd, pgd_sz);
	pgt->pgd = NULL;
}
+29 −26
Original line number Diff line number Diff line
@@ -668,47 +668,49 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
 * @kvm:	The pointer to the KVM structure
 * @mmu:	The pointer to the s2 MMU structure
 *
 * Allocates only the stage-2 HW PGD level table(s) of size defined by
 * stage2_pgd_size(mmu->kvm).
 *
 * Allocates only the stage-2 HW PGD level table(s).
 * Note we don't need locking here as this is only called when the VM is
 * created, which can only be done once.
 */
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
{
	phys_addr_t pgd_phys;
	pgd_t *pgd;
	int cpu;
	int cpu, err;
	struct kvm_pgtable *pgt;

	if (mmu->pgd != NULL) {
	if (mmu->pgt != NULL) {
		kvm_err("kvm_arch already initialized?\n");
		return -EINVAL;
	}

	/* Allocate the HW PGD, making sure that each page gets its own refcount */
	pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO);
	if (!pgd)
	pgt = kzalloc(sizeof(*pgt), GFP_KERNEL);
	if (!pgt)
		return -ENOMEM;

	pgd_phys = virt_to_phys(pgd);
	if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm)))
		return -EINVAL;
	err = kvm_pgtable_stage2_init(pgt, kvm);
	if (err)
		goto out_free_pgtable;

	mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
	if (!mmu->last_vcpu_ran) {
		free_pages_exact(pgd, stage2_pgd_size(kvm));
		return -ENOMEM;
		err = -ENOMEM;
		goto out_destroy_pgtable;
	}

	for_each_possible_cpu(cpu)
		*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;

	mmu->kvm = kvm;
	mmu->pgd = pgd;
	mmu->pgd_phys = pgd_phys;
	mmu->pgt = pgt;
	mmu->pgd_phys = __pa(pgt->pgd);
	mmu->pgd = (void *)pgt->pgd;
	mmu->vmid.vmid_gen = 0;

	return 0;

out_destroy_pgtable:
	kvm_pgtable_stage2_destroy(pgt);
out_free_pgtable:
	kfree(pgt);
	return err;
}

static void stage2_unmap_memslot(struct kvm *kvm,
@@ -781,20 +783,21 @@ void stage2_unmap_vm(struct kvm *kvm)
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
{
	struct kvm *kvm = mmu->kvm;
	void *pgd = NULL;
	struct kvm_pgtable *pgt = NULL;

	spin_lock(&kvm->mmu_lock);
	if (mmu->pgd) {
		unmap_stage2_range(mmu, 0, kvm_phys_size(kvm));
		pgd = READ_ONCE(mmu->pgd);
	pgt = mmu->pgt;
	if (pgt) {
		mmu->pgd = NULL;
		mmu->pgd_phys = 0;
		mmu->pgt = NULL;
		free_percpu(mmu->last_vcpu_ran);
	}
	spin_unlock(&kvm->mmu_lock);

	/* Free the HW pgd, one page at a time */
	if (pgd) {
		free_pages_exact(pgd, stage2_pgd_size(kvm));
		free_percpu(mmu->last_vcpu_ran);
	if (pgt) {
		kvm_pgtable_stage2_destroy(pgt);
		kfree(pgt);
	}
}