Commit 6d9d2115 authored by Will Deacon's avatar Will Deacon Committed by Marc Zyngier
Browse files

KVM: arm64: Add support for stage-2 map()/unmap() in generic page-table



Add stage-2 map() and unmap() operations to the generic page-table code.

Signed-off-by: default avatarWill Deacon <will@kernel.org>
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Reviewed-by: default avatarGavin Shan <gshan@redhat.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20200911132529.19844-7-will@kernel.org
parent 71233d05
Loading
Loading
Loading
Loading
+46 −0
Original line number Diff line number Diff line
@@ -140,6 +140,52 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm);
 */
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);

/**
 * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
 * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
 * @addr:	Intermediate physical address at which to place the mapping.
 * @size:	Size of the mapping.
 * @phys:	Physical address of the memory to map.
 * @prot:	Permissions and attributes for the mapping.
 * @mc:		Cache of pre-allocated GFP_PGTABLE_USER memory from which to
 *		allocate page-table pages.
 *
 * The offset of @addr within a page is ignored, @size is rounded-up to
 * the next page boundary and @phys is rounded-down to the previous page
 * boundary.
 *
 * If device attributes are not explicitly requested in @prot, then the
 * mapping will be normal, cacheable.
 *
 * Note that this function will both coalesce existing table entries and split
 * existing block mappings, relying on page-faults to fault back areas outside
 * of the new mapping lazily.
 *
 * Return: 0 on success, negative error code on failure.
 */
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
			   u64 phys, enum kvm_pgtable_prot prot,
			   struct kvm_mmu_memory_cache *mc);

/**
 * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
 * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init().
 * @addr:	Intermediate physical address from which to remove the mapping.
 * @size:	Size of the mapping.
 *
 * The offset of @addr within a page is ignored and @size is rounded-up to
 * the next page boundary.
 *
 * TLB invalidation is performed for each page-table entry cleared during the
 * unmapping operation and the reference count for the page-table page
 * containing the cleared entry is decremented, with unreferenced pages being
 * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
 * FWB is not supported by the CPU.
 *
 * Return: 0 on success, negative error code on failure.
 */
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);

/**
 * kvm_pgtable_walk() - Walk a page-table.
 * @pgt:	Page-table structure initialised by kvm_pgtable_*_init().
+273 −0
Original line number Diff line number Diff line
@@ -32,10 +32,19 @@
#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS	3
#define KVM_PTE_LEAF_ATTR_LO_S1_AF	BIT(10)

#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR	GENMASK(5, 2)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R	BIT(6)
#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W	BIT(7)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH	GENMASK(9, 8)
#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS	3
#define KVM_PTE_LEAF_ATTR_LO_S2_AF	BIT(10)

#define KVM_PTE_LEAF_ATTR_HI		GENMASK(63, 51)

#define KVM_PTE_LEAF_ATTR_HI_S1_XN	BIT(54)

#define KVM_PTE_LEAF_ATTR_HI_S2_XN	BIT(54)

struct kvm_pgtable_walk_data {
	struct kvm_pgtable		*pgt;
	struct kvm_pgtable_walker	*walker;
@@ -417,6 +426,270 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
	pgt->pgd = NULL;
}

struct stage2_map_data {
	u64				phys;
	kvm_pte_t			attr;

	kvm_pte_t			*anchor;

	struct kvm_s2_mmu		*mmu;
	struct kvm_mmu_memory_cache	*memcache;
};

static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
				    struct stage2_map_data *data)
{
	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
	kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
			    PAGE_S2_MEMATTR(NORMAL);
	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;

	if (!(prot & KVM_PGTABLE_PROT_X))
		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
	else if (device)
		return -EINVAL;

	if (prot & KVM_PGTABLE_PROT_R)
		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;

	if (prot & KVM_PGTABLE_PROT_W)
		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;

	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
	data->attr = attr;
	return 0;
}

static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
				       kvm_pte_t *ptep,
				       struct stage2_map_data *data)
{
	u64 granule = kvm_granule_size(level), phys = data->phys;

	if (!kvm_block_mapping_supported(addr, end, phys, level))
		return false;

	if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
		goto out;

	/* There's an existing valid leaf entry, so perform break-before-make */
	kvm_set_invalid_pte(ptep);
	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
	kvm_set_valid_leaf_pte(ptep, phys, data->attr, level);
out:
	data->phys += granule;
	return true;
}

static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
				     kvm_pte_t *ptep,
				     struct stage2_map_data *data)
{
	if (data->anchor)
		return 0;

	if (!kvm_block_mapping_supported(addr, end, data->phys, level))
		return 0;

	kvm_set_invalid_pte(ptep);
	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0);
	data->anchor = ptep;
	return 0;
}

static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
				struct stage2_map_data *data)
{
	kvm_pte_t *childp, pte = *ptep;
	struct page *page = virt_to_page(ptep);

	if (data->anchor) {
		if (kvm_pte_valid(pte))
			put_page(page);

		return 0;
	}

	if (stage2_map_walker_try_leaf(addr, end, level, ptep, data))
		goto out_get_page;

	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
		return -EINVAL;

	if (!data->memcache)
		return -ENOMEM;

	childp = kvm_mmu_memory_cache_alloc(data->memcache);
	if (!childp)
		return -ENOMEM;

	/*
	 * If we've run into an existing block mapping then replace it with
	 * a table. Accesses beyond 'end' that fall within the new table
	 * will be mapped lazily.
	 */
	if (kvm_pte_valid(pte)) {
		kvm_set_invalid_pte(ptep);
		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
		put_page(page);
	}

	kvm_set_table_pte(ptep, childp);

out_get_page:
	get_page(page);
	return 0;
}

static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
				      kvm_pte_t *ptep,
				      struct stage2_map_data *data)
{
	int ret = 0;

	if (!data->anchor)
		return 0;

	free_page((unsigned long)kvm_pte_follow(*ptep));
	put_page(virt_to_page(ptep));

	if (data->anchor == ptep) {
		data->anchor = NULL;
		ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
	}

	return ret;
}

/*
 * This is a little fiddly, as we use all three of the walk flags. The idea
 * is that the TABLE_PRE callback runs for table entries on the way down,
 * looking for table entries which we could conceivably replace with a
 * block entry for this mapping. If it finds one, then it sets the 'anchor'
 * field in 'struct stage2_map_data' to point at the table entry, before
 * clearing the entry to zero and descending into the now detached table.
 *
 * The behaviour of the LEAF callback then depends on whether or not the
 * anchor has been set. If not, then we're not using a block mapping higher
 * up the table and we perform the mapping at the existing leaves instead.
 * If, on the other hand, the anchor _is_ set, then we drop references to
 * all valid leaves so that the pages beneath the anchor can be freed.
 *
 * Finally, the TABLE_POST callback does nothing if the anchor has not
 * been set, but otherwise frees the page-table pages while walking back up
 * the page-table, installing the block entry when it revisits the anchor
 * pointer and clearing the anchor to NULL.
 */
static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
			     enum kvm_pgtable_walk_flags flag, void * const arg)
{
	struct stage2_map_data *data = arg;

	switch (flag) {
	case KVM_PGTABLE_WALK_TABLE_PRE:
		return stage2_map_walk_table_pre(addr, end, level, ptep, data);
	case KVM_PGTABLE_WALK_LEAF:
		return stage2_map_walk_leaf(addr, end, level, ptep, data);
	case KVM_PGTABLE_WALK_TABLE_POST:
		return stage2_map_walk_table_post(addr, end, level, ptep, data);
	}

	return -EINVAL;
}

int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
			   u64 phys, enum kvm_pgtable_prot prot,
			   struct kvm_mmu_memory_cache *mc)
{
	int ret;
	struct stage2_map_data map_data = {
		.phys		= ALIGN_DOWN(phys, PAGE_SIZE),
		.mmu		= pgt->mmu,
		.memcache	= mc,
	};
	struct kvm_pgtable_walker walker = {
		.cb		= stage2_map_walker,
		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
				  KVM_PGTABLE_WALK_LEAF |
				  KVM_PGTABLE_WALK_TABLE_POST,
		.arg		= &map_data,
	};

	ret = stage2_map_set_prot_attr(prot, &map_data);
	if (ret)
		return ret;

	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
	dsb(ishst);
	return ret;
}

static void stage2_flush_dcache(void *addr, u64 size)
{
	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
		return;

	__flush_dcache_area(addr, size);
}

static bool stage2_pte_cacheable(kvm_pte_t pte)
{
	u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte);
	return memattr == PAGE_S2_MEMATTR(NORMAL);
}

static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
			       enum kvm_pgtable_walk_flags flag,
			       void * const arg)
{
	struct kvm_s2_mmu *mmu = arg;
	kvm_pte_t pte = *ptep, *childp = NULL;
	bool need_flush = false;

	if (!kvm_pte_valid(pte))
		return 0;

	if (kvm_pte_table(pte, level)) {
		childp = kvm_pte_follow(pte);

		if (page_count(virt_to_page(childp)) != 1)
			return 0;
	} else if (stage2_pte_cacheable(pte)) {
		need_flush = true;
	}

	/*
	 * This is similar to the map() path in that we unmap the entire
	 * block entry and rely on the remaining portions being faulted
	 * back lazily.
	 */
	kvm_set_invalid_pte(ptep);
	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
	put_page(virt_to_page(ptep));

	if (need_flush) {
		stage2_flush_dcache(kvm_pte_follow(pte),
				    kvm_granule_size(level));
	}

	if (childp)
		free_page((unsigned long)childp);

	return 0;
}

int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
	struct kvm_pgtable_walker walker = {
		.cb	= stage2_unmap_walker,
		.arg	= pgt->mmu,
		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
	};

	return kvm_pgtable_walk(pgt, addr, size, &walker);
}

int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
{
	size_t pgd_sz;