KVM: arm64: Add support for stage-2 map()/unmap() in generic page-table (6d9d2115) · Commits · 戴 / test

arch/arm64/include/asm/kvm_pgtable.h

+46 −0

Original line number	Diff line number	Diff line
		@@ -140,6 +140,52 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable pgt, struct kvm kvm);
		*/
		void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);

		/**
		* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
		* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
		* @addr: Intermediate physical address at which to place the mapping.
		* @size: Size of the mapping.
		* @phys: Physical address of the memory to map.
		* @prot: Permissions and attributes for the mapping.
		* @mc: Cache of pre-allocated GFP_PGTABLE_USER memory from which to
		* allocate page-table pages.
		*
		* The offset of @addr within a page is ignored, @size is rounded-up to
		* the next page boundary and @phys is rounded-down to the previous page
		* boundary.
		*
		* If device attributes are not explicitly requested in @prot, then the
		* mapping will be normal, cacheable.
		*
		* Note that this function will both coalesce existing table entries and split
		* existing block mappings, relying on page-faults to fault back areas outside
		* of the new mapping lazily.
		*
		* Return: 0 on success, negative error code on failure.
		*/
		int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
		u64 phys, enum kvm_pgtable_prot prot,
		struct kvm_mmu_memory_cache *mc);

		/**
		* kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
		* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
		* @addr: Intermediate physical address from which to remove the mapping.
		* @size: Size of the mapping.
		*
		* The offset of @addr within a page is ignored and @size is rounded-up to
		* the next page boundary.
		*
		* TLB invalidation is performed for each page-table entry cleared during the
		* unmapping operation and the reference count for the page-table page
		* containing the cleared entry is decremented, with unreferenced pages being
		* freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
		* FWB is not supported by the CPU.
		*
		* Return: 0 on success, negative error code on failure.
		*/
		int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);

		/**
		* kvm_pgtable_walk() - Walk a page-table.
		* @pgt: Page-table structure initialised by kvm_pgtable_*_init().

arch/arm64/kvm/hyp/pgtable.c

+273 −0

Original line number	Diff line number	Diff line
		@@ -32,10 +32,19 @@
		#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3
		#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10)

		#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2)
		#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6)
		#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7)
		#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8)
		#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3
		#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10)

		#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 51)

		#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)

		#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)

		struct kvm_pgtable_walk_data {
		struct kvm_pgtable *pgt;
		struct kvm_pgtable_walker *walker;
		@@ -417,6 +426,270 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
		pgt->pgd = NULL;
		}

		struct stage2_map_data {
		u64 phys;
		kvm_pte_t attr;

		kvm_pte_t *anchor;

		struct kvm_s2_mmu *mmu;
		struct kvm_mmu_memory_cache *memcache;
		};

		static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
		struct stage2_map_data *data)
		{
		bool device = prot & KVM_PGTABLE_PROT_DEVICE;
		kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
		PAGE_S2_MEMATTR(NORMAL);
		u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;

		if (!(prot & KVM_PGTABLE_PROT_X))
		attr \|= KVM_PTE_LEAF_ATTR_HI_S2_XN;
		else if (device)
		return -EINVAL;

		if (prot & KVM_PGTABLE_PROT_R)
		attr \|= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;

		if (prot & KVM_PGTABLE_PROT_W)
		attr \|= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;

		attr \|= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
		attr \|= KVM_PTE_LEAF_ATTR_LO_S2_AF;
		data->attr = attr;
		return 0;
		}

		static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
		kvm_pte_t *ptep,
		struct stage2_map_data *data)
		{
		u64 granule = kvm_granule_size(level), phys = data->phys;

		if (!kvm_block_mapping_supported(addr, end, phys, level))
		return false;

		if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
		goto out;

		/* There's an existing valid leaf entry, so perform break-before-make */
		kvm_set_invalid_pte(ptep);
		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
		kvm_set_valid_leaf_pte(ptep, phys, data->attr, level);
		out:
		data->phys += granule;
		return true;
		}

		static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
		kvm_pte_t *ptep,
		struct stage2_map_data *data)
		{
		if (data->anchor)
		return 0;

		if (!kvm_block_mapping_supported(addr, end, data->phys, level))
		return 0;

		kvm_set_invalid_pte(ptep);
		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0);
		data->anchor = ptep;
		return 0;
		}

		static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
		struct stage2_map_data *data)
		{
		kvm_pte_t childp, pte = ptep;
		struct page *page = virt_to_page(ptep);

		if (data->anchor) {
		if (kvm_pte_valid(pte))
		put_page(page);

		return 0;
		}

		if (stage2_map_walker_try_leaf(addr, end, level, ptep, data))
		goto out_get_page;

		if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
		return -EINVAL;

		if (!data->memcache)
		return -ENOMEM;

		childp = kvm_mmu_memory_cache_alloc(data->memcache);
		if (!childp)
		return -ENOMEM;

		/*
		* If we've run into an existing block mapping then replace it with
		* a table. Accesses beyond 'end' that fall within the new table
		* will be mapped lazily.
		*/
		if (kvm_pte_valid(pte)) {
		kvm_set_invalid_pte(ptep);
		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
		put_page(page);
		}

		kvm_set_table_pte(ptep, childp);

		out_get_page:
		get_page(page);
		return 0;
		}

		static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
		kvm_pte_t *ptep,
		struct stage2_map_data *data)
		{
		int ret = 0;

		if (!data->anchor)
		return 0;

		free_page((unsigned long)kvm_pte_follow(*ptep));
		put_page(virt_to_page(ptep));

		if (data->anchor == ptep) {
		data->anchor = NULL;
		ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
		}

		return ret;
		}

		/*
		* This is a little fiddly, as we use all three of the walk flags. The idea
		* is that the TABLE_PRE callback runs for table entries on the way down,
		* looking for table entries which we could conceivably replace with a
		* block entry for this mapping. If it finds one, then it sets the 'anchor'
		* field in 'struct stage2_map_data' to point at the table entry, before
		* clearing the entry to zero and descending into the now detached table.
		*
		* The behaviour of the LEAF callback then depends on whether or not the
		* anchor has been set. If not, then we're not using a block mapping higher
		* up the table and we perform the mapping at the existing leaves instead.
		* If, on the other hand, the anchor _is_ set, then we drop references to
		* all valid leaves so that the pages beneath the anchor can be freed.
		*
		* Finally, the TABLE_POST callback does nothing if the anchor has not
		* been set, but otherwise frees the page-table pages while walking back up
		* the page-table, installing the block entry when it revisits the anchor
		* pointer and clearing the anchor to NULL.
		*/
		static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
		enum kvm_pgtable_walk_flags flag, void * const arg)
		{
		struct stage2_map_data *data = arg;

		switch (flag) {
		case KVM_PGTABLE_WALK_TABLE_PRE:
		return stage2_map_walk_table_pre(addr, end, level, ptep, data);
		case KVM_PGTABLE_WALK_LEAF:
		return stage2_map_walk_leaf(addr, end, level, ptep, data);
		case KVM_PGTABLE_WALK_TABLE_POST:
		return stage2_map_walk_table_post(addr, end, level, ptep, data);
		}

		return -EINVAL;
		}

		int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
		u64 phys, enum kvm_pgtable_prot prot,
		struct kvm_mmu_memory_cache *mc)
		{
		int ret;
		struct stage2_map_data map_data = {
		.phys = ALIGN_DOWN(phys, PAGE_SIZE),
		.mmu = pgt->mmu,
		.memcache = mc,
		};
		struct kvm_pgtable_walker walker = {
		.cb = stage2_map_walker,
		.flags = KVM_PGTABLE_WALK_TABLE_PRE \|
		KVM_PGTABLE_WALK_LEAF \|
		KVM_PGTABLE_WALK_TABLE_POST,
		.arg = &map_data,
		};

		ret = stage2_map_set_prot_attr(prot, &map_data);
		if (ret)
		return ret;

		ret = kvm_pgtable_walk(pgt, addr, size, &walker);
		dsb(ishst);
		return ret;
		}

		static void stage2_flush_dcache(void *addr, u64 size)
		{
		if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
		return;

		__flush_dcache_area(addr, size);
		}

		static bool stage2_pte_cacheable(kvm_pte_t pte)
		{
		u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte);
		return memattr == PAGE_S2_MEMATTR(NORMAL);
		}

		static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
		enum kvm_pgtable_walk_flags flag,
		void * const arg)
		{
		struct kvm_s2_mmu *mmu = arg;
		kvm_pte_t pte = ptep, childp = NULL;
		bool need_flush = false;

		if (!kvm_pte_valid(pte))
		return 0;

		if (kvm_pte_table(pte, level)) {
		childp = kvm_pte_follow(pte);

		if (page_count(virt_to_page(childp)) != 1)
		return 0;
		} else if (stage2_pte_cacheable(pte)) {
		need_flush = true;
		}

		/*
		* This is similar to the map() path in that we unmap the entire
		* block entry and rely on the remaining portions being faulted
		* back lazily.
		*/
		kvm_set_invalid_pte(ptep);
		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
		put_page(virt_to_page(ptep));

		if (need_flush) {
		stage2_flush_dcache(kvm_pte_follow(pte),
		kvm_granule_size(level));
		}

		if (childp)
		free_page((unsigned long)childp);

		return 0;
		}

		int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
		{
		struct kvm_pgtable_walker walker = {
		.cb = stage2_unmap_walker,
		.arg = pgt->mmu,
		.flags = KVM_PGTABLE_WALK_LEAF \| KVM_PGTABLE_WALK_TABLE_POST,
		};

		return kvm_pgtable_walk(pgt, addr, size, &walker);
		}

		int kvm_pgtable_stage2_init(struct kvm_pgtable pgt, struct kvm kvm)
		{
		size_t pgd_sz;

Admin message