Commit 0577d1ab authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini
Browse files

KVM: Terminate memslot walks via used_slots



Refactor memslot handling to treat the number of used slots as the de
facto size of the memslot array, e.g. return NULL from id_to_memslot()
when an invalid index is provided instead of relying on npages==0 to
detect an invalid memslot.  Rework the sorting and walking of memslots
in advance of dynamically sizing memslots to aid bisection and debug,
e.g. with luck, a bug in the refactoring will bisect here and/or hit a
WARN instead of randomly corrupting memory.

Alternatively, a global null/invalid memslot could be returned, i.e. so
callers of id_to_memslot() don't have to explicitly check for a NULL
memslot, but that approach runs the risk of introducing difficult-to-
debug issues, e.g. if the global null slot is modified.  Constifying
the return from id_to_memslot() to combat such issues is possible, but
would require a massive refactoring of arch specific code and would
still be susceptible to casting shenanigans.

Add function comments to update_memslots() and search_memslots() to
explicitly (and loudly) state how memslots are sorted.

Opportunistically stuff @hva with a non-canonical value when deleting a
private memslot on x86 to detect bogus usage of the freed slot.

No functional change intended.

Tested-by: default avatarChristoffer Dall <christoffer.dall@arm.com>
Tested-by: default avatarMarc Zyngier <maz@kernel.org>
Signed-off-by: default avatarSean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 2a49f61d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -4400,7 +4400,7 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
	slots = kvm_memslots(kvm);
	memslot = id_to_memslot(slots, log->slot);
	r = -ENOENT;
	if (!memslot->dirty_bitmap)
	if (!memslot || !memslot->dirty_bitmap)
		goto out;

	/*
+8 −7
Original line number Diff line number Diff line
@@ -9715,9 +9715,9 @@ void kvm_arch_sync_events(struct kvm *kvm)
int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
	int i, r;
	unsigned long hva;
	unsigned long hva, uninitialized_var(old_npages);
	struct kvm_memslots *slots = kvm_memslots(kvm);
	struct kvm_memory_slot *slot, old;
	struct kvm_memory_slot *slot;

	/* Called with kvm->slots_lock held.  */
	if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
@@ -9725,7 +9725,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)

	slot = id_to_memslot(slots, id);
	if (size) {
		if (slot->npages)
		if (slot && slot->npages)
			return -EEXIST;

		/*
@@ -9737,13 +9737,14 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
		if (IS_ERR((void *)hva))
			return PTR_ERR((void *)hva);
	} else {
		if (!slot->npages)
		if (!slot || !slot->npages)
			return 0;

		hva = 0;
		/* Stuff a non-canonical value to catch use-after-delete. */
		hva = 0xdeadull << 48;
		old_npages = slot->npages;
	}

	old = *slot;
	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
		struct kvm_userspace_memory_region m;

@@ -9758,7 +9759,7 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
	}

	if (!size)
		vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
		vm_munmap(hva, old_npages * PAGE_SIZE);

	return 0;
}
+12 −6
Original line number Diff line number Diff line
@@ -574,8 +574,9 @@ static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)

#define kvm_for_each_memslot(memslot, slots)				\
	for (memslot = &slots->memslots[0];				\
	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
		memslot++)
	     memslot < slots->memslots + slots->used_slots; memslot++)	\
		if (WARN_ON_ONCE(!memslot->npages)) {			\
		} else

void kvm_vcpu_destroy(struct kvm_vcpu *vcpu);

@@ -635,12 +636,15 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
	return __kvm_memslots(vcpu->kvm, as_id);
}

static inline struct kvm_memory_slot *
id_to_memslot(struct kvm_memslots *slots, int id)
static inline
struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
{
	int index = slots->id_to_index[id];
	struct kvm_memory_slot *slot;

	if (index < 0)
		return NULL;

	slot = &slots->memslots[index];

	WARN_ON(slot->id != id);
@@ -1012,6 +1016,8 @@ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
 * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
 * gfn_to_memslot() itself isn't here as an inline because that would
 * bloat other code too much.
 *
 * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
 */
static inline struct kvm_memory_slot *
search_memslots(struct kvm_memslots *slots, gfn_t gfn)
+7 −2
Original line number Diff line number Diff line
@@ -1534,8 +1534,13 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
{
	struct kvm_memslots *slots = kvm_memslots(kvm);
	struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
	phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
	phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
	phys_addr_t start, end;

	if (WARN_ON_ONCE(!memslot))
		return;

	start = memslot->base_gfn << PAGE_SHIFT;
	end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;

	spin_lock(&kvm->mmu_lock);
	stage2_wp_range(kvm, start, end);
+158 −52
Original line number Diff line number Diff line
@@ -566,7 +566,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
		return NULL;

	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
		slots->id_to_index[i] = slots->memslots[i].id = i;
		slots->id_to_index[i] = slots->memslots[i].id = -1;

	return slots;
}
@@ -870,63 +870,162 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
}

/*
 * Insert memslot and re-sort memslots based on their GFN,
 * so binary search could be used to lookup GFN.
 * Sorting algorithm takes advantage of having initially
 * sorted array and known changed memslot position.
 * Delete a memslot by decrementing the number of used slots and shifting all
 * other entries in the array forward one spot.
 */
static void update_memslots(struct kvm_memslots *slots,
			    struct kvm_memory_slot *new,
			    enum kvm_mr_change change)
static inline void kvm_memslot_delete(struct kvm_memslots *slots,
				      struct kvm_memory_slot *memslot)
{
	int id = new->id;
	int i = slots->id_to_index[id];
	struct kvm_memory_slot *mslots = slots->memslots;
	int i;

	if (WARN_ON(slots->id_to_index[memslot->id] == -1))
		return;

	WARN_ON(mslots[i].id != id);
	switch (change) {
	case KVM_MR_CREATE:
		slots->used_slots++;
		WARN_ON(mslots[i].npages || !new->npages);
		break;
	case KVM_MR_DELETE:
	slots->used_slots--;
		WARN_ON(new->npages || !mslots[i].npages);
		break;
	default:
		break;

	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
		mslots[i] = mslots[i + 1];
		slots->id_to_index[mslots[i].id] = i;
	}
	mslots[i] = *memslot;
	slots->id_to_index[memslot->id] = -1;
}

/*
 * "Insert" a new memslot by incrementing the number of used slots.  Returns
 * the new slot's initial index into the memslots array.
 */
static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
{
	return slots->used_slots++;
}

	while (i < KVM_MEM_SLOTS_NUM - 1 &&
	       new->base_gfn <= mslots[i + 1].base_gfn) {
		if (!mslots[i + 1].npages)
/*
 * Move a changed memslot backwards in the array by shifting existing slots
 * with a higher GFN toward the front of the array.  Note, the changed memslot
 * itself is not preserved in the array, i.e. not swapped at this time, only
 * its new index into the array is tracked.  Returns the changed memslot's
 * current index into the memslots array.
 */
static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
					    struct kvm_memory_slot *memslot)
{
	struct kvm_memory_slot *mslots = slots->memslots;
	int i;

	if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) ||
	    WARN_ON_ONCE(!slots->used_slots))
		return -1;

	/*
	 * Move the target memslot backward in the array by shifting existing
	 * memslots with a higher GFN (than the target memslot) towards the
	 * front of the array.
	 */
	for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
		if (memslot->base_gfn > mslots[i + 1].base_gfn)
			break;

		WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);

		/* Shift the next memslot forward one and update its index. */
		mslots[i] = mslots[i + 1];
		slots->id_to_index[mslots[i].id] = i;
		i++;
	}
	return i;
}

/*
	 * The ">=" is needed when creating a slot with base_gfn == 0,
	 * so that it moves before all those with base_gfn == npages == 0.
	 *
	 * On the other hand, if new->npages is zero, the above loop has
	 * already left i pointing to the beginning of the empty part of
	 * mslots, and the ">=" would move the hole backwards in this
	 * case---which is wrong.  So skip the loop when deleting a slot.
 * Move a changed memslot forwards in the array by shifting existing slots with
 * a lower GFN toward the back of the array.  Note, the changed memslot itself
 * is not preserved in the array, i.e. not swapped at this time, only its new
 * index into the array is tracked.  Returns the changed memslot's final index
 * into the memslots array.
 */
	if (new->npages) {
		while (i > 0 &&
		       new->base_gfn >= mslots[i - 1].base_gfn) {
static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
					   struct kvm_memory_slot *memslot,
					   int start)
{
	struct kvm_memory_slot *mslots = slots->memslots;
	int i;

	for (i = start; i > 0; i--) {
		if (memslot->base_gfn < mslots[i - 1].base_gfn)
			break;

		WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);

		/* Shift the next memslot back one and update its index. */
		mslots[i] = mslots[i - 1];
		slots->id_to_index[mslots[i].id] = i;
			i--;
	}
	} else
		WARN_ON_ONCE(i != slots->used_slots);
	return i;
}

	mslots[i] = *new;
	slots->id_to_index[mslots[i].id] = i;
/*
 * Re-sort memslots based on their GFN to account for an added, deleted, or
 * moved memslot.  Sorting memslots by GFN allows using a binary search during
 * memslot lookup.
 *
 * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!  I.e. the entry
 * at memslots[0] has the highest GFN.
 *
 * The sorting algorithm takes advantage of having initially sorted memslots
 * and knowing the position of the changed memslot.  Sorting is also optimized
 * by not swapping the updated memslot and instead only shifting other memslots
 * and tracking the new index for the update memslot.  Only once its final
 * index is known is the updated memslot copied into its position in the array.
 *
 *  - When deleting a memslot, the deleted memslot simply needs to be moved to
 *    the end of the array.
 *
 *  - When creating a memslot, the algorithm "inserts" the new memslot at the
 *    end of the array and then it forward to its correct location.
 *
 *  - When moving a memslot, the algorithm first moves the updated memslot
 *    backward to handle the scenario where the memslot's GFN was changed to a
 *    lower value.  update_memslots() then falls through and runs the same flow
 *    as creating a memslot to move the memslot forward to handle the scenario
 *    where its GFN was changed to a higher value.
 *
 * Note, slots are sorted from highest->lowest instead of lowest->highest for
 * historical reasons.  Originally, invalid memslots where denoted by having
 * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots
 * to the end of the array.  The current algorithm uses dedicated logic to
 * delete a memslot and thus does not rely on invalid memslots having GFN=0.
 *
 * The other historical motiviation for highest->lowest was to improve the
 * performance of memslot lookup.  KVM originally used a linear search starting
 * at memslots[0].  On x86, the largest memslot usually has one of the highest,
 * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a
 * single memslot above the 4gb boundary.  As the largest memslot is also the
 * most likely to be referenced, sorting it to the front of the array was
 * advantageous.  The current binary search starts from the middle of the array
 * and uses an LRU pointer to improve performance for all memslots and GFNs.
 */
static void update_memslots(struct kvm_memslots *slots,
			    struct kvm_memory_slot *memslot,
			    enum kvm_mr_change change)
{
	int i;

	if (change == KVM_MR_DELETE) {
		kvm_memslot_delete(slots, memslot);
	} else {
		if (change == KVM_MR_CREATE)
			i = kvm_memslot_insert_back(slots);
		else
			i = kvm_memslot_move_backward(slots, memslot);
		i = kvm_memslot_move_forward(slots, memslot, i);

		/*
		 * Copy the memslot to its new position in memslots and update
		 * its index accordingly.
		 */
		slots->memslots[i] = *memslot;
		slots->id_to_index[memslot->id] = i;
	}
}

static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
@@ -1106,7 +1205,14 @@ int __kvm_set_memory_region(struct kvm *kvm,
	 * memslot needs to be referenced after calling update_memslots(), e.g.
	 * to free its resources and for arch specific behavior.
	 */
	old = *id_to_memslot(__kvm_memslots(kvm, as_id), id);
	tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id);
	if (tmp) {
		old = *tmp;
		tmp = NULL;
	} else {
		memset(&old, 0, sizeof(old));
		old.id = id;
	}

	if (!mem->memory_size)
		return kvm_delete_memslot(kvm, mem, &old, as_id);
@@ -1224,7 +1330,7 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,

	slots = __kvm_memslots(kvm, as_id);
	*memslot = id_to_memslot(slots, id);
	if (!(*memslot)->dirty_bitmap)
	if (!(*memslot) || !(*memslot)->dirty_bitmap)
		return -ENOENT;

	kvm_arch_sync_dirty_log(kvm, *memslot);
@@ -1282,10 +1388,10 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)

	slots = __kvm_memslots(kvm, as_id);
	memslot = id_to_memslot(slots, id);
	if (!memslot || !memslot->dirty_bitmap)
		return -ENOENT;

	dirty_bitmap = memslot->dirty_bitmap;
	if (!dirty_bitmap)
		return -ENOENT;

	kvm_arch_sync_dirty_log(kvm, memslot);

@@ -1393,10 +1499,10 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,

	slots = __kvm_memslots(kvm, as_id);
	memslot = id_to_memslot(slots, id);
	if (!memslot || !memslot->dirty_bitmap)
		return -ENOENT;

	dirty_bitmap = memslot->dirty_bitmap;
	if (!dirty_bitmap)
		return -ENOENT;

	n = ALIGN(log->num_pages, BITS_PER_LONG) / 8;