Commit 84b13499 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull kvm fixes from Paolo Bonzini:
 "A bit on the bigger side, mostly due to me being on vacation, then
  busy, then on parental leave, but there's nothing worrisome.

  ARM:
   - Multiple stolen time fixes, with a new capability to match x86
   - Fix for hugetlbfs mappings when PUD and PMD are the same level
   - Fix for hugetlbfs mappings when PTE mappings are enforced (dirty
     logging, for example)
   - Fix tracing output of 64bit values

  x86:
   - nSVM state restore fixes
   - Async page fault fixes
   - Lots of small fixes everywhere"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits)
  KVM: emulator: more strict rsm checks.
  KVM: nSVM: more strict SMM checks when returning to nested guest
  SVM: nSVM: setup nested msr permission bitmap on nested state load
  SVM: nSVM: correctly restore GIF on vmexit from nesting after migration
  x86/kvm: don't forget to ACK async PF IRQ
  x86/kvm: properly use DEFINE_IDTENTRY_SYSVEC() macro
  KVM: VMX: Don't freeze guest when event delivery causes an APIC-access exit
  KVM: SVM: avoid emulation with stale next_rip
  KVM: x86: always allow writing '0' to MSR_KVM_ASYNC_PF_EN
  KVM: SVM: Periodically schedule when unregistering regions on destroy
  KVM: MIPS: Change the definition of kvm type
  kvm x86/mmu: use KVM_REQ_MMU_SYNC to sync when needed
  KVM: nVMX: Fix the update value of nested load IA32_PERF_GLOBAL_CTRL control
  KVM: fix memory leak in kvm_io_bus_unregister_dev()
  KVM: Check the allocation of pv cpu mask
  KVM: nVMX: Update VMCS02 when L2 PAE PDPTE updates detected
  KVM: arm64: Update page shift if stage 2 block mapping not supported
  KVM: arm64: Fix address truncation in traces
  KVM: arm64: Do not try to map PUDs when they are folded into PMD
  arm64/x86: KVM: Introduce steal-time cap
  ...
parents b952e974 37f66bbe
Loading
Loading
Loading
Loading
+18 −4
Original line number Diff line number Diff line
@@ -6130,7 +6130,7 @@ HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
-----------------------------------

:Architecture: x86
:Architectures: x86

This capability indicates that KVM running on top of Hyper-V hypervisor
enables Direct TLB flush for its guests meaning that TLB flush
@@ -6143,19 +6143,33 @@ in CPUID and only exposes Hyper-V identification. In this case, guest
thinks it's running on Hyper-V and only use Hyper-V hypercalls.

8.22 KVM_CAP_S390_VCPU_RESETS
-----------------------------

Architectures: s390
:Architectures: s390

This capability indicates that the KVM_S390_NORMAL_RESET and
KVM_S390_CLEAR_RESET ioctls are available.

8.23 KVM_CAP_S390_PROTECTED
---------------------------

Architecture: s390

:Architectures: s390

This capability indicates that the Ultravisor has been initialized and
KVM can therefore start protected VMs.
This capability governs the KVM_S390_PV_COMMAND ioctl and the
KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected
guests when the state change is invalid.

8.24 KVM_CAP_STEAL_TIME
-----------------------

:Architectures: arm64, x86

This capability indicates that KVM supports steal time accounting.
When steal time accounting is supported it may be enabled with
architecture-specific interfaces.  This capability and the architecture-
specific interfaces must be consistent, i.e. if one says the feature
is supported, than the other should as well and vice versa.  For arm64
see Documentation/virt/kvm/devices/vcpu.rst "KVM_ARM_VCPU_PVTIME_CTRL".
For x86 see Documentation/virt/kvm/msr.rst "MSR_KVM_STEAL_TIME".
+1 −1
Original line number Diff line number Diff line
@@ -368,7 +368,6 @@ struct kvm_vcpu_arch {

	/* Guest PV state */
	struct {
		u64 steal;
		u64 last_steal;
		gpa_t base;
	} steal;
@@ -544,6 +543,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
void kvm_update_stolen_time(struct kvm_vcpu *vcpu);

bool kvm_arm_pvtime_supported(void);
int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
			    struct kvm_device_attr *attr);
int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+3 −0
Original line number Diff line number Diff line
@@ -206,6 +206,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
		 */
		r = 1;
		break;
	case KVM_CAP_STEAL_TIME:
		r = kvm_arm_pvtime_supported();
		break;
	default:
		r = kvm_arch_vm_ioctl_check_extension(kvm, ext);
		break;
+7 −1
Original line number Diff line number Diff line
@@ -1877,6 +1877,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	    !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
		force_pte = true;
		vma_pagesize = PAGE_SIZE;
		vma_shift = PAGE_SHIFT;
	}

	/*
@@ -1970,7 +1971,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		(fault_status == FSC_PERM &&
		 stage2_is_exec(mmu, fault_ipa, vma_pagesize));

	if (vma_pagesize == PUD_SIZE) {
	/*
	 * If PUD_SIZE == PMD_SIZE, there is no real PUD level, and
	 * all we have is a 2-level page table. Trying to map a PUD in
	 * this case would be fatally wrong.
	 */
	if (PUD_SIZE != PMD_SIZE && vma_pagesize == PUD_SIZE) {
		pud_t new_pud = kvm_pfn_pud(pfn, mem_type);

		new_pud = kvm_pud_mkhuge(new_pud);
+13 −16
Original line number Diff line number Diff line
@@ -13,25 +13,22 @@
void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
{
	struct kvm *kvm = vcpu->kvm;
	u64 steal;
	__le64 steal_le;
	u64 offset;
	int idx;
	u64 base = vcpu->arch.steal.base;
	u64 last_steal = vcpu->arch.steal.last_steal;
	u64 offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
	u64 steal = 0;
	int idx;

	if (base == GPA_INVALID)
		return;

	/* Let's do the local bookkeeping */
	steal = vcpu->arch.steal.steal;
	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
	vcpu->arch.steal.steal = steal;

	steal_le = cpu_to_le64(steal);
	idx = srcu_read_lock(&kvm->srcu);
	offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
	kvm_put_guest(kvm, base + offset, steal_le, u64);
	if (!kvm_get_guest(kvm, base + offset, steal)) {
		steal = le64_to_cpu(steal);
		vcpu->arch.steal.last_steal = READ_ONCE(current->sched_info.run_delay);
		steal += vcpu->arch.steal.last_steal - last_steal;
		kvm_put_guest(kvm, base + offset, cpu_to_le64(steal));
	}
	srcu_read_unlock(&kvm->srcu, idx);
}

@@ -43,6 +40,7 @@ long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
	switch (feature) {
	case ARM_SMCCC_HV_PV_TIME_FEATURES:
	case ARM_SMCCC_HV_PV_TIME_ST:
		if (vcpu->arch.steal.base != GPA_INVALID)
			val = SMCCC_RET_SUCCESS;
		break;
	}
@@ -64,7 +62,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
	 * Start counting stolen time from the time the guest requests
	 * the feature enabled.
	 */
	vcpu->arch.steal.steal = 0;
	vcpu->arch.steal.last_steal = current->sched_info.run_delay;

	idx = srcu_read_lock(&kvm->srcu);
@@ -74,7 +71,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
	return base;
}

static bool kvm_arm_pvtime_supported(void)
bool kvm_arm_pvtime_supported(void)
{
	return !!sched_info_on();
}
Loading