Merge branch 'kvm-fixes' into 'next' (c0623f5e) · Commits · 戴 / test

arch/arm64/kvm/hyp/nvhe/tlb.c

+7 −0

Original line number	Diff line number	Diff line
		@@ -31,7 +31,14 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
		isb();
		}

		/*
		* __load_guest_stage2() includes an ISB only when the AT
		* workaround is applied. Take care of the opposite condition,
		* ensuring that we always have an ISB, but not two ISBs back
		* to back.
		*/
		__load_guest_stage2(mmu);
		asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
		}

		static void __tlb_switch_to_host(struct tlb_inv_context *cxt)

arch/x86/kvm/svm/svm.c

+7 −1

Original line number	Diff line number	Diff line
		@@ -2274,6 +2274,12 @@ static int iret_interception(struct vcpu_svm *svm)
		return 1;
		}

		static int invd_interception(struct vcpu_svm *svm)
		{
		/* Treat an INVD instruction as a NOP and just skip it. */
		return kvm_skip_emulated_instruction(&svm->vcpu);
		}

		static int invlpg_interception(struct vcpu_svm *svm)
		{
		if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
		@@ -2891,7 +2897,7 @@ static int (const svm_exit_handlers[])(struct vcpu_svm svm) = {
		[SVM_EXIT_RDPMC] = rdpmc_interception,
		[SVM_EXIT_CPUID] = cpuid_interception,
		[SVM_EXIT_IRET] = iret_interception,
		[SVM_EXIT_INVD] = emulate_on_interception,
		[SVM_EXIT_INVD] = invd_interception,
		[SVM_EXIT_PAUSE] = pause_interception,
		[SVM_EXIT_HLT] = halt_interception,
		[SVM_EXIT_INVLPG] = invlpg_interception,

arch/x86/kvm/vmx/vmx.c

+22 −15

Original line number	Diff line number	Diff line
		@@ -128,6 +128,9 @@ static bool __read_mostly enable_preemption_timer = 1;
		module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
		#endif

		extern bool __read_mostly allow_smaller_maxphyaddr;
		module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);

		#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW \| X86_CR0_CD)
		#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
		#define KVM_VM_CR0_ALWAYS_ON \
		@@ -834,6 +837,18 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu)
		*/
		if (is_guest_mode(vcpu))
		eb \|= get_vmcs12(vcpu)->exception_bitmap;
		else {
		/*
		* If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
		* between guest and host. In that case we only care about present
		* faults. For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in
		* prepare_vmcs02_rare.
		*/
		bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR));
		int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0;
		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask);
		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask);
		}

		vmcs_write32(EXCEPTION_BITMAP, eb);
		}
		@@ -4363,16 +4378,6 @@ static void init_vmcs(struct vcpu_vmx *vmx)
		vmx->pt_desc.guest.output_mask = 0x7F;
		vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
		}

		/*
		* If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
		* between guest and host. In that case we only care about present
		* faults.
		*/
		if (enable_ept) {
		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, PFERR_PRESENT_MASK);
		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, PFERR_PRESENT_MASK);
		}
		}

		static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
		@@ -4814,6 +4819,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
		* EPT will cause page fault only if we need to
		* detect illegal GPAs.
		*/
		WARN_ON_ONCE(!allow_smaller_maxphyaddr);
		kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
		return 1;
		} else
		@@ -5343,7 +5349,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
		* would also use advanced VM-exit information for EPT violations to
		* reconstruct the page fault error code.
		*/
		if (unlikely(kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
		if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa)))
		return kvm_emulate_instruction(vcpu, 0);

		return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
		@@ -8008,10 +8014,11 @@ static int __init vmx_init(void)
		vmx_check_vmcs12_offsets();

		/*
		* Intel processors don't have problems with
		* GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable
		* it for VMX by default
		* Shadow paging doesn't have a (further) performance penalty
		* from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it
		* by default
		*/
		if (!enable_ept)
		allow_smaller_maxphyaddr = true;

		return 0;

arch/x86/kvm/vmx/vmx.h

+4 −1

Original line number	Diff line number	Diff line
		@@ -470,7 +470,10 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)

		static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
		{
		return !enable_ept \|\| cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
		if (!enable_ept)
		return true;

		return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
		}

		static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)

arch/x86/kvm/x86.c

+6 −5

Original line number	Diff line number	Diff line
		@@ -194,7 +194,7 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
		u64 __read_mostly host_efer;
		EXPORT_SYMBOL_GPL(host_efer);

		bool __read_mostly allow_smaller_maxphyaddr;
		bool __read_mostly allow_smaller_maxphyaddr = 0;
		EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);

		static u64 __read_mostly host_xss;
		@@ -982,6 +982,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
		unsigned long old_cr4 = kvm_read_cr4(vcpu);
		unsigned long pdptr_bits = X86_CR4_PGE \| X86_CR4_PSE \| X86_CR4_PAE \|
		X86_CR4_SMEP;
		unsigned long mmu_role_bits = pdptr_bits \| X86_CR4_SMAP \| X86_CR4_PKE;

		if (kvm_valid_cr4(vcpu, cr4))
		return 1;
		@@ -1009,7 +1010,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
		if (kvm_x86_ops.set_cr4(vcpu, cr4))
		return 1;

		if (((cr4 ^ old_cr4) & pdptr_bits) \|\|
		if (((cr4 ^ old_cr4) & mmu_role_bits) \|\|
		(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
		kvm_mmu_reset_context(vcpu);

		@@ -3400,9 +3401,9 @@ int kvm_get_msr_common(struct kvm_vcpu vcpu, struct msr_data msr_info)
		* even when not intercepted. AMD manual doesn't explicitly
		* state this but appears to behave the same.
		*
		* Unconditionally return L1's TSC offset on userspace reads
		* so that userspace reads and writes always operate on L1's
		* offset, e.g. to ensure deterministic behavior for migration.
		* On userspace reads and writes, however, we unconditionally
		* return L1's TSC value to ensure backwards-compatible
		* behavior for migration.
		*/
		u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
		vcpu->arch.tsc_offset;

Admin message