Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (8c5bd25b) · Commits · 戴 / test

arch/x86/kvm/mmu.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -3393,7 +3393,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
		* here.
		*/
		if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
		level == PT_PAGE_TABLE_LEVEL &&
		!kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
		PageTransCompoundMap(pfn_to_page(pfn)) &&
		!mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
		unsigned long mask;
		@@ -6009,8 +6009,8 @@ restart:
		* the guest, and the guest page table is using 4K page size
		* mapping if the indirect sp has level = 1.
		*/
		if (sp->role.direct &&
		!kvm_is_reserved_pfn(pfn) &&
		if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
		!kvm_is_zone_device_pfn(pfn) &&
		PageTransCompoundMap(pfn_to_page(pfn))) {
		pte_list_remove(rmap_head, sptep);

arch/x86/kvm/vmx/vmx.c

+20 −3

Original line number	Diff line number	Diff line
		@@ -1268,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
		if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
		return;

		/*
		* If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
		* PI.NDST: pi_post_block is the one expected to change PID.NDST and the
		* wakeup handler expects the vCPU to be on the blocked_vcpu_list that
		* matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
		* correctly.
		*/
		if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR \|\| vcpu->cpu == cpu) {
		pi_clear_sn(pi_desc);
		goto after_clear_sn;
		}

		/* The full case. */
		do {
		old.control = new.control = pi_desc->control;
		@@ -1283,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
		} while (cmpxchg64(&pi_desc->control, old.control,
		new.control) != old.control);

		after_clear_sn:

		/*
		* Clear SN before reading the bitmap. The VT-d firmware
		* writes the bitmap and reads SN atomically (5.2.3 in the
		@@ -1291,7 +1305,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
		*/
		smp_mb__after_atomic();

		if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
		if (!pi_is_pir_empty(pi_desc))
		pi_set_on(pi_desc);
		}

		@@ -6137,7 +6151,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
		if (pi_test_on(&vmx->pi_desc)) {
		pi_clear_on(&vmx->pi_desc);
		/*
		* IOMMU can write to PIR.ON, so the barrier matters even on UP.
		* IOMMU can write to PID.ON, so the barrier matters even on UP.
		* But on x86 this is just a compiler barrier anyway.
		*/
		smp_mb__after_atomic();
		@@ -6167,7 +6181,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)

		static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
		{
		return pi_test_on(vcpu_to_pi_desc(vcpu));
		struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);

		return pi_test_on(pi_desc) \|\|
		(pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
		}

		static void vmx_load_eoi_exitmap(struct kvm_vcpu vcpu, u64 eoi_exit_bitmap)

arch/x86/kvm/vmx/vmx.h

+11 −0

Original line number	Diff line number	Diff line
		@@ -355,6 +355,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
		return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
		}

		static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
		{
		return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
		}

		static inline void pi_set_sn(struct pi_desc *pi_desc)
		{
		set_bit(POSTED_INTR_SN,
		@@ -373,6 +378,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc)
		(unsigned long *)&pi_desc->control);
		}

		static inline void pi_clear_sn(struct pi_desc *pi_desc)
		{
		clear_bit(POSTED_INTR_SN,
		(unsigned long *)&pi_desc->control);
		}

		static inline int pi_test_on(struct pi_desc *pi_desc)
		{
		return test_bit(POSTED_INTR_ON,

arch/x86/kvm/x86.c

+26 −30

Original line number	Diff line number	Diff line
		@@ -1133,13 +1133,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
		* List of msr numbers which we expose to userspace through KVM_GET_MSRS
		* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
		*
		* This list is modified at module load time to reflect the
		* The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
		* extract the supported MSRs from the related const lists.
		* msrs_to_save is selected from the msrs_to_save_all to reflect the
		* capabilities of the host cpu. This capabilities test skips MSRs that are
		* kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
		* kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
		* may depend on host virtualization features rather than host cpu features.
		*/

		static u32 msrs_to_save[] = {
		static const u32 msrs_to_save_all[] = {
		MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
		MSR_STAR,
		#ifdef CONFIG_X86_64
		@@ -1180,9 +1182,10 @@ static u32 msrs_to_save[] = {
		MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
		};

		static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
		static unsigned num_msrs_to_save;

		static u32 emulated_msrs[] = {
		static const u32 emulated_msrs_all[] = {
		MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
		MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
		HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
		@@ -1221,7 +1224,7 @@ static u32 emulated_msrs[] = {
		* by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
		* We always support the "true" VMX control MSRs, even if the host
		* processor does not, so I am putting these registers here rather
		* than in msrs_to_save.
		* than in msrs_to_save_all.
		*/
		MSR_IA32_VMX_BASIC,
		MSR_IA32_VMX_TRUE_PINBASED_CTLS,
		@@ -1240,13 +1243,14 @@ static u32 emulated_msrs[] = {
		MSR_KVM_POLL_CONTROL,
		};

		static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
		static unsigned num_emulated_msrs;

		/*
		* List of msr numbers which are used to expose MSR-based features that
		* can be used by a hypervisor to validate requested CPU features.
		*/
		static u32 msr_based_features[] = {
		static const u32 msr_based_features_all[] = {
		MSR_IA32_VMX_BASIC,
		MSR_IA32_VMX_TRUE_PINBASED_CTLS,
		MSR_IA32_VMX_PINBASED_CTLS,
		@@ -1271,6 +1275,7 @@ static u32 msr_based_features[] = {
		MSR_IA32_ARCH_CAPABILITIES,
		};

		static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
		static unsigned int num_msr_based_features;

		static u64 kvm_get_arch_capabilities(void)
		@@ -5118,22 +5123,22 @@ static void kvm_init_msr_list(void)
		{
		struct x86_pmu_capability x86_pmu;
		u32 dummy[2];
		unsigned i, j;
		unsigned i;

		BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
		"Please update the fixed PMCs in msrs_to_save[]");
		"Please update the fixed PMCs in msrs_to_saved_all[]");

		perf_get_x86_pmu_capability(&x86_pmu);

		for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
		for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
		if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
		continue;

		/*
		* Even MSRs that are valid in the host may not be exposed
		* to the guests in some cases.
		*/
		switch (msrs_to_save[i]) {
		switch (msrs_to_save_all[i]) {
		case MSR_IA32_BNDCFGS:
		if (!kvm_mpx_supported())
		continue;
		@@ -5161,17 +5166,17 @@ static void kvm_init_msr_list(void)
		break;
		case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
		if (!kvm_x86_ops->pt_supported() \|\|
		msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
		msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
		intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
		continue;
		break;
		case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
		if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
		if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
		min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
		continue;
		break;
		case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
		if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
		if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
		min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
		continue;
		}
		@@ -5179,34 +5184,25 @@ static void kvm_init_msr_list(void)
		break;
		}

		if (j < i)
		msrs_to_save[j] = msrs_to_save[i];
		j++;
		msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
		}
		num_msrs_to_save = j;

		for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
		for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
		continue;

		if (j < i)
		emulated_msrs[j] = emulated_msrs[i];
		j++;
		emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
		}
		num_emulated_msrs = j;

		for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
		for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
		struct kvm_msr_entry msr;

		msr.index = msr_based_features[i];
		msr.index = msr_based_features_all[i];
		if (kvm_get_msr_feature(&msr))
		continue;

		if (j < i)
		msr_based_features[j] = msr_based_features[i];
		j++;
		msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
		}
		num_msr_based_features = j;
		}

		static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,

include/linux/kvm_host.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -966,6 +966,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
		void kvm_vcpu_kick(struct kvm_vcpu *vcpu);

		bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
		bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);

		struct kvm_irq_ack_notifier {
		struct hlist_node link;

Admin message