KVM: SVM: Support MMIO for an SEV-ES guest (8f423a80) · Commits · 戴 / test

arch/x86/kvm/svm/sev.c

+124 −0

Original line number	Diff line number	Diff line
		@@ -1262,6 +1262,9 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
		if (vcpu->arch.guest_state_protected)
		sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
		__free_page(virt_to_page(svm->vmsa));

		if (svm->ghcb_sa_free)
		kfree(svm->ghcb_sa);
		}

		static void dump_ghcb(struct vcpu_svm *svm)
		@@ -1436,6 +1439,11 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
		!ghcb_rcx_is_valid(ghcb))
		goto vmgexit_err;
		break;
		case SVM_VMGEXIT_MMIO_READ:
		case SVM_VMGEXIT_MMIO_WRITE:
		if (!ghcb_sw_scratch_is_valid(ghcb))
		goto vmgexit_err;
		break;
		case SVM_VMGEXIT_UNSUPPORTED_EVENT:
		break;
		default:
		@@ -1470,6 +1478,24 @@ static void pre_sev_es_run(struct vcpu_svm *svm)
		if (!svm->ghcb)
		return;

		if (svm->ghcb_sa_free) {
		/*
		* The scratch area lives outside the GHCB, so there is a
		* buffer that, depending on the operation performed, may
		* need to be synced, then freed.
		*/
		if (svm->ghcb_sa_sync) {
		kvm_write_guest(svm->vcpu.kvm,
		ghcb_get_sw_scratch(svm->ghcb),
		svm->ghcb_sa, svm->ghcb_sa_len);
		svm->ghcb_sa_sync = false;
		}

		kfree(svm->ghcb_sa);
		svm->ghcb_sa = NULL;
		svm->ghcb_sa_free = false;
		}

		trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);

		sev_es_sync_to_ghcb(svm);
		@@ -1504,6 +1530,86 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
		vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
		}

		#define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE)
		static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
		{
		struct vmcb_control_area *control = &svm->vmcb->control;
		struct ghcb *ghcb = svm->ghcb;
		u64 ghcb_scratch_beg, ghcb_scratch_end;
		u64 scratch_gpa_beg, scratch_gpa_end;
		void *scratch_va;

		scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
		if (!scratch_gpa_beg) {
		pr_err("vmgexit: scratch gpa not provided\n");
		return false;
		}

		scratch_gpa_end = scratch_gpa_beg + len;
		if (scratch_gpa_end < scratch_gpa_beg) {
		pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
		len, scratch_gpa_beg);
		return false;
		}

		if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
		/* Scratch area begins within GHCB */
		ghcb_scratch_beg = control->ghcb_gpa +
		offsetof(struct ghcb, shared_buffer);
		ghcb_scratch_end = control->ghcb_gpa +
		offsetof(struct ghcb, reserved_1);

		/*
		* If the scratch area begins within the GHCB, it must be
		* completely contained in the GHCB shared buffer area.
		*/
		if (scratch_gpa_beg < ghcb_scratch_beg \|\|
		scratch_gpa_end > ghcb_scratch_end) {
		pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
		scratch_gpa_beg, scratch_gpa_end);
		return false;
		}

		scratch_va = (void *)svm->ghcb;
		scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
		} else {
		/*
		* The guest memory must be read into a kernel buffer, so
		* limit the size
		*/
		if (len > GHCB_SCRATCH_AREA_LIMIT) {
		pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
		len, GHCB_SCRATCH_AREA_LIMIT);
		return false;
		}
		scratch_va = kzalloc(len, GFP_KERNEL);
		if (!scratch_va)
		return false;

		if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
		/* Unable to copy scratch area from guest */
		pr_err("vmgexit: kvm_read_guest for scratch area failed\n");

		kfree(scratch_va);
		return false;
		}

		/*
		* The scratch area is outside the GHCB. The operation will
		* dictate whether the buffer needs to be synced before running
		* the vCPU next time (i.e. a read was requested so the data
		* must be written back to the guest memory).
		*/
		svm->ghcb_sa_sync = sync;
		svm->ghcb_sa_free = true;
		}

		svm->ghcb_sa = scratch_va;
		svm->ghcb_sa_len = len;

		return true;
		}

		static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
		unsigned int pos)
		{
		@@ -1641,6 +1747,24 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)

		ret = -EINVAL;
		switch (exit_code) {
		case SVM_VMGEXIT_MMIO_READ:
		if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
		break;

		ret = kvm_sev_es_mmio_read(&svm->vcpu,
		control->exit_info_1,
		control->exit_info_2,
		svm->ghcb_sa);
		break;
		case SVM_VMGEXIT_MMIO_WRITE:
		if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
		break;

		ret = kvm_sev_es_mmio_write(&svm->vcpu,
		control->exit_info_1,
		control->exit_info_2,
		svm->ghcb_sa);
		break;
		case SVM_VMGEXIT_UNSUPPORTED_EVENT:
		vcpu_unimpl(&svm->vcpu,
		"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",

arch/x86/kvm/svm/svm.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -174,6 +174,12 @@ struct vcpu_svm {
		struct vmcb_save_area *vmsa;
		struct ghcb *ghcb;
		struct kvm_host_map ghcb_map;

		/* SEV-ES scratch area support */
		void *ghcb_sa;
		u64 ghcb_sa_len;
		bool ghcb_sa_sync;
		bool ghcb_sa_free;
		};

		struct svm_cpu_data {

arch/x86/kvm/x86.c

+123 −0

Original line number	Diff line number	Diff line
		@@ -11292,6 +11292,129 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
		}
		EXPORT_SYMBOL_GPL(kvm_handle_invpcid);

		static int complete_sev_es_emulated_mmio(struct kvm_vcpu *vcpu)
		{
		struct kvm_run *run = vcpu->run;
		struct kvm_mmio_fragment *frag;
		unsigned int len;

		BUG_ON(!vcpu->mmio_needed);

		/* Complete previous fragment */
		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
		len = min(8u, frag->len);
		if (!vcpu->mmio_is_write)
		memcpy(frag->data, run->mmio.data, len);

		if (frag->len <= 8) {
		/* Switch to the next fragment. */
		frag++;
		vcpu->mmio_cur_fragment++;
		} else {
		/* Go forward to the next mmio piece. */
		frag->data += len;
		frag->gpa += len;
		frag->len -= len;
		}

		if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
		vcpu->mmio_needed = 0;

		// VMG change, at this point, we're always done
		// RIP has already been advanced
		return 1;
		}

		// More MMIO is needed
		run->mmio.phys_addr = frag->gpa;
		run->mmio.len = min(8u, frag->len);
		run->mmio.is_write = vcpu->mmio_is_write;
		if (run->mmio.is_write)
		memcpy(run->mmio.data, frag->data, min(8u, frag->len));
		run->exit_reason = KVM_EXIT_MMIO;

		vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;

		return 0;
		}

		int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
		void *data)
		{
		int handled;
		struct kvm_mmio_fragment *frag;

		if (!data)
		return -EINVAL;

		handled = write_emultor.read_write_mmio(vcpu, gpa, bytes, data);
		if (handled == bytes)
		return 1;

		bytes -= handled;
		gpa += handled;
		data += handled;

		/TODO: Check if need to increment number of frags /
		frag = vcpu->mmio_fragments;
		vcpu->mmio_nr_fragments = 1;
		frag->len = bytes;
		frag->gpa = gpa;
		frag->data = data;

		vcpu->mmio_needed = 1;
		vcpu->mmio_cur_fragment = 0;

		vcpu->run->mmio.phys_addr = gpa;
		vcpu->run->mmio.len = min(8u, frag->len);
		vcpu->run->mmio.is_write = 1;
		memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
		vcpu->run->exit_reason = KVM_EXIT_MMIO;

		vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;

		return 0;
		}
		EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_write);

		int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes,
		void *data)
		{
		int handled;
		struct kvm_mmio_fragment *frag;

		if (!data)
		return -EINVAL;

		handled = read_emultor.read_write_mmio(vcpu, gpa, bytes, data);
		if (handled == bytes)
		return 1;

		bytes -= handled;
		gpa += handled;
		data += handled;

		/TODO: Check if need to increment number of frags /
		frag = vcpu->mmio_fragments;
		vcpu->mmio_nr_fragments = 1;
		frag->len = bytes;
		frag->gpa = gpa;
		frag->data = data;

		vcpu->mmio_needed = 1;
		vcpu->mmio_cur_fragment = 0;

		vcpu->run->mmio.phys_addr = gpa;
		vcpu->run->mmio.len = min(8u, frag->len);
		vcpu->run->mmio.is_write = 0;
		vcpu->run->exit_reason = KVM_EXIT_MMIO;

		vcpu->arch.complete_userspace_io = complete_sev_es_emulated_mmio;

		return 0;
		}
		EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read);

		EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
		EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
		EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);

arch/x86/kvm/x86.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -427,4 +427,9 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
		__reserved_bits; \
		})

		int kvm_sev_es_mmio_write(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes,
		void *dst);
		int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t src, unsigned int bytes,
		void *dst);

		#endif

Admin message