Commit 229456fc authored by Marcelo Tosatti's avatar Marcelo Tosatti Committed by Avi Kivity
Browse files

KVM: convert custom marker based tracing to event traces



This allows use of the powerful ftrace infrastructure.

See Documentation/trace/ for usage information.

[avi, stephen: various build fixes]
[sheng: fix control register breakage]

Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarStephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: default avatarSheng Yang <sheng@linux.intel.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent 219b65dc
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_notifier.h>
#include <linux/tracepoint.h>

#include <linux/kvm.h>
#include <linux/kvm_para.h>
@@ -527,6 +528,7 @@ struct kvm_x86_ops {
	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
	int (*get_tdp_level)(void);
	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
	const struct trace_print_flags *exit_reasons_str;
};

extern struct kvm_x86_ops *kvm_x86_ops;
+4 −0
Original line number Diff line number Diff line

EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm

CFLAGS_x86.o := -I.
CFLAGS_svm.o := -I.
CFLAGS_vmx.o := -I.

kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
				coalesced_mmio.o irq_comm.o eventfd.o)
kvm-$(CONFIG_KVM_TRACE)	+= $(addprefix ../../../virt/kvm/, kvm_trace.o)
+4 −3
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <asm/atomic.h>
#include "kvm_cache_regs.h"
#include "irq.h"
#include "trace.h"

#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -515,8 +516,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
{
	u32 val = 0;

	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);

	if (offset >= LAPIC_MMIO_LENGTH)
		return 0;

@@ -562,6 +561,8 @@ static void apic_mmio_read(struct kvm_io_device *this,
	}
	result = __apic_read(apic, offset & ~0xf);

	trace_kvm_apic_read(offset, result);

	switch (len) {
	case 1:
	case 2:
@@ -657,7 +658,7 @@ static void apic_mmio_write(struct kvm_io_device *this,

	offset &= 0xff0;

	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
	trace_kvm_apic_write(offset, val);

	switch (offset) {
	case APIC_ID:		/* Local APIC ID */
+62 −22
Original line number Diff line number Diff line
@@ -25,10 +25,12 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/ftrace_event.h>

#include <asm/desc.h>

#include <asm/virtext.h>
#include "trace.h"

#define __ex(x) __kvm_handle_fault_on_reboot(x)

@@ -1096,7 +1098,6 @@ static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
		val = 0;
	}

	KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
	return val;
}

@@ -1105,8 +1106,6 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
{
	struct vcpu_svm *svm = to_svm(vcpu);

	KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);

	*exception = 0;

	switch (dr) {
@@ -1154,14 +1153,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
	fault_address  = svm->vmcb->control.exit_info_2;
	error_code = svm->vmcb->control.exit_info_1;

	if (!npt_enabled)
		KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
			    (u32)fault_address, (u32)(fault_address >> 32),
			    handler);
	else
		KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
			    (u32)fault_address, (u32)(fault_address >> 32),
			    handler);
	trace_kvm_page_fault(fault_address, error_code);
	/*
	 * FIXME: Tis shouldn't be necessary here, but there is a flush
	 * missing in the MMU code. Until we find this bug, flush the
@@ -1288,14 +1280,12 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)

static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
	KVMTRACE_0D(NMI, &svm->vcpu, handler);
	return 1;
}

static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
	++svm->vcpu.stat.irq_exits;
	KVMTRACE_0D(INTR, &svm->vcpu, handler);
	return 1;
}

@@ -2077,8 +2067,7 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
	if (svm_get_msr(&svm->vcpu, ecx, &data))
		kvm_inject_gp(&svm->vcpu, 0);
	else {
		KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
			    (u32)(data >> 32), handler);
		trace_kvm_msr_read(ecx, data);

		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
@@ -2163,8 +2152,7 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);

	KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
		    handler);
	trace_kvm_msr_write(ecx, data);

	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
	if (svm_set_msr(&svm->vcpu, ecx, data))
@@ -2185,8 +2173,6 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
static int interrupt_window_interception(struct vcpu_svm *svm,
				   struct kvm_run *kvm_run)
{
	KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);

	svm_clear_vintr(svm);
	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
	/*
@@ -2265,8 +2251,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
	struct vcpu_svm *svm = to_svm(vcpu);
	u32 exit_code = svm->vmcb->control.exit_code;

	KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
		    (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
	trace_kvm_exit(exit_code, svm->vmcb->save.rip);

	if (is_nested(svm)) {
		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
@@ -2354,7 +2339,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
{
	struct vmcb_control_area *control;

	KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
	trace_kvm_inj_virq(irq);

	++svm->vcpu.stat.irq_injections;
	control = &svm->vmcb->control;
@@ -2717,6 +2702,59 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
	return 0;
}

static const struct trace_print_flags svm_exit_reasons_str[] = {
	{ SVM_EXIT_READ_CR0,           		"read_cr0" },
	{ SVM_EXIT_READ_CR3,	      		"read_cr3" },
	{ SVM_EXIT_READ_CR4,	      		"read_cr4" },
	{ SVM_EXIT_READ_CR8,  	      		"read_cr8" },
	{ SVM_EXIT_WRITE_CR0,          		"write_cr0" },
	{ SVM_EXIT_WRITE_CR3,	      		"write_cr3" },
	{ SVM_EXIT_WRITE_CR4,          		"write_cr4" },
	{ SVM_EXIT_WRITE_CR8, 	      		"write_cr8" },
	{ SVM_EXIT_READ_DR0, 	      		"read_dr0" },
	{ SVM_EXIT_READ_DR1,	      		"read_dr1" },
	{ SVM_EXIT_READ_DR2,	      		"read_dr2" },
	{ SVM_EXIT_READ_DR3,	      		"read_dr3" },
	{ SVM_EXIT_WRITE_DR0,	      		"write_dr0" },
	{ SVM_EXIT_WRITE_DR1,	      		"write_dr1" },
	{ SVM_EXIT_WRITE_DR2,	      		"write_dr2" },
	{ SVM_EXIT_WRITE_DR3,	      		"write_dr3" },
	{ SVM_EXIT_WRITE_DR5,	      		"write_dr5" },
	{ SVM_EXIT_WRITE_DR7,	      		"write_dr7" },
	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" },
	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" },
	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" },
	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" },
	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" },
	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" },
	{ SVM_EXIT_INTR,			"interrupt" },
	{ SVM_EXIT_NMI,				"nmi" },
	{ SVM_EXIT_SMI,				"smi" },
	{ SVM_EXIT_INIT,			"init" },
	{ SVM_EXIT_VINTR,			"vintr" },
	{ SVM_EXIT_CPUID,			"cpuid" },
	{ SVM_EXIT_INVD,			"invd" },
	{ SVM_EXIT_HLT,				"hlt" },
	{ SVM_EXIT_INVLPG,			"invlpg" },
	{ SVM_EXIT_INVLPGA,			"invlpga" },
	{ SVM_EXIT_IOIO,			"io" },
	{ SVM_EXIT_MSR,				"msr" },
	{ SVM_EXIT_TASK_SWITCH,			"task_switch" },
	{ SVM_EXIT_SHUTDOWN,			"shutdown" },
	{ SVM_EXIT_VMRUN,			"vmrun" },
	{ SVM_EXIT_VMMCALL,			"hypercall" },
	{ SVM_EXIT_VMLOAD,			"vmload" },
	{ SVM_EXIT_VMSAVE,			"vmsave" },
	{ SVM_EXIT_STGI,			"stgi" },
	{ SVM_EXIT_CLGI,			"clgi" },
	{ SVM_EXIT_SKINIT,			"skinit" },
	{ SVM_EXIT_WBINVD,			"wbinvd" },
	{ SVM_EXIT_MONITOR,			"monitor" },
	{ SVM_EXIT_MWAIT,			"mwait" },
	{ SVM_EXIT_NPF,				"npf" },
	{ -1, NULL }
};

static struct kvm_x86_ops svm_x86_ops = {
	.cpu_has_kvm_support = has_svm,
	.disabled_by_bios = is_disabled,
@@ -2778,6 +2816,8 @@ static struct kvm_x86_ops svm_x86_ops = {
	.set_tss_addr = svm_set_tss_addr,
	.get_tdp_level = get_npt_level,
	.get_mt_mask = svm_get_mt_mask,

	.exit_reasons_str = svm_exit_reasons_str,
};

static int __init svm_init(void)

arch/x86/kvm/trace.h

0 → 100644
+260 −0
Original line number Diff line number Diff line
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H

#include <linux/tracepoint.h>

#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
#define TRACE_INCLUDE_PATH arch/x86/kvm
#define TRACE_INCLUDE_FILE trace

/*
 * Tracepoint for guest mode entry.
 */
TRACE_EVENT(kvm_entry,
	TP_PROTO(unsigned int vcpu_id),
	TP_ARGS(vcpu_id),

	TP_STRUCT__entry(
		__field(	unsigned int,	vcpu_id		)
	),

	TP_fast_assign(
		__entry->vcpu_id	= vcpu_id;
	),

	TP_printk("vcpu %u", __entry->vcpu_id)
);

/*
 * Tracepoint for hypercall.
 */
TRACE_EVENT(kvm_hypercall,
	TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
		 unsigned long a2, unsigned long a3),
	TP_ARGS(nr, a0, a1, a2, a3),

	TP_STRUCT__entry(
		__field(	unsigned long, 	nr		)
		__field(	unsigned long,	a0		)
		__field(	unsigned long,	a1		)
		__field(	unsigned long,	a2		)
		__field(	unsigned long,	a3		)
	),

	TP_fast_assign(
		__entry->nr		= nr;
		__entry->a0		= a0;
		__entry->a1		= a1;
		__entry->a2		= a2;
		__entry->a3		= a3;
	),

	TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
		 __entry->nr, __entry->a0, __entry->a1,  __entry->a2,
		 __entry->a3)
);

/*
 * Tracepoint for PIO.
 */
TRACE_EVENT(kvm_pio,
	TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
		 unsigned int count),
	TP_ARGS(rw, port, size, count),

	TP_STRUCT__entry(
		__field(	unsigned int, 	rw		)
		__field(	unsigned int, 	port		)
		__field(	unsigned int, 	size		)
		__field(	unsigned int,	count		)
	),

	TP_fast_assign(
		__entry->rw		= rw;
		__entry->port		= port;
		__entry->size		= size;
		__entry->count		= count;
	),

	TP_printk("pio_%s at 0x%x size %d count %d",
		  __entry->rw ? "write" : "read",
		  __entry->port, __entry->size, __entry->count)
);

/*
 * Tracepoint for cpuid.
 */
TRACE_EVENT(kvm_cpuid,
	TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
		 unsigned long rcx, unsigned long rdx),
	TP_ARGS(function, rax, rbx, rcx, rdx),

	TP_STRUCT__entry(
		__field(	unsigned int,	function	)
		__field(	unsigned long,	rax		)
		__field(	unsigned long,	rbx		)
		__field(	unsigned long,	rcx		)
		__field(	unsigned long,	rdx		)
	),

	TP_fast_assign(
		__entry->function	= function;
		__entry->rax		= rax;
		__entry->rbx		= rbx;
		__entry->rcx		= rcx;
		__entry->rdx		= rdx;
	),

	TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
		  __entry->function, __entry->rax,
		  __entry->rbx, __entry->rcx, __entry->rdx)
);

/*
 * Tracepoint for apic access.
 */
TRACE_EVENT(kvm_apic,
	TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
	TP_ARGS(rw, reg, val),

	TP_STRUCT__entry(
		__field(	unsigned int,	rw		)
		__field(	unsigned int,	reg		)
		__field(	unsigned int,	val		)
	),

	TP_fast_assign(
		__entry->rw		= rw;
		__entry->reg		= reg;
		__entry->val		= val;
	),

	TP_printk("apic_%s 0x%x = 0x%x",
		  __entry->rw ? "write" : "read",
		  __entry->reg, __entry->val)
);

#define trace_kvm_apic_read(reg, val)		trace_kvm_apic(0, reg, val)
#define trace_kvm_apic_write(reg, val)		trace_kvm_apic(1, reg, val)

/*
 * Tracepoint for kvm guest exit:
 */
TRACE_EVENT(kvm_exit,
	TP_PROTO(unsigned int exit_reason, unsigned long guest_rip),
	TP_ARGS(exit_reason, guest_rip),

	TP_STRUCT__entry(
		__field(	unsigned int,	exit_reason	)
		__field(	unsigned long,	guest_rip	)
	),

	TP_fast_assign(
		__entry->exit_reason	= exit_reason;
		__entry->guest_rip	= guest_rip;
	),

	TP_printk("reason %s rip 0x%lx",
		 ftrace_print_symbols_seq(p, __entry->exit_reason,
					  kvm_x86_ops->exit_reasons_str),
		 __entry->guest_rip)
);

/*
 * Tracepoint for kvm interrupt injection:
 */
TRACE_EVENT(kvm_inj_virq,
	TP_PROTO(unsigned int irq),
	TP_ARGS(irq),

	TP_STRUCT__entry(
		__field(	unsigned int,	irq		)
	),

	TP_fast_assign(
		__entry->irq		= irq;
	),

	TP_printk("irq %u", __entry->irq)
);

/*
 * Tracepoint for page fault.
 */
TRACE_EVENT(kvm_page_fault,
	TP_PROTO(unsigned long fault_address, unsigned int error_code),
	TP_ARGS(fault_address, error_code),

	TP_STRUCT__entry(
		__field(	unsigned long,	fault_address	)
		__field(	unsigned int,	error_code	)
	),

	TP_fast_assign(
		__entry->fault_address	= fault_address;
		__entry->error_code	= error_code;
	),

	TP_printk("address %lx error_code %x",
		  __entry->fault_address, __entry->error_code)
);

/*
 * Tracepoint for guest MSR access.
 */
TRACE_EVENT(kvm_msr,
	TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
	TP_ARGS(rw, ecx, data),

	TP_STRUCT__entry(
		__field(	unsigned int,	rw		)
		__field(	unsigned int,	ecx		)
		__field(	unsigned long,	data		)
	),

	TP_fast_assign(
		__entry->rw		= rw;
		__entry->ecx		= ecx;
		__entry->data		= data;
	),

	TP_printk("msr_%s %x = 0x%lx",
		  __entry->rw ? "write" : "read",
		  __entry->ecx, __entry->data)
);

#define trace_kvm_msr_read(ecx, data)		trace_kvm_msr(0, ecx, data)
#define trace_kvm_msr_write(ecx, data)		trace_kvm_msr(1, ecx, data)

/*
 * Tracepoint for guest CR access.
 */
TRACE_EVENT(kvm_cr,
	TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
	TP_ARGS(rw, cr, val),

	TP_STRUCT__entry(
		__field(	unsigned int,	rw		)
		__field(	unsigned int,	cr		)
		__field(	unsigned long,	val		)
	),

	TP_fast_assign(
		__entry->rw		= rw;
		__entry->cr		= cr;
		__entry->val		= val;
	),

	TP_printk("cr_%s %x = 0x%lx",
		  __entry->rw ? "write" : "read",
		  __entry->cr, __entry->val)
);

#define trace_kvm_cr_read(cr, val)		trace_kvm_cr(0, cr, val)
#define trace_kvm_cr_write(cr, val)		trace_kvm_cr(1, cr, val)

#endif /* _TRACE_KVM_H */

/* This part must be outside protection */
#include <trace/define_trace.h>
Loading