Commit 42595ce9 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-vmware-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 vmware updates from Ingo Molnar:
 "The main change in this tree is the addition of 'steal time clock
  support' for VMware guests"

* 'x86-vmware-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vmware: Use bool type for vmw_sched_clock
  x86/vmware: Enable steal time accounting
  x86/vmware: Add steal time clock support for VMware guests
  x86/vmware: Remove vmware_sched_clock_setup()
  x86/vmware: Make vmware_select_hypercall() __init
parents d9d76778 8fefe9da
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -3201,7 +3201,7 @@
			[X86,PV_OPS] Disable paravirtualized VMware scheduler
			clock and use the default one.

	no-steal-acc	[X86,KVM,ARM64] Disable paravirtualized steal time
	no-steal-acc	[X86,PV_OPS,ARM64] Disable paravirtualized steal time
			accounting. steal time is computed, but won't
			influence scheduler behaviour

+221 −8
Original line number Diff line number Diff line
@@ -25,6 +25,8 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/clocksource.h>
#include <linux/cpu.h>
#include <linux/reboot.h>
#include <asm/div64.h>
#include <asm/x86_init.h>
#include <asm/hypervisor.h>
@@ -47,6 +49,11 @@
#define VMWARE_CMD_GETVCPU_INFO  68
#define VMWARE_CMD_LEGACY_X2APIC  3
#define VMWARE_CMD_VCPU_RESERVED 31
#define VMWARE_CMD_STEALCLOCK    91

#define STEALCLOCK_NOT_AVAILABLE (-1)
#define STEALCLOCK_DISABLED        0
#define STEALCLOCK_ENABLED         1

#define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
	__asm__("inl (%%dx), %%eax" :					\
@@ -86,6 +93,18 @@
	}							\
	} while (0)

struct vmware_steal_time {
	union {
		uint64_t clock;	/* stolen time counter in units of vtsc */
		struct {
			/* only for little-endian */
			uint32_t clock_low;
			uint32_t clock_high;
		};
	};
	uint64_t reserved[7];
};

static unsigned long vmware_tsc_khz __ro_after_init;
static u8 vmware_hypercall_mode     __ro_after_init;

@@ -103,15 +122,25 @@ static unsigned long vmware_get_tsc_khz(void)

#ifdef CONFIG_PARAVIRT
static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
static int vmw_sched_clock __initdata = 1;
static bool vmw_sched_clock __initdata = true;
static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
static bool has_steal_clock;
static bool steal_acc __initdata = true; /* steal time accounting */

static __init int setup_vmw_sched_clock(char *s)
{
	vmw_sched_clock = 0;
	vmw_sched_clock = false;
	return 0;
}
early_param("no-vmw-sched-clock", setup_vmw_sched_clock);

static __init int parse_no_stealacc(char *arg)
{
	steal_acc = false;
	return 0;
}
early_param("no-steal-acc", parse_no_stealacc);

static unsigned long long notrace vmware_sched_clock(void)
{
	unsigned long long ns;
@@ -122,7 +151,7 @@ static unsigned long long notrace vmware_sched_clock(void)
	return ns;
}

static void __init vmware_sched_clock_setup(void)
static void __init vmware_cyc2ns_setup(void)
{
	struct cyc2ns_data *d = &vmware_cyc2ns;
	unsigned long long tsc_now = rdtsc();
@@ -132,17 +161,201 @@ static void __init vmware_sched_clock_setup(void)
	d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
					   d->cyc2ns_shift);

	pv_ops.time.sched_clock = vmware_sched_clock;
	pr_info("using sched offset of %llu ns\n", d->cyc2ns_offset);
	pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
}

static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
{
	uint32_t result, info;

	asm volatile (VMWARE_HYPERCALL :
		"=a"(result),
		"=c"(info) :
		"a"(VMWARE_HYPERVISOR_MAGIC),
		"b"(0),
		"c"(VMWARE_CMD_STEALCLOCK),
		"d"(0),
		"S"(arg1),
		"D"(arg2) :
		"memory");
	return result;
}

static bool stealclock_enable(phys_addr_t pa)
{
	return vmware_cmd_stealclock(upper_32_bits(pa),
				     lower_32_bits(pa)) == STEALCLOCK_ENABLED;
}

static int __stealclock_disable(void)
{
	return vmware_cmd_stealclock(0, 1);
}

static void stealclock_disable(void)
{
	__stealclock_disable();
}

static bool vmware_is_stealclock_available(void)
{
	return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
}

/**
 * vmware_steal_clock() - read the per-cpu steal clock
 * @cpu:            the cpu number whose steal clock we want to read
 *
 * The function reads the steal clock if we are on a 64-bit system, otherwise
 * reads it in parts, checking that the high part didn't change in the
 * meantime.
 *
 * Return:
 *      The steal clock reading in ns.
 */
static uint64_t vmware_steal_clock(int cpu)
{
	struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
	uint64_t clock;

	if (IS_ENABLED(CONFIG_64BIT))
		clock = READ_ONCE(steal->clock);
	else {
		uint32_t initial_high, low, high;

		do {
			initial_high = READ_ONCE(steal->clock_high);
			/* Do not reorder initial_high and high readings */
			virt_rmb();
			low = READ_ONCE(steal->clock_low);
			/* Keep low reading in between */
			virt_rmb();
			high = READ_ONCE(steal->clock_high);
		} while (initial_high != high);

		clock = ((uint64_t)high << 32) | low;
	}

	return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
			     vmware_cyc2ns.cyc2ns_shift);
}

static void vmware_register_steal_time(void)
{
	int cpu = smp_processor_id();
	struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);

	if (!has_steal_clock)
		return;

	if (!stealclock_enable(slow_virt_to_phys(st))) {
		has_steal_clock = false;
		return;
	}

	pr_info("vmware-stealtime: cpu %d, pa %llx\n",
		cpu, (unsigned long long) slow_virt_to_phys(st));
}

static void vmware_disable_steal_time(void)
{
	if (!has_steal_clock)
		return;

	stealclock_disable();
}

static void vmware_guest_cpu_init(void)
{
	if (has_steal_clock)
		vmware_register_steal_time();
}

static void vmware_pv_guest_cpu_reboot(void *unused)
{
	vmware_disable_steal_time();
}

static int vmware_pv_reboot_notify(struct notifier_block *nb,
				unsigned long code, void *unused)
{
	if (code == SYS_RESTART)
		on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
	return NOTIFY_DONE;
}

static struct notifier_block vmware_pv_reboot_nb = {
	.notifier_call = vmware_pv_reboot_notify,
};

#ifdef CONFIG_SMP
static void __init vmware_smp_prepare_boot_cpu(void)
{
	vmware_guest_cpu_init();
	native_smp_prepare_boot_cpu();
}

static int vmware_cpu_online(unsigned int cpu)
{
	local_irq_disable();
	vmware_guest_cpu_init();
	local_irq_enable();
	return 0;
}

static int vmware_cpu_down_prepare(unsigned int cpu)
{
	local_irq_disable();
	vmware_disable_steal_time();
	local_irq_enable();
	return 0;
}
#endif

static __init int activate_jump_labels(void)
{
	if (has_steal_clock) {
		static_key_slow_inc(&paravirt_steal_enabled);
		if (steal_acc)
			static_key_slow_inc(&paravirt_steal_rq_enabled);
	}

	return 0;
}
arch_initcall(activate_jump_labels);

static void __init vmware_paravirt_ops_setup(void)
{
	pv_info.name = "VMware hypervisor";
	pv_ops.cpu.io_delay = paravirt_nop;

	if (vmware_tsc_khz && vmw_sched_clock)
		vmware_sched_clock_setup();
	if (vmware_tsc_khz == 0)
		return;

	vmware_cyc2ns_setup();

	if (vmw_sched_clock)
		pv_ops.time.sched_clock = vmware_sched_clock;

	if (vmware_is_stealclock_available()) {
		has_steal_clock = true;
		pv_ops.time.steal_clock = vmware_steal_clock;

		/* We use reboot notifier only to disable steal clock */
		register_reboot_notifier(&vmware_pv_reboot_nb);

#ifdef CONFIG_SMP
		smp_ops.smp_prepare_boot_cpu =
			vmware_smp_prepare_boot_cpu;
		if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
					      "x86/vmware:online",
					      vmware_cpu_online,
					      vmware_cpu_down_prepare) < 0)
			pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
#else
		vmware_guest_cpu_init();
#endif
	}
}
#else
#define vmware_paravirt_ops_setup() do {} while (0)
@@ -213,7 +426,7 @@ static void __init vmware_platform_setup(void)
	vmware_set_capabilities();
}

static u8 vmware_select_hypercall(void)
static u8 __init vmware_select_hypercall(void)
{
	int eax, ebx, ecx, edx;