Commit 92a0610b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull x86 cpu updates from Borislav Petkov:

 - Add support for hardware-enforced cache coherency on AMD which
   obviates the need to flush cachelines before changing the PTE
   encryption bit (Krish Sadhukhan)

 - Add Centaur initialization support for families >= 7 (Tony W Wang-oc)

 - Add a feature flag for, and expose TSX suspend load tracking feature
   to KVM (Cathy Zhang)

 - Emulate SLDT and STR so that windows programs don't crash on UMIP
   machines (Brendan Shanks and Ricardo Neri)

 - Use the new SERIALIZE insn on Intel hardware which supports it
   (Ricardo Neri)

 - Misc cleanups and fixes

* tag 'x86_cpu_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  KVM: SVM: Don't flush cache if hardware enforces cache coherency across encryption domains
  x86/mm/pat: Don't flush cache if hardware enforces cache coherency across encryption domnains
  x86/cpu: Add hardware-enforced cache coherency as a CPUID feature
  x86/cpu/centaur: Add Centaur family >=7 CPUs initialization support
  x86/cpu/centaur: Replace two-condition switch-case with an if statement
  x86/kvm: Expose TSX Suspend Load Tracking feature
  x86/cpufeatures: Enumerate TSX suspend load address tracking instructions
  x86/umip: Add emulation/spoofing for SLDT and STR instructions
  x86/cpu: Fix typos and improve the comments in sync_core()
  x86/cpu: Use XGETBV and XSETBV mnemonics in fpu/internal.h
  x86/cpu: Use SERIALIZE in sync_core() when available
parents ca1b6692 e1ebb2b4
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
/* free					( 3*32+17) */
#define X86_FEATURE_SME_COHERENT	( 3*32+17) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -368,6 +368,7 @@
#define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE		(18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+2 −5
Original line number Diff line number Diff line
@@ -602,9 +602,7 @@ static inline u64 xgetbv(u32 index)
{
	u32 eax, edx;

	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
		     : "=a" (eax), "=d" (edx)
		     : "c" (index));
	asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
	return eax + ((u64)edx << 32);
}

@@ -613,8 +611,7 @@ static inline void xsetbv(u32 index, u64 value)
	u32 eax = value;
	u32 edx = value >> 32;

	asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
		     : : "a" (eax), "d" (edx), "c" (index));
	asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}

#endif /* _ASM_X86_FPU_INTERNAL_H */
+6 −0
Original line number Diff line number Diff line
@@ -234,6 +234,12 @@ static inline void clwb(volatile void *__p)

#define nop() asm volatile ("nop")

static inline void serialize(void)
{
	/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
	asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
}

#endif /* __KERNEL__ */

#endif /* _ASM_X86_SPECIAL_INSNS_H */
+22 −12
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#include <linux/preempt.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>

#ifdef CONFIG_X86_32
static inline void iret_to_self(void)
@@ -46,22 +47,34 @@ static inline void iret_to_self(void)
 *
 *  b) Text was modified on a different CPU, may subsequently be
 *     executed on this CPU, and you want to make sure the new version
 *     gets executed.  This generally means you're calling this in a IPI.
 *     gets executed.  This generally means you're calling this in an IPI.
 *
 * If you're calling this for a different reason, you're probably doing
 * it wrong.
 *
 * Like all of Linux's memory ordering operations, this is a
 * compiler barrier as well.
 */
static inline void sync_core(void)
{
	/*
	 * There are quite a few ways to do this.  IRET-to-self is nice
	 * because it works on every CPU, at any CPL (so it's compatible
	 * with paravirtualization), and it never exits to a hypervisor.
	 * The only down sides are that it's a bit slow (it seems to be
	 * a bit more than 2x slower than the fastest options) and that
	 * it unmasks NMIs.  The "push %cs" is needed because, in
	 * paravirtual environments, __KERNEL_CS may not be a valid CS
	 * value when we do IRET directly.
	 * The SERIALIZE instruction is the most straightforward way to
	 * do this, but it is not universally available.
	 */
	if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
		serialize();
		return;
	}

	/*
	 * For all other processors, there are quite a few ways to do this.
	 * IRET-to-self is nice because it works on every CPU, at any CPL
	 * (so it's compatible with paravirtualization), and it never exits
	 * to a hypervisor.  The only downsides are that it's a bit slow
	 * (it seems to be a bit more than 2x slower than the fastest
	 * options) and that it unmasks NMIs.  The "push %cs" is needed,
	 * because in paravirtual environments __KERNEL_CS may not be a
	 * valid CS value when we do IRET directly.
	 *
	 * In case NMI unmasking or performance ever becomes a problem,
	 * the next best option appears to be MOV-to-CR2 and an
@@ -71,9 +84,6 @@ static inline void sync_core(void)
	 * CPUID is the conventional way, but it's nasty: it doesn't
	 * exist on some 486-like CPUs, and it usually exits to a
	 * hypervisor.
	 *
	 * Like all of Linux's memory ordering operations, this is a
	 * compiler barrier as well.
	 */
	iret_to_self();
}
+12 −15
Original line number Diff line number Diff line
@@ -65,6 +65,9 @@ static void init_c3(struct cpuinfo_x86 *c)
		c->x86_cache_alignment = c->x86_clflush_size * 2;
		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
	}

	if (c->x86 >= 7)
		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
}

enum {
@@ -90,18 +93,15 @@ enum {

static void early_init_centaur(struct cpuinfo_x86 *c)
{
	switch (c->x86) {
#ifdef CONFIG_X86_32
	case 5:
	/* Emulate MTRRs using Centaur's MCR. */
	if (c->x86 == 5)
		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
		break;
#endif
	case 6:
		if (c->x86_model >= 0xf)
	if ((c->x86 == 6 && c->x86_model >= 0xf) ||
	    (c->x86 >= 7))
		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
		break;
	}

#ifdef CONFIG_X86_64
	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#endif
@@ -145,9 +145,8 @@ static void init_centaur(struct cpuinfo_x86 *c)
			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
	}

	switch (c->x86) {
#ifdef CONFIG_X86_32
	case 5:
	if (c->x86 == 5) {
		switch (c->x86_model) {
		case 4:
			name = "C6";
@@ -207,12 +206,10 @@ static void init_centaur(struct cpuinfo_x86 *c)
			c->x86_cache_size = (cc>>24)+(dd>>24);
		}
		sprintf(c->x86_model_id, "WinChip %s", name);
		break;
	}
#endif
	case 6:
	if (c->x86 == 6 || c->x86 >= 7)
		init_c3(c);
		break;
	}
#ifdef CONFIG_X86_64
	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
Loading