Commit 72674d48 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86-urgent-2020-07-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "A series of fixes for x86:

   - Reset MXCSR in kernel_fpu_begin() to prevent using a stale user
     space value.

   - Prevent writing MSR_TEST_CTRL on CPUs which are not explicitly
     whitelisted for split lock detection. Some CPUs which do not
     support it crash even when the MSR is written to 0 which is the
     default value.

   - Fix the XEN PV fallout of the entry code rework

   - Fix the 32bit fallout of the entry code rework

   - Add more selftests to ensure that these entry problems don't come
     back.

   - Disable 16 bit segments on XEN PV. It's not supported because XEN
     PV does not implement ESPFIX64"

* tag 'x86-urgent-2020-07-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/ldt: Disable 16-bit segments on Xen PV
  x86/entry/32: Fix #MC and #DB wiring on x86_32
  x86/entry/xen: Route #DB correctly on Xen PV
  x86/entry, selftests: Further improve user entry sanity checks
  x86/entry/compat: Clear RAX high bits on Xen PV SYSENTER
  selftests/x86: Consolidate and fix get/set_eflags() helpers
  selftests/x86/syscall_nt: Clear weird flags after each test
  selftests/x86/syscall_nt: Add more flag combinations
  x86/entry/64/compat: Fix Xen PV SYSENTER frame setup
  x86/entry: Move SYSENTER's regs->sp and regs->flags fixups into C
  x86/entry: Assert that syscalls are on the right stack
  x86/split_lock: Don't write MSR_TEST_CTRL on CPUs that aren't whitelisted
  x86/fpu: Reset MXCSR to default in kernel_fpu_begin()
parents f23dbe18 cc801833
Loading
Loading
Loading
Loading
+46 −3
Original line number Diff line number Diff line
@@ -45,6 +45,32 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>

/* Check that the stack and regs on entry from user mode are sane. */
static void check_user_regs(struct pt_regs *regs)
{
	if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
		/*
		 * Make sure that the entry code gave us a sensible EFLAGS
		 * register.  Native because we want to check the actual CPU
		 * state, not the interrupt state as imagined by Xen.
		 */
		unsigned long flags = native_save_fl();
		WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
				      X86_EFLAGS_NT));

		/* We think we came from user mode. Make sure pt_regs agrees. */
		WARN_ON_ONCE(!user_mode(regs));

		/*
		 * All entries from user mode (except #DF) should be on the
		 * normal thread stack and should have user pt_regs in the
		 * correct location.
		 */
		WARN_ON_ONCE(!on_thread_stack());
		WARN_ON_ONCE(regs != task_pt_regs(current));
	}
}

#ifdef CONFIG_CONTEXT_TRACKING
/**
 * enter_from_user_mode - Establish state when coming from user mode
@@ -127,9 +153,6 @@ static long syscall_trace_enter(struct pt_regs *regs)
	unsigned long ret = 0;
	u32 work;

	if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
		BUG_ON(regs != task_pt_regs(current));

	work = READ_ONCE(ti->flags);

	if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
@@ -346,6 +369,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
	struct thread_info *ti;

	check_user_regs(regs);

	enter_from_user_mode();
	instrumentation_begin();

@@ -409,6 +434,8 @@ static void do_syscall_32_irqs_on(struct pt_regs *regs)
/* Handles int $0x80 */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
	check_user_regs(regs);

	enter_from_user_mode();
	instrumentation_begin();

@@ -460,6 +487,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
					vdso_image_32.sym_int80_landing_pad;
	bool success;

	check_user_regs(regs);

	/*
	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
	 * so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
@@ -510,6 +539,18 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
		(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif
}

/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible noinstr long do_SYSENTER_32(struct pt_regs *regs)
{
	/* SYSENTER loses RSP, but the vDSO saved it in RBP. */
	regs->sp = regs->bp;

	/* SYSENTER clobbers EFLAGS.IF.  Assume it was set in usermode. */
	regs->flags |= X86_EFLAGS_IF;

	return do_fast_syscall_32(regs);
}
#endif

SYSCALL_DEFINE0(ni_syscall)
@@ -553,6 +594,7 @@ SYSCALL_DEFINE0(ni_syscall)
bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
{
	if (user_mode(regs)) {
		check_user_regs(regs);
		enter_from_user_mode();
		return false;
	}
@@ -686,6 +728,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
 */
void noinstr idtentry_enter_user(struct pt_regs *regs)
{
	check_user_regs(regs);
	enter_from_user_mode();
}

+2 −3
Original line number Diff line number Diff line
@@ -933,9 +933,8 @@ SYM_FUNC_START(entry_SYSENTER_32)

.Lsysenter_past_esp:
	pushl	$__USER_DS		/* pt_regs->ss */
	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
	pushl	$0			/* pt_regs->sp (placeholder) */
	pushfl				/* pt_regs->flags (except IF = 0) */
	orl	$X86_EFLAGS_IF, (%esp)	/* Fix IF */
	pushl	$__USER_CS		/* pt_regs->cs */
	pushl	$0			/* pt_regs->ip = 0 (placeholder) */
	pushl	%eax			/* pt_regs->orig_ax */
@@ -965,7 +964,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
.Lsysenter_flags_fixed:

	movl	%esp, %eax
	call	do_fast_syscall_32
	call	do_SYSENTER_32
	/* XEN PV guests always use IRET path */
	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
+16 −15
Original line number Diff line number Diff line
@@ -57,29 +57,30 @@ SYM_CODE_START(entry_SYSENTER_compat)

	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp

	/*
	 * User tracing code (ptrace or signal handlers) might assume that
	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
	 * syscall.  Just in case the high bits are nonzero, zero-extend
	 * the syscall number.  (This could almost certainly be deleted
	 * with no ill effects.)
	 */
	movl	%eax, %eax

	/* Construct struct pt_regs on stack */
	pushq	$__USER32_DS		/* pt_regs->ss */
	pushq	%rbp			/* pt_regs->sp (stashed in bp) */
	pushq	$0			/* pt_regs->sp = 0 (placeholder) */

	/*
	 * Push flags.  This is nasty.  First, interrupts are currently
	 * off, but we need pt_regs->flags to have IF set.  Second, even
	 * if TF was set when SYSENTER started, it's clear by now.  We fix
	 * that later using TIF_SINGLESTEP.
	 * off, but we need pt_regs->flags to have IF set.  Second, if TS
	 * was set in usermode, it's still set, and we're singlestepping
	 * through this code.  do_SYSENTER_32() will fix up IF.
	 */
	pushfq				/* pt_regs->flags (except IF = 0) */
	orl	$X86_EFLAGS_IF, (%rsp)	/* Fix saved flags */
	pushq	$__USER32_CS		/* pt_regs->cs */
	pushq	$0			/* pt_regs->ip = 0 (placeholder) */
SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)

	/*
	 * User tracing code (ptrace or signal handlers) might assume that
	 * the saved RAX contains a 32-bit number when we're invoking a 32-bit
	 * syscall.  Just in case the high bits are nonzero, zero-extend
	 * the syscall number.  (This could almost certainly be deleted
	 * with no ill effects.)
	 */
	movl	%eax, %eax

	pushq	%rax			/* pt_regs->orig_ax */
	pushq	%rdi			/* pt_regs->di */
	pushq	%rsi			/* pt_regs->si */
@@ -135,7 +136,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
.Lsysenter_flags_fixed:

	movq	%rsp, %rdi
	call	do_fast_syscall_32
	call	do_SYSENTER_32
	/* XEN PV guests always use IRET path */
	ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
		    "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
+5 −0
Original line number Diff line number Diff line
@@ -623,6 +623,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 * MXCSR and XCR definitions:
 */

static inline void ldmxcsr(u32 mxcsr)
{
	asm volatile("ldmxcsr %0" :: "m" (mxcsr));
}

extern unsigned int mxcsr_feature_mask;

#define XCR_XFEATURE_ENABLED_MASK	0x00000000
+19 −28
Original line number Diff line number Diff line
@@ -353,10 +353,6 @@ static __always_inline void __##func(struct pt_regs *regs)

#else	/* CONFIG_X86_64 */

/* Maps to a regular IDTENTRY on 32bit for now */
# define DECLARE_IDTENTRY_IST		DECLARE_IDTENTRY
# define DEFINE_IDTENTRY_IST		DEFINE_IDTENTRY

/**
 * DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant
 * @vector:	Vector number (ignored for C)
@@ -387,28 +383,18 @@ __visible noinstr void func(struct pt_regs *regs, \
#endif	/* !CONFIG_X86_64 */

/* C-Code mapping */
#define DECLARE_IDTENTRY_NMI		DECLARE_IDTENTRY_RAW
#define DEFINE_IDTENTRY_NMI		DEFINE_IDTENTRY_RAW

#ifdef CONFIG_X86_64
#define DECLARE_IDTENTRY_MCE		DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE		DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE_USER	DEFINE_IDTENTRY_NOIST

#define DECLARE_IDTENTRY_NMI		DECLARE_IDTENTRY_RAW
#define DEFINE_IDTENTRY_NMI		DEFINE_IDTENTRY_RAW

#define DECLARE_IDTENTRY_DEBUG		DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG		DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG_USER	DEFINE_IDTENTRY_NOIST

/**
 * DECLARE_IDTENTRY_XEN - Declare functions for XEN redirect IDT entry points
 * @vector:	Vector number (ignored for C)
 * @func:	Function name of the entry point
 *
 * Used for xennmi and xendebug redirections. No DEFINE as this is all ASM
 * indirection magic.
 */
#define DECLARE_IDTENTRY_XEN(vector, func)				\
	asmlinkage void xen_asm_exc_xen##func(void);			\
	asmlinkage void asm_exc_xen##func(void)
#endif

#else /* !__ASSEMBLY__ */

@@ -455,9 +441,6 @@ __visible noinstr void func(struct pt_regs *regs, \
# define DECLARE_IDTENTRY_MCE(vector, func)				\
	DECLARE_IDTENTRY(vector, func)

# define DECLARE_IDTENTRY_DEBUG(vector, func)				\
	DECLARE_IDTENTRY(vector, func)

/* No ASM emitted for DF as this goes through a C shim */
# define DECLARE_IDTENTRY_DF(vector, func)

@@ -469,10 +452,6 @@ __visible noinstr void func(struct pt_regs *regs, \
/* No ASM code emitted for NMI */
#define DECLARE_IDTENTRY_NMI(vector, func)

/* XEN NMI and DB wrapper */
#define DECLARE_IDTENTRY_XEN(vector, func)				\
	idtentry vector asm_exc_xen##func exc_##func has_error_code=0

/*
 * ASM code to emit the common vector entry stubs where each stub is
 * packed into 8 bytes.
@@ -565,16 +544,28 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF,	exc_page_fault);

#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_MCE(X86_TRAP_MC,	exc_machine_check);
#else
DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	exc_machine_check);
#endif
#endif

/* NMI */
DECLARE_IDTENTRY_NMI(X86_TRAP_NMI,	exc_nmi);
DECLARE_IDTENTRY_XEN(X86_TRAP_NMI,	nmi);
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_NMI,	xenpv_exc_nmi);
#endif

/* #DB */
#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB,	exc_debug);
DECLARE_IDTENTRY_XEN(X86_TRAP_DB,	debug);
#else
DECLARE_IDTENTRY_RAW(X86_TRAP_DB,	exc_debug);
#endif
#ifdef CONFIG_XEN_PV
DECLARE_IDTENTRY_RAW(X86_TRAP_DB,	xenpv_exc_debug);
#endif

/* #DF */
DECLARE_IDTENTRY_DF(X86_TRAP_DF,	exc_double_fault);
Loading