Commit c14cab26 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86-urgent-2020-05-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "A set of fixes for x86:

   - Ensure that direct mapping alias is always flushed when changing
     page attributes. The optimization for small ranges failed to do so
     when the virtual address was in the vmalloc or module space.

   - Unbreak the trace event registration for syscalls without arguments
     caused by the refactoring of the SYSCALL_DEFINE0() macro.

   - Move the printk in the TSC deadline timer code to a place where it
     is guaranteed to only be called once during boot and cannot be
     rearmed by clearing warn_once after boot. If it's invoked post boot
     then lockdep rightfully complains about a potential deadlock as the
     calling context is different.

   - A series of fixes for objtool and the ORC unwinder addressing
     variety of small issues:

       - Stack offset tracking for indirect CFAs in objtool ignored
         subsequent pushs and pops

       - Repair the unwind hints in the register clearing entry ASM code

       - Make the unwinding in the low level exit to usermode code stop
         after switching to the trampoline stack. The unwind hint is no
         longer valid and the ORC unwinder emits a warning as it can't
         find the registers anymore.

       - Fix unwind hints in switch_to_asm() and rewind_stack_do_exit()
         which caused objtool to generate bogus ORC data.

       - Prevent unwinder warnings when dumping the stack of a
         non-current task as there is no way to be sure about the
         validity because the dumped stack can be a moving target.

       - Make the ORC unwinder behave the same way as the frame pointer
         unwinder when dumping an inactive tasks stack and do not skip
         the first frame.

       - Prevent ORC unwinding before ORC data has been initialized

       - Immediately terminate unwinding when a unknown ORC entry type
         is found.

       - Prevent premature stop of the unwinder caused by IRET frames.

       - Fix another infinite loop in objtool caused by a negative
         offset which was not catched.

       - Address a few build warnings in the ORC unwinder and add
         missing static/ro_after_init annotations"

* tag 'x86-urgent-2020-05-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/unwind/orc: Move ORC sorting variables under !CONFIG_MODULES
  x86/apic: Move TSC deadline timer debug printk
  ftrace/x86: Fix trace event registration for syscalls without arguments
  x86/mm/cpa: Flush direct map alias during cpa
  objtool: Fix infinite loop in for_offset_range()
  x86/unwind/orc: Fix premature unwind stoppage due to IRET frames
  x86/unwind/orc: Fix error path for bad ORC entry type
  x86/unwind/orc: Prevent unwinding before ORC initialization
  x86/unwind/orc: Don't skip the first frame for inactive tasks
  x86/unwind: Prevent false warnings for non-current tasks
  x86/unwind/orc: Convert global variables to static
  x86/entry/64: Fix unwind hints in rewind_stack_do_exit()
  x86/entry/64: Fix unwind hints in __switch_to_asm()
  x86/entry/64: Fix unwind hints in kernel exit path
  x86/entry/64: Fix unwind hints in register clearing code
  objtool: Fix stack offset tracking for indirect CFAs
parents 8b000832 fb9cbbc8
Loading
Loading
Loading
Loading
+21 −19
Original line number Diff line number Diff line
@@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS	21*8

.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
	/*
	 * Push registers and sanitize registers of values that a
	 * speculation attack might otherwise want to exploit. The
	 * lower registers are likely clobbered well before they
	 * could be put to use in a speculative execution gadget.
	 * Interleave XOR with PUSH for better uop scheduling:
	 */
	.if \save_ret
	pushq	%rsi		/* pt_regs->si */
	movq	8(%rsp), %rsi	/* temporarily store the return address in %rsi */
@@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
	pushq   %rsi		/* pt_regs->si */
	.endif
	pushq	\rdx		/* pt_regs->dx */
	xorl	%edx, %edx	/* nospec   dx */
	pushq   %rcx		/* pt_regs->cx */
	xorl	%ecx, %ecx	/* nospec   cx */
	pushq   \rax		/* pt_regs->ax */
	pushq   %r8		/* pt_regs->r8 */
	xorl	%r8d, %r8d	/* nospec   r8 */
	pushq   %r9		/* pt_regs->r9 */
	xorl	%r9d, %r9d	/* nospec   r9 */
	pushq   %r10		/* pt_regs->r10 */
	xorl	%r10d, %r10d	/* nospec   r10 */
	pushq   %r11		/* pt_regs->r11 */
	xorl	%r11d, %r11d	/* nospec   r11*/
	pushq	%rbx		/* pt_regs->rbx */
	xorl    %ebx, %ebx	/* nospec   rbx*/
	pushq	%rbp		/* pt_regs->rbp */
	xorl    %ebp, %ebp	/* nospec   rbp*/
	pushq	%r12		/* pt_regs->r12 */
	xorl	%r12d, %r12d	/* nospec   r12*/
	pushq	%r13		/* pt_regs->r13 */
	xorl	%r13d, %r13d	/* nospec   r13*/
	pushq	%r14		/* pt_regs->r14 */
	xorl	%r14d, %r14d	/* nospec   r14*/
	pushq	%r15		/* pt_regs->r15 */
	xorl	%r15d, %r15d	/* nospec   r15*/
	UNWIND_HINT_REGS

	.if \save_ret
	pushq	%rsi		/* return address on top of stack */
	.endif

	/*
	 * Sanitize registers of values that a speculation attack might
	 * otherwise want to exploit. The lower registers are likely clobbered
	 * well before they could be put to use in a speculative execution
	 * gadget.
	 */
	xorl	%edx,  %edx	/* nospec dx  */
	xorl	%ecx,  %ecx	/* nospec cx  */
	xorl	%r8d,  %r8d	/* nospec r8  */
	xorl	%r9d,  %r9d	/* nospec r9  */
	xorl	%r10d, %r10d	/* nospec r10 */
	xorl	%r11d, %r11d	/* nospec r11 */
	xorl	%ebx,  %ebx	/* nospec rbx */
	xorl	%ebp,  %ebp	/* nospec rbp */
	xorl	%r12d, %r12d	/* nospec r12 */
	xorl	%r13d, %r13d	/* nospec r13 */
	xorl	%r14d, %r14d	/* nospec r14 */
	xorl	%r15d, %r15d	/* nospec r15 */

.endm

.macro POP_REGS pop_rdi=1 skip_r11rcx=0
+7 −7
Original line number Diff line number Diff line
@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
	 */
syscall_return_via_sysret:
	/* rcx and r11 are already restored (see code above) */
	UNWIND_HINT_EMPTY
	POP_REGS pop_rdi=0 skip_r11rcx=1

	/*
@@ -258,6 +257,7 @@ syscall_return_via_sysret:
	 */
	movq	%rsp, %rdi
	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
	UNWIND_HINT_EMPTY

	pushq	RSP-RDI(%rdi)	/* RSP */
	pushq	(%rdi)		/* RDI */
@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
 * %rdi: prev task
 * %rsi: next task
 */
SYM_CODE_START(__switch_to_asm)
	UNWIND_HINT_FUNC
SYM_FUNC_START(__switch_to_asm)
	/*
	 * Save callee-saved registers
	 * This must match the order in inactive_task_frame
@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
	popq	%rbp

	jmp	__switch_to
SYM_CODE_END(__switch_to_asm)
SYM_FUNC_END(__switch_to_asm)

/*
 * A newly forked process directly context switches into this address.
@@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
 * +----------------------------------------------------+
 */
SYM_CODE_START(interrupt_entry)
	UNWIND_HINT_FUNC
	UNWIND_HINT_IRET_REGS offset=16
	ASM_CLAC
	cld

@@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
	pushq	5*8(%rdi)		/* regs->eflags */
	pushq	4*8(%rdi)		/* regs->cs */
	pushq	3*8(%rdi)		/* regs->ip */
	UNWIND_HINT_IRET_REGS
	pushq	2*8(%rdi)		/* regs->orig_ax */
	pushq	8(%rdi)			/* return address */
	UNWIND_HINT_FUNC

	movq	(%rdi), %rdi
	jmp	2f
@@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
	 */
	movq	%rsp, %rdi
	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
	UNWIND_HINT_EMPTY

	/* Copy the IRET frame to the trampoline stack. */
	pushq	6*8(%rdi)	/* SS */
@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)

	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rax
	leaq	-PTREGS_SIZE(%rax), %rsp
	UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
	UNWIND_HINT_REGS

	call	do_exit
SYM_CODE_END(rewind_stack_do_exit)
+3 −2
Original line number Diff line number Diff line
@@ -61,11 +61,12 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
{
	/*
	 * Compare the symbol name with the system call name. Skip the
	 * "__x64_sys", "__ia32_sys" or simple "sys" prefix.
	 * "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
	 */
	return !strcmp(sym + 3, name + 3) ||
		(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
		(!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
		(!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
		(!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
}

#ifndef COMPILE_OFFSETS
+1 −1
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ struct unwind_state {
#if defined(CONFIG_UNWINDER_ORC)
	bool signal, full_regs;
	unsigned long sp, bp, ip;
	struct pt_regs *regs;
	struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
	bool got_irq;
	unsigned long *bp, *orig_sp, ip;
+14 −13
Original line number Diff line number Diff line
@@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
		 * According to Intel, MFENCE can do the serialization here.
		 */
		asm volatile("mfence" : : : "memory");

		printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
		return;
	}

@@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);

static u32 hsx_deadline_rev(void)
static __init u32 hsx_deadline_rev(void)
{
	switch (boot_cpu_data.x86_stepping) {
	case 0x02: return 0x3a; /* EP */
@@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void)
	return ~0U;
}

static u32 bdx_deadline_rev(void)
static __init u32 bdx_deadline_rev(void)
{
	switch (boot_cpu_data.x86_stepping) {
	case 0x02: return 0x00000011;
@@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void)
	return ~0U;
}

static u32 skx_deadline_rev(void)
static __init u32 skx_deadline_rev(void)
{
	switch (boot_cpu_data.x86_stepping) {
	case 0x03: return 0x01000136;
@@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void)
	return ~0U;
}

static const struct x86_cpu_id deadline_match[] = {
static const struct x86_cpu_id deadline_match[] __initconst = {
	X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X,		&hsx_deadline_rev),
	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X,	0x0b000020),
	X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D,	&bdx_deadline_rev),
@@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = {
	{},
};

static void apic_check_deadline_errata(void)
static __init bool apic_validate_deadline_timer(void)
{
	const struct x86_cpu_id *m;
	u32 rev;

	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
	    boot_cpu_has(X86_FEATURE_HYPERVISOR))
		return;
	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
		return false;
	if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
		return true;

	m = x86_match_cpu(deadline_match);
	if (!m)
		return;
		return true;

	/*
	 * Function pointers will have the MSB set due to address layout,
@@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void)
		rev = (u32)m->driver_data;

	if (boot_cpu_data.microcode >= rev)
		return;
		return true;

	setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
	pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
	       "please update microcode to version: 0x%x (or later)\n", rev);
	return false;
}

/*
@@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void)
{
	unsigned int new_apicid;

	apic_check_deadline_errata();
	if (apic_validate_deadline_timer())
		pr_debug("TSC deadline timer available\n");

	if (x2apic_mode) {
		boot_cpu_physical_apicid = read_apic_id();
Loading