Commit 8f147727 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 irq updates from Ingo Molnar:
 "Here are the main changes in this tree:

   - Introduce x86-64 IRQ/exception/debug stack guard pages to detect
     stack overflows immediately and deterministically.

   - Clean up over a decade worth of cruft accumulated.

  The outcome of this should be more clear-cut faults/crashes when any
  of the low level x86 CPU stacks overflow, instead of silent memory
  corruption and sporadic failures much later on"

* 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
  x86/irq: Fix outdated comments
  x86/irq/64: Remove stack overflow debug code
  x86/irq/64: Remap the IRQ stack with guard pages
  x86/irq/64: Split the IRQ stack into its own pages
  x86/irq/64: Init hardirq_stack_ptr during CPU hotplug
  x86/irq/32: Handle irq stack allocation failure proper
  x86/irq/32: Invoke irq_ctx_init() from init_IRQ()
  x86/irq/64: Rename irq_stack_ptr to hardirq_stack_ptr
  x86/irq/32: Rename hard/softirq_stack to hard/softirq_stack_ptr
  x86/irq/32: Make irq stack a character array
  x86/irq/32: Define IRQ_STACK_SIZE
  x86/dumpstack/64: Speedup in_exception_stack()
  x86/exceptions: Split debug IST stack
  x86/exceptions: Enable IST guard pages
  x86/exceptions: Disconnect IST index and stack order
  x86/cpu: Remove orig_ist array
  x86/cpu: Prepare TSS.IST setup for guard pages
  x86/dumpstack/64: Use cpu_entry_area instead of orig_ist
  x86/irq/64: Use cpu entry area instead of orig_ist
  x86/traps: Use cpu_entry_area instead of orig_ist
  ...
parents 53f8b081 2c464543
Loading
Loading
Loading
Loading
+9 −4
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ If that assumption is ever broken then the stacks will become corrupt.

The currently assigned IST stacks are :-

* DOUBLEFAULT_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
* ESTACK_DF.  EXCEPTION_STKSZ (PAGE_SIZE).

  Used for interrupt 8 - Double Fault Exception (#DF).

@@ -68,7 +68,7 @@ The currently assigned IST stacks are :-
  Using a separate stack allows the kernel to recover from it well enough
  in many cases to still output an oops.

* NMI_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
* ESTACK_NMI.  EXCEPTION_STKSZ (PAGE_SIZE).

  Used for non-maskable interrupts (NMI).

@@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
  middle of switching stacks.  Using IST for NMI events avoids making
  assumptions about the previous state of the kernel stack.

* DEBUG_STACK.  DEBUG_STKSZ
* ESTACK_DB.  EXCEPTION_STKSZ (PAGE_SIZE).

  Used for hardware debug interrupts (interrupt 1) and for software
  debug interrupts (INT3).
@@ -86,7 +86,12 @@ The currently assigned IST stacks are :-
  avoids making assumptions about the previous state of the kernel
  stack.

* MCE_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
  To handle nested #DB correctly there exist two instances of DB stacks. On
  #DB entry the IST stackpointer for #DB is switched to the second instance
  so a nested #DB starts from a clean stack. The nested #DB switches
  the IST stackpointer to a guard hole to catch triple nesting.

* ESTACK_MCE.  EXCEPTION_STKSZ (PAGE_SIZE).

  Used for interrupt 18 - Machine Check Exception (#MC).

+1 −1
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ config X86_32
	select ARCH_WANT_IPC_PARSE_VERSION
	select CLKSRC_I8253
	select CLONE_BACKWARDS
	select HAVE_DEBUG_STACKOVERFLOW
	select MODULES_USE_ELF_REL
	select OLD_SIGACTION

@@ -138,7 +139,6 @@ config X86
	select HAVE_COPY_THREAD_TLS
	select HAVE_C_RECORDMCOUNT
	select HAVE_DEBUG_KMEMLEAK
	select HAVE_DEBUG_STACKOVERFLOW
	select HAVE_DMA_CONTIGUOUS
	select HAVE_DYNAMIC_FTRACE
	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+8 −8
Original line number Diff line number Diff line
@@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)

#ifdef CONFIG_STACKPROTECTOR
	movq	TASK_stack_canary(%rsi), %rbx
	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
	movq	%rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif

#ifdef CONFIG_RETPOLINE
@@ -430,8 +430,8 @@ END(irq_entries_start)
	 * it before we actually move ourselves to the IRQ stack.
	 */

	movq	\old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
	movq	PER_CPU_VAR(irq_stack_ptr), %rsp
	movq	\old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
	movq	PER_CPU_VAR(hardirq_stack_ptr), %rsp

#ifdef CONFIG_DEBUG_ENTRY
	/*
@@ -840,7 +840,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
 * Exception entry points.
 */
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)

/**
 * idtentry - Generate an IDT entry stub
@@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
 * @paranoid == 2 is special: the stub will never switch stacks.  This is for
 * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
 */
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
ENTRY(\sym)
	UNWIND_HINT_IRET_REGS offset=\has_error_code*8

@@ -924,13 +924,13 @@ ENTRY(\sym)
	.endif

	.if \shift_ist != -1
	subq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
	subq	$\ist_offset, CPU_TSS_IST(\shift_ist)
	.endif

	call	\do_sym

	.if \shift_ist != -1
	addq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
	addq	$\ist_offset, CPU_TSS_IST(\shift_ist)
	.endif

	/* these procedures expect "no swapgs" flag in ebx */
@@ -1128,7 +1128,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
	hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */

idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=DEBUG_STACK
idtentry debug			do_debug		has_error_code=0	paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
idtentry int3			do_int3			has_error_code=0
idtentry stack_segment		do_stack_segment	has_error_code=1

+64 −5
Original line number Diff line number Diff line
@@ -7,6 +7,64 @@
#include <asm/processor.h>
#include <asm/intel_ds.h>

#ifdef CONFIG_X86_64

/* Macro to enforce the same ordering and stack sizes */
#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
	char	DF_stack_guard[guardsize];	\
	char	DF_stack[EXCEPTION_STKSZ];	\
	char	NMI_stack_guard[guardsize];	\
	char	NMI_stack[EXCEPTION_STKSZ];	\
	char	DB2_stack_guard[guardsize];	\
	char	DB2_stack[db2_holesize];	\
	char	DB1_stack_guard[guardsize];	\
	char	DB1_stack[EXCEPTION_STKSZ];	\
	char	DB_stack_guard[guardsize];	\
	char	DB_stack[EXCEPTION_STKSZ];	\
	char	MCE_stack_guard[guardsize];	\
	char	MCE_stack[EXCEPTION_STKSZ];	\
	char	IST_top_guard[guardsize];	\

/* The exception stacks' physical storage. No guard pages required */
struct exception_stacks {
	ESTACKS_MEMBERS(0, 0)
};

/* The effective cpu entry area mapping with guard pages. */
struct cea_exception_stacks {
	ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
};

/*
 * The exception stack ordering in [cea_]exception_stacks
 */
enum exception_stack_ordering {
	ESTACK_DF,
	ESTACK_NMI,
	ESTACK_DB2,
	ESTACK_DB1,
	ESTACK_DB,
	ESTACK_MCE,
	N_EXCEPTION_STACKS
};

#define CEA_ESTACK_SIZE(st)					\
	sizeof(((struct cea_exception_stacks *)0)->st## _stack)

#define CEA_ESTACK_BOT(ceastp, st)				\
	((unsigned long)&(ceastp)->st## _stack)

#define CEA_ESTACK_TOP(ceastp, st)				\
	(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))

#define CEA_ESTACK_OFFS(st)					\
	offsetof(struct cea_exception_stacks, st## _stack)

#define CEA_ESTACK_PAGES					\
	(sizeof(struct cea_exception_stacks) / PAGE_SIZE)

#endif

/*
 * cpu_entry_area is a percpu region that contains things needed by the CPU
 * and early entry/exit code.  Real types aren't used for all fields here
@@ -32,12 +90,9 @@ struct cpu_entry_area {

#ifdef CONFIG_X86_64
	/*
	 * Exception stacks used for IST entries.
	 *
	 * In the future, this should have a separate slot for each stack
	 * with guard pages between them.
	 * Exception stacks used for IST entries with guard pages.
	 */
	char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
	struct cea_exception_stacks estacks;
#endif
#ifdef CONFIG_CPU_SUP_INTEL
	/*
@@ -57,6 +112,7 @@ struct cpu_entry_area {
#define CPU_ENTRY_AREA_TOT_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)

DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);

extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
@@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
	return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}

#define __this_cpu_ist_top_va(name)					\
	CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)

#endif
+0 −2
Original line number Diff line number Diff line
@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
{
	__this_cpu_dec(debug_stack_usage);
}
int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }
Loading