Commit dc4e0021 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Ingo Molnar
Browse files

x86/doublefault/32: Move #DF stack and TSS to cpu_entry_area



There are three problems with the current layout of the doublefault
stack and TSS.  First, the TSS is only cacheline-aligned, which is
not enough -- if the hardware portion of the TSS (struct x86_hw_tss)
crosses a page boundary, horrible things happen [0].  Second, the
stack and TSS are global, so simultaneous double faults on different
CPUs will cause massive corruption.  Third, the whole mechanism
won't work if user CR3 is loaded, resulting in a triple fault [1].

Let the doublefault stack and TSS share a page (which prevents the
TSS from spanning a page boundary), make it percpu, and move it into
cpu_entry_area.  Teach the stack dump code about the doublefault
stack.

[0] Real hardware will read past the end of the page onto the next
    *physical* page if a task switch happens.  Virtual machines may
    have any number of bugs, and I would consider it reasonable for
    a VM to summarily kill the guest if it tries to task-switch to
    a page-spanning TSS.

[1] Real hardware triple faults.  At least some VMs seem to hang.
    I'm not sure what's going on.

Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent e99b6f46
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -65,6 +65,13 @@ enum exception_stack_ordering {

#endif

#ifdef CONFIG_X86_32
struct doublefault_stack {
	unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)];
	struct x86_hw_tss tss;
} __aligned(PAGE_SIZE);
#endif

/*
 * cpu_entry_area is a percpu region that contains things needed by the CPU
 * and early entry/exit code.  Real types aren't used for all fields here
@@ -86,6 +93,11 @@ struct cpu_entry_area {
#endif
	struct entry_stack_page entry_stack_page;

#ifdef CONFIG_X86_32
	char guard_doublefault_stack[PAGE_SIZE];
	struct doublefault_stack doublefault_stack;
#endif

	/*
	 * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
	 * we need task switches to work, and task switches write to the TSS.
+13 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_DOUBLEFAULT_H
#define _ASM_X86_DOUBLEFAULT_H

#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
extern void doublefault_init_cpu_tss(void);
#else
static inline void doublefault_init_cpu_tss(void)
{
}
#endif

#endif /* _ASM_X86_DOUBLEFAULT_H */
+4 −3
Original line number Diff line number Diff line
@@ -41,10 +41,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif

/*
 * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
 * to avoid include recursion hell
 * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
 * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
 * to avoid include recursion hell.
 */
#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 41)
#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 43)

/* The +1 is for the readonly IDT page: */
#define CPU_ENTRY_AREA_BASE	\
+0 −1
Original line number Diff line number Diff line
@@ -166,7 +166,6 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86	boot_cpu_data;
extern struct cpuinfo_x86	new_cpu_data;

extern struct x86_hw_tss	doublefault_tss;
extern __u32			cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32			cpu_caps_set[NCAPINTS + NBUGINTS];

+2 −10
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@
#include <asm/stackprotector.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/doublefault.h>
#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@@ -1814,8 +1815,6 @@ static inline void tss_setup_ist(struct tss_struct *tss)
	tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
}

static inline void gdt_setup_doublefault_tss(int cpu) { }

#else /* CONFIG_X86_64 */

static inline void setup_getcpu(int cpu) { }
@@ -1827,13 +1826,6 @@ static inline void ucode_cpu_init(int cpu)

static inline void tss_setup_ist(struct tss_struct *tss) { }

static inline void gdt_setup_doublefault_tss(int cpu)
{
#ifdef CONFIG_DOUBLEFAULT
	/* Set up the doublefault TSS pointer in the GDT */
	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif
}
#endif /* !CONFIG_X86_64 */

static inline void tss_setup_io_bitmap(struct tss_struct *tss)
@@ -1923,7 +1915,7 @@ void cpu_init(void)
	clear_all_debug_regs();
	dbg_restore_debug_regs();

	gdt_setup_doublefault_tss(cpu);
	doublefault_init_cpu_tss();

	fpu__init_cpu();

Loading