Commit d22fff81 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:

 - Extend the memmap= boot parameter syntax to allow the redeclaration
   and dropping of existing ranges, and to support all e820 range types
   (Jan H. Schönherr)

 - Improve the W+X boot time security checks to remove false positive
   warnings on Xen (Jan Beulich)

 - Support booting as Xen PVH guest (Juergen Gross)

 - Improved 5-level paging (LA57) support, in particular it's possible
   now to have a single kernel image for both 4-level and 5-level
   hardware (Kirill A. Shutemov)

 - AMD hardware RAM encryption support (SME/SEV) fixes (Tom Lendacky)

 - Preparatory commits for hardware-encrypted RAM support on Intel CPUs.
   (Kirill A. Shutemov)

 - Improved Intel-MID support (Andy Shevchenko)

 - Show EFI page tables in page_tables debug files (Andy Lutomirski)

 - ... plus misc fixes and smaller cleanups

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (56 commits)
  x86/cpu/tme: Fix spelling: "configuation" -> "configuration"
  x86/boot: Fix SEV boot failure from change to __PHYSICAL_MASK_SHIFT
  x86/mm: Update comment in detect_tme() regarding x86_phys_bits
  x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic
  x86/mm: Remove pointless checks in vmalloc_fault
  x86/platform/intel-mid: Add special handling for ACPI HW reduced platforms
  ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback
  ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export
  x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf
  x86/pconfig: Detect PCONFIG targets
  x86/tme: Detect if TME and MKTME is activated by BIOS
  x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
  x86/boot/compressed/64: Use page table in trampoline memory
  x86/boot/compressed/64: Use stack from trampoline memory
  x86/boot/compressed/64: Make sure we have a 32-bit code segment
  x86/mm: Do not use paravirtualized calls in native_set_p4d()
  kdump, vmcoreinfo: Export pgtable_l5_enabled value
  x86/boot/compressed/64: Prepare new top-level page table for trampoline
  x86/boot/compressed/64: Set up trampoline memory
  x86/boot/compressed/64: Save and restore trampoline memory
  ...
parents 986b37c0 eaeb8e76
Loading
Loading
Loading
Loading
+9 −0
Original line number Original line Diff line number Diff line
@@ -2248,6 +2248,15 @@
			The memory region may be marked as e820 type 12 (0xc)
			The memory region may be marked as e820 type 12 (0xc)
			and is NVDIMM or ADR memory.
			and is NVDIMM or ADR memory.


	memmap=<size>%<offset>-<oldtype>+<newtype>
			[KNL,ACPI] Convert memory within the specified region
			from <oldtype> to <newtype>. If "-<oldtype>" is left
			out, the whole region will be marked as <newtype>,
			even if previously unavailable. If "+<newtype>" is left
			out, matching memory will be removed. Types are
			specified as e820 types, e.g., 1 = RAM, 2 = reserved,
			3 = ACPI, 12 = PRAM.

	memory_corruption_check=0/1 [X86]
	memory_corruption_check=0/1 [X86]
			Some BIOSes seem to corrupt the first 64k of
			Some BIOSes seem to corrupt the first 64k of
			memory when doing things like suspend/resume.
			memory when doing things like suspend/resume.
+3 −6
Original line number Original line Diff line number Diff line
@@ -20,12 +20,9 @@ Documentation/x86/x86_64/mm.txt


CONFIG_X86_5LEVEL=y enables the feature.
CONFIG_X86_5LEVEL=y enables the feature.


So far, a kernel compiled with the option enabled will be able to boot
Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
only on machines that supports the feature -- see for 'la57' flag in
In this case additional page table level -- p4d -- will be folded at
/proc/cpuinfo.
runtime.

The plan is to implement boot-time switching between 4- and 5-level paging
in the future.


== User-space and large virtual address space ==
== User-space and large virtual address space ==


+11 −6
Original line number Original line Diff line number Diff line
@@ -1461,6 +1461,8 @@ config X86_PAE


config X86_5LEVEL
config X86_5LEVEL
	bool "Enable 5-level page tables support"
	bool "Enable 5-level page tables support"
	select DYNAMIC_MEMORY_LAYOUT
	select SPARSEMEM_VMEMMAP
	depends on X86_64
	depends on X86_64
	---help---
	---help---
	  5-level paging enables access to larger address space:
	  5-level paging enables access to larger address space:
@@ -1469,8 +1471,8 @@ config X86_5LEVEL


	  It will be supported by future Intel CPUs.
	  It will be supported by future Intel CPUs.


	  Note: a kernel with this option enabled can only be booted
	  A kernel with the option enabled can be booted on machines that
	  on machines that support the feature.
	  support 4- or 5-level paging.


	  See Documentation/x86/x86_64/5level-paging.txt for more
	  See Documentation/x86/x86_64/5level-paging.txt for more
	  information.
	  information.
@@ -1595,10 +1597,6 @@ config ARCH_HAVE_MEMORY_PRESENT
	def_bool y
	def_bool y
	depends on X86_32 && DISCONTIGMEM
	depends on X86_32 && DISCONTIGMEM


config NEED_NODE_MEMMAP_SIZE
	def_bool y
	depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)

config ARCH_FLATMEM_ENABLE
config ARCH_FLATMEM_ENABLE
	def_bool y
	def_bool y
	depends on X86_32 && !NUMA
	depends on X86_32 && !NUMA
@@ -2174,10 +2172,17 @@ config PHYSICAL_ALIGN


	  Don't change this unless you know what you are doing.
	  Don't change this unless you know what you are doing.


config DYNAMIC_MEMORY_LAYOUT
	bool
	---help---
	  This option makes base addresses of vmalloc and vmemmap as well as
	  __PAGE_OFFSET movable during boot.

config RANDOMIZE_MEMORY
config RANDOMIZE_MEMORY
	bool "Randomize the kernel memory sections"
	bool "Randomize the kernel memory sections"
	depends on X86_64
	depends on X86_64
	depends on RANDOMIZE_BASE
	depends on RANDOMIZE_BASE
	select DYNAMIC_MEMORY_LAYOUT
	default RANDOMIZE_BASE
	default RANDOMIZE_BASE
	---help---
	---help---
	   Randomizes the base virtual address of kernel memory sections
	   Randomizes the base virtual address of kernel memory sections
+1 −1
Original line number Original line Diff line number Diff line
@@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
ifdef CONFIG_X86_64
	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
	vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
	vmlinux-objs-y += $(obj)/mem_encrypt.o
	vmlinux-objs-y += $(obj)/mem_encrypt.o
	vmlinux-objs-y += $(obj)/pgtable_64.o
	vmlinux-objs-y += $(obj)/pgtable_64.o
endif
endif
+114 −54
Original line number Original line Diff line number Diff line
@@ -33,6 +33,7 @@
#include <asm/processor-flags.h>
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
#include <asm/bootparam.h>
#include "pgtable.h"


/*
/*
 * Locally defined symbols should be marked hidden:
 * Locally defined symbols should be marked hidden:
@@ -304,55 +305,77 @@ ENTRY(startup_64)
	/* Set up the stack */
	/* Set up the stack */
	leaq	boot_stack_end(%rbx), %rsp
	leaq	boot_stack_end(%rbx), %rsp


#ifdef CONFIG_X86_5LEVEL
	/*
	/*
	 * Check if we need to enable 5-level paging.
	 * At this point we are in long mode with 4-level paging enabled,
	 * RSI holds real mode data and need to be preserved across
	 * but we might want to enable 5-level paging or vice versa.
	 * a function call.
	 *
	 * The problem is that we cannot do it directly. Setting or clearing
	 * CR4.LA57 in long mode would trigger #GP. So we need to switch off
	 * long mode and paging first.
	 *
	 * We also need a trampoline in lower memory to switch over from
	 * 4- to 5-level paging for cases when the bootloader puts the kernel
	 * above 4G, but didn't enable 5-level paging for us.
	 *
	 * The same trampoline can be used to switch from 5- to 4-level paging
	 * mode, like when starting 4-level paging kernel via kexec() when
	 * original kernel worked in 5-level paging mode.
	 *
	 * For the trampoline, we need the top page table to reside in lower
	 * memory as we don't have a way to load 64-bit values into CR3 in
	 * 32-bit mode.
	 *
	 * We go though the trampoline even if we don't have to: if we're
	 * already in a desired paging mode. This way the trampoline code gets
	 * tested on every boot.
	 */
	 */
	pushq	%rsi
	call	l5_paging_required
	popq	%rsi


	/* If l5_paging_required() returned zero, we're done here. */
	/* Make sure we have GDT with 32-bit code segment */
	cmpq	$0, %rax
	leaq	gdt(%rip), %rax
	je	lvl5
	movq	%rax, gdt64+2(%rip)
	lgdt	gdt64(%rip)


	/*
	/*
	 * At this point we are in long mode with 4-level paging enabled,
	 * paging_prepare() sets up the trampoline and checks if we need to
	 * but we want to enable 5-level paging.
	 * enable 5-level paging.
	 *
	 * The problem is that we cannot do it directly. Setting LA57 in
	 * long mode would trigger #GP. So we need to switch off long mode
	 * first.
	 *
	 *
	 * NOTE: This is not going to work if bootloader put us above 4G
	 * Address of the trampoline is returned in RAX.
	 * limit.
	 * Non zero RDX on return means we need to enable 5-level paging.
	 *
	 *
	 * The first step is go into compatibility mode.
	 * RSI holds real mode data and needs to be preserved across
	 * this function call.
	 */
	 */
	pushq	%rsi
	call	paging_prepare
	popq	%rsi


	/* Clear additional page table */
	/* Save the trampoline address in RCX */
	leaq	lvl5_pgtable(%rbx), %rdi
	movq	%rax, %rcx
	xorq	%rax, %rax
	movq	$(PAGE_SIZE/8), %rcx
	rep	stosq


	/*
	/*
	 * Setup current CR3 as the first and only entry in a new top level
	 * Load the address of trampoline_return() into RDI.
	 * page table.
	 * It will be used by the trampoline to return to the main code.
	 */
	 */
	movq	%cr3, %rdi
	leaq	trampoline_return(%rip), %rdi
	leaq	0x7 (%rdi), %rax
	movq	%rax, lvl5_pgtable(%rbx)


	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
	pushq	$__KERNEL32_CS
	pushq	$__KERNEL32_CS
	leaq	compatible_mode(%rip), %rax
	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
	pushq	%rax
	pushq	%rax
	lretq
	lretq
lvl5:
trampoline_return:
#endif
	/* Restore the stack, the 32-bit trampoline uses its own stack */
	leaq	boot_stack_end(%rbx), %rsp

	/*
	 * cleanup_trampoline() would restore trampoline memory.
	 *
	 * RSI holds real mode data and needs to be preserved across
	 * this function call.
	 */
	pushq	%rsi
	call	cleanup_trampoline
	popq	%rsi


	/* Zero EFLAGS */
	/* Zero EFLAGS */
	pushq	$0
	pushq	$0
@@ -490,46 +513,82 @@ relocated:
	jmp	*%rax
	jmp	*%rax


	.code32
	.code32
#ifdef CONFIG_X86_5LEVEL
/*
compatible_mode:
 * This is the 32-bit trampoline that will be copied over to low memory.
 *
 * RDI contains the return address (might be above 4G).
 * ECX contains the base address of the trampoline memory.
 * Non zero RDX on return means we need to enable 5-level paging.
 */
ENTRY(trampoline_32bit_src)
	/* Set up data and stack segments */
	/* Set up data and stack segments */
	movl	$__KERNEL_DS, %eax
	movl	$__KERNEL_DS, %eax
	movl	%eax, %ds
	movl	%eax, %ds
	movl	%eax, %ss
	movl	%eax, %ss


	/* Set up new stack */
	leal	TRAMPOLINE_32BIT_STACK_END(%ecx), %esp

	/* Disable paging */
	/* Disable paging */
	movl	%cr0, %eax
	movl	%cr0, %eax
	btrl	$X86_CR0_PG_BIT, %eax
	btrl	$X86_CR0_PG_BIT, %eax
	movl	%eax, %cr0
	movl	%eax, %cr0


	/* Point CR3 to 5-level paging */
	/* Check what paging mode we want to be in after the trampoline */
	leal	lvl5_pgtable(%ebx), %eax
	cmpl	$0, %edx
	movl	%eax, %cr3
	jz	1f


	/* Enable PAE and LA57 mode */
	/* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
	movl	%cr4, %eax
	testl	$X86_CR4_LA57, %eax
	jnz	3f
	jmp	2f
1:
	/* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
	movl	%cr4, %eax
	movl	%cr4, %eax
	orl	$(X86_CR4_PAE | X86_CR4_LA57), %eax
	testl	$X86_CR4_LA57, %eax
	jz	3f
2:
	/* Point CR3 to the trampoline's new top level page table */
	leal	TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
	movl	%eax, %cr3
3:
	/* Enable PAE and LA57 (if required) paging modes */
	movl	$X86_CR4_PAE, %eax
	cmpl	$0, %edx
	jz	1f
	orl	$X86_CR4_LA57, %eax
1:
	movl	%eax, %cr4
	movl	%eax, %cr4


	/* Calculate address we are running at */
	/* Calculate address of paging_enabled() once we are executing in the trampoline */
	call	1f
	leal	paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
1:	popl	%edi
	subl	$1b, %edi


	/* Prepare stack for far return to Long Mode */
	/* Prepare the stack for far return to Long Mode */
	pushl	$__KERNEL_CS
	pushl	$__KERNEL_CS
	leal	lvl5(%edi), %eax
	pushl	%eax
	push	%eax


	/* Enable paging back */
	/* Enable paging again */
	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
	movl	%eax, %cr0
	movl	%eax, %cr0


	lret
	lret
#endif


	.code64
paging_enabled:
	/* Return from the trampoline */
	jmp	*%rdi

	/*
         * The trampoline code has a size limit.
         * Make sure we fail to compile if the trampoline code grows
         * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
	 */
	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE

	.code32
no_longmode:
no_longmode:
	/* This isn't an x86-64 CPU so hang */
	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
1:
	hlt
	hlt
	jmp     1b
	jmp     1b
@@ -537,6 +596,11 @@ no_longmode:
#include "../../kernel/verify_cpu.S"
#include "../../kernel/verify_cpu.S"


	.data
	.data
gdt64:
	.word	gdt_end - gdt
	.long	0
	.word	0
	.quad   0
gdt:
gdt:
	.word	gdt_end - gdt
	.word	gdt_end - gdt
	.long	gdt
	.long	gdt
@@ -585,7 +649,3 @@ boot_stack_end:
	.balign 4096
	.balign 4096
pgtable:
pgtable:
	.fill BOOT_PGT_SIZE, 1, 0
	.fill BOOT_PGT_SIZE, 1, 0
#ifdef CONFIG_X86_5LEVEL
lvl5_pgtable:
	.fill PAGE_SIZE, 1, 0
#endif
Loading