Commit 186525bd authored by Ingo Molnar's avatar Ingo Molnar
Browse files

mm, x86/mm: Untangle address space layout definitions from basic pgtable type definitions



- Untangle the somewhat incestous way of how VMALLOC_START is used all across the
  kernel, but is, on x86, defined deep inside one of the lowest level page table headers.
  It doesn't help that vmalloc.h only includes a single asm header:

     #include <asm/page.h>           /* pgprot_t */

  So there was no existing cross-arch way to decouple address layout
  definitions from page.h details. I used this:

   #ifndef VMALLOC_START
   # include <asm/vmalloc.h>
   #endif

  This way every architecture that wants to simplify page.h can do so.

- Also on x86 we had a couple of LDT related inline functions that used
  the late-stage address space layout positions - but these could be
  uninlined without real trouble - the end result is cleaner this way as
  well.

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 1f059dfd
Loading
Loading
Loading
Loading
+1 −9
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
#include <asm/pgtable_areas.h>

#ifdef CONFIG_X86_64

@@ -134,15 +135,6 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);

/* Single page reserved for the readonly IDT mapping: */
#define	CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)

#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)

#define CPU_ENTRY_AREA_MAP_SIZE			\
	(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)

extern struct cpu_entry_area *get_cpu_entry_area(int cpu);

static inline struct entry_stack *cpu_entry_stack(int cpu)
+6 −80
Original line number Diff line number Diff line
@@ -69,14 +69,6 @@ struct ldt_struct {
	int			slot;
};

/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)

static inline void *ldt_slot_va(int slot)
{
	return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
}

/*
 * Used for LDT copy/destruction.
 */
@@ -99,87 +91,21 @@ static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif

static inline void load_mm_ldt(struct mm_struct *mm)
{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
	struct ldt_struct *ldt;

	/* READ_ONCE synchronizes with smp_store_release */
	ldt = READ_ONCE(mm->context.ldt);

	/*
	 * Any change to mm->context.ldt is followed by an IPI to all
	 * CPUs with the mm active.  The LDT will not be freed until
	 * after the IPI is handled by all such CPUs.  This means that,
	 * if the ldt_struct changes before we return, the values we see
	 * will be safe, and the new values will be loaded before we run
	 * any user code.
	 *
	 * NB: don't try to convert this to use RCU without extreme care.
	 * We would still need IRQs off, because we don't want to change
	 * the local LDT after an IPI loaded a newer value than the one
	 * that we can see.
	 */

	if (unlikely(ldt)) {
		if (static_cpu_has(X86_FEATURE_PTI)) {
			if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
				/*
				 * Whoops -- either the new LDT isn't mapped
				 * (if slot == -1) or is mapped into a bogus
				 * slot (if slot > 1).
				 */
				clear_LDT();
				return;
			}

			/*
			 * If page table isolation is enabled, ldt->entries
			 * will not be mapped in the userspace pagetables.
			 * Tell the CPU to access the LDT through the alias
			 * at ldt_slot_va(ldt->slot).
			 */
			set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
		} else {
			set_ldt(ldt->entries, ldt->nr_entries);
		}
	} else {
		clear_LDT();
	}
extern void load_mm_ldt(struct mm_struct *mm);
extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
#else
static inline void load_mm_ldt(struct mm_struct *mm)
{
	clear_LDT();
#endif
}

static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
	/*
	 * Load the LDT if either the old or new mm had an LDT.
	 *
	 * An mm will never go from having an LDT to not having an LDT.  Two
	 * mms never share an LDT, so we don't gain anything by checking to
	 * see whether the LDT changed.  There's also no guarantee that
	 * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL,
	 * then prev->context.ldt will also be non-NULL.
	 *
	 * If we really cared, we could optimize the case where prev == next
	 * and we're exiting lazy mode.  Most of the time, if this happens,
	 * we don't actually need to reload LDTR, but modify_ldt() is mostly
	 * used by legacy code and emulators where we don't need this level of
	 * performance.
	 *
	 * This uses | instead of || because it generates better code.
	 */
	if (unlikely((unsigned long)prev->context.ldt |
		     (unsigned long)next->context.ldt))
		load_mm_ldt(next);
#endif

	DEBUG_LOCKS_WARN_ON(preemptible());
}
#endif

void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);

/*
 * Init a new mm.  Used on mm copies, like at fork()
+53 −0
Original line number Diff line number Diff line
#ifndef _ASM_X86_PGTABLE_32_AREAS_H
#define _ASM_X86_PGTABLE_32_AREAS_H

#include <asm/cpu_entry_area.h>

/*
 * Just any arbitrary offset to the start of the vmalloc VM area: the
 * current 8MB value just means that there will be a 8MB "hole" after the
 * physical memory until the kernel virtual memory starts.  That means that
 * any out-of-bounds memory accesses will hopefully be caught.
 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
 * area for the same reason. ;)
 */
#define VMALLOC_OFFSET	(8 * 1024 * 1024)

#ifndef __ASSEMBLY__
extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif

#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
#ifdef CONFIG_X86_PAE
#define LAST_PKMAP 512
#else
#define LAST_PKMAP 1024
#endif

#define CPU_ENTRY_AREA_PAGES		(NR_CPUS * DIV_ROUND_UP(sizeof(struct cpu_entry_area), PAGE_SIZE))

/* The +1 is for the readonly IDT page: */
#define CPU_ENTRY_AREA_BASE	\
	((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)

#define LDT_BASE_ADDR		\
	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)

#define LDT_END_ADDR		(LDT_BASE_ADDR + PMD_SIZE)

#define PKMAP_BASE		\
	((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)

#ifdef CONFIG_HIGHMEM
# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
#else
# define VMALLOC_END	(LDT_BASE_ADDR - 2 * PAGE_SIZE)
#endif

#define MODULES_VADDR	VMALLOC_START
#define MODULES_END	VMALLOC_END
#define MODULES_LEN	(MODULES_VADDR - MODULES_END)

#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)

#endif /* _ASM_X86_PGTABLE_32_AREAS_H */
+3 −54
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PGTABLE_32_DEFS_H
#define _ASM_X86_PGTABLE_32_DEFS_H
#ifndef _ASM_X86_PGTABLE_32_TYPES_H
#define _ASM_X86_PGTABLE_32_TYPES_H

/*
 * The Linux x86 paging architecture is 'compile-time dual-mode', it
@@ -20,55 +20,4 @@
#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
#define PGDIR_MASK	(~(PGDIR_SIZE - 1))

/* Just any arbitrary offset to the start of the vmalloc VM area: the
 * current 8MB value just means that there will be a 8MB "hole" after the
 * physical memory until the kernel virtual memory starts.  That means that
 * any out-of-bounds memory accesses will hopefully be caught.
 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
 * area for the same reason. ;)
 */
#define VMALLOC_OFFSET	(8 * 1024 * 1024)

#ifndef __ASSEMBLY__
extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif

#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
#ifdef CONFIG_X86_PAE
#define LAST_PKMAP 512
#else
#define LAST_PKMAP 1024
#endif

/*
 * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
 * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
 * to avoid include recursion hell.
 */
#define CPU_ENTRY_AREA_PAGES	(NR_CPUS * 43)

/* The +1 is for the readonly IDT page: */
#define CPU_ENTRY_AREA_BASE	\
	((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)

#define LDT_BASE_ADDR		\
	((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)

#define LDT_END_ADDR		(LDT_BASE_ADDR + PMD_SIZE)

#define PKMAP_BASE		\
	((LDT_BASE_ADDR - PAGE_SIZE) & PMD_MASK)

#ifdef CONFIG_HIGHMEM
# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
#else
# define VMALLOC_END	(LDT_BASE_ADDR - 2 * PAGE_SIZE)
#endif

#define MODULES_VADDR	VMALLOC_START
#define MODULES_END	VMALLOC_END
#define MODULES_LEN	(MODULES_VADDR - MODULES_END)

#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)

#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
#endif /* _ASM_X86_PGTABLE_32_TYPES_H */
+16 −0
Original line number Diff line number Diff line
#ifndef _ASM_X86_PGTABLE_AREAS_H
#define _ASM_X86_PGTABLE_AREAS_H

#ifdef CONFIG_X86_32
# include <asm/pgtable_32_areas.h>
#endif

/* Single page reserved for the readonly IDT mapping: */
#define CPU_ENTRY_AREA_RO_IDT		CPU_ENTRY_AREA_BASE
#define CPU_ENTRY_AREA_PER_CPU		(CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)

#define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)

#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)

#endif /* _ASM_X86_PGTABLE_AREAS_H */
Loading