Commit 8b5abde1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "A laundry list of changes: KASAN improvements/fixes for ptdump, a
  self-test fix, PAT cleanup and wbinvd() avoidance, removal of stale
  code and documentation updates"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm/ptdump: Add address marker for KASAN shadow region
  x86/mm/ptdump: Optimize check for W+X mappings for CONFIG_KASAN=y
  x86/mm/pat: Use rb_entry()
  x86/mpx: Re-add MPX to selftests Makefile
  x86/mm: Remove CONFIG_DEBUG_NX_TEST
  x86/mm/cpa: Avoid wbinvd() for PREEMPT
  x86/mm: Improve documentation for low-level device I/O functions
parents a25a1d6c 025205f8
Loading
Loading
Loading
Loading
+0 −8
Original line number Diff line number Diff line
@@ -120,14 +120,6 @@ config DEBUG_SET_MODULE_RONX
	  against certain classes of kernel exploits.
	  If in doubt, say "N".

config DEBUG_NX_TEST
	tristate "Testcase for the NX non-executable stack feature"
	depends on DEBUG_KERNEL && m
	---help---
	  This option enables a testcase for the CPU NX capability
	  and the software setup of this feature.
	  If in doubt, say "N"

config DOUBLEFAULT
	default y
	bool "Enable doublefault exception handler" if EXPERT
+35 −11
Original line number Diff line number Diff line
@@ -164,6 +164,17 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
#define virt_to_bus virt_to_phys
#define bus_to_virt phys_to_virt

/*
 * The default ioremap() behavior is non-cached; if you need something
 * else, you probably want one of the following.
 */
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc

extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);

/**
 * ioremap     -   map bus memory into CPU space
 * @offset:    bus address of the memory
@@ -178,17 +189,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address)
 * If the area you are trying to map is a PCI BAR you should have a
 * look at pci_iomap().
 */
extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc

extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
				unsigned long prot_val);

/*
 * The default ioremap() behavior is non-cached:
 */
static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
{
	return ioremap_nocache(offset, size);
@@ -207,18 +207,42 @@ extern void set_iounmap_nonlazy(void);
 */
#define xlate_dev_kmem_ptr(p)	p

/**
 * memset_io	Set a range of I/O memory to a constant value
 * @addr:	The beginning of the I/O-memory range to set
 * @val:	The value to set the memory to
 * @count:	The number of bytes to set
 *
 * Set a range of I/O memory to a given value.
 */
static inline void
memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
{
	memset((void __force *)addr, val, count);
}

/**
 * memcpy_fromio	Copy a block of data from I/O memory
 * @dst:		The (RAM) destination for the copy
 * @src:		The (I/O memory) source for the data
 * @count:		The number of bytes to copy
 *
 * Copy a block of data from I/O memory.
 */
static inline void
memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
{
	memcpy(dst, (const void __force *)src, count);
}

/**
 * memcpy_toio		Copy a block of data into I/O memory
 * @dst:		The (I/O memory) destination for the copy
 * @src:		The (RAM) source for the data
 * @count:		The number of bytes to copy
 *
 * Copy a block of data to I/O memory.
 */
static inline void
memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
{
+0 −1
Original line number Diff line number Diff line
@@ -101,7 +101,6 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o

obj-$(CONFIG_AMD_NB)		+= amd_nb.o
obj-$(CONFIG_DEBUG_RODATA_TEST)	+= test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o

obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o

arch/x86/kernel/test_nx.c

deleted100644 → 0
+0 −173
Original line number Diff line number Diff line
/*
 * test_nx.c: functional test for NX functionality
 *
 * (C) Copyright 2008 Intel Corporation
 * Author: Arjan van de Ven <arjan@linux.intel.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */
#include <linux/module.h>
#include <linux/sort.h>
#include <linux/slab.h>

#include <linux/uaccess.h>
#include <asm/asm.h>

extern int rodata_test_data;

/*
 * This file checks 4 things:
 * 1) Check if the stack is not executable
 * 2) Check if kmalloc memory is not executable
 * 3) Check if the .rodata section is not executable
 * 4) Check if the .data section of a module is not executable
 *
 * To do this, the test code tries to execute memory in stack/kmalloc/etc,
 * and then checks if the expected trap happens.
 *
 * Sadly, this implies having a dynamic exception handling table entry.
 * ... which can be done (and will make Rusty cry)... but it can only
 * be done in a stand-alone module with only 1 entry total.
 * (otherwise we'd have to sort and that's just too messy)
 */



/*
 * We want to set up an exception handling point on our stack,
 * which means a variable value. This function is rather dirty
 * and walks the exception table of the module, looking for a magic
 * marker and replaces it with a specific function.
 */
static void fudze_exception_table(void *marker, void *new)
{
	struct module *mod = THIS_MODULE;
	struct exception_table_entry *extable;

	/*
	 * Note: This module has only 1 exception table entry,
	 * so searching and sorting is not needed. If that changes,
	 * this would be the place to search and re-sort the exception
	 * table.
	 */
	if (mod->num_exentries > 1) {
		printk(KERN_ERR "test_nx: too many exception table entries!\n");
		printk(KERN_ERR "test_nx: test results are not reliable.\n");
		return;
	}
	extable = (struct exception_table_entry *)mod->extable;
	extable[0].insn = (unsigned long)new;
}


/*
 * exception tables get their symbols translated so we need
 * to use a fake function to put in there, which we can then
 * replace at runtime.
 */
void foo_label(void);

/*
 * returns 0 for not-executable, negative for executable
 *
 * Note: we cannot allow this function to be inlined, because
 * that would give us more than 1 exception table entry.
 * This in turn would break the assumptions above.
 */
static noinline int test_address(void *address)
{
	unsigned long result;

	/* Set up an exception table entry for our address */
	fudze_exception_table(&foo_label, address);
	result = 1;
	asm volatile(
		"foo_label:\n"
		"0:	call *%[fake_code]\n"
		"1:\n"
		".section .fixup,\"ax\"\n"
		"2:	mov %[zero], %[rslt]\n"
		"	ret\n"
		".previous\n"
		_ASM_EXTABLE(0b,2b)
		: [rslt] "=r" (result)
		: [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
	);
	/* change the exception table back for the next round */
	fudze_exception_table(address, &foo_label);

	if (result)
		return -ENODEV;
	return 0;
}

static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */

static int test_NX(void)
{
	int ret = 0;
	/* 0xC3 is the opcode for "ret" */
	char stackcode[] = {0xC3, 0x90, 0 };
	char *heap;

	test_data = 0xC3;

	printk(KERN_INFO "Testing NX protection\n");

	/* Test 1: check if the stack is not executable */
	if (test_address(&stackcode)) {
		printk(KERN_ERR "test_nx: stack was executable\n");
		ret = -ENODEV;
	}


	/* Test 2: Check if the heap is executable */
	heap = kmalloc(64, GFP_KERNEL);
	if (!heap)
		return -ENOMEM;
	heap[0] = 0xC3; /* opcode for "ret" */

	if (test_address(heap)) {
		printk(KERN_ERR "test_nx: heap was executable\n");
		ret = -ENODEV;
	}
	kfree(heap);

	/*
	 * The following 2 tests currently fail, this needs to get fixed
	 * Until then, don't run them to avoid too many people getting scared
	 * by the error message
	 */

	/* Test 3: Check if the .rodata section is executable */
	if (rodata_test_data != 0xC3) {
		printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
		ret = -ENODEV;
	} else if (test_address(&rodata_test_data)) {
		printk(KERN_ERR "test_nx: .rodata section is executable\n");
		ret = -ENODEV;
	}

#if 0
	/* Test 4: Check if the .data section of a module is executable */
	if (test_address(&test_data)) {
		printk(KERN_ERR "test_nx: .data section is executable\n");
		ret = -ENODEV;
	}

#endif
	return ret;
}

static void test_exit(void)
{
}

module_init(test_NX);
module_exit(test_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Testcase for the NX infrastructure");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
+24 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/seq_file.h>

#include <asm/kasan.h>
#include <asm/pgtable.h>

/*
@@ -51,6 +52,10 @@ enum address_markers_idx {
	LOW_KERNEL_NR,
	VMALLOC_START_NR,
	VMEMMAP_START_NR,
#ifdef CONFIG_KASAN
	KASAN_SHADOW_START_NR,
	KASAN_SHADOW_END_NR,
#endif
# ifdef CONFIG_X86_ESPFIX64
	ESPFIX_START_NR,
# endif
@@ -76,6 +81,10 @@ static struct addr_marker address_markers[] = {
	{ 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
	{ 0/* VMALLOC_START */, "vmalloc() Area" },
	{ 0/* VMEMMAP_START */, "Vmemmap" },
#ifdef CONFIG_KASAN
	{ KASAN_SHADOW_START,	"KASAN shadow" },
	{ KASAN_SHADOW_END,	"KASAN shadow end" },
#endif
# ifdef CONFIG_X86_ESPFIX64
	{ ESPFIX_BASE_ADDR,	"ESPfix Area", 16 },
# endif
@@ -327,18 +336,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,

#if PTRS_PER_PUD > 1

/*
 * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
 * KASAN fills page tables with the same values. Since there is no
 * point in checking page table more than once we just skip repeated
 * entries. This saves us dozens of seconds during boot.
 */
static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
{
	return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
}

static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
							unsigned long P)
{
	int i;
	pud_t *start;
	pgprotval_t prot;
	pud_t *prev_pud = NULL;

	start = (pud_t *) pgd_page_vaddr(addr);

	for (i = 0; i < PTRS_PER_PUD; i++) {
		st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
		if (!pud_none(*start)) {
		if (!pud_none(*start) &&
		    !pud_already_checked(prev_pud, start, st->check_wx)) {
			if (pud_large(*start) || !pud_present(*start)) {
				prot = pud_flags(*start);
				note_page(m, st, __pgprot(prot), 2);
@@ -349,6 +371,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
		} else
			note_page(m, st, __pgprot(0), 2);

		prev_pud = start;
		start++;
	}
}
Loading