Commit 0d37dde7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 vsyscall updates from Thomas Gleixner:
 "Further hardening of the legacy vsyscall by providing support for
  execute only mode and switching the default to it.

  This prevents a certain class of attacks which rely on the vsyscall
  page being accessible at a fixed address in the canonical kernel
  address space"

* 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  selftests/x86: Add a test for process_vm_readv() on the vsyscall page
  x86/vsyscall: Add __ro_after_init to global variables
  x86/vsyscall: Change the default vsyscall mode to xonly
  selftests/x86/vsyscall: Verify that vsyscall=none blocks execution
  x86/vsyscall: Document odd SIGSEGV error code for vsyscalls
  x86/vsyscall: Show something useful on a read fault
  x86/vsyscall: Add a new vsyscall=xonly mode
  Documentation/admin: Remove the vsyscall=native documentation
parents 0902d501 7f0a5e07
Loading
Loading
Loading
Loading
+5 −6
Original line number Diff line number Diff line
@@ -5100,13 +5100,12 @@
			targets for exploits that can control RIP.

			emulate     [default] Vsyscalls turn into traps and are
			            emulated reasonably safely.
			            emulated reasonably safely.  The vsyscall
				    page is readable.

			native      Vsyscalls are native syscall instructions.
			            This is a little bit faster than trapping
			            and makes a few dynamic recompilers work
			            better than they would in emulation mode.
			            It also makes exploits much easier to write.
			xonly       Vsyscalls turn into traps and are
			            emulated reasonably safely.  The vsyscall
				    page is not readable.

			none        Vsyscalls don't work at all.  This makes
			            them quite hard to use for exploits but
+25 −10
Original line number Diff line number Diff line
@@ -2288,7 +2288,7 @@ config COMPAT_VDSO
choice
	prompt "vsyscall table for legacy applications"
	depends on X86_64
	default LEGACY_VSYSCALL_EMULATE
	default LEGACY_VSYSCALL_XONLY
	help
	  Legacy user code that does not know how to find the vDSO expects
	  to be able to issue three syscalls by calling fixed addresses in
@@ -2296,23 +2296,38 @@ choice
	  it can be used to assist security vulnerability exploitation.

	  This setting can be changed at boot time via the kernel command
	  line parameter vsyscall=[emulate|none].
	  line parameter vsyscall=[emulate|xonly|none].

	  On a system with recent enough glibc (2.14 or newer) and no
	  static binaries, you can say None without a performance penalty
	  to improve security.

	  If unsure, select "Emulate".
	  If unsure, select "Emulate execution only".

	config LEGACY_VSYSCALL_EMULATE
		bool "Emulate"
		bool "Full emulation"
		help
		  The kernel traps and emulates calls into the fixed
		  vsyscall address mapping. This makes the mapping
		  non-executable, but it still contains known contents,
		  which could be used in certain rare security vulnerability
		  exploits. This configuration is recommended when userspace
		  still uses the vsyscall area.
		  The kernel traps and emulates calls into the fixed vsyscall
		  address mapping. This makes the mapping non-executable, but
		  it still contains readable known contents, which could be
		  used in certain rare security vulnerability exploits. This
		  configuration is recommended when using legacy userspace
		  that still uses vsyscalls along with legacy binary
		  instrumentation tools that require code to be readable.

		  An example of this type of legacy userspace is running
		  Pin on an old binary that still uses vsyscalls.

	config LEGACY_VSYSCALL_XONLY
		bool "Emulate execution only"
		help
		  The kernel traps and emulates calls into the fixed vsyscall
		  address mapping and does not allow reads.  This
		  configuration is recommended when userspace might use the
		  legacy vsyscall area but support for legacy binary
		  instrumentation of legacy code is not needed.  It mitigates
		  certain uses of the vsyscall area as an ASLR-bypassing
		  buffer.

	config LEGACY_VSYSCALL_NONE
		bool "None"
+33 −4
Original line number Diff line number Diff line
@@ -42,9 +42,11 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"

static enum { EMULATE, NONE } vsyscall_mode =
static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init =
#ifdef CONFIG_LEGACY_VSYSCALL_NONE
	NONE;
#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY)
	XONLY;
#else
	EMULATE;
#endif
@@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str)
	if (str) {
		if (!strcmp("emulate", str))
			vsyscall_mode = EMULATE;
		else if (!strcmp("xonly", str))
			vsyscall_mode = XONLY;
		else if (!strcmp("none", str))
			vsyscall_mode = NONE;
		else
@@ -113,7 +117,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
	}
}

bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
bool emulate_vsyscall(unsigned long error_code,
		      struct pt_regs *regs, unsigned long address)
{
	struct task_struct *tsk;
	unsigned long caller;
@@ -122,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
	long ret;
	unsigned long orig_dx;

	/* Write faults or kernel-privilege faults never get fixed up. */
	if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER)
		return false;

	if (!(error_code & X86_PF_INSTR)) {
		/* Failed vsyscall read */
		if (vsyscall_mode == EMULATE)
			return false;

		/*
		 * User code tried and failed to read the vsyscall page.
		 */
		warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround");
		return false;
	}

	/*
	 * No point in checking CS -- the only way to get here is a user mode
	 * trap to a high address, which means that we're in 64-bit user code.
@@ -284,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma)
static const struct vm_operations_struct gate_vma_ops = {
	.name = gate_vma_name,
};
static struct vm_area_struct gate_vma = {
static struct vm_area_struct gate_vma __ro_after_init = {
	.vm_start	= VSYSCALL_ADDR,
	.vm_end		= VSYSCALL_ADDR + PAGE_SIZE,
	.vm_page_prot	= PAGE_READONLY_EXEC,
@@ -357,12 +378,20 @@ void __init map_vsyscall(void)
	extern char __vsyscall_page;
	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);

	if (vsyscall_mode != NONE) {
	/*
	 * For full emulation, the page needs to exist for real.  In
	 * execute-only mode, there is no PTE at all backing the vsyscall
	 * page.
	 */
	if (vsyscall_mode == EMULATE) {
		__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
			     PAGE_KERNEL_VVAR);
		set_vsyscall_pgtable_user_bits(swapper_pg_dir);
	}

	if (vsyscall_mode == XONLY)
		gate_vma.vm_flags = VM_EXEC;

	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
		     (unsigned long)VSYSCALL_ADDR);
}
+4 −2
Original line number Diff line number Diff line
@@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
 * Called on instruction fetch fault in vsyscall page.
 * Returns true if handled.
 */
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
extern bool emulate_vsyscall(unsigned long error_code,
			     struct pt_regs *regs, unsigned long address);
#else
static inline void map_vsyscall(void) {}
static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
static inline bool emulate_vsyscall(unsigned long error_code,
				    struct pt_regs *regs, unsigned long address)
{
	return false;
}
+12 −6
Original line number Diff line number Diff line
@@ -710,6 +710,10 @@ static void set_signal_archinfo(unsigned long address,
	 * To avoid leaking information about the kernel page
	 * table layout, pretend that user-mode accesses to
	 * kernel addresses are always protection faults.
	 *
	 * NB: This means that failed vsyscalls with vsyscall=none
	 * will have the PROT bit.  This doesn't leak any
	 * information and does not appear to cause any problems.
	 */
	if (address >= TASK_SIZE_MAX)
		error_code |= X86_PF_PROT;
@@ -1369,16 +1373,18 @@ void do_user_addr_fault(struct pt_regs *regs,

#ifdef CONFIG_X86_64
	/*
	 * Instruction fetch faults in the vsyscall page might need
	 * emulation.  The vsyscall page is at a high address
	 * (>PAGE_OFFSET), but is considered to be part of the user
	 * address space.
	 * Faults in the vsyscall page might need emulation.  The
	 * vsyscall page is at a high address (>PAGE_OFFSET), but is
	 * considered to be part of the user address space.
	 *
	 * The vsyscall page does not have a "real" VMA, so do this
	 * emulation before we go searching for VMAs.
	 *
	 * PKRU never rejects instruction fetches, so we don't need
	 * to consider the PF_PK bit.
	 */
	if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) {
		if (emulate_vsyscall(regs, address))
	if (is_vsyscall_vaddr(address)) {
		if (emulate_vsyscall(hw_error_code, regs, address))
			return;
	}
#endif
Loading