Commit 8f159f1d authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

x86/entry/common: Protect against instrumentation



Mark the various syscall entries with noinstr to protect them against
instrumentation and add the noinstrumentation_begin()/end() annotations to mark the
parts of the functions which are safe to call out into instrumentable code.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarAlexandre Chartre <alexandre.chartre@oracle.com>
Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200505134340.520277507@linutronix.de


parent 1723be30
Loading
Loading
Loading
Loading
+89 −44
Original line number Diff line number Diff line
@@ -41,15 +41,26 @@

#ifdef CONFIG_CONTEXT_TRACKING
/* Called on entry from user mode with IRQs off. */
__visible inline noinstr void enter_from_user_mode(void)
__visible noinstr void enter_from_user_mode(void)
{
	CT_WARN_ON(ct_state() != CONTEXT_USER);
	enum ctx_state state = ct_state();

	user_exit_irqoff();

	instrumentation_begin();
	CT_WARN_ON(state != CONTEXT_USER);
	instrumentation_end();
}
#else
static inline void enter_from_user_mode(void) {}
#endif

static noinstr void exit_to_user_mode(void)
{
	user_enter_irqoff();
	mds_user_clear_cpu_buffers();
}

static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
{
#ifdef CONFIG_X86_64
@@ -179,8 +190,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
	}
}

/* Called with IRQs disabled. */
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
static void __prepare_exit_to_usermode(struct pt_regs *regs)
{
	struct thread_info *ti = current_thread_info();
	u32 cached_flags;
@@ -219,10 +229,14 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
	 */
	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
#endif
}

	user_enter_irqoff();

	mds_user_clear_cpu_buffers();
__visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs)
{
	instrumentation_begin();
	__prepare_exit_to_usermode(regs);
	instrumentation_end();
	exit_to_user_mode();
}

#define SYSCALL_EXIT_WORK_FLAGS				\
@@ -251,11 +265,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
		tracehook_report_syscall_exit(regs, step);
}

/*
 * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
 * state such that we can immediately switch to user mode.
 */
__visible inline void syscall_return_slowpath(struct pt_regs *regs)
static void __syscall_return_slowpath(struct pt_regs *regs)
{
	struct thread_info *ti = current_thread_info();
	u32 cached_flags = READ_ONCE(ti->flags);
@@ -276,15 +286,29 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
		syscall_slow_exit_work(regs, cached_flags);

	local_irq_disable();
	prepare_exit_to_usermode(regs);
	__prepare_exit_to_usermode(regs);
}

/*
 * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
 * state such that we can immediately switch to user mode.
 */
__visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
{
	instrumentation_begin();
	__syscall_return_slowpath(regs);
	instrumentation_end();
	exit_to_user_mode();
}

#ifdef CONFIG_X86_64
__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
	struct thread_info *ti;

	enter_from_user_mode();
	instrumentation_begin();

	local_irq_enable();
	ti = current_thread_info();
	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
@@ -301,8 +325,10 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
		regs->ax = x32_sys_call_table[nr](regs);
#endif
	}
	__syscall_return_slowpath(regs);

	syscall_return_slowpath(regs);
	instrumentation_end();
	exit_to_user_mode();
}
#endif

@@ -313,7 +339,7 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 * extremely hot in workloads that use it, and it's usually called from
 * do_fast_syscall_32, so forcibly inline it to improve performance.
 */
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
static void do_syscall_32_irqs_on(struct pt_regs *regs)
{
	struct thread_info *ti = current_thread_info();
	unsigned int nr = (unsigned int)regs->orig_ax;
@@ -337,27 +363,62 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
		regs->ax = ia32_sys_call_table[nr](regs);
	}

	syscall_return_slowpath(regs);
	__syscall_return_slowpath(regs);
}

/* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
	enter_from_user_mode();
	instrumentation_begin();

	local_irq_enable();
	do_syscall_32_irqs_on(regs);

	instrumentation_end();
	exit_to_user_mode();
}

static bool __do_fast_syscall_32(struct pt_regs *regs)
{
	int res;

	/* Fetch EBP from where the vDSO stashed it. */
	if (IS_ENABLED(CONFIG_X86_64)) {
		/*
		 * Micro-optimization: the pointer we're following is
		 * explicitly 32 bits, so it can't be out of range.
		 */
		res = __get_user(*(u32 *)&regs->bp,
			 (u32 __user __force *)(unsigned long)(u32)regs->sp);
	} else {
		res = get_user(*(u32 *)&regs->bp,
		       (u32 __user __force *)(unsigned long)(u32)regs->sp);
	}

	if (res) {
		/* User code screwed up. */
		regs->ax = -EFAULT;
		local_irq_disable();
		__prepare_exit_to_usermode(regs);
		return false;
	}

	/* Now this is just like a normal syscall. */
	do_syscall_32_irqs_on(regs);
	return true;
}

/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
__visible long do_fast_syscall_32(struct pt_regs *regs)
__visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
{
	/*
	 * Called using the internal vDSO SYSENTER/SYSCALL32 calling
	 * convention.  Adjust regs so it looks like we entered using int80.
	 */

	unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
					vdso_image_32.sym_int80_landing_pad;
	bool success;

	/*
	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
@@ -367,33 +428,17 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
	regs->ip = landing_pad;

	enter_from_user_mode();
	instrumentation_begin();

	local_irq_enable();
	success = __do_fast_syscall_32(regs);

	/* Fetch EBP from where the vDSO stashed it. */
	if (
#ifdef CONFIG_X86_64
		/*
		 * Micro-optimization: the pointer we're following is explicitly
		 * 32 bits, so it can't be out of range.
		 */
		__get_user(*(u32 *)&regs->bp,
			    (u32 __user __force *)(unsigned long)(u32)regs->sp)
#else
		get_user(*(u32 *)&regs->bp,
			 (u32 __user __force *)(unsigned long)(u32)regs->sp)
#endif
		) {

		/* User code screwed up. */
		local_irq_disable();
		regs->ax = -EFAULT;
		prepare_exit_to_usermode(regs);
		return 0;	/* Keep it simple: use IRET. */
	}
	instrumentation_end();
	exit_to_user_mode();

	/* Now this is just like a normal syscall. */
	do_syscall_32_irqs_on(regs);
	/* If it failed, keep it simple: use IRET. */
	if (!success)
		return 0;

#ifdef CONFIG_X86_64
	/*