Commit 616d2483 authored by Brian Gerst's avatar Brian Gerst Committed by Ingo Molnar
Browse files

sched/x86: Pass kernel thread parameters in 'struct fork_frame'



Instead of setting up a fake pt_regs context, put the kernel thread
function pointer and arg into the unused callee-restored registers
of 'struct fork_frame'.

Signed-off-by: default avatarBrian Gerst <brgerst@gmail.com>
Reviewed-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1471106302-10159-6-git-send-email-brgerst@gmail.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 0100301b
Loading
Loading
Loading
Loading
+15 −16
Original line number Diff line number Diff line
@@ -240,35 +240,34 @@ END(__switch_to_asm)
 * A newly forked process directly context switches into this address.
 *
 * eax: prev task we switched from
 * ebx: kernel thread func (NULL for user thread)
 * edi: kernel thread arg
 */
ENTRY(ret_from_fork)
	pushl	%eax
	call	schedule_tail
	popl	%eax

	testl	%ebx, %ebx
	jnz	1f		/* kernel threads are uncommon */

2:
	/* When we fork, we trace the syscall return in the child, too. */
	movl    %esp, %eax
	call    syscall_return_slowpath
	jmp     restore_all
END(ret_from_fork)

ENTRY(ret_from_kernel_thread)
	pushl	%eax
	call	schedule_tail
	popl	%eax
	movl	PT_EBP(%esp), %eax
	call	*PT_EBX(%esp)
	movl	$0, PT_EAX(%esp)

	/* kernel thread */
1:	movl	%edi, %eax
	call	*%ebx
	/*
	 * Kernel threads return to userspace as if returning from a syscall.
	 * We should check whether anything actually uses this path and, if so,
	 * consider switching it over to ret_from_fork.
	 * A kernel thread is allowed to return here after successfully
	 * calling do_execve().  Exit to userspace to complete the execve()
	 * syscall.
	 */
	movl    %esp, %eax
	call    syscall_return_slowpath
	jmp     restore_all
ENDPROC(ret_from_kernel_thread)
	movl	$0, PT_EAX(%esp)
	jmp	2b
END(ret_from_fork)

/*
 * Return to user mode is not as complex as all this looks,
+17 −20
Original line number Diff line number Diff line
@@ -407,37 +407,34 @@ END(__switch_to_asm)
 * A newly forked process directly context switches into this address.
 *
 * rax: prev task we switched from
 * rbx: kernel thread func (NULL for user thread)
 * r12: kernel thread arg
 */
ENTRY(ret_from_fork)
	movq	%rax, %rdi
	call	schedule_tail			/* rdi: 'prev' task parameter */

	testb	$3, CS(%rsp)			/* from kernel_thread? */
	jnz	1f

	/*
	 * We came from kernel_thread.  This code path is quite twisted, and
	 * someone should clean it up.
	 *
	 * copy_thread_tls stashes the function pointer in RBX and the
	 * parameter to be passed in RBP.  The called function is permitted
	 * to call do_execve and thereby jump to user mode.
	 */
	movq	RBP(%rsp), %rdi
	call	*RBX(%rsp)
	movl	$0, RAX(%rsp)

	/*
	 * Fall through as though we're exiting a syscall.  This makes a
	 * twisted sort of sense if we just called do_execve.
	 */
	testq	%rbx, %rbx			/* from kernel_thread? */
	jnz	1f				/* kernel threads are uncommon */

1:
2:
	movq	%rsp, %rdi
	call	syscall_return_slowpath	/* returns with IRQs disabled */
	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
	SWAPGS
	jmp	restore_regs_and_iret

1:
	/* kernel thread */
	movq	%r12, %rdi
	call	*%rbx
	/*
	 * A kernel thread is allowed to return here after successfully
	 * calling do_execve().  Exit to userspace to complete the execve()
	 * syscall.
	 */
	movq	$0, RAX(%rsp)
	jmp	2b
END(ret_from_fork)

/*
+2 −0
Original line number Diff line number Diff line
@@ -34,6 +34,8 @@ static inline void prepare_switch_to(struct task_struct *prev,
#endif
}

asmlinkage void ret_from_fork(void);

/* data that is pointed to by thread.sp */
struct inactive_task_frame {
#ifdef CONFIG_X86_64
+4 −14
Original line number Diff line number Diff line
@@ -55,9 +55,6 @@
#include <asm/switch_to.h>
#include <asm/vm86.h>

asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");

/*
 * Return saved PC of a blocked thread.
 */
@@ -139,6 +136,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
	int err;

	frame->bp = 0;
	frame->ret_addr = (unsigned long) ret_from_fork;
	p->thread.sp = (unsigned long) fork_frame;
	p->thread.sp0 = (unsigned long) (childregs+1);
	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -146,25 +144,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
	if (unlikely(p->flags & PF_KTHREAD)) {
		/* kernel thread */
		memset(childregs, 0, sizeof(struct pt_regs));
		frame->ret_addr = (unsigned long) ret_from_kernel_thread;
		task_user_gs(p) = __KERNEL_STACK_CANARY;
		childregs->ds = __USER_DS;
		childregs->es = __USER_DS;
		childregs->fs = __KERNEL_PERCPU;
		childregs->bx = sp;	/* function */
		childregs->bp = arg;
		childregs->orig_ax = -1;
		childregs->cs = __KERNEL_CS | get_kernel_rpl();
		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
		frame->bx = sp;		/* function */
		frame->di = arg;
		p->thread.io_bitmap_ptr = NULL;
		return 0;
	}
	frame->bx = 0;
	*childregs = *current_pt_regs();
	childregs->ax = 0;
	if (sp)
		childregs->sp = sp;

	frame->ret_addr = (unsigned long) ret_from_fork;
	task_user_gs(p) = get_user_gs(current_pt_regs());

	p->thread.io_bitmap_ptr = NULL;
+3 −9
Original line number Diff line number Diff line
@@ -50,8 +50,6 @@
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>

asmlinkage extern void ret_from_fork(void);

__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);

/* Prints also some state that isn't saved in the pt_regs */
@@ -165,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
	if (unlikely(p->flags & PF_KTHREAD)) {
		/* kernel thread */
		memset(childregs, 0, sizeof(struct pt_regs));
		childregs->sp = (unsigned long)childregs;
		childregs->ss = __KERNEL_DS;
		childregs->bx = sp; /* function */
		childregs->bp = arg;
		childregs->orig_ax = -1;
		childregs->cs = __KERNEL_CS | get_kernel_rpl();
		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
		frame->bx = sp;		/* function */
		frame->r12 = arg;
		return 0;
	}
	frame->bx = 0;
	*childregs = *current_pt_regs();

	childregs->ax = 0;