Commit 3950e975 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull execve updates from Eric Biederman:
 "During the development of v5.7 I ran into bugs and quality of
  implementation issues related to exec that could not be easily fixed
  because of the way exec is implemented. So I have been diggin into
  exec and cleaning up what I can.

  This cycle I have been looking at different ideas and different
  implementations to see what is possible to improve exec, and cleaning
  the way exec interfaces with in kernel users. Only cleaning up the
  interfaces of exec with rest of the kernel has managed to stabalize
  and make it through review in time for v5.9-rc1 resulting in 2 sets of
  changes this cycle.

   - Implement kernel_execve

   - Make the user mode driver code a better citizen

  With kernel_execve the code size got a little larger as the copying of
  parameters from userspace and copying of parameters from userspace is
  now separate. The good news is kernel threads no longer need to play
  games with set_fs to use exec. Which when combined with the rest of
  Christophs set_fs changes should security bugs with set_fs much more
  difficult"

* 'exec-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (23 commits)
  exec: Implement kernel_execve
  exec: Factor bprm_stack_limits out of prepare_arg_pages
  exec: Factor bprm_execve out of do_execve_common
  exec: Move bprm_mm_init into alloc_bprm
  exec: Move initialization of bprm->filename into alloc_bprm
  exec: Factor out alloc_bprm
  exec: Remove unnecessary spaces from binfmts.h
  umd: Stop using split_argv
  umd: Remove exit_umh
  bpfilter: Take advantage of the facilities of struct pid
  exit: Factor thread_group_exited out of pidfd_poll
  umd: Track user space drivers with struct pid
  bpfilter: Move bpfilter_umh back into init data
  exec: Remove do_execve_file
  umh: Stop calling do_execve_file
  umd: Transform fork_usermode_blob into fork_usermode_driver
  umd: Rename umd_info.cmdline umd_info.driver_name
  umd: For clarity rename umh_info umd_info
  umh: Separate the user mode driver and the user mode helper support
  umh: Remove call_usermodehelper_setup_file.
  ...
parents fd76a74d 7fce69df
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -854,7 +854,7 @@ SYM_CODE_START(ret_from_fork)
	CALL_NOSPEC ebx
	/*
	 * A kernel thread is allowed to return here after successfully
	 * calling do_execve().  Exit to userspace to complete the execve()
	 * calling kernel_execve().  Exit to userspace to complete the execve()
	 * syscall.
	 */
	movl	$0, PT_EAX(%esp)
+1 −1
Original line number Diff line number Diff line
@@ -293,7 +293,7 @@ SYM_CODE_START(ret_from_fork)
	CALL_NOSPEC rbx
	/*
	 * A kernel thread is allowed to return here after successfully
	 * calling do_execve().  Exit to userspace to complete the execve()
	 * calling kernel_execve().  Exit to userspace to complete the execve()
	 * syscall.
	 */
	movq	$0, RAX(%rsp)
+1 −1
Original line number Diff line number Diff line
@@ -275,7 +275,7 @@ bool unwind_next_frame(struct unwind_state *state)
		 * This user_mode() check is slightly broader than a PF_KTHREAD
		 * check because it also catches the awkward situation where a
		 * newly forked kthread transitions into a user task by calling
		 * do_execve(), which eventually clears PF_KTHREAD.
		 * kernel_execve(), which eventually clears PF_KTHREAD.
		 */
		if (!user_mode(regs))
			goto the_end;
+197 −110
Original line number Diff line number Diff line
@@ -448,18 +448,26 @@ static int count(struct user_arg_ptr argv, int max)
	return i;
}

static int prepare_arg_pages(struct linux_binprm *bprm,
			struct user_arg_ptr argv, struct user_arg_ptr envp)
static int count_strings_kernel(const char *const *argv)
{
	unsigned long limit, ptr_size;
	int i;

	if (!argv)
		return 0;

	bprm->argc = count(argv, MAX_ARG_STRINGS);
	if (bprm->argc < 0)
		return bprm->argc;
	for (i = 0; argv[i]; ++i) {
		if (i >= MAX_ARG_STRINGS)
			return -E2BIG;
		if (fatal_signal_pending(current))
			return -ERESTARTNOHAND;
		cond_resched();
	}
	return i;
}

	bprm->envc = count(envp, MAX_ARG_STRINGS);
	if (bprm->envc < 0)
		return bprm->envc;
static int bprm_stack_limits(struct linux_binprm *bprm)
{
	unsigned long limit, ptr_size;

	/*
	 * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
@@ -633,6 +641,20 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
}
EXPORT_SYMBOL(copy_string_kernel);

static int copy_strings_kernel(int argc, const char *const *argv,
			       struct linux_binprm *bprm)
{
	while (argc-- > 0) {
		int ret = copy_string_kernel(argv[argc], bprm);
		if (ret < 0)
			return ret;
		if (fatal_signal_pending(current))
			return -ERESTARTNOHAND;
		cond_resched();
	}
	return 0;
}

#ifdef CONFIG_MMU

/*
@@ -1543,6 +1565,10 @@ static int prepare_bprm_creds(struct linux_binprm *bprm)

static void free_bprm(struct linux_binprm *bprm)
{
	if (bprm->mm) {
		acct_arg_size(bprm, 0);
		mmput(bprm->mm);
	}
	free_arg_pages(bprm);
	if (bprm->cred) {
		mutex_unlock(&current->signal->cred_guard_mutex);
@@ -1557,9 +1583,43 @@ static void free_bprm(struct linux_binprm *bprm)
	/* If a binfmt changed the interp, free it. */
	if (bprm->interp != bprm->filename)
		kfree(bprm->interp);
	kfree(bprm->fdpath);
	kfree(bprm);
}

static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
{
	struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
	int retval = -ENOMEM;
	if (!bprm)
		goto out;

	if (fd == AT_FDCWD || filename->name[0] == '/') {
		bprm->filename = filename->name;
	} else {
		if (filename->name[0] == '\0')
			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
		else
			bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
						  fd, filename->name);
		if (!bprm->fdpath)
			goto out_free;

		bprm->filename = bprm->fdpath;
	}
	bprm->interp = bprm->filename;

	retval = bprm_mm_init(bprm);
	if (retval)
		goto out_free;
	return bprm;

out_free:
	free_bprm(bprm);
out:
	return ERR_PTR(retval);
}

int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
{
	/* If a binfmt changed the interp, free it first. */
@@ -1818,52 +1878,24 @@ static int exec_binprm(struct linux_binprm *bprm)
/*
 * sys_execve() executes a new program.
 */
static int __do_execve_file(int fd, struct filename *filename,
			    struct user_arg_ptr argv,
			    struct user_arg_ptr envp,
			    int flags, struct file *file)
static int bprm_execve(struct linux_binprm *bprm,
		       int fd, struct filename *filename, int flags)
{
	char *pathbuf = NULL;
	struct linux_binprm *bprm;
	struct file *file;
	struct files_struct *displaced;
	int retval;

	if (IS_ERR(filename))
		return PTR_ERR(filename);

	/*
	 * We move the actual failure in case of RLIMIT_NPROC excess from
	 * set*uid() to execve() because too many poorly written programs
	 * don't check setuid() return code.  Here we additionally recheck
	 * whether NPROC limit is still exceeded.
	 */
	if ((current->flags & PF_NPROC_EXCEEDED) &&
	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
		retval = -EAGAIN;
		goto out_ret;
	}

	/* We're below the limit (still or again), so we don't want to make
	 * further execve() calls fail. */
	current->flags &= ~PF_NPROC_EXCEEDED;

	retval = unshare_files(&displaced);
	if (retval)
		goto out_ret;

	retval = -ENOMEM;
	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
	if (!bprm)
		goto out_files;
		return retval;

	retval = prepare_bprm_creds(bprm);
	if (retval)
		goto out_free;
		goto out_files;

	check_unsafe_exec(bprm);
	current->in_execve = 1;

	if (!file)
	file = do_open_execat(fd, filename, flags);
	retval = PTR_ERR(file);
	if (IS_ERR(file))
@@ -1872,57 +1904,20 @@ static int __do_execve_file(int fd, struct filename *filename,
	sched_exec();

	bprm->file = file;
	if (!filename) {
		bprm->filename = "none";
	} else if (fd == AT_FDCWD || filename->name[0] == '/') {
		bprm->filename = filename->name;
	} else {
		if (filename->name[0] == '\0')
			pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
		else
			pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
					    fd, filename->name);
		if (!pathbuf) {
			retval = -ENOMEM;
			goto out_unmark;
		}
	/*
	 * Record that a name derived from an O_CLOEXEC fd will be
	 * inaccessible after exec. Relies on having exclusive access to
	 * current->files (due to unshare_files above).
	 */
		if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
	if (bprm->fdpath &&
	    close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
		bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
		bprm->filename = pathbuf;
	}
	bprm->interp = bprm->filename;

	retval = bprm_mm_init(bprm);
	if (retval)
		goto out_unmark;

	retval = prepare_arg_pages(bprm, argv, envp);
	if (retval < 0)
		goto out;

	/* Set the unchanging part of bprm->cred */
	retval = security_bprm_creds_for_exec(bprm);
	if (retval)
		goto out;

	retval = copy_string_kernel(bprm->filename, bprm);
	if (retval < 0)
		goto out;

	bprm->exec = bprm->p;
	retval = copy_strings(bprm->envc, envp, bprm);
	if (retval < 0)
		goto out;

	retval = copy_strings(bprm->argc, argv, bprm);
	if (retval < 0)
		goto out;

	retval = exec_binprm(bprm);
	if (retval < 0)
		goto out;
@@ -1933,10 +1928,6 @@ static int __do_execve_file(int fd, struct filename *filename,
	rseq_execve(current);
	acct_update_integrals(current);
	task_numa_free(current, false);
	free_bprm(bprm);
	kfree(pathbuf);
	if (filename)
		putname(filename);
	if (displaced)
		put_files_struct(displaced);
	return retval;
@@ -1950,25 +1941,15 @@ out:
	 */
	if (bprm->point_of_no_return && !fatal_signal_pending(current))
		force_sigsegv(SIGSEGV);
	if (bprm->mm) {
		acct_arg_size(bprm, 0);
		mmput(bprm->mm);
	}

out_unmark:
	current->fs->in_exec = 0;
	current->in_execve = 0;

out_free:
	free_bprm(bprm);
	kfree(pathbuf);

out_files:
	if (displaced)
		reset_files_struct(displaced);
out_ret:
	if (filename)
		putname(filename);

	return retval;
}

@@ -1977,18 +1958,124 @@ static int do_execveat_common(int fd, struct filename *filename,
			      struct user_arg_ptr envp,
			      int flags)
{
	return __do_execve_file(fd, filename, argv, envp, flags, NULL);
	struct linux_binprm *bprm;
	int retval;

	if (IS_ERR(filename))
		return PTR_ERR(filename);

	/*
	 * We move the actual failure in case of RLIMIT_NPROC excess from
	 * set*uid() to execve() because too many poorly written programs
	 * don't check setuid() return code.  Here we additionally recheck
	 * whether NPROC limit is still exceeded.
	 */
	if ((current->flags & PF_NPROC_EXCEEDED) &&
	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
		retval = -EAGAIN;
		goto out_ret;
	}

	/* We're below the limit (still or again), so we don't want to make
	 * further execve() calls fail. */
	current->flags &= ~PF_NPROC_EXCEEDED;

	bprm = alloc_bprm(fd, filename);
	if (IS_ERR(bprm)) {
		retval = PTR_ERR(bprm);
		goto out_ret;
	}

int do_execve_file(struct file *file, void *__argv, void *__envp)
	retval = count(argv, MAX_ARG_STRINGS);
	if (retval < 0)
		goto out_free;
	bprm->argc = retval;

	retval = count(envp, MAX_ARG_STRINGS);
	if (retval < 0)
		goto out_free;
	bprm->envc = retval;

	retval = bprm_stack_limits(bprm);
	if (retval < 0)
		goto out_free;

	retval = copy_string_kernel(bprm->filename, bprm);
	if (retval < 0)
		goto out_free;
	bprm->exec = bprm->p;

	retval = copy_strings(bprm->envc, envp, bprm);
	if (retval < 0)
		goto out_free;

	retval = copy_strings(bprm->argc, argv, bprm);
	if (retval < 0)
		goto out_free;

	retval = bprm_execve(bprm, fd, filename, flags);
out_free:
	free_bprm(bprm);

out_ret:
	putname(filename);
	return retval;
}

int kernel_execve(const char *kernel_filename,
		  const char *const *argv, const char *const *envp)
{
	struct user_arg_ptr argv = { .ptr.native = __argv };
	struct user_arg_ptr envp = { .ptr.native = __envp };
	struct filename *filename;
	struct linux_binprm *bprm;
	int fd = AT_FDCWD;
	int retval;

	filename = getname_kernel(kernel_filename);
	if (IS_ERR(filename))
		return PTR_ERR(filename);

	bprm = alloc_bprm(fd, filename);
	if (IS_ERR(bprm)) {
		retval = PTR_ERR(bprm);
		goto out_ret;
	}

	retval = count_strings_kernel(argv);
	if (retval < 0)
		goto out_free;
	bprm->argc = retval;

	retval = count_strings_kernel(envp);
	if (retval < 0)
		goto out_free;
	bprm->envc = retval;

	retval = bprm_stack_limits(bprm);
	if (retval < 0)
		goto out_free;

	retval = copy_string_kernel(bprm->filename, bprm);
	if (retval < 0)
		goto out_free;
	bprm->exec = bprm->p;

	return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
	retval = copy_strings_kernel(bprm->envc, envp, bprm);
	if (retval < 0)
		goto out_free;

	retval = copy_strings_kernel(bprm->argc, argv, bprm);
	if (retval < 0)
		goto out_free;

	retval = bprm_execve(bprm, fd, filename, 0);
out_free:
	free_bprm(bprm);
out_ret:
	putname(filename);
	return retval;
}

int do_execve(struct filename *filename,
static int do_execve(struct filename *filename,
	const char __user *const __user *__argv,
	const char __user *const __user *__envp)
{
@@ -1997,7 +2084,7 @@ int do_execve(struct filename *filename,
	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
}

int do_execveat(int fd, struct filename *filename,
static int do_execveat(int fd, struct filename *filename,
		const char __user *const __user *__argv,
		const char __user *const __user *__envp,
		int flags)
+8 −13
Original line number Diff line number Diff line
@@ -56,6 +56,7 @@ struct linux_binprm {
	const char *interp;	/* Name of the binary really executed. Most
				   of the time same as filename, but could be
				   different for binfmt_{misc,script} */
	const char *fdpath;	/* generated filename for execveat */
	unsigned interp_flags;
	int execfd;		/* File descriptor of the executable */
	unsigned long loader, exec;
@@ -134,13 +135,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm);
extern void set_binfmt(struct linux_binfmt *new);
extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t);

extern int do_execve(struct filename *,
		     const char __user * const __user *,
		     const char __user * const __user *);
extern int do_execveat(int, struct filename *,
		       const char __user * const __user *,
		       const char __user * const __user *,
		       int);
int do_execve_file(struct file *file, void *__argv, void *__envp);
int kernel_execve(const char *filename,
		  const char *const *argv, const char *const *envp);

#endif /* _LINUX_BINFMTS_H */
Loading