Commit 4b871ce2 authored by Eric W. Biederman's avatar Eric W. Biederman
Browse files

Merged 'Infrastructure to allow fixing exec deadlocks' from Bernd Edlinger

This is an infrastructure change that makes way for fixing this issue.
Each patch was already posted previously so this is just a cleanup of
the original mailing list thread(s) which got out of control by now.

Everything started here:
https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/



I added reviewed-by tags from the mailing list threads, except when
withdrawn.

It took a lot longer than expected to collect everything from the
mailinglist threads, since several commit messages have been infected
with typos, and they got fixed without a new patch version.

- Correct the point of no return.
- Add two new mutexes to replace cred_guard_mutex.
- Fix each use of cred_guard_mutex.
- Update documentation.
- Add a test case.

-- EWB Removed the last 2 patches they need more work

Bernd Edlinger (9):
      exec: Fix a deadlock in strace
      selftests/ptrace: add test cases for dead-locks
      mm: docs: Fix a comment in process_vm_rw_core
      kernel: doc: remove outdated comment cred.c
      kernel/kcmp.c: Use new infrastructure to fix deadlocks in execve
      proc: Use new infrastructure to fix deadlocks in execve
      proc: io_accounting: Use new infrastructure to fix deadlocks in execve
      perf: Use new infrastructure to fix deadlocks in execve
      pidfd: Use new infrastructure to fix deadlocks in execve

Eric W. Biederman (5):
      exec: Only compute current once in flush_old_exec
      exec: Factor unshare_sighand out of de_thread and call it separately
      exec: Move cleanup of posix timers on exec out of de_thread
      exec: Move exec_mmap right after de_thread in flush_old_exec
      exec: Add exec_update_mutex to replace cred_guard_mutex

 fs/exec.c                                 | 78 +++++++++++++++++++---------
 fs/proc/base.c                            | 10 ++--
 include/linux/binfmts.h                   |  8 ++-
 include/linux/sched/signal.h              |  9 +++-
 init/init_task.c                          |  1 +
 kernel/cred.c                             |  2 -
 kernel/events/core.c                      | 12 ++---
 kernel/fork.c                             |  5 +-
 kernel/kcmp.c                             |  8 +--
 kernel/pid.c                              |  4 +-
 mm/process_vm_access.c                    |  2 +-
 tools/testing/selftests/ptrace/Makefile   |  4 +-
 tools/testing/selftests/ptrace/vmaccess.c | 86 +++++++++++++++++++++++++++++++
 13 files changed, 179 insertions(+), 50 deletions(-)

Signed-off-by: default avatarBernd Edlinger <bernd.edlinger@hotmail.de>
Signed-off-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
parents a0d4a141 501f9328
Loading
Loading
Loading
Loading
+54 −24
Original line number Diff line number Diff line
@@ -1010,16 +1010,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
}
EXPORT_SYMBOL(read_code);

/*
 * Maps the mm_struct mm into the current task struct.
 * On success, this function returns with the mutex
 * exec_update_mutex locked.
 */
static int exec_mmap(struct mm_struct *mm)
{
	struct task_struct *tsk;
	struct mm_struct *old_mm, *active_mm;
	int ret;

	/* Notify parent that we're no longer interested in the old VM */
	tsk = current;
	old_mm = current->mm;
	exec_mm_release(tsk, old_mm);

	ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
	if (ret)
		return ret;

	if (old_mm) {
		sync_mm_rss(old_mm);
		/*
@@ -1031,9 +1041,11 @@ static int exec_mmap(struct mm_struct *mm)
		down_read(&old_mm->mmap_sem);
		if (unlikely(old_mm->core_state)) {
			up_read(&old_mm->mmap_sem);
			mutex_unlock(&tsk->signal->exec_update_mutex);
			return -EINTR;
		}
	}

	task_lock(tsk);
	active_mm = tsk->active_mm;
	membarrier_exec_mmap(mm);
@@ -1189,10 +1201,22 @@ no_thread_group:
	/* we have changed execution domain */
	tsk->exit_signal = SIGCHLD;

#ifdef CONFIG_POSIX_TIMERS
	exit_itimers(sig);
	flush_itimer_signals();
#endif
	BUG_ON(!thread_group_leader(tsk));
	return 0;

killed:
	/* protects against exit_notify() and __exit_signal() */
	read_lock(&tasklist_lock);
	sig->group_exit_task = NULL;
	sig->notify_count = 0;
	read_unlock(&tasklist_lock);
	return -EAGAIN;
}


static int unshare_sighand(struct task_struct *me)
{
	struct sighand_struct *oldsighand = me->sighand;

	if (refcount_read(&oldsighand->count) != 1) {
		struct sighand_struct *newsighand;
@@ -1210,23 +1234,13 @@ no_thread_group:

		write_lock_irq(&tasklist_lock);
		spin_lock(&oldsighand->siglock);
		rcu_assign_pointer(tsk->sighand, newsighand);
		rcu_assign_pointer(me->sighand, newsighand);
		spin_unlock(&oldsighand->siglock);
		write_unlock_irq(&tasklist_lock);

		__cleanup_sighand(oldsighand);
	}

	BUG_ON(!thread_group_leader(tsk));
	return 0;

killed:
	/* protects against exit_notify() and __exit_signal() */
	read_lock(&tasklist_lock);
	sig->group_exit_task = NULL;
	sig->notify_count = 0;
	read_unlock(&tasklist_lock);
	return -EAGAIN;
}

char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
@@ -1260,13 +1274,13 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
 */
int flush_old_exec(struct linux_binprm * bprm)
{
	struct task_struct *me = current;
	int retval;

	/*
	 * Make sure we have a private signal table and that
	 * we are unassociated from the previous thread group.
	 * Make this the only thread in the thread group.
	 */
	retval = de_thread(current);
	retval = de_thread(me);
	if (retval)
		goto out;

@@ -1286,18 +1300,31 @@ int flush_old_exec(struct linux_binprm * bprm)
		goto out;

	/*
	 * After clearing bprm->mm (to mark that current is using the
	 * prepared mm now), we have nothing left of the original
	 * After setting bprm->called_exec_mmap (to mark that current is
	 * using the prepared mm now), we have nothing left of the original
	 * process. If anything from here on returns an error, the check
	 * in search_binary_handler() will SEGV current.
	 */
	bprm->called_exec_mmap = 1;
	bprm->mm = NULL;

#ifdef CONFIG_POSIX_TIMERS
	exit_itimers(me->signal);
	flush_itimer_signals();
#endif

	/*
	 * Make the signal table private.
	 */
	retval = unshare_sighand(me);
	if (retval)
		goto out;

	set_fs(USER_DS);
	current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
	me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
					PF_NOFREEZE | PF_NO_SETAFFINITY);
	flush_thread();
	current->personality &= ~bprm->per_clear;
	me->personality &= ~bprm->per_clear;

	/*
	 * We have to apply CLOEXEC before we change whether the process is
@@ -1305,7 +1332,7 @@ int flush_old_exec(struct linux_binprm * bprm)
	 * trying to access the should-be-closed file descriptors of a process
	 * undergoing exec(2).
	 */
	do_close_on_exec(current->files);
	do_close_on_exec(me->files);
	return 0;

out:
@@ -1424,6 +1451,8 @@ static void free_bprm(struct linux_binprm *bprm)
{
	free_arg_pages(bprm);
	if (bprm->cred) {
		if (bprm->called_exec_mmap)
			mutex_unlock(&current->signal->exec_update_mutex);
		mutex_unlock(&current->signal->cred_guard_mutex);
		abort_creds(bprm->cred);
	}
@@ -1473,6 +1502,7 @@ void install_exec_creds(struct linux_binprm *bprm)
	 * credentials; any time after this it may be unlocked.
	 */
	security_bprm_committed_creds(bprm);
	mutex_unlock(&current->signal->exec_update_mutex);
	mutex_unlock(&current->signal->cred_guard_mutex);
}
EXPORT_SYMBOL(install_exec_creds);
@@ -1664,7 +1694,7 @@ int search_binary_handler(struct linux_binprm *bprm)

		read_lock(&binfmt_lock);
		put_binfmt(fmt);
		if (retval < 0 && !bprm->mm) {
		if (retval < 0 && bprm->called_exec_mmap) {
			/* we got to flush_old_exec() and failed after it */
			read_unlock(&binfmt_lock);
			force_sigsegv(SIGSEGV);
+5 −5
Original line number Diff line number Diff line
@@ -405,11 +405,11 @@ print0:

static int lock_trace(struct task_struct *task)
{
	int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
	int err = mutex_lock_killable(&task->signal->exec_update_mutex);
	if (err)
		return err;
	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
		mutex_unlock(&task->signal->cred_guard_mutex);
		mutex_unlock(&task->signal->exec_update_mutex);
		return -EPERM;
	}
	return 0;
@@ -417,7 +417,7 @@ static int lock_trace(struct task_struct *task)

static void unlock_trace(struct task_struct *task)
{
	mutex_unlock(&task->signal->cred_guard_mutex);
	mutex_unlock(&task->signal->exec_update_mutex);
}

#ifdef CONFIG_STACKTRACE
@@ -2883,7 +2883,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
	unsigned long flags;
	int result;

	result = mutex_lock_killable(&task->signal->cred_guard_mutex);
	result = mutex_lock_killable(&task->signal->exec_update_mutex);
	if (result)
		return result;

@@ -2919,7 +2919,7 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
	result = 0;

out_unlock:
	mutex_unlock(&task->signal->cred_guard_mutex);
	mutex_unlock(&task->signal->exec_update_mutex);
	return result;
}

+7 −1
Original line number Diff line number Diff line
@@ -44,7 +44,13 @@ struct linux_binprm {
		 * exec has happened. Used to sanitize execution environment
		 * and to set AT_SECURE auxv for glibc.
		 */
		secureexec:1;
		secureexec:1,
		/*
		 * Set by flush_old_exec, when exec_mmap has been called.
		 * This is past the point of no return, when the
		 * exec_update_mutex has been taken.
		 */
		called_exec_mmap:1;
#ifdef __alpha__
	unsigned int taso:1;
#endif
+8 −1
Original line number Diff line number Diff line
@@ -224,7 +224,14 @@ struct signal_struct {

	struct mutex cred_guard_mutex;	/* guard against foreign influences on
					 * credential calculations
					 * (notably. ptrace) */
					 * (notably. ptrace)
					 * Deprecated do not use in new code.
					 * Use exec_update_mutex instead.
					 */
	struct mutex exec_update_mutex;	/* Held while task_struct is being
					 * updated during exec, and may have
					 * inconsistent permissions.
					 */
} __randomize_layout;

/*
+1 −0
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@ static struct signal_struct init_signals = {
	.multiprocess	= HLIST_HEAD_INIT,
	.rlim		= INIT_RLIMITS,
	.cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex),
	.exec_update_mutex = __MUTEX_INITIALIZER(init_signals.exec_update_mutex),
#ifdef CONFIG_POSIX_TIMERS
	.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
	.cputimer	= {
Loading