Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace (d987ca1c) · Commits · 戴 / test

arch/um/drivers/mconsole_kern.c

+27 −1

Original line number	Diff line number	Diff line
		@@ -36,6 +36,8 @@
		#include "mconsole_kern.h"
		#include <os.h>

		static struct vfsmount *proc_mnt = NULL;

		static int do_unlink_socket(struct notifier_block *notifier,
		unsigned long what, void *data)
		{
		@@ -123,7 +125,7 @@ void mconsole_log(struct mc_request *req)

		void mconsole_proc(struct mc_request *req)
		{
		struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
		struct vfsmount *mnt = proc_mnt;
		char *buf;
		int len;
		struct file *file;
		@@ -134,6 +136,10 @@ void mconsole_proc(struct mc_request *req)
		ptr += strlen("proc");
		ptr = skip_spaces(ptr);

		if (!mnt) {
		mconsole_reply(req, "Proc not available", 1, 0);
		goto out;
		}
		file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0);
		if (IS_ERR(file)) {
		mconsole_reply(req, "Failed to open file", 1, 0);
		@@ -683,6 +689,24 @@ void mconsole_stack(struct mc_request *req)
		with_console(req, stack_proc, to);
		}

		static int __init mount_proc(void)
		{
		struct file_system_type *proc_fs_type;
		struct vfsmount *mnt;

		proc_fs_type = get_fs_type("proc");
		if (!proc_fs_type)
		return -ENODEV;

		mnt = kern_mount(proc_fs_type);
		put_filesystem(proc_fs_type);
		if (IS_ERR(mnt))
		return PTR_ERR(mnt);

		proc_mnt = mnt;
		return 0;
		}

		/*
		* Changed by mconsole_setup, which is __setup, and called before SMP is
		* active.
		@@ -696,6 +720,8 @@ static int __init mconsole_init(void)
		int err;
		char file[UNIX_PATH_MAX];

		mount_proc();

		if (umid_file_name("mconsole", file, sizeof(file)))
		return -1;
		snprintf(mconsole_socket_name, sizeof(file), "%s", file);

fs/exec.c

+55 −25

Original line number	Diff line number	Diff line
		@@ -1036,16 +1036,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
		}
		EXPORT_SYMBOL(read_code);

		/*
		* Maps the mm_struct mm into the current task struct.
		* On success, this function returns with the mutex
		* exec_update_mutex locked.
		*/
		static int exec_mmap(struct mm_struct *mm)
		{
		struct task_struct *tsk;
		struct mm_struct old_mm, active_mm;
		int ret;

		/* Notify parent that we're no longer interested in the old VM */
		tsk = current;
		old_mm = current->mm;
		exec_mm_release(tsk, old_mm);

		ret = mutex_lock_killable(&tsk->signal->exec_update_mutex);
		if (ret)
		return ret;

		if (old_mm) {
		sync_mm_rss(old_mm);
		/*
		@@ -1057,9 +1067,11 @@ static int exec_mmap(struct mm_struct *mm)
		down_read(&old_mm->mmap_sem);
		if (unlikely(old_mm->core_state)) {
		up_read(&old_mm->mmap_sem);
		mutex_unlock(&tsk->signal->exec_update_mutex);
		return -EINTR;
		}
		}

		task_lock(tsk);
		active_mm = tsk->active_mm;
		membarrier_exec_mmap(mm);
		@@ -1215,10 +1227,22 @@ no_thread_group:
		/* we have changed execution domain */
		tsk->exit_signal = SIGCHLD;

		#ifdef CONFIG_POSIX_TIMERS
		exit_itimers(sig);
		flush_itimer_signals();
		#endif
		BUG_ON(!thread_group_leader(tsk));
		return 0;

		killed:
		/* protects against exit_notify() and __exit_signal() */
		read_lock(&tasklist_lock);
		sig->group_exit_task = NULL;
		sig->notify_count = 0;
		read_unlock(&tasklist_lock);
		return -EAGAIN;
		}


		static int unshare_sighand(struct task_struct *me)
		{
		struct sighand_struct *oldsighand = me->sighand;

		if (refcount_read(&oldsighand->count) != 1) {
		struct sighand_struct *newsighand;
		@@ -1236,23 +1260,13 @@ no_thread_group:

		write_lock_irq(&tasklist_lock);
		spin_lock(&oldsighand->siglock);
		rcu_assign_pointer(tsk->sighand, newsighand);
		rcu_assign_pointer(me->sighand, newsighand);
		spin_unlock(&oldsighand->siglock);
		write_unlock_irq(&tasklist_lock);

		__cleanup_sighand(oldsighand);
		}

		BUG_ON(!thread_group_leader(tsk));
		return 0;

		killed:
		/* protects against exit_notify() and __exit_signal() */
		read_lock(&tasklist_lock);
		sig->group_exit_task = NULL;
		sig->notify_count = 0;
		read_unlock(&tasklist_lock);
		return -EAGAIN;
		}

		char __get_task_comm(char buf, size_t buf_size, struct task_struct *tsk)
		@@ -1286,13 +1300,13 @@ void __set_task_comm(struct task_struct tsk, const char buf, bool exec)
		*/
		int flush_old_exec(struct linux_binprm * bprm)
		{
		struct task_struct *me = current;
		int retval;

		/*
		* Make sure we have a private signal table and that
		* we are unassociated from the previous thread group.
		* Make this the only thread in the thread group.
		*/
		retval = de_thread(current);
		retval = de_thread(me);
		if (retval)
		goto out;

		@@ -1312,18 +1326,31 @@ int flush_old_exec(struct linux_binprm * bprm)
		goto out;

		/*
		* After clearing bprm->mm (to mark that current is using the
		* prepared mm now), we have nothing left of the original
		* After setting bprm->called_exec_mmap (to mark that current is
		* using the prepared mm now), we have nothing left of the original
		* process. If anything from here on returns an error, the check
		* in search_binary_handler() will SEGV current.
		*/
		bprm->called_exec_mmap = 1;
		bprm->mm = NULL;

		#ifdef CONFIG_POSIX_TIMERS
		exit_itimers(me->signal);
		flush_itimer_signals();
		#endif

		/*
		* Make the signal table private.
		*/
		retval = unshare_sighand(me);
		if (retval)
		goto out;

		set_fs(USER_DS);
		current->flags &= ~(PF_RANDOMIZE \| PF_FORKNOEXEC \| PF_KTHREAD \|
		me->flags &= ~(PF_RANDOMIZE \| PF_FORKNOEXEC \| PF_KTHREAD \|
		PF_NOFREEZE \| PF_NO_SETAFFINITY);
		flush_thread();
		current->personality &= ~bprm->per_clear;
		me->personality &= ~bprm->per_clear;

		/*
		* We have to apply CLOEXEC before we change whether the process is
		@@ -1331,7 +1358,7 @@ int flush_old_exec(struct linux_binprm * bprm)
		* trying to access the should-be-closed file descriptors of a process
		* undergoing exec(2).
		*/
		do_close_on_exec(current->files);
		do_close_on_exec(me->files);
		return 0;

		out:
		@@ -1412,7 +1439,7 @@ void setup_new_exec(struct linux_binprm * bprm)

		/* An exec changes our domain. We are no longer part of the thread
		group */
		current->self_exec_id++;
		WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1);
		flush_signal_handlers(current, 0);
		}
		EXPORT_SYMBOL(setup_new_exec);
		@@ -1450,6 +1477,8 @@ static void free_bprm(struct linux_binprm *bprm)
		{
		free_arg_pages(bprm);
		if (bprm->cred) {
		if (bprm->called_exec_mmap)
		mutex_unlock(&current->signal->exec_update_mutex);
		mutex_unlock(&current->signal->cred_guard_mutex);
		abort_creds(bprm->cred);
		}
		@@ -1499,6 +1528,7 @@ void install_exec_creds(struct linux_binprm *bprm)
		* credentials; any time after this it may be unlocked.
		*/
		security_bprm_committed_creds(bprm);
		mutex_unlock(&current->signal->exec_update_mutex);
		mutex_unlock(&current->signal->cred_guard_mutex);
		}
		EXPORT_SYMBOL(install_exec_creds);
		@@ -1690,7 +1720,7 @@ int search_binary_handler(struct linux_binprm *bprm)

		read_lock(&binfmt_lock);
		put_binfmt(fmt);
		if (retval < 0 && !bprm->mm) {
		if (retval < 0 && bprm->called_exec_mmap) {
		/* we got to flush_old_exec() and failed after it */
		read_unlock(&binfmt_lock);
		force_sigsegv(SIGSEGV);

fs/proc/base.c

+41 −80

Original line number	Diff line number	Diff line
		@@ -405,11 +405,11 @@ print0:

		static int lock_trace(struct task_struct *task)
		{
		int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
		int err = mutex_lock_killable(&task->signal->exec_update_mutex);
		if (err)
		return err;
		if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
		mutex_unlock(&task->signal->cred_guard_mutex);
		mutex_unlock(&task->signal->exec_update_mutex);
		return -EPERM;
		}
		return 0;
		@@ -417,7 +417,7 @@ static int lock_trace(struct task_struct *task)

		static void unlock_trace(struct task_struct *task)
		{
		mutex_unlock(&task->signal->cred_guard_mutex);
		mutex_unlock(&task->signal->exec_update_mutex);
		}

		#ifdef CONFIG_STACKTRACE
		@@ -1834,11 +1834,25 @@ void task_dump_owner(struct task_struct *task, umode_t mode,
		*rgid = gid;
		}

		void proc_pid_evict_inode(struct proc_inode *ei)
		{
		struct pid *pid = ei->pid;

		if (S_ISDIR(ei->vfs_inode.i_mode)) {
		spin_lock(&pid->wait_pidfd.lock);
		hlist_del_init_rcu(&ei->sibling_inodes);
		spin_unlock(&pid->wait_pidfd.lock);
		}

		put_pid(pid);
		}

		struct inode proc_pid_make_inode(struct super_block sb,
		struct task_struct *task, umode_t mode)
		{
		struct inode * inode;
		struct proc_inode *ei;
		struct pid *pid;

		/* We need a new inode */

		@@ -1856,10 +1870,18 @@ struct inode proc_pid_make_inode(struct super_block sb,
		/*
		* grab the reference to task.
		*/
		ei->pid = get_task_pid(task, PIDTYPE_PID);
		if (!ei->pid)
		pid = get_task_pid(task, PIDTYPE_PID);
		if (!pid)
		goto out_unlock;

		/* Let the pid remember us for quick removal */
		ei->pid = pid;
		if (S_ISDIR(mode)) {
		spin_lock(&pid->wait_pidfd.lock);
		hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
		spin_unlock(&pid->wait_pidfd.lock);
		}

		task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
		security_task_to_inode(task, inode);

		@@ -2861,7 +2883,7 @@ static int do_io_accounting(struct task_struct task, struct seq_file m, int wh
		unsigned long flags;
		int result;

		result = mutex_lock_killable(&task->signal->cred_guard_mutex);
		result = mutex_lock_killable(&task->signal->exec_update_mutex);
		if (result)
		return result;

		@@ -2897,7 +2919,7 @@ static int do_io_accounting(struct task_struct task, struct seq_file m, int wh
		result = 0;

		out_unlock:
		mutex_unlock(&task->signal->cred_guard_mutex);
		mutex_unlock(&task->signal->exec_update_mutex);
		return result;
		}

		@@ -3230,90 +3252,29 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
		.permission = proc_pid_permission,
		};

		static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
		{
		struct dentry dentry, leader, *dir;
		char buf[10 + 1];
		struct qstr name;

		name.name = buf;
		name.len = snprintf(buf, sizeof(buf), "%u", pid);
		/* no ->d_hash() rejects on procfs */
		dentry = d_hash_and_lookup(mnt->mnt_root, &name);
		if (dentry) {
		d_invalidate(dentry);
		dput(dentry);
		}

		if (pid == tgid)
		return;

		name.name = buf;
		name.len = snprintf(buf, sizeof(buf), "%u", tgid);
		leader = d_hash_and_lookup(mnt->mnt_root, &name);
		if (!leader)
		goto out;

		name.name = "task";
		name.len = strlen(name.name);
		dir = d_hash_and_lookup(leader, &name);
		if (!dir)
		goto out_put_leader;

		name.name = buf;
		name.len = snprintf(buf, sizeof(buf), "%u", pid);
		dentry = d_hash_and_lookup(dir, &name);
		if (dentry) {
		d_invalidate(dentry);
		dput(dentry);
		}

		dput(dir);
		out_put_leader:
		dput(leader);
		out:
		return;
		}

		/**
		* proc_flush_task - Remove dcache entries for @task from the /proc dcache.
		* @task: task that should be flushed.
		* proc_flush_pid - Remove dcache entries for @pid from the /proc dcache.
		* @pid: pid that should be flushed.
		*
		* When flushing dentries from proc, one needs to flush them from global
		* proc (proc_mnt) and from all the namespaces' procs this task was seen
		* in. This call is supposed to do all of this job.
		*
		* Looks in the dcache for
		* /proc/@pid
		* /proc/@tgid/task/@pid
		* if either directory is present flushes it and all of it'ts children
		* from the dcache.
		* This function walks a list of inodes (that belong to any proc
		* filesystem) that are attached to the pid and flushes them from
		* the dentry cache.
		*
		* It is safe and reasonable to cache /proc entries for a task until
		* that task exits. After that they just clog up the dcache with
		* useless entries, possibly causing useful dcache entries to be
		* flushed instead. This routine is proved to flush those useless
		* dcache entries at process exit time.
		* flushed instead. This routine is provided to flush those useless
		* dcache entries when a process is reaped.
		*
		* NOTE: This routine is just an optimization so it does not guarantee
		* that no dcache entries will exist at process exit time it
		* just makes it very unlikely that any will persist.
		* that no dcache entries will exist after a process is reaped
		* it just makes it very unlikely that any will persist.
		*/

		void proc_flush_task(struct task_struct *task)
		void proc_flush_pid(struct pid *pid)
		{
		int i;
		struct pid pid, tgid;
		struct upid *upid;

		pid = task_pid(task);
		tgid = task_tgid(task);

		for (i = 0; i <= pid->level; i++) {
		upid = &pid->numbers[i];
		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
		tgid->numbers[i].nr);
		}
		proc_invalidate_siblings_dcache(&pid->inodes, &pid->wait_pidfd.lock);
		put_pid(pid);
		}

		static struct dentry proc_pid_instantiate(struct dentry dentry,

fs/proc/inode.c

+68 −5

Original line number	Diff line number	Diff line
		@@ -33,21 +33,27 @@ static void proc_evict_inode(struct inode *inode)
		{
		struct proc_dir_entry *de;
		struct ctl_table_header *head;
		struct proc_inode *ei = PROC_I(inode);

		truncate_inode_pages_final(&inode->i_data);
		clear_inode(inode);

		/* Stop tracking associated processes */
		put_pid(PROC_I(inode)->pid);
		if (ei->pid) {
		proc_pid_evict_inode(ei);
		ei->pid = NULL;
		}

		/* Let go of any associated proc directory entry */
		de = PDE(inode);
		if (de)
		de = ei->pde;
		if (de) {
		pde_put(de);
		ei->pde = NULL;
		}

		head = PROC_I(inode)->sysctl;
		head = ei->sysctl;
		if (head) {
		RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
		RCU_INIT_POINTER(ei->sysctl, NULL);
		proc_sys_evict_inode(inode, head);
		}
		}
		@@ -68,6 +74,7 @@ static struct inode proc_alloc_inode(struct super_block sb)
		ei->pde = NULL;
		ei->sysctl = NULL;
		ei->sysctl_entry = NULL;
		INIT_HLIST_NODE(&ei->sibling_inodes);
		ei->ns_ops = NULL;
		return &ei->vfs_inode;
		}
		@@ -102,6 +109,62 @@ void __init proc_init_kmemcache(void)
		BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE);
		}

		void proc_invalidate_siblings_dcache(struct hlist_head inodes, spinlock_t lock)
		{
		struct inode *inode;
		struct proc_inode *ei;
		struct hlist_node *node;
		struct super_block *old_sb = NULL;

		rcu_read_lock();
		for (;;) {
		struct super_block *sb;
		node = hlist_first_rcu(inodes);
		if (!node)
		break;
		ei = hlist_entry(node, struct proc_inode, sibling_inodes);
		spin_lock(lock);
		hlist_del_init_rcu(&ei->sibling_inodes);
		spin_unlock(lock);

		inode = &ei->vfs_inode;
		sb = inode->i_sb;
		if ((sb != old_sb) && !atomic_inc_not_zero(&sb->s_active))
		continue;
		inode = igrab(inode);
		rcu_read_unlock();
		if (sb != old_sb) {
		if (old_sb)
		deactivate_super(old_sb);
		old_sb = sb;
		}
		if (unlikely(!inode)) {
		rcu_read_lock();
		continue;
		}

		if (S_ISDIR(inode->i_mode)) {
		struct dentry *dir = d_find_any_alias(inode);
		if (dir) {
		d_invalidate(dir);
		dput(dir);
		}
		} else {
		struct dentry *dentry;
		while ((dentry = d_find_alias(inode))) {
		d_invalidate(dentry);
		dput(dentry);
		}
		}
		iput(inode);

		rcu_read_lock();
		}
		rcu_read_unlock();
		if (old_sb)
		deactivate_super(old_sb);
		}

		static int proc_show_options(struct seq_file seq, struct dentry root)
		{
		struct super_block *sb = root->d_sb;

fs/proc/internal.h

+3 −1

Original line number	Diff line number	Diff line
		@@ -91,7 +91,7 @@ struct proc_inode {
		struct proc_dir_entry *pde;
		struct ctl_table_header *sysctl;
		struct ctl_table *sysctl_entry;
		struct hlist_node sysctl_inodes;
		struct hlist_node sibling_inodes;
		const struct proc_ns_operations *ns_ops;
		struct inode vfs_inode;
		} __randomize_layout;
		@@ -158,6 +158,7 @@ extern int proc_pid_statm(struct seq_file , struct pid_namespace ,
		extern const struct dentry_operations pid_dentry_operations;
		extern int pid_getattr(const struct path , struct kstat , u32, unsigned int);
		extern int proc_setattr(struct dentry , struct iattr );
		extern void proc_pid_evict_inode(struct proc_inode *);
		extern struct inode proc_pid_make_inode(struct super_block , struct task_struct *, umode_t);
		extern void pid_update_inode(struct task_struct , struct inode );
		extern int pid_delete_dentry(const struct dentry *);
		@@ -210,6 +211,7 @@ extern const struct inode_operations proc_pid_link_inode_operations;
		extern const struct super_operations proc_sops;

		void proc_init_kmemcache(void);
		void proc_invalidate_siblings_dcache(struct hlist_head inodes, spinlock_t lock);
		void set_proc_pid_nlink(void);
		extern struct inode proc_get_inode(struct super_block , struct proc_dir_entry *);
		extern void proc_entry_rundown(struct proc_dir_entry *);

Admin message