Merge tag 'for-5.6/io_uring-vfs-2020-01-29' of git://git.kernel.dk/linux-block (896f8d23) · Commits · 戴 / test

drivers/android/binder.c

+4 −2

Original line number	Diff line number	Diff line
		@@ -2249,11 +2249,13 @@ static void binder_deferred_fd_close(int fd)
		return;
		init_task_work(&twcb->twork, binder_do_fd_close);
		__close_fd_get_file(fd, &twcb->file);
		if (twcb->file)
		if (twcb->file) {
		filp_close(twcb->file, current->files);
		task_work_add(current, &twcb->twork, true);
		else
		} else {
		kfree(twcb);
		}
		}

		static void binder_transaction_buffer_release(struct binder_proc *proc,
		struct binder_buffer *buffer,

fs/eventpoll.c

+56 −31

Original line number	Diff line number	Diff line
		@@ -354,12 +354,6 @@ static inline struct epitem ep_item_from_epqueue(poll_table p)
		return container_of(p, struct ep_pqueue, pt)->epi;
		}

		/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
		static inline int ep_op_has_event(int op)
		{
		return op != EPOLL_CTL_DEL;
		}

		/* Initialize the poll safe wake up structure */
		static void ep_nested_calls_init(struct nested_calls *ncalls)
		{
		@@ -2074,27 +2068,28 @@ SYSCALL_DEFINE1(epoll_create, int, size)
		return do_epoll_create(0);
		}

		/*
		* The following function implements the controller interface for
		* the eventpoll file that enables the insertion/removal/change of
		* file descriptors inside the interest set.
		*/
		SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
		struct epoll_event __user *, event)
		static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
		bool nonblock)
		{
		if (!nonblock) {
		mutex_lock_nested(mutex, depth);
		return 0;
		}
		if (mutex_trylock(mutex))
		return 0;
		return -EAGAIN;
		}

		int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
		bool nonblock)
		{
		int error;
		int full_check = 0;
		struct fd f, tf;
		struct eventpoll *ep;
		struct epitem *epi;
		struct epoll_event epds;
		struct eventpoll *tep = NULL;

		error = -EFAULT;
		if (ep_op_has_event(op) &&
		copy_from_user(&epds, event, sizeof(struct epoll_event)))
		goto error_return;

		error = -EBADF;
		f = fdget(epfd);
		if (!f.file)
		@@ -2112,7 +2107,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,

		/* Check if EPOLLWAKEUP is allowed */
		if (ep_op_has_event(op))
		ep_take_care_of_epollwakeup(&epds);
		ep_take_care_of_epollwakeup(epds);

		/*
		* We have to check that the file structure underneath the file descriptor
		@@ -2128,11 +2123,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
		* so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
		* Also, we do not currently supported nested exclusive wakeups.
		*/
		if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) {
		if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) {
		if (op == EPOLL_CTL_MOD)
		goto error_tgt_fput;
		if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) \|\|
		(epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
		(epds->events & ~EPOLLEXCLUSIVE_OK_BITS)))
		goto error_tgt_fput;
		}

		@@ -2157,13 +2152,17 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
		* deep wakeup paths from forming in parallel through multiple
		* EPOLL_CTL_ADD operations.
		*/
		mutex_lock_nested(&ep->mtx, 0);
		error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
		if (error)
		goto error_tgt_fput;
		if (op == EPOLL_CTL_ADD) {
		if (!list_empty(&f.file->f_ep_links) \|\|
		is_file_epoll(tf.file)) {
		full_check = 1;
		mutex_unlock(&ep->mtx);
		mutex_lock(&epmutex);
		error = epoll_mutex_lock(&epmutex, 0, nonblock);
		if (error)
		goto error_tgt_fput;
		full_check = 1;
		if (is_file_epoll(tf.file)) {
		error = -ELOOP;
		if (ep_loop_check(ep, tf.file) != 0) {
		@@ -2173,10 +2172,19 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
		} else
		list_add(&tf.file->f_tfile_llink,
		&tfile_check_list);
		mutex_lock_nested(&ep->mtx, 0);
		error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
		if (error) {
		out_del:
		list_del(&tf.file->f_tfile_llink);
		goto error_tgt_fput;
		}
		if (is_file_epoll(tf.file)) {
		tep = tf.file->private_data;
		mutex_lock_nested(&tep->mtx, 1);
		error = epoll_mutex_lock(&tep->mtx, 1, nonblock);
		if (error) {
		mutex_unlock(&ep->mtx);
		goto out_del;
		}
		}
		}
		}
		@@ -2192,8 +2200,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
		switch (op) {
		case EPOLL_CTL_ADD:
		if (!epi) {
		epds.events \|= EPOLLERR \| EPOLLHUP;
		error = ep_insert(ep, &epds, tf.file, fd, full_check);
		epds->events \|= EPOLLERR \| EPOLLHUP;
		error = ep_insert(ep, epds, tf.file, fd, full_check);
		} else
		error = -EEXIST;
		if (full_check)
		@@ -2208,8 +2216,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
		case EPOLL_CTL_MOD:
		if (epi) {
		if (!(epi->event.events & EPOLLEXCLUSIVE)) {
		epds.events \|= EPOLLERR \| EPOLLHUP;
		error = ep_modify(ep, epi, &epds);
		epds->events \|= EPOLLERR \| EPOLLHUP;
		error = ep_modify(ep, epi, epds);
		}
		} else
		error = -ENOENT;
		@@ -2231,6 +2239,23 @@ error_return:
		return error;
		}

		/*
		* The following function implements the controller interface for
		* the eventpoll file that enables the insertion/removal/change of
		* file descriptors inside the interest set.
		*/
		SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
		struct epoll_event __user *, event)
		{
		struct epoll_event epds;

		if (ep_op_has_event(op) &&
		copy_from_user(&epds, event, sizeof(struct epoll_event)))
		return -EFAULT;

		return do_epoll_ctl(epfd, op, fd, &epds, false);
		}

		/*
		* Implement the event wait interface for the eventpoll file. It is the kernel
		* part of the user space epoll_wait(2).

fs/file.c

+4 −2

Original line number	Diff line number	Diff line
		@@ -642,7 +642,9 @@ out_unlock:
		EXPORT_SYMBOL(__close_fd); /* for ksys_close() */

		/*
		* variant of __close_fd that gets a ref on the file for later fput
		* variant of __close_fd that gets a ref on the file for later fput.
		* The caller must ensure that filp_close() called on the file, and then
		* an fput().
		*/
		int __close_fd_get_file(unsigned int fd, struct file **res)
		{
		@@ -662,7 +664,7 @@ int __close_fd_get_file(unsigned int fd, struct file **res)
		spin_unlock(&files->file_lock);
		get_file(file);
		*res = file;
		return filp_close(file, files);
		return 0;

		out_unlock:
		spin_unlock(&files->file_lock);

fs/internal.h

+8 −0

Original line number	Diff line number	Diff line
		@@ -124,6 +124,8 @@ extern struct file do_filp_open(int dfd, struct filename pathname,
		const struct open_flags *op);
		extern struct file do_file_open_root(struct dentry , struct vfsmount *,
		const char , const struct open_flags );
		extern struct open_how build_open_how(int flags, umode_t mode);
		extern int build_open_flags(const struct open_how how, struct open_flags op);

		long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
		long do_faccessat(int dfd, const char __user *filename, int mode);
		@@ -182,3 +184,9 @@ extern const struct dentry_operations ns_dentry_operations;

		/* direct-io.c: */
		int sb_init_dio_done_wq(struct super_block *sb);

		/*
		* fs/stat.c:
		*/
		unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, int flags);
		int cp_statx(const struct kstat stat, struct statx __user buffer);

fs/io-wq.c

+76 −27

Original line number	Diff line number	Diff line
		@@ -56,7 +56,8 @@ struct io_worker {

		struct rcu_head rcu;
		struct mm_struct *mm;
		const struct cred *creds;
		const struct cred *cur_creds;
		const struct cred *saved_creds;
		struct files_struct *restore_files;
		};

		@@ -109,10 +110,10 @@ struct io_wq {

		struct task_struct *manager;
		struct user_struct *user;
		const struct cred *creds;
		struct mm_struct *mm;
		refcount_t refs;
		struct completion done;

		refcount_t use_refs;
		};

		static bool io_worker_get(struct io_worker *worker)
		@@ -135,9 +136,9 @@ static bool __io_worker_unuse(struct io_wqe wqe, struct io_worker worker)
		{
		bool dropped_lock = false;

		if (worker->creds) {
		revert_creds(worker->creds);
		worker->creds = NULL;
		if (worker->saved_creds) {
		revert_creds(worker->saved_creds);
		worker->cur_creds = worker->saved_creds = NULL;
		}

		if (current->files != worker->restore_files) {
		@@ -396,6 +397,43 @@ static struct io_wq_work io_get_next_work(struct io_wqe wqe, unsigned *hash)
		return NULL;
		}

		static void io_wq_switch_mm(struct io_worker worker, struct io_wq_work work)
		{
		if (worker->mm) {
		unuse_mm(worker->mm);
		mmput(worker->mm);
		worker->mm = NULL;
		}
		if (!work->mm) {
		set_fs(KERNEL_DS);
		return;
		}
		if (mmget_not_zero(work->mm)) {
		use_mm(work->mm);
		if (!worker->mm)
		set_fs(USER_DS);
		worker->mm = work->mm;
		/* hang on to this mm */
		work->mm = NULL;
		return;
		}

		/* failed grabbing mm, ensure work gets cancelled */
		work->flags \|= IO_WQ_WORK_CANCEL;
		}

		static void io_wq_switch_creds(struct io_worker *worker,
		struct io_wq_work *work)
		{
		const struct cred *old_creds = override_creds(work->creds);

		worker->cur_creds = work->creds;
		if (worker->saved_creds)
		put_cred(old_creds); /* creds set by previous switch */
		else
		worker->saved_creds = old_creds;
		}

		static void io_worker_handle_work(struct io_worker *worker)
		__releases(wqe->lock)
		{
		@@ -438,24 +476,19 @@ next:
		if (work->flags & IO_WQ_WORK_CB)
		work->func(&work);

		if ((work->flags & IO_WQ_WORK_NEEDS_FILES) &&
		current->files != work->files) {
		if (work->files && current->files != work->files) {
		task_lock(current);
		current->files = work->files;
		task_unlock(current);
		}
		if ((work->flags & IO_WQ_WORK_NEEDS_USER) && !worker->mm &&
		wq->mm) {
		if (mmget_not_zero(wq->mm)) {
		use_mm(wq->mm);
		set_fs(USER_DS);
		worker->mm = wq->mm;
		} else {
		work->flags \|= IO_WQ_WORK_CANCEL;
		}
		}
		if (!worker->creds)
		worker->creds = override_creds(wq->creds);
		if (work->mm != worker->mm)
		io_wq_switch_mm(worker, work);
		if (worker->cur_creds != work->creds)
		io_wq_switch_creds(worker, work);
		/*
		* OK to set IO_WQ_WORK_CANCEL even for uncancellable work,
		* the worker function will do the right thing.
		*/
		if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
		work->flags \|= IO_WQ_WORK_CANCEL;
		if (worker->mm)
		@@ -720,6 +753,7 @@ static bool io_wq_can_queue(struct io_wqe wqe, struct io_wqe_acct acct,
		static void io_wqe_enqueue(struct io_wqe wqe, struct io_wq_work work)
		{
		struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
		int work_flags;
		unsigned long flags;

		/*
		@@ -734,12 +768,14 @@ static void io_wqe_enqueue(struct io_wqe wqe, struct io_wq_work work)
		return;
		}

		work_flags = work->flags;
		spin_lock_irqsave(&wqe->lock, flags);
		wq_list_add_tail(&work->list, &wqe->work_list);
		wqe->flags &= ~IO_WQE_FLAG_STALLED;
		spin_unlock_irqrestore(&wqe->lock, flags);

		if (!atomic_read(&acct->nr_running))
		if ((work_flags & IO_WQ_WORK_CONCURRENT) \|\|
		!atomic_read(&acct->nr_running))
		io_wqe_wake_worker(wqe, acct);
		}

		@@ -828,6 +864,7 @@ static bool io_work_cancel(struct io_worker worker, void cancel_data)
		*/
		spin_lock_irqsave(&worker->lock, flags);
		if (worker->cur_work &&
		!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
		data->cancel(worker->cur_work, data->caller_data)) {
		send_sig(SIGINT, worker->task, 1);
		ret = true;
		@@ -902,7 +939,8 @@ static bool io_wq_worker_cancel(struct io_worker worker, void data)
		return false;

		spin_lock_irqsave(&worker->lock, flags);
		if (worker->cur_work == work) {
		if (worker->cur_work == work &&
		!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) {
		send_sig(SIGINT, worker->task, 1);
		ret = true;
		}
		@@ -1026,7 +1064,6 @@ struct io_wq io_wq_create(unsigned bounded, struct io_wq_data data)

		/* caller must already hold a reference to this */
		wq->user = data->user;
		wq->creds = data->creds;

		for_each_node(node) {
		struct io_wqe *wqe;
		@@ -1053,9 +1090,6 @@ struct io_wq io_wq_create(unsigned bounded, struct io_wq_data data)

		init_completion(&wq->done);

		/* caller must have already done mmgrab() on this mm */
		wq->mm = data->mm;

		wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
		if (!IS_ERR(wq->manager)) {
		wake_up_process(wq->manager);
		@@ -1064,6 +1098,7 @@ struct io_wq io_wq_create(unsigned bounded, struct io_wq_data data)
		ret = -ENOMEM;
		goto err;
		}
		refcount_set(&wq->use_refs, 1);
		reinit_completion(&wq->done);
		return wq;
		}
		@@ -1078,13 +1113,21 @@ err:
		return ERR_PTR(ret);
		}

		bool io_wq_get(struct io_wq wq, struct io_wq_data data)
		{
		if (data->get_work != wq->get_work \|\| data->put_work != wq->put_work)
		return false;

		return refcount_inc_not_zero(&wq->use_refs);
		}

		static bool io_wq_worker_wake(struct io_worker worker, void data)
		{
		wake_up_process(worker->task);
		return false;
		}

		void io_wq_destroy(struct io_wq *wq)
		static void __io_wq_destroy(struct io_wq *wq)
		{
		int node;

		@@ -1104,3 +1147,9 @@ void io_wq_destroy(struct io_wq *wq)
		kfree(wq->wqes);
		kfree(wq);
		}

		void io_wq_destroy(struct io_wq *wq)
		{
		if (refcount_dec_and_test(&wq->use_refs))
		__io_wq_destroy(wq);
		}

Admin message