Commit c93493b7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'io_uring-5.8-2020-07-01' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "One fix in here, for a regression in 5.7 where a task is waiting in
  the kernel for a condition, but that condition won't become true until
  task_work is run. And the task_work can't be run exactly because the
  task is waiting in the kernel, so we'll never make any progress.

  One example of that is registering an eventfd and queueing io_uring
  work, and then the task goes and waits in eventfd read with the
  expectation that it'll get woken (and read an event) when the io_uring
  request completes. The io_uring request is finished through task_work,
  which won't get run while the task is looping in eventfd read"

* tag 'io_uring-5.8-2020-07-01' of git://git.kernel.dk/linux-block:
  io_uring: use signal based task_work running
  task_work: teach task_work_add() to do signal_wake_up()
parents cd77006e ce593a6c
Loading
Loading
Loading
Loading
+24 −8
Original line number Diff line number Diff line
@@ -4072,6 +4072,21 @@ struct io_poll_table {
	int error;
};

static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
				int notify)
{
	struct task_struct *tsk = req->task;
	int ret;

	if (req->ctx->flags & IORING_SETUP_SQPOLL)
		notify = 0;

	ret = task_work_add(tsk, cb, notify);
	if (!ret)
		wake_up_process(tsk);
	return ret;
}

static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
			   __poll_t mask, task_work_func_t func)
{
@@ -4095,13 +4110,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
	 * of executing it. We can't safely execute it anyway, as we may not
	 * have the needed state needed for it anyway.
	 */
	ret = task_work_add(tsk, &req->task_work, true);
	ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL);
	if (unlikely(ret)) {
		WRITE_ONCE(poll->canceled, true);
		tsk = io_wq_get_task(req->ctx->io_wq);
		task_work_add(tsk, &req->task_work, true);
	}
		task_work_add(tsk, &req->task_work, 0);
		wake_up_process(tsk);
	}
	return 1;
}

@@ -6182,19 +6197,20 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
	do {
		prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
						TASK_INTERRUPTIBLE);
		/* make sure we run task_work before checking for signals */
		if (current->task_works)
			task_work_run();
		if (io_should_wake(&iowq, false))
			break;
		schedule();
		if (signal_pending(current)) {
			ret = -EINTR;
			ret = -ERESTARTSYS;
			break;
		}
		if (io_should_wake(&iowq, false))
			break;
		schedule();
	} while (1);
	finish_wait(&ctx->wait, &iowq.wq);

	restore_saved_sigmask_unless(ret == -EINTR);
	restore_saved_sigmask_unless(ret == -ERESTARTSYS);

	return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
}
+3 −1
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ struct task_struct;
#define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
#define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
#define JOBCTL_TRAP_FREEZE_BIT	23	/* trap for cgroup freezer */
#define JOBCTL_TASK_WORK_BIT	24	/* set by TWA_SIGNAL */

#define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
#define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
@@ -28,9 +29,10 @@ struct task_struct;
#define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
#define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_FREEZE	(1UL << JOBCTL_TRAP_FREEZE_BIT)
#define JOBCTL_TASK_WORK	(1UL << JOBCTL_TASK_WORK_BIT)

#define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
#define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK)

extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask);
extern void task_clear_jobctl_trapping(struct task_struct *task);
+4 −1
Original line number Diff line number Diff line
@@ -13,7 +13,10 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
	twork->func = func;
}

int task_work_add(struct task_struct *task, struct callback_head *twork, bool);
#define TWA_RESUME	1
#define TWA_SIGNAL	2
int task_work_add(struct task_struct *task, struct callback_head *twork, int);

struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
void task_work_run(void);

+7 −3
Original line number Diff line number Diff line
@@ -2529,9 +2529,6 @@ bool get_signal(struct ksignal *ksig)
	struct signal_struct *signal = current->signal;
	int signr;

	if (unlikely(current->task_works))
		task_work_run();

	if (unlikely(uprobe_deny_signal()))
		return false;

@@ -2544,6 +2541,13 @@ bool get_signal(struct ksignal *ksig)

relock:
	spin_lock_irq(&sighand->siglock);
	current->jobctl &= ~JOBCTL_TASK_WORK;
	if (unlikely(current->task_works)) {
		spin_unlock_irq(&sighand->siglock);
		task_work_run();
		goto relock;
	}

	/*
	 * Every stopped thread goes here after wakeup. Check to see if
	 * we should notify the parent, prepare_signal(SIGCONT) encodes
+14 −2
Original line number Diff line number Diff line
@@ -25,9 +25,10 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
 * 0 if succeeds or -ESRCH.
 */
int
task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
task_work_add(struct task_struct *task, struct callback_head *work, int notify)
{
	struct callback_head *head;
	unsigned long flags;

	do {
		head = READ_ONCE(task->task_works);
@@ -36,8 +37,19 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
		work->next = head;
	} while (cmpxchg(&task->task_works, head, work) != head);

	if (notify)
	switch (notify) {
	case TWA_RESUME:
		set_notify_resume(task);
		break;
	case TWA_SIGNAL:
		if (lock_task_sighand(task, &flags)) {
			task->jobctl |= JOBCTL_TASK_WORK;
			signal_wake_up(task, 0);
			unlock_task_sighand(task, &flags);
		}
		break;
	}

	return 0;
}