Commit 216578e5 authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe
Browse files

io_uring: fix REQ_F_COMP_LOCKED by killing it



REQ_F_COMP_LOCKED is used and implemented in a buggy way. The problem is
that the flag is set before io_put_req() but not cleared after, and if
that wasn't the final reference, the request will be freed with the flag
set from some other context, which may not hold a spinlock. That means
possible races with removing linked timeouts and unsynchronised
completion (e.g. access to CQ).

Instead of fixing REQ_F_COMP_LOCKED, kill the flag and use
task_work_add() to move such requests to a fresh context to free from
it, as was done with __io_free_req_finish().

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 4edf20f9
Loading
Loading
Loading
Loading
+69 −80
Original line number Diff line number Diff line
@@ -574,7 +574,6 @@ enum {
	REQ_F_NOWAIT_BIT,
	REQ_F_LINK_TIMEOUT_BIT,
	REQ_F_ISREG_BIT,
	REQ_F_COMP_LOCKED_BIT,
	REQ_F_NEED_CLEANUP_BIT,
	REQ_F_POLLED_BIT,
	REQ_F_BUFFER_SELECTED_BIT,
@@ -613,8 +612,6 @@ enum {
	REQ_F_LINK_TIMEOUT	= BIT(REQ_F_LINK_TIMEOUT_BIT),
	/* regular file */
	REQ_F_ISREG		= BIT(REQ_F_ISREG_BIT),
	/* completion under lock */
	REQ_F_COMP_LOCKED	= BIT(REQ_F_COMP_LOCKED_BIT),
	/* needs cleanup */
	REQ_F_NEED_CLEANUP	= BIT(REQ_F_NEED_CLEANUP_BIT),
	/* already went through poll handler */
@@ -963,8 +960,8 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
			     struct io_comp_state *cs);
static void io_cqring_fill_event(struct io_kiocb *req, long res);
static void io_put_req(struct io_kiocb *req);
static void io_put_req_deferred(struct io_kiocb *req, int nr);
static void io_double_put_req(struct io_kiocb *req);
static void __io_double_put_req(struct io_kiocb *req);
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
static void __io_queue_linked_timeout(struct io_kiocb *req);
static void io_queue_linked_timeout(struct io_kiocb *req);
@@ -1316,9 +1313,8 @@ static void io_kill_timeout(struct io_kiocb *req)
		atomic_set(&req->ctx->cq_timeouts,
			atomic_read(&req->ctx->cq_timeouts) + 1);
		list_del_init(&req->timeout.list);
		req->flags |= REQ_F_COMP_LOCKED;
		io_cqring_fill_event(req, 0);
		io_put_req(req);
		io_put_req_deferred(req, 1);
	}
}

@@ -1369,8 +1365,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
		if (link) {
			__io_queue_linked_timeout(link);
			/* drop submission reference */
			link->flags |= REQ_F_COMP_LOCKED;
			io_put_req(link);
			io_put_req_deferred(link, 1);
		}
		kfree(de);
	} while (!list_empty(&ctx->defer_list));
@@ -1597,13 +1592,19 @@ static void io_submit_flush_completions(struct io_comp_state *cs)
		req = list_first_entry(&cs->list, struct io_kiocb, compl.list);
		list_del(&req->compl.list);
		__io_cqring_fill_event(req, req->result, req->compl.cflags);
		if (!(req->flags & REQ_F_LINK_HEAD)) {
			req->flags |= REQ_F_COMP_LOCKED;
			io_put_req(req);
		} else {

		/*
		 * io_free_req() doesn't care about completion_lock unless one
		 * of these flags is set. REQ_F_WORK_INITIALIZED is in the list
		 * because of a potential deadlock with req->work.fs->lock
		 */
		if (req->flags & (REQ_F_FAIL_LINK|REQ_F_LINK_TIMEOUT
				 |REQ_F_WORK_INITIALIZED)) {
			spin_unlock_irq(&ctx->completion_lock);
			io_put_req(req);
			spin_lock_irq(&ctx->completion_lock);
		} else {
			io_put_req(req);
		}
	}
	io_commit_cqring(ctx);
@@ -1702,10 +1703,14 @@ static void io_dismantle_req(struct io_kiocb *req)
	io_req_clean_work(req);
}

static void __io_free_req_finish(struct io_kiocb *req)
static void __io_free_req(struct io_kiocb *req)
{
	struct io_uring_task *tctx = req->task->io_uring;
	struct io_ring_ctx *ctx = req->ctx;
	struct io_uring_task *tctx;
	struct io_ring_ctx *ctx;

	io_dismantle_req(req);
	tctx = req->task->io_uring;
	ctx = req->ctx;

	atomic_long_inc(&tctx->req_complete);
	if (tctx->in_idle)
@@ -1719,33 +1724,6 @@ static void __io_free_req_finish(struct io_kiocb *req)
	percpu_ref_put(&ctx->refs);
}

static void io_req_task_file_table_put(struct callback_head *cb)
{
	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);

	io_dismantle_req(req);
	__io_free_req_finish(req);
}

static void __io_free_req(struct io_kiocb *req)
{
	if (!(req->flags & REQ_F_COMP_LOCKED)) {
		io_dismantle_req(req);
		__io_free_req_finish(req);
	} else {
		int ret;

		init_task_work(&req->task_work, io_req_task_file_table_put);
		ret = task_work_add(req->task, &req->task_work, TWA_RESUME);
		if (unlikely(ret)) {
			struct task_struct *tsk;

			tsk = io_wq_get_task(req->ctx->io_wq);
			task_work_add(tsk, &req->task_work, 0);
		}
	}
}

static bool io_link_cancel_timeout(struct io_kiocb *req)
{
	struct io_timeout_data *io = req->async_data;
@@ -1754,11 +1732,10 @@ static bool io_link_cancel_timeout(struct io_kiocb *req)

	ret = hrtimer_try_to_cancel(&io->timer);
	if (ret != -1) {
		req->flags |= REQ_F_COMP_LOCKED;
		io_cqring_fill_event(req, -ECANCELED);
		io_commit_cqring(ctx);
		req->flags &= ~REQ_F_LINK_HEAD;
		io_put_req(req);
		io_put_req_deferred(req, 1);
		return true;
	}

@@ -1785,17 +1762,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
static void io_kill_linked_timeout(struct io_kiocb *req)
{
	struct io_ring_ctx *ctx = req->ctx;
	bool wake_ev;

	if (!(req->flags & REQ_F_COMP_LOCKED)) {
	unsigned long flags;
	bool wake_ev;

	spin_lock_irqsave(&ctx->completion_lock, flags);
	wake_ev = __io_kill_linked_timeout(req);
	spin_unlock_irqrestore(&ctx->completion_lock, flags);
	} else {
		wake_ev = __io_kill_linked_timeout(req);
	}

	if (wake_ev)
		io_cqring_ev_posted(ctx);
@@ -1835,27 +1807,29 @@ static void __io_fail_links(struct io_kiocb *req)
		trace_io_uring_fail_link(req, link);

		io_cqring_fill_event(link, -ECANCELED);
		link->flags |= REQ_F_COMP_LOCKED;
		__io_double_put_req(link);

		/*
		 * It's ok to free under spinlock as they're not linked anymore,
		 * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
		 * work.fs->lock.
		 */
		if (link->flags & REQ_F_WORK_INITIALIZED)
			io_put_req_deferred(link, 2);
		else
			io_double_put_req(link);
	}

	io_commit_cqring(ctx);
	io_cqring_ev_posted(ctx);
}

static void io_fail_links(struct io_kiocb *req)
{
	struct io_ring_ctx *ctx = req->ctx;

	if (!(req->flags & REQ_F_COMP_LOCKED)) {
	unsigned long flags;

	spin_lock_irqsave(&ctx->completion_lock, flags);
	__io_fail_links(req);
	spin_unlock_irqrestore(&ctx->completion_lock, flags);
	} else {
		__io_fail_links(req);
	}

	io_cqring_ev_posted(ctx);
}
@@ -2069,6 +2043,34 @@ static void io_put_req(struct io_kiocb *req)
		io_free_req(req);
}

static void io_put_req_deferred_cb(struct callback_head *cb)
{
	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);

	io_free_req(req);
}

static void io_free_req_deferred(struct io_kiocb *req)
{
	int ret;

	init_task_work(&req->task_work, io_put_req_deferred_cb);
	ret = io_req_task_work_add(req, true);
	if (unlikely(ret)) {
		struct task_struct *tsk;

		tsk = io_wq_get_task(req->ctx->io_wq);
		task_work_add(tsk, &req->task_work, 0);
		wake_up_process(tsk);
	}
}

static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
{
	if (refcount_sub_and_test(refs, &req->refs))
		io_free_req_deferred(req);
}

static struct io_wq_work *io_steal_work(struct io_kiocb *req)
{
	struct io_kiocb *nxt;
@@ -2085,17 +2087,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req)
	return nxt ? &nxt->work : NULL;
}

/*
 * Must only be used if we don't need to care about links, usually from
 * within the completion handling itself.
 */
static void __io_double_put_req(struct io_kiocb *req)
{
	/* drop both submit and complete references */
	if (refcount_sub_and_test(2, &req->refs))
		__io_free_req(req);
}

static void io_double_put_req(struct io_kiocb *req)
{
	/* drop both submit and complete references */
@@ -5127,9 +5118,8 @@ static bool io_poll_remove_one(struct io_kiocb *req)
	if (do_complete) {
		io_cqring_fill_event(req, -ECANCELED);
		io_commit_cqring(req->ctx);
		req->flags |= REQ_F_COMP_LOCKED;
		req_set_fail_links(req);
		io_put_req(req);
		io_put_req_deferred(req, 1);
	}

	return do_complete;
@@ -5311,9 +5301,8 @@ static int __io_timeout_cancel(struct io_kiocb *req)
	list_del_init(&req->timeout.list);

	req_set_fail_links(req);
	req->flags |= REQ_F_COMP_LOCKED;
	io_cqring_fill_event(req, -ECANCELED);
	io_put_req(req);
	io_put_req_deferred(req, 1);
	return 0;
}