Commit 491381ce authored by Jens Axboe's avatar Jens Axboe
Browse files

io_uring: fix up O_NONBLOCK handling for sockets



We've got two issues with the non-regular file handling for non-blocking
IO:

1) We don't want to re-do a short read in full for a non-regular file,
   as we can't just read the data again.
2) For non-regular files that don't support non-blocking IO attempts,
   we need to punt to async context even if the file is opened as
   non-blocking. Otherwise the caller always gets -EAGAIN.

Add two new request flags to handle these cases. One is just a cache
of the inode S_ISREG() status, the other tells io_uring that we always
need to punt this request to async context, even if REQ_F_NOWAIT is set.

Cc: stable@vger.kernel.org
Reported-by: default avatarHrvoje Zeba <zeba.hrvoje@gmail.com>
Tested-by: default avatarHrvoje Zeba <zeba.hrvoje@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 6333ff6e
Loading
Loading
Loading
Loading
+39 −18
Original line number Diff line number Diff line
@@ -322,6 +322,8 @@ struct io_kiocb {
#define REQ_F_FAIL_LINK		256	/* fail rest of links */
#define REQ_F_SHADOW_DRAIN	512	/* link-drain shadow req */
#define REQ_F_TIMEOUT		1024	/* timeout request */
#define REQ_F_ISREG		2048	/* regular file */
#define REQ_F_MUST_PUNT		4096	/* must be punted even for NONBLOCK */
	u64			user_data;
	u32			result;
	u32			sequence;
@@ -914,26 +916,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
	return ret;
}

static void kiocb_end_write(struct kiocb *kiocb)
static void kiocb_end_write(struct io_kiocb *req)
{
	if (kiocb->ki_flags & IOCB_WRITE) {
		struct inode *inode = file_inode(kiocb->ki_filp);

	/*
	 * Tell lockdep we inherited freeze protection from submission
	 * thread.
	 */
		if (S_ISREG(inode->i_mode))
	if (req->flags & REQ_F_ISREG) {
		struct inode *inode = file_inode(req->file);

		__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
		file_end_write(kiocb->ki_filp);
	}
	file_end_write(req->file);
}

static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);

	kiocb_end_write(kiocb);
	if (kiocb->ki_flags & IOCB_WRITE)
		kiocb_end_write(req);

	if ((req->flags & REQ_F_LINK) && res != req->result)
		req->flags |= REQ_F_FAIL_LINK;
@@ -945,7 +947,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);

	kiocb_end_write(kiocb);
	if (kiocb->ki_flags & IOCB_WRITE)
		kiocb_end_write(req);

	if ((req->flags & REQ_F_LINK) && res != req->result)
		req->flags |= REQ_F_FAIL_LINK;
@@ -1059,8 +1062,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
	if (!req->file)
		return -EBADF;

	if (force_nonblock && !io_file_supports_async(req->file))
		force_nonblock = false;
	if (S_ISREG(file_inode(req->file)->i_mode))
		req->flags |= REQ_F_ISREG;

	/*
	 * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
	 * we know to async punt it even if it was opened O_NONBLOCK
	 */
	if (force_nonblock && !io_file_supports_async(req->file)) {
		req->flags |= REQ_F_MUST_PUNT;
		return -EAGAIN;
	}

	kiocb->ki_pos = READ_ONCE(sqe->off);
	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
@@ -1081,7 +1093,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
		return ret;

	/* don't allow async punt if RWF_NOWAIT was requested */
	if (kiocb->ki_flags & IOCB_NOWAIT)
	if ((kiocb->ki_flags & IOCB_NOWAIT) ||
	    (req->file->f_flags & O_NONBLOCK))
		req->flags |= REQ_F_NOWAIT;

	if (force_nonblock)
@@ -1382,7 +1395,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
		 * need async punt anyway, so it's more efficient to do it
		 * here.
		 */
		if (force_nonblock && ret2 > 0 && ret2 < read_size)
		if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
		    (req->flags & REQ_F_ISREG) &&
		    ret2 > 0 && ret2 < read_size)
			ret2 = -EAGAIN;
		/* Catch -EAGAIN return for forced non-blocking submission */
		if (!force_nonblock || ret2 != -EAGAIN) {
@@ -1447,7 +1462,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
		 * released so that it doesn't complain about the held lock when
		 * we return to userspace.
		 */
		if (S_ISREG(file_inode(file)->i_mode)) {
		if (req->flags & REQ_F_ISREG) {
			__sb_start_write(file_inode(file)->i_sb,
						SB_FREEZE_WRITE, true);
			__sb_writers_release(file_inode(file)->i_sb,
@@ -2282,7 +2297,13 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
	int ret;

	ret = __io_submit_sqe(ctx, req, s, force_nonblock);
	if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {

	/*
	 * We async punt it if the file wasn't marked NOWAIT, or if the file
	 * doesn't support non-blocking read/write attempts
	 */
	if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
	    (req->flags & REQ_F_MUST_PUNT))) {
		struct io_uring_sqe *sqe_copy;

		sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);