Commit 7d67af2c authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe
Browse files

io_uring: add splice(2) support



Add support for splice(2).

- output file is specified as sqe->fd, so it's handled by generic code
- hash_reg_file handled by generic code as well
- len is 32bit, but should be fine
- the fd_in is registered file, when SPLICE_F_FD_IN_FIXED is set, which
is a splice flag (i.e. sqe->splice_flags).

Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 8da11c19
Loading
Loading
Loading
Loading
+109 −0
Original line number Diff line number Diff line
@@ -76,6 +76,7 @@
#include <linux/fadvise.h>
#include <linux/eventpoll.h>
#include <linux/fs_struct.h>
#include <linux/splice.h>

#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -428,6 +429,15 @@ struct io_epoll {
	struct epoll_event		event;
};

struct io_splice {
	struct file			*file_out;
	struct file			*file_in;
	loff_t				off_out;
	loff_t				off_in;
	u64				len;
	unsigned int			flags;
};

struct io_async_connect {
	struct sockaddr_storage		address;
};
@@ -544,6 +554,7 @@ struct io_kiocb {
		struct io_fadvise	fadvise;
		struct io_madvise	madvise;
		struct io_epoll		epoll;
		struct io_splice	splice;
	};

	struct io_async_ctx		*io;
@@ -744,6 +755,11 @@ static const struct io_op_def io_op_defs[] = {
		.unbound_nonreg_file	= 1,
		.file_table		= 1,
	},
	[IORING_OP_SPLICE] = {
		.needs_file		= 1,
		.hash_reg_file		= 1,
		.unbound_nonreg_file	= 1,
	}
};

static void io_wq_submit_work(struct io_wq_work **workptr);
@@ -758,6 +774,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
static int io_grab_files(struct io_kiocb *req);
static void io_ring_file_ref_flush(struct fixed_file_data *data);
static void io_cleanup_req(struct io_kiocb *req);
static int io_file_get(struct io_submit_state *state,
		       struct io_kiocb *req,
		       int fd, struct file **out_file,
		       bool fixed);

static struct kmem_cache *req_cachep;

@@ -2404,6 +2424,77 @@ out_free:
	return ret;
}

static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
	struct io_splice* sp = &req->splice;
	unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL;
	int ret;

	if (req->flags & REQ_F_NEED_CLEANUP)
		return 0;

	sp->file_in = NULL;
	sp->off_in = READ_ONCE(sqe->splice_off_in);
	sp->off_out = READ_ONCE(sqe->off);
	sp->len = READ_ONCE(sqe->len);
	sp->flags = READ_ONCE(sqe->splice_flags);

	if (unlikely(sp->flags & ~valid_flags))
		return -EINVAL;

	ret = io_file_get(NULL, req, READ_ONCE(sqe->splice_fd_in), &sp->file_in,
			  (sp->flags & SPLICE_F_FD_IN_FIXED));
	if (ret)
		return ret;
	req->flags |= REQ_F_NEED_CLEANUP;

	if (!S_ISREG(file_inode(sp->file_in)->i_mode))
		req->work.flags |= IO_WQ_WORK_UNBOUND;

	return 0;
}

static bool io_splice_punt(struct file *file)
{
	if (get_pipe_info(file))
		return false;
	if (!io_file_supports_async(file))
		return true;
	return !(file->f_mode & O_NONBLOCK);
}

static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt,
		     bool force_nonblock)
{
	struct io_splice *sp = &req->splice;
	struct file *in = sp->file_in;
	struct file *out = sp->file_out;
	unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
	loff_t *poff_in, *poff_out;
	long ret;

	if (force_nonblock) {
		if (io_splice_punt(in) || io_splice_punt(out))
			return -EAGAIN;
		flags |= SPLICE_F_NONBLOCK;
	}

	poff_in = (sp->off_in == -1) ? NULL : &sp->off_in;
	poff_out = (sp->off_out == -1) ? NULL : &sp->off_out;
	ret = do_splice(in, poff_in, out, poff_out, sp->len, flags);
	if (force_nonblock && ret == -EAGAIN)
		return -EAGAIN;

	io_put_file(req, in, (sp->flags & SPLICE_F_FD_IN_FIXED));
	req->flags &= ~REQ_F_NEED_CLEANUP;

	io_cqring_add_event(req, ret);
	if (ret != sp->len)
		req_set_fail_links(req);
	io_put_req_find_next(req, nxt);
	return 0;
}

/*
 * IORING_OP_NOP just posts a completion event, nothing else.
 */
@@ -4230,6 +4321,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
	case IORING_OP_EPOLL_CTL:
		ret = io_epoll_ctl_prep(req, sqe);
		break;
	case IORING_OP_SPLICE:
		ret = io_splice_prep(req, sqe);
		break;
	default:
		printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
				req->opcode);
@@ -4292,6 +4386,10 @@ static void io_cleanup_req(struct io_kiocb *req)
	case IORING_OP_STATX:
		putname(req->open.filename);
		break;
	case IORING_OP_SPLICE:
		io_put_file(req, req->splice.file_in,
			    (req->splice.flags & SPLICE_F_FD_IN_FIXED));
		break;
	}

	req->flags &= ~REQ_F_NEED_CLEANUP;
@@ -4495,6 +4593,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
		}
		ret = io_epoll_ctl(req, nxt, force_nonblock);
		break;
	case IORING_OP_SPLICE:
		if (sqe) {
			ret = io_splice_prep(req, sqe);
			if (ret < 0)
				break;
		}
		ret = io_splice(req, nxt, force_nonblock);
		break;
	default:
		ret = -EINVAL;
		break;
@@ -7230,6 +7336,7 @@ static int __init io_uring_init(void)
	BUILD_BUG_SQE_ELEM(8,  __u64,  off);
	BUILD_BUG_SQE_ELEM(8,  __u64,  addr2);
	BUILD_BUG_SQE_ELEM(16, __u64,  addr);
	BUILD_BUG_SQE_ELEM(16, __u64,  splice_off_in);
	BUILD_BUG_SQE_ELEM(24, __u32,  len);
	BUILD_BUG_SQE_ELEM(28,     __kernel_rwf_t, rw_flags);
	BUILD_BUG_SQE_ELEM(28, /* compat */   int, rw_flags);
@@ -7244,9 +7351,11 @@ static int __init io_uring_init(void)
	BUILD_BUG_SQE_ELEM(28, __u32,  open_flags);
	BUILD_BUG_SQE_ELEM(28, __u32,  statx_flags);
	BUILD_BUG_SQE_ELEM(28, __u32,  fadvise_advice);
	BUILD_BUG_SQE_ELEM(28, __u32,  splice_flags);
	BUILD_BUG_SQE_ELEM(32, __u64,  user_data);
	BUILD_BUG_SQE_ELEM(40, __u16,  buf_index);
	BUILD_BUG_SQE_ELEM(42, __u16,  personality);
	BUILD_BUG_SQE_ELEM(44, __s32,  splice_fd_in);

	BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+13 −1
Original line number Diff line number Diff line
@@ -23,7 +23,10 @@ struct io_uring_sqe {
		__u64	off;	/* offset into file */
		__u64	addr2;
	};
	union {
		__u64	addr;	/* pointer to buffer or iovecs */
		__u64	splice_off_in;
	};
	__u32	len;		/* buffer size or number of iovecs */
	union {
		__kernel_rwf_t	rw_flags;
@@ -37,6 +40,7 @@ struct io_uring_sqe {
		__u32		open_flags;
		__u32		statx_flags;
		__u32		fadvise_advice;
		__u32		splice_flags;
	};
	__u64	user_data;	/* data to be passed back at completion time */
	union {
@@ -45,6 +49,7 @@ struct io_uring_sqe {
			__u16	buf_index;
			/* personality to use, if used */
			__u16	personality;
			__s32	splice_fd_in;
		};
		__u64	__pad2[3];
	};
@@ -113,6 +118,7 @@ enum {
	IORING_OP_RECV,
	IORING_OP_OPENAT2,
	IORING_OP_EPOLL_CTL,
	IORING_OP_SPLICE,

	/* this goes last, obviously */
	IORING_OP_LAST,
@@ -128,6 +134,12 @@ enum {
 */
#define IORING_TIMEOUT_ABS	(1U << 0)

/*
 * sqe->splice_flags
 * extends splice(2) flags
 */
#define SPLICE_F_FD_IN_FIXED	(1U << 31) /* the last bit of __u32 */

/*
 * IO completion data structure (Completion Queue Entry)
 */