Commit 004d564f authored by Jens Axboe's avatar Jens Axboe
Browse files

tools/io_uring: sync with liburing



Various fixes and changes have been applied to liburing since we
copied some select bits to the kernel testing/examples part, sync
up with liburing to get those changes.

Most notable is the change that split the CQE reading into the peek
and seen event, instead of being just a single function. Also fixes
an unsigned wrap issue in io_uring_submit(), leak of 'fd' in setup
if we fail, and various other little issues.

Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 486f0692
Loading
Loading
Loading
Loading
+15 −6
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>

@@ -85,11 +86,16 @@ static int queue_read(struct io_uring *ring, off_t size, off_t offset)
	struct io_uring_sqe *sqe;
	struct io_data *data;

	data = malloc(size + sizeof(*data));
	if (!data)
		return 1;

	sqe = io_uring_get_sqe(ring);
	if (!sqe)
	if (!sqe) {
		free(data);
		return 1;
	}

	data = malloc(size + sizeof(*data));
	data->read = 1;
	data->offset = data->first_offset = offset;

@@ -166,22 +172,23 @@ static int copy_file(struct io_uring *ring, off_t insize)
			struct io_data *data;

			if (!got_comp) {
				ret = io_uring_wait_completion(ring, &cqe);
				ret = io_uring_wait_cqe(ring, &cqe);
				got_comp = 1;
			} else
				ret = io_uring_get_completion(ring, &cqe);
				ret = io_uring_peek_cqe(ring, &cqe);
			if (ret < 0) {
				fprintf(stderr, "io_uring_get_completion: %s\n",
				fprintf(stderr, "io_uring_peek_cqe: %s\n",
							strerror(-ret));
				return 1;
			}
			if (!cqe)
				break;

			data = (struct io_data *) (uintptr_t) cqe->user_data;
			data = io_uring_cqe_get_data(cqe);
			if (cqe->res < 0) {
				if (cqe->res == -EAGAIN) {
					queue_prepped(ring, data);
					io_uring_cqe_seen(ring, cqe);
					continue;
				}
				fprintf(stderr, "cqe failed: %s\n",
@@ -193,6 +200,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
				data->iov.iov_len -= cqe->res;
				data->offset += cqe->res;
				queue_prepped(ring, data);
				io_uring_cqe_seen(ring, cqe);
				continue;
			}

@@ -209,6 +217,7 @@ static int copy_file(struct io_uring *ring, off_t insize)
				free(data);
				writes--;
			}
			io_uring_cqe_seen(ring, cqe);
		}
	}

+52 −12
Original line number Diff line number Diff line
#ifndef LIB_URING_H
#define LIB_URING_H

#ifdef __cplusplus
extern "C" {
#endif

#include <sys/uio.h>
#include <signal.h>
#include <string.h>
#include "../../include/uapi/linux/io_uring.h"
#include <inttypes.h>
#include "barrier.h"

/*
 * Library interface to io_uring
@@ -46,7 +52,7 @@ struct io_uring {
 * System calls
 */
extern int io_uring_setup(unsigned entries, struct io_uring_params *p);
extern int io_uring_enter(unsigned fd, unsigned to_submit,
extern int io_uring_enter(int fd, unsigned to_submit,
	unsigned min_complete, unsigned flags, sigset_t *sig);
extern int io_uring_register(int fd, unsigned int opcode, void *arg,
	unsigned int nr_args);
@@ -59,13 +65,32 @@ extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
	struct io_uring *ring);
extern void io_uring_queue_exit(struct io_uring *ring);
extern int io_uring_get_completion(struct io_uring *ring,
extern int io_uring_peek_cqe(struct io_uring *ring,
	struct io_uring_cqe **cqe_ptr);
extern int io_uring_wait_completion(struct io_uring *ring,
extern int io_uring_wait_cqe(struct io_uring *ring,
	struct io_uring_cqe **cqe_ptr);
extern int io_uring_submit(struct io_uring *ring);
extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);

/*
 * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
 * been processed by the application.
 */
static inline void io_uring_cqe_seen(struct io_uring *ring,
				     struct io_uring_cqe *cqe)
{
	if (cqe) {
		struct io_uring_cq *cq = &ring->cq;

		(*cq->khead)++;
		/*
		 * Ensure that the kernel sees our new head, the kernel has
		 * the matching read barrier.
		 */
		write_barrier();
	}
}

/*
 * Command prep helpers
 */
@@ -74,8 +99,14 @@ static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
	sqe->user_data = (unsigned long) data;
}

static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe)
{
	return (void *) (uintptr_t) cqe->user_data;
}

static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
				    void *addr, unsigned len, off_t offset)
				    const void *addr, unsigned len,
				    off_t offset)
{
	memset(sqe, 0, sizeof(*sqe));
	sqe->opcode = op;
@@ -86,8 +117,8 @@ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
}

static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
				       struct iovec *iovecs, unsigned nr_vecs,
				       off_t offset)
				       const struct iovec *iovecs,
				       unsigned nr_vecs, off_t offset)
{
	io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
}
@@ -100,14 +131,14 @@ static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
}

static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
				        struct iovec *iovecs, unsigned nr_vecs,
					off_t offset)
					const struct iovec *iovecs,
					unsigned nr_vecs, off_t offset)
{
	io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
}

static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
					     void *buf, unsigned nbytes,
					     const void *buf, unsigned nbytes,
					     off_t offset)
{
	io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
@@ -131,13 +162,22 @@ static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
}

static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
				       int datasync)
				       unsigned fsync_flags)
{
	memset(sqe, 0, sizeof(*sqe));
	sqe->opcode = IORING_OP_FSYNC;
	sqe->fd = fd;
	if (datasync)
		sqe->fsync_flags = IORING_FSYNC_DATASYNC;
	sqe->fsync_flags = fsync_flags;
}

static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
{
	memset(sqe, 0, sizeof(*sqe));
	sqe->opcode = IORING_OP_NOP;
}

#ifdef __cplusplus
}
#endif

#endif
+14 −22
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@
#include "liburing.h"
#include "barrier.h"

static int __io_uring_get_completion(struct io_uring *ring,
static int __io_uring_get_cqe(struct io_uring *ring,
			      struct io_uring_cqe **cqe_ptr, int wait)
{
	struct io_uring_cq *cq = &ring->cq;
@@ -39,34 +39,25 @@ static int __io_uring_get_completion(struct io_uring *ring,
			return -errno;
	} while (1);

	if (*cqe_ptr) {
		*cq->khead = head + 1;
		/*
		 * Ensure that the kernel sees our new head, the kernel has
		 * the matching read barrier.
		 */
		write_barrier();
	}

	return 0;
}

/*
 * Return an IO completion, if one is readily available
 * Return an IO completion, if one is readily available. Returns 0 with
 * cqe_ptr filled in on success, -errno on failure.
 */
int io_uring_get_completion(struct io_uring *ring,
			    struct io_uring_cqe **cqe_ptr)
int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
{
	return __io_uring_get_completion(ring, cqe_ptr, 0);
	return __io_uring_get_cqe(ring, cqe_ptr, 0);
}

/*
 * Return an IO completion, waiting for it if necessary
 * Return an IO completion, waiting for it if necessary. Returns 0 with
 * cqe_ptr filled in on success, -errno on failure.
 */
int io_uring_wait_completion(struct io_uring *ring,
			     struct io_uring_cqe **cqe_ptr)
int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
{
	return __io_uring_get_completion(ring, cqe_ptr, 1);
	return __io_uring_get_cqe(ring, cqe_ptr, 1);
}

/*
@@ -78,7 +69,7 @@ int io_uring_submit(struct io_uring *ring)
{
	struct io_uring_sq *sq = &ring->sq;
	const unsigned mask = *sq->kring_mask;
	unsigned ktail, ktail_next, submitted;
	unsigned ktail, ktail_next, submitted, to_submit;
	int ret;

	/*
@@ -100,7 +91,8 @@ int io_uring_submit(struct io_uring *ring)
	 */
	submitted = 0;
	ktail = ktail_next = *sq->ktail;
	while (sq->sqe_head < sq->sqe_tail) {
	to_submit = sq->sqe_tail - sq->sqe_head;
	while (to_submit--) {
		ktail_next++;
		read_barrier();

@@ -136,7 +128,7 @@ submit:
	if (ret < 0)
		return -errno;

	return 0;
	return ret;
}

/*
+7 −3
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ static int io_uring_mmap(int fd, struct io_uring_params *p,
	sq->kdropped = ptr + p->sq_off.dropped;
	sq->array = ptr + p->sq_off.array;

	size = p->sq_entries * sizeof(struct io_uring_sqe),
	size = p->sq_entries * sizeof(struct io_uring_sqe);
	sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
				MAP_SHARED | MAP_POPULATE, fd,
				IORING_OFF_SQES);
@@ -79,7 +79,7 @@ int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring
int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
{
	struct io_uring_params p;
	int fd;
	int fd, ret;

	memset(&p, 0, sizeof(p));
	p.flags = flags;
@@ -88,7 +88,11 @@ int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
	if (fd < 0)
		return fd;

	return io_uring_queue_mmap(fd, &p, ring);
	ret = io_uring_queue_mmap(fd, &p, ring);
	if (ret)
		close(fd);

	return ret;
}

void io_uring_queue_exit(struct io_uring *ring)
+30 −18
Original line number Diff line number Diff line
@@ -7,34 +7,46 @@
#include <signal.h>
#include "liburing.h"

#if defined(__x86_64) || defined(__i386__)
#ifndef __NR_sys_io_uring_setup
#define __NR_sys_io_uring_setup		425
#ifdef __alpha__
/*
 * alpha is the only exception, all other architectures
 * have common numbers for new system calls.
 */
# ifndef __NR_io_uring_setup
#  define __NR_io_uring_setup		535
# endif
# ifndef __NR_io_uring_enter
#  define __NR_io_uring_enter		536
# endif
# ifndef __NR_io_uring_register
#  define __NR_io_uring_register	537
# endif
#else /* !__alpha__ */
# ifndef __NR_io_uring_setup
#  define __NR_io_uring_setup		425
# endif
#ifndef __NR_sys_io_uring_enter
#define __NR_sys_io_uring_enter		426
# ifndef __NR_io_uring_enter
#  define __NR_io_uring_enter		426
# endif
#ifndef __NR_sys_io_uring_register
#define __NR_sys_io_uring_register	427
# ifndef __NR_io_uring_register
#  define __NR_io_uring_register	427
# endif
#else
#error "Arch not supported yet"
#endif

int io_uring_register(int fd, unsigned int opcode, void *arg,
		      unsigned int nr_args)
{
	return syscall(__NR_sys_io_uring_register, fd, opcode, arg, nr_args);
	return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args);
}

int io_uring_setup(unsigned entries, struct io_uring_params *p)
int io_uring_setup(unsigned int entries, struct io_uring_params *p)
{
	return syscall(__NR_sys_io_uring_setup, entries, p);
	return syscall(__NR_io_uring_setup, entries, p);
}

int io_uring_enter(unsigned fd, unsigned to_submit, unsigned min_complete,
		   unsigned flags, sigset_t *sig)
int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete,
		   unsigned int flags, sigset_t *sig)
{
	return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete,
	return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
			flags, sig, _NSIG / 8);
}