Commit cdc8fcb4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.9/io_uring-20200802' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:
 "Lots of cleanups in here, hardening the code and/or making it easier
  to read and fixing bugs, but a core feature/change too adding support
  for real async buffered reads. With the latter in place, we just need
  buffered write async support and we're done relying on kthreads for
  the fast path. In detail:

   - Cleanup how memory accounting is done on ring setup/free (Bijan)

   - sq array offset calculation fixup (Dmitry)

   - Consistently handle blocking off O_DIRECT submission path (me)

   - Support proper async buffered reads, instead of relying on kthread
     offload for that. This uses the page waitqueue to drive retries
     from task_work, like we handle poll based retry. (me)

   - IO completion optimizations (me)

   - Fix race with accounting and ring fd install (me)

   - Support EPOLLEXCLUSIVE (Jiufei)

   - Get rid of the io_kiocb unionizing, made possible by shrinking
     other bits (Pavel)

   - Completion side cleanups (Pavel)

   - Cleanup REQ_F_ flags handling, and kill off many of them (Pavel)

   - Request environment grabbing cleanups (Pavel)

   - File and socket read/write cleanups (Pavel)

   - Improve kiocb_set_rw_flags() (Pavel)

   - Tons of fixes and cleanups (Pavel)

   - IORING_SQ_NEED_WAKEUP clear fix (Xiaoguang)"

* tag 'for-5.9/io_uring-20200802' of git://git.kernel.dk/linux-block: (127 commits)
  io_uring: flip if handling after io_setup_async_rw
  fs: optimise kiocb_set_rw_flags()
  io_uring: don't touch 'ctx' after installing file descriptor
  io_uring: get rid of atomic FAA for cq_timeouts
  io_uring: consolidate *_check_overflow accounting
  io_uring: fix stalled deferred requests
  io_uring: fix racy overflow count reporting
  io_uring: deduplicate __io_complete_rw()
  io_uring: de-unionise io_kiocb
  io-wq: update hash bits
  io_uring: fix missing io_queue_linked_timeout()
  io_uring: mark ->work uninitialised after cleanup
  io_uring: deduplicate io_grab_files() calls
  io_uring: don't do opcode prep twice
  io_uring: clear IORING_SQ_NEED_WAKEUP after executing task works
  io_uring: batch put_task_struct()
  tasks: add put_task_struct_many()
  io_uring: return locked and pinned page accounting
  io_uring: don't miscount pinned memory
  io_uring: don't open-code recv kbuf managment
  ...
parents 382625d0 fa15bafb
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -960,9 +960,14 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
{
	struct request_queue *q = bio->bi_disk->queue;
	blk_status_t status = BLK_STS_IOERR;
	struct blk_plug *plug;

	might_sleep();

	plug = blk_mq_plug(q, bio);
	if (plug && plug->nowait)
		bio->bi_opf |= REQ_NOWAIT;

	/*
	 * For a REQ_NOWAIT based request, return -EOPNOTSUPP
	 * if queue is not a request based queue.
@@ -1802,6 +1807,7 @@ void blk_start_plug(struct blk_plug *plug)
	INIT_LIST_HEAD(&plug->cb_list);
	plug->rq_count = 0;
	plug->multiple_queues = false;
	plug->nowait = false;

	/*
	 * Store ordering should not be needed here, since a potential
+1 −1
Original line number Diff line number Diff line
@@ -1734,7 +1734,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
	 */
	filp->f_flags |= O_LARGEFILE;

	filp->f_mode |= FMODE_NOWAIT;
	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;

	if (filp->f_flags & O_NDELAY)
		filp->f_mode |= FMODE_NDELAY;
+1 −1
Original line number Diff line number Diff line
@@ -3537,7 +3537,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)

static int btrfs_file_open(struct inode *inode, struct file *filp)
{
	filp->f_mode |= FMODE_NOWAIT;
	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
	return generic_file_open(inode, filp);
}

+5 −9
Original line number Diff line number Diff line
@@ -462,6 +462,7 @@ static void io_impersonate_work(struct io_worker *worker,
		io_wq_switch_mm(worker, work);
	if (worker->cur_creds != work->creds)
		io_wq_switch_creds(worker, work);
	current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->fsize;
}

static void io_assign_current_work(struct io_worker *worker,
@@ -489,7 +490,6 @@ static void io_worker_handle_work(struct io_worker *worker)

	do {
		struct io_wq_work *work;
		unsigned int hash;
get_next:
		/*
		 * If we got some work, mark us as busy. If we didn't, but
@@ -512,6 +512,7 @@ get_next:
		/* handle a whole dependent link */
		do {
			struct io_wq_work *old_work, *next_hashed, *linked;
			unsigned int hash = io_get_work_hash(work);

			next_hashed = wq_next_work(work);
			io_impersonate_work(worker, work);
@@ -522,10 +523,8 @@ get_next:
			if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
				work->flags |= IO_WQ_WORK_CANCEL;

			hash = io_get_work_hash(work);
			linked = old_work = work;
			wq->do_work(&linked);
			linked = (old_work == linked) ? NULL : linked;
			old_work = work;
			linked = wq->do_work(work);

			work = next_hashed;
			if (!work && linked && !io_wq_is_hashed(linked)) {
@@ -542,8 +541,6 @@ get_next:
				spin_lock_irq(&wqe->lock);
				wqe->hash_map &= ~BIT_ULL(hash);
				wqe->flags &= ~IO_WQE_FLAG_STALLED;
				/* dependent work is not hashed */
				hash = -1U;
				/* skip unnecessary unlock-lock wqe->lock */
				if (!work)
					goto get_next;
@@ -781,8 +778,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
		struct io_wq_work *old_work = work;

		work->flags |= IO_WQ_WORK_CANCEL;
		wq->do_work(&work);
		work = (work == old_work) ? NULL : work;
		work = wq->do_work(work);
		wq->free_work(old_work);
	} while (work);
}
+6 −5
Original line number Diff line number Diff line
@@ -5,10 +5,10 @@ struct io_wq;

enum {
	IO_WQ_WORK_CANCEL	= 1,
	IO_WQ_WORK_HASHED	= 4,
	IO_WQ_WORK_UNBOUND	= 32,
	IO_WQ_WORK_NO_CANCEL	= 256,
	IO_WQ_WORK_CONCURRENT	= 512,
	IO_WQ_WORK_HASHED	= 2,
	IO_WQ_WORK_UNBOUND	= 4,
	IO_WQ_WORK_NO_CANCEL	= 8,
	IO_WQ_WORK_CONCURRENT	= 16,

	IO_WQ_HASH_SHIFT	= 24,	/* upper 8 bits are used for hash key */
};
@@ -89,6 +89,7 @@ struct io_wq_work {
	struct mm_struct *mm;
	const struct cred *creds;
	struct fs_struct *fs;
	unsigned long fsize;
	unsigned flags;
};

@@ -101,7 +102,7 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
}

typedef void (free_work_fn)(struct io_wq_work *);
typedef void (io_wq_work_fn)(struct io_wq_work **);
typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *);

struct io_wq_data {
	struct user_struct *user;
Loading