Commit c4c2416a authored by Gang He's avatar Gang He Committed by Linus Torvalds
Browse files

ocfs2: nowait aio support

Return EAGAIN if any of the following checks fail for direct I/O:

 - Cannot get the related locks immediately

 - Blocks are not allocated at the write location, it will trigger block
   allocation and block IO operations.

[ghe@suse.com: v4]
  Link: http://lkml.kernel.org/r/1516007283-29932-4-git-send-email-ghe@suse.com
[ghe@suse.com: v2]
  Link: http://lkml.kernel.org/r/1511944612-9629-4-git-send-email-ghe@suse.com
Link: http://lkml.kernel.org/r/1511775987-841-4-git-send-email-ghe@suse.com


Signed-off-by: default avatarGang He <ghe@suse.com>
Reviewed-by: default avatarAlex Chen <alex.chen@huawei.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent ac604d3c
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1958,7 +1958,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)

	trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);

	error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level);
	error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
	if (lock_level && error >= 0) {
		/* We release EX lock which used to update atime
		 * and get PR lock again to reduce contention
+15 −5
Original line number Diff line number Diff line
@@ -2546,12 +2546,17 @@ int ocfs2_inode_lock_with_page(struct inode *inode,

int ocfs2_inode_lock_atime(struct inode *inode,
			  struct vfsmount *vfsmnt,
			  int *level)
			  int *level, int wait)
{
	int ret;

	if (wait)
		ret = ocfs2_inode_lock(inode, NULL, 0);
	else
		ret = ocfs2_try_inode_lock(inode, NULL, 0);

	if (ret < 0) {
		if (ret != -EAGAIN)
			mlog_errno(ret);
		return ret;
	}
@@ -2564,8 +2569,13 @@ int ocfs2_inode_lock_atime(struct inode *inode,
		struct buffer_head *bh = NULL;

		ocfs2_inode_unlock(inode, 0);
		if (wait)
			ret = ocfs2_inode_lock(inode, &bh, 1);
		else
			ret = ocfs2_try_inode_lock(inode, &bh, 1);

		if (ret < 0) {
			if (ret != -EAGAIN)
				mlog_errno(ret);
			return ret;
		}
+1 −1
Original line number Diff line number Diff line
@@ -146,7 +146,7 @@ int ocfs2_try_open_lock(struct inode *inode, int write);
void ocfs2_open_unlock(struct inode *inode);
int ocfs2_inode_lock_atime(struct inode *inode,
			  struct vfsmount *vfsmnt,
			  int *level);
			  int *level, int wait);
int ocfs2_inode_lock_full_nested(struct inode *inode,
			 struct buffer_head **ret_bh,
			 int ex,
+80 −21
Original line number Diff line number Diff line
@@ -140,6 +140,8 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
		spin_unlock(&oi->ip_lock);
	}

	file->f_mode |= FMODE_NOWAIT;

leave:
	return status;
}
@@ -2132,12 +2134,12 @@ out:
}

static int ocfs2_prepare_inode_for_write(struct file *file,
					 loff_t pos,
					 size_t count)
					 loff_t pos, size_t count, int wait)
{
	int ret = 0, meta_level = 0;
	int ret = 0, meta_level = 0, overwrite_io = 0;
	struct dentry *dentry = file->f_path.dentry;
	struct inode *inode = d_inode(dentry);
	struct buffer_head *di_bh = NULL;
	loff_t end;

	/*
@@ -2145,13 +2147,40 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
	 * if we need to make modifications here.
	 */
	for(;;) {
		if (wait)
			ret = ocfs2_inode_lock(inode, NULL, meta_level);
		else
			ret = ocfs2_try_inode_lock(inode,
				overwrite_io ? NULL : &di_bh, meta_level);
		if (ret < 0) {
			meta_level = -1;
			if (ret != -EAGAIN)
				mlog_errno(ret);
			goto out;
		}

		/*
		 * Check if IO will overwrite allocated blocks in case
		 * IOCB_NOWAIT flag is set.
		 */
		if (!wait && !overwrite_io) {
			overwrite_io = 1;
			if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
				ret = -EAGAIN;
				goto out_unlock;
			}

			ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
			brelse(di_bh);
			di_bh = NULL;
			up_read(&OCFS2_I(inode)->ip_alloc_sem);
			if (ret < 0) {
				if (ret != -EAGAIN)
					mlog_errno(ret);
				goto out_unlock;
			}
		}

		/* Clear suid / sgid if necessary. We do this here
		 * instead of later in the write path because
		 * remove_suid() calls ->setattr without any hint that
@@ -2199,7 +2228,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,

out_unlock:
	trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
					    pos, count);
					    pos, count, wait);

	brelse(di_bh);

	if (meta_level >= 0)
		ocfs2_inode_unlock(inode, meta_level);
@@ -2211,7 +2242,7 @@ out:
static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
				    struct iov_iter *from)
{
	int direct_io, rw_level;
	int rw_level;
	ssize_t written = 0;
	ssize_t ret;
	size_t count = iov_iter_count(from);
@@ -2223,6 +2254,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
	void *saved_ki_complete = NULL;
	int append_write = ((iocb->ki_pos + count) >=
			i_size_read(inode) ? 1 : 0);
	int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
	int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;

	trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
		(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2230,11 +2263,16 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
		file->f_path.dentry->d_name.name,
		(unsigned int)from->nr_segs);	/* GRRRRR */

	if (!direct_io && nowait)
		return -EOPNOTSUPP;

	if (count == 0)
		return 0;

	direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;

	if (nowait) {
		if (!inode_trylock(inode))
			return -EAGAIN;
	} else
		inode_lock(inode);

	/*
@@ -2244,8 +2282,12 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
	 */
	rw_level = (!direct_io || full_coherency || append_write);

	if (nowait)
		ret = ocfs2_try_rw_lock(inode, rw_level);
	else
		ret = ocfs2_rw_lock(inode, rw_level);
	if (ret < 0) {
		if (ret != -EAGAIN)
			mlog_errno(ret);
		goto out_mutex;
	}
@@ -2260,8 +2302,12 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
		 * other nodes to drop their caches.  Buffered I/O
		 * already does this in write_begin().
		 */
		if (nowait)
			ret = ocfs2_try_inode_lock(inode, NULL, 1);
		else
			ret = ocfs2_inode_lock(inode, NULL, 1);
		if (ret < 0) {
			if (ret != -EAGAIN)
				mlog_errno(ret);
			goto out;
		}
@@ -2277,8 +2323,9 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
	}
	count = ret;

	ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
	ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, !nowait);
	if (ret < 0) {
		if (ret != -EAGAIN)
			mlog_errno(ret);
		goto out;
	}
@@ -2355,6 +2402,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
	int ret = 0, rw_level = -1, lock_level = 0;
	struct file *filp = iocb->ki_filp;
	struct inode *inode = file_inode(filp);
	int direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
	int nowait = iocb->ki_flags & IOCB_NOWAIT ? 1 : 0;

	trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry,
			(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2369,13 +2418,21 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
		goto bail;
	}

	if (!direct_io && nowait)
		return -EOPNOTSUPP;

	/*
	 * buffered reads protect themselves in ->readpage().  O_DIRECT reads
	 * need locks to protect pending reads from racing with truncate.
	 */
	if (iocb->ki_flags & IOCB_DIRECT) {
	if (direct_io) {
		if (nowait)
			ret = ocfs2_try_rw_lock(inode, 0);
		else
			ret = ocfs2_rw_lock(inode, 0);

		if (ret < 0) {
			if (ret != -EAGAIN)
				mlog_errno(ret);
			goto bail;
		}
@@ -2393,8 +2450,10 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
	 * like i_size. This allows the checks down below
	 * generic_file_aio_read() a chance of actually working.
	 */
	ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level);
	ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level,
				     !nowait);
	if (ret < 0) {
		if (ret != -EAGAIN)
			mlog_errno(ret);
		goto bail;
	}
+1 −1
Original line number Diff line number Diff line
@@ -184,7 +184,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
	int ret = 0, lock_level = 0;

	ret = ocfs2_inode_lock_atime(file_inode(file),
				    file->f_path.mnt, &lock_level);
				    file->f_path.mnt, &lock_level, 1);
	if (ret < 0) {
		mlog_errno(ret);
		goto out;
Loading