Commit 77605e41 authored by Jinshan Xiong's avatar Jinshan Xiong Committed by Greg Kroah-Hartman
Browse files

staging/lustre/clio: add pages into writeback cache in batches



in ll_write_end(), instead of adding the page into writeback
cache directly, it will be held in a page list. After enough
pages have been collected, issue them all with cio_commit_async().

Signed-off-by: default avatarJinshan Xiong <jinshan.xiong@intel.com>
Reviewed-on: http://review.whamcloud.com/7893
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3321


Reviewed-by: default avatarBobi Jam <bobijam@gmail.com>
Reviewed-by: default avatarLai Siyao <lai.siyao@intel.com>
Signed-off-by: default avatarOleg Drokin <green@linuxhacker.ru>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 3c361c1c
Loading
Loading
Loading
Loading
+26 −51
Original line number Diff line number Diff line
@@ -1019,26 +1019,6 @@ struct cl_page_operations {
		 */
		int  (*cpo_make_ready)(const struct lu_env *env,
				       const struct cl_page_slice *slice);
		/**
		 * Announce that this page is to be written out
		 * opportunistically, that is, page is dirty, it is not
		 * necessary to start write-out transfer right now, but
		 * eventually page has to be written out.
		 *
		 * Main caller of this is the write path (see
		 * vvp_io_commit_write()), using this method to build a
		 * "transfer cache" from which large transfers are then
		 * constructed by the req-formation engine.
		 *
		 * \todo XXX it would make sense to add page-age tracking
		 * semantics here, and to oblige the req-formation engine to
		 * send the page out not later than it is too old.
		 *
		 * \see cl_page_cache_add()
		 */
		int  (*cpo_cache_add)(const struct lu_env *env,
				      const struct cl_page_slice *slice,
				      struct cl_io *io);
	} io[CRT_NR];
	/**
	 * Tell transfer engine that only [to, from] part of a page should be
@@ -2023,6 +2003,8 @@ struct cl_io_slice {
	struct list_head		     cis_linkage;
};

typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
			      struct cl_page *);
/**
 * Per-layer io operations.
 * \see vvp_io_ops, lov_io_ops, lovsub_io_ops, osc_io_ops
@@ -2106,7 +2088,7 @@ struct cl_io_operations {
		void (*cio_fini)(const struct lu_env *env,
				 const struct cl_io_slice *slice);
	} op[CIT_OP_NR];
	struct {

		/**
		 * Submit pages from \a queue->c2_qin for IO, and move
		 * successfully submitted pages into \a queue->c2_qout. Return
@@ -2119,7 +2101,15 @@ struct cl_io_operations {
				   const struct cl_io_slice *slice,
				   enum cl_req_type crt,
				   struct cl_2queue *queue);
	} req_op[CRT_NR];
	/**
	 * Queue async page for write.
	 * The difference between cio_submit and cio_queue is that
	 * cio_submit is for urgent request.
	 */
	int  (*cio_commit_async)(const struct lu_env *env,
				 const struct cl_io_slice *slice,
				 struct cl_page_list *queue, int from, int to,
				 cl_commit_cbt cb);
	/**
	 * Read missing page.
	 *
@@ -2131,31 +2121,6 @@ struct cl_io_operations {
	int (*cio_read_page)(const struct lu_env *env,
			     const struct cl_io_slice *slice,
			     const struct cl_page_slice *page);
	/**
	 * Prepare write of a \a page. Called bottom-to-top by a top-level
	 * cl_io_operations::op[CIT_WRITE]::cio_start() to prepare page for
	 * get data from user-level buffer.
	 *
	 * \pre io->ci_type == CIT_WRITE
	 *
	 * \see vvp_io_prepare_write(), lov_io_prepare_write(),
	 * osc_io_prepare_write().
	 */
	int (*cio_prepare_write)(const struct lu_env *env,
				 const struct cl_io_slice *slice,
				 const struct cl_page_slice *page,
				 unsigned from, unsigned to);
	/**
	 *
	 * \pre io->ci_type == CIT_WRITE
	 *
	 * \see vvp_io_commit_write(), lov_io_commit_write(),
	 * osc_io_commit_write().
	 */
	int (*cio_commit_write)(const struct lu_env *env,
				const struct cl_io_slice *slice,
				const struct cl_page_slice *page,
				unsigned from, unsigned to);
	/**
	 * Optional debugging helper. Print given io slice.
	 */
@@ -3044,15 +3009,14 @@ int cl_io_lock_alloc_add(const struct lu_env *env, struct cl_io *io,
			 struct cl_lock_descr *descr);
int cl_io_read_page(const struct lu_env *env, struct cl_io *io,
		    struct cl_page *page);
int cl_io_prepare_write(const struct lu_env *env, struct cl_io *io,
			struct cl_page *page, unsigned from, unsigned to);
int cl_io_commit_write(const struct lu_env *env, struct cl_io *io,
		       struct cl_page *page, unsigned from, unsigned to);
int cl_io_submit_rw(const struct lu_env *env, struct cl_io *io,
		    enum cl_req_type iot, struct cl_2queue *queue);
int cl_io_submit_sync(const struct lu_env *env, struct cl_io *io,
		      enum cl_req_type iot, struct cl_2queue *queue,
		      long timeout);
int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
		       struct cl_page_list *queue, int from, int to,
		       cl_commit_cbt cb);
int cl_io_is_going(const struct lu_env *env);

/**
@@ -3108,6 +3072,12 @@ static inline struct cl_page *cl_page_list_last(struct cl_page_list *plist)
	return list_entry(plist->pl_pages.prev, struct cl_page, cp_batch);
}

static inline struct cl_page *cl_page_list_first(struct cl_page_list *plist)
{
	LASSERT(plist->pl_nr > 0);
	return list_entry(plist->pl_pages.next, struct cl_page, cp_batch);
}

/**
 * Iterate over pages in a page list.
 */
@@ -3124,9 +3094,14 @@ void cl_page_list_init(struct cl_page_list *plist);
void cl_page_list_add(struct cl_page_list *plist, struct cl_page *page);
void cl_page_list_move(struct cl_page_list *dst, struct cl_page_list *src,
		       struct cl_page *page);
void cl_page_list_move_head(struct cl_page_list *dst, struct cl_page_list *src,
			    struct cl_page *page);
void cl_page_list_splice(struct cl_page_list *list, struct cl_page_list *head);
void cl_page_list_del(const struct lu_env *env, struct cl_page_list *plist,
		      struct cl_page *page);
void cl_page_list_disown(const struct lu_env *env,
			 struct cl_io *io, struct cl_page_list *plist);
void cl_page_list_fini(const struct lu_env *env, struct cl_page_list *plist);

void cl_2queue_init(struct cl_2queue *queue);
void cl_2queue_disown(const struct lu_env *env,
+6 −0
Original line number Diff line number Diff line
@@ -91,6 +91,12 @@ struct ccc_io {
		struct {
			enum ccc_setattr_lock_type cui_local_lock;
		} setattr;
		struct {
			struct cl_page_list cui_queue;
			unsigned long cui_written;
			int cui_from;
			int cui_to;
		} write;
	} u;
	/**
	 * True iff io is processing glimpse right now.
+7 −4
Original line number Diff line number Diff line
@@ -1120,6 +1120,9 @@ ll_file_io_generic(const struct lu_env *env, struct vvp_io_args *args,
	struct cl_io	 *io;
	ssize_t	       result;

	CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zd\n",
	       file->f_path.dentry->d_name.name, iot, *ppos, count);

restart:
	io = ccc_env_thread_io(env);
	ll_io_init(io, file, iot == CIT_WRITE);
@@ -1144,9 +1147,8 @@ restart:
					goto out;
				}
				write_mutex_locked = 1;
			} else if (iot == CIT_READ) {
				down_read(&lli->lli_trunc_sem);
			}
			down_read(&lli->lli_trunc_sem);
			break;
		case IO_SPLICE:
			vio->u.splice.cui_pipe = args->u.splice.via_pipe;
@@ -1157,10 +1159,10 @@ restart:
			LBUG();
		}
		result = cl_io_loop(env, io);
		if (args->via_io_subtype == IO_NORMAL)
			up_read(&lli->lli_trunc_sem);
		if (write_mutex_locked)
			mutex_unlock(&lli->lli_write_mutex);
		else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
			up_read(&lli->lli_trunc_sem);
	} else {
		/* cl_io_rw_init() handled IO */
		result = io->ci_result;
@@ -1197,6 +1199,7 @@ out:
			fd->fd_write_failed = true;
		}
	}
	CDEBUG(D_VFSTRACE, "iot: %d, result: %zd\n", iot, result);

	return result;
}
+6 −2
Original line number Diff line number Diff line
@@ -697,8 +697,6 @@ int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);

/* llite/rw.c */
int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
int ll_writepage(struct page *page, struct writeback_control *wbc);
int ll_writepages(struct address_space *, struct writeback_control *wbc);
int ll_readpage(struct file *file, struct page *page);
@@ -706,6 +704,9 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
int ll_readahead(const struct lu_env *env, struct cl_io *io,
		 struct ll_readahead_state *ras, struct address_space *mapping,
		 struct cl_page_list *queue, int flags);
int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io);
struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage);
void ll_cl_fini(struct ll_cl_context *lcc);

extern const struct address_space_operations ll_aops;

@@ -1476,4 +1477,7 @@ int ll_layout_restore(struct inode *inode);
int ll_xattr_init(void);
void ll_xattr_fini(void);

int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
		    struct cl_page *page, enum cl_req_type crt);

#endif /* LLITE_INTERNAL_H */
+38 −148
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@
 * Finalizes cl-data before exiting typical address_space operation. Dual to
 * ll_cl_init().
 */
static void ll_cl_fini(struct ll_cl_context *lcc)
void ll_cl_fini(struct ll_cl_context *lcc)
{
	struct lu_env  *env  = lcc->lcc_env;
	struct cl_io   *io   = lcc->lcc_io;
@@ -84,8 +84,7 @@ static void ll_cl_fini(struct ll_cl_context *lcc)
 * Initializes common cl-data at the typical address_space operation entry
 * point.
 */
static struct ll_cl_context *ll_cl_init(struct file *file,
					struct page *vmpage, int create)
struct ll_cl_context *ll_cl_init(struct file *file, struct page *vmpage)
{
	struct ll_cl_context *lcc;
	struct lu_env    *env;
@@ -96,7 +95,7 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
	int refcheck;
	int result = 0;

	clob = ll_i2info(vmpage->mapping->host)->lli_clob;
	clob = ll_i2info(file_inode(file))->lli_clob;
	LASSERT(clob);

	env = cl_env_get(&refcheck);
@@ -111,62 +110,18 @@ static struct ll_cl_context *ll_cl_init(struct file *file,

	cio = ccc_env_io(env);
	io = cio->cui_cl.cis_io;
	if (!io && create) {
		struct inode *inode = vmpage->mapping->host;
		loff_t pos;

		if (inode_trylock(inode)) {
			inode_unlock((inode));
	lcc->lcc_io = io;
	if (!io) {
		struct inode *inode = file_inode(file);

			/* this is too bad. Someone is trying to write the
			 * page w/o holding inode mutex. This means we can
			 * add dirty pages into cache during truncate
			 */
			CERROR("Proc %s is dirtying page w/o inode lock, this will break truncate\n",
			       current->comm);
		CERROR("%s: " DFID " no active IO, please file a ticket.\n",
		       ll_get_fsname(inode->i_sb, NULL, 0),
		       PFID(ll_inode2fid(inode)));
		dump_stack();
			LBUG();
			return ERR_PTR(-EIO);
		}

		/*
		 * Loop-back driver calls ->prepare_write().
		 * methods directly, bypassing file system ->write() operation,
		 * so cl_io has to be created here.
		 */
		io = ccc_env_thread_io(env);
		ll_io_init(io, file, 1);

		/* No lock at all for this kind of IO - we can't do it because
		 * we have held page lock, it would cause deadlock.
		 * XXX: This causes poor performance to loop device - One page
		 *      per RPC.
		 *      In order to get better performance, users should use
		 *      lloop driver instead.
		 */
		io->ci_lockreq = CILR_NEVER;

		pos = vmpage->index << PAGE_CACHE_SHIFT;

		/* Create a temp IO to serve write. */
		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE);
		if (result == 0) {
			cio->cui_fd = LUSTRE_FPRIVATE(file);
			cio->cui_iter = NULL;
			result = cl_io_iter_init(env, io);
			if (result == 0) {
				result = cl_io_lock(env, io);
				if (result == 0)
					result = cl_io_start(env, io);
			}
		} else
			result = io->ci_result;
	}

	lcc->lcc_io = io;
	if (!io)
		result = -EIO;
	if (result == 0) {
	}
	if (result == 0 && vmpage) {
		struct cl_page   *page;

		LASSERT(io->ci_state == CIS_IO_GOING);
@@ -185,99 +140,9 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
		lcc = ERR_PTR(result);
	}

	CDEBUG(D_VFSTRACE, "%lu@"DFID" -> %d %p %p\n",
	       vmpage->index, PFID(lu_object_fid(&clob->co_lu)), result,
	       env, io);
	return lcc;
}

static struct ll_cl_context *ll_cl_get(void)
{
	struct ll_cl_context *lcc;
	struct lu_env *env;
	int refcheck;

	env = cl_env_get(&refcheck);
	LASSERT(!IS_ERR(env));
	lcc = &vvp_env_info(env)->vti_io_ctx;
	LASSERT(env == lcc->lcc_env);
	LASSERT(current == lcc->lcc_cookie);
	cl_env_put(env, &refcheck);

	/* env has got in ll_cl_init, so it is still usable. */
	return lcc;
}

/**
 * ->prepare_write() address space operation called by generic_file_write()
 * for every page during write.
 */
int ll_prepare_write(struct file *file, struct page *vmpage, unsigned from,
		     unsigned to)
{
	struct ll_cl_context *lcc;
	int result;

	lcc = ll_cl_init(file, vmpage, 1);
	if (!IS_ERR(lcc)) {
		struct lu_env  *env = lcc->lcc_env;
		struct cl_io   *io  = lcc->lcc_io;
		struct cl_page *page = lcc->lcc_page;

		cl_page_assume(env, io, page);

		result = cl_io_prepare_write(env, io, page, from, to);
		if (result == 0) {
			/*
			 * Add a reference, so that page is not evicted from
			 * the cache until ->commit_write() is called.
			 */
			cl_page_get(page);
			lu_ref_add(&page->cp_reference, "prepare_write",
				   current);
		} else {
			cl_page_unassume(env, io, page);
			ll_cl_fini(lcc);
		}
		/* returning 0 in prepare assumes commit must be called
		 * afterwards
		 */
	} else {
		result = PTR_ERR(lcc);
	}
	return result;
}

int ll_commit_write(struct file *file, struct page *vmpage, unsigned from,
		    unsigned to)
{
	struct ll_cl_context *lcc;
	struct lu_env    *env;
	struct cl_io     *io;
	struct cl_page   *page;
	int result = 0;

	lcc  = ll_cl_get();
	env  = lcc->lcc_env;
	page = lcc->lcc_page;
	io   = lcc->lcc_io;

	LASSERT(cl_page_is_owned(page, io));
	LASSERT(from <= to);
	if (from != to) /* handle short write case. */
		result = cl_io_commit_write(env, io, page, from, to);
	if (cl_page_is_owned(page, io))
		cl_page_unassume(env, io, page);

	/*
	 * Release reference acquired by ll_prepare_write().
	 */
	lu_ref_del(&page->cp_reference, "prepare_write", current);
	cl_page_put(env, page);
	ll_cl_fini(lcc);
	return result;
}

static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);

/**
@@ -1251,7 +1116,7 @@ int ll_readpage(struct file *file, struct page *vmpage)
	struct ll_cl_context *lcc;
	int result;

	lcc = ll_cl_init(file, vmpage, 0);
	lcc = ll_cl_init(file, vmpage);
	if (!IS_ERR(lcc)) {
		struct lu_env  *env  = lcc->lcc_env;
		struct cl_io   *io   = lcc->lcc_io;
@@ -1273,3 +1138,28 @@ int ll_readpage(struct file *file, struct page *vmpage)
	}
	return result;
}

int ll_page_sync_io(const struct lu_env *env, struct cl_io *io,
		    struct cl_page *page, enum cl_req_type crt)
{
	struct cl_2queue  *queue;
	int result;

	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);

	queue = &io->ci_queue;
	cl_2queue_init_page(queue, page);

	result = cl_io_submit_sync(env, io, crt, queue, 0);
	LASSERT(cl_page_is_owned(page, io));

	if (crt == CRT_READ)
		/*
		 * in CRT_WRITE case page is left locked even in case of
		 * error.
		 */
		cl_page_list_disown(env, io, &queue->c2_qin);
	cl_2queue_fini(env, queue);

	return result;
}
Loading