Commit 6b2fb799 authored by Maxim Patlasov's avatar Maxim Patlasov Committed by Miklos Szeredi
Browse files

fuse: optimize writepages search



Re-work fi->writepages, replacing list with rb-tree.  This improves
performance because kernel fuse iterates through fi->writepages for each
writeback page and typical number of entries is about 800 (for 100MB of
fuse writeback).

Before patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 41.3473 s, 260 MB/s

 2  1      0 57445400  40416 6323676    0    0    33 374743 8633 19210  1  8 88  3  0

  29.86%  [kernel]               [k] _raw_spin_lock
  26.62%  [fuse]                 [k] fuse_page_is_writeback

After patch:

10240+0 records in
10240+0 records out
10737418240 bytes (11 GB) copied, 21.4954 s, 500 MB/s

 2  9      0 53676040  31744 10265984    0    0    64 854790 10956 48387  1  6 88  6  0

  23.55%  [kernel]             [k] copy_user_enhanced_fast_string
   9.87%  [kernel]             [k] __memcpy
   3.10%  [kernel]             [k] _raw_spin_lock

Signed-off-by: default avatarMaxim Patlasov <mpatlasov@virtuozzo.com>
Signed-off-by: default avatarVasily Averin <vvs@virtuozzo.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent 5ddd9ced
Loading
Loading
Loading
Loading
+49 −13
Original line number Diff line number Diff line
@@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)

struct fuse_writepage_args {
	struct fuse_io_args ia;
	struct list_head writepages_entry;
	struct rb_node writepages_entry;
	struct list_head queue_entry;
	struct fuse_writepage_args *next;
	struct inode *inode;
@@ -366,18 +366,24 @@ struct fuse_writepage_args {
static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
					    pgoff_t idx_from, pgoff_t idx_to)
{
	struct fuse_writepage_args *wpa;
	struct rb_node *n;

	n = fi->writepages.rb_node;

	list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
	while (n) {
		struct fuse_writepage_args *wpa;
		pgoff_t curr_index;

		wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
		WARN_ON(get_fuse_inode(wpa->inode) != fi);
		curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
		if (idx_from < curr_index + wpa->ia.ap.num_pages &&
		    curr_index <= idx_to) {
		if (idx_from >= curr_index + wpa->ia.ap.num_pages)
			n = n->rb_right;
		else if (idx_to < curr_index)
			n = n->rb_left;
		else
			return wpa;
	}
	}
	return NULL;
}

@@ -1624,7 +1630,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
	struct backing_dev_info *bdi = inode_to_bdi(inode);
	int i;

	list_del(&wpa->writepages_entry);
	rb_erase(&wpa->writepages_entry, &fi->writepages);
	for (i = 0; i < ap->num_pages; i++) {
		dec_wb_stat(&bdi->wb, WB_WRITEBACK);
		dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
@@ -1712,6 +1718,36 @@ __acquires(fi->lock)
	}
}

static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
{
	pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
	pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
	struct rb_node **p = &root->rb_node;
	struct rb_node  *parent = NULL;

	WARN_ON(!wpa->ia.ap.num_pages);
	while (*p) {
		struct fuse_writepage_args *curr;
		pgoff_t curr_index;

		parent = *p;
		curr = rb_entry(parent, struct fuse_writepage_args,
				writepages_entry);
		WARN_ON(curr->inode != wpa->inode);
		curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;

		if (idx_from >= curr_index + curr->ia.ap.num_pages)
			p = &(*p)->rb_right;
		else if (idx_to < curr_index)
			p = &(*p)->rb_left;
		else
			return (void) WARN_ON(true);
	}

	rb_link_node(&wpa->writepages_entry, parent, p);
	rb_insert_color(&wpa->writepages_entry, root);
}

static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
			       int error)
{
@@ -1730,7 +1766,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
		wpa->next = next->next;
		next->next = NULL;
		next->ia.ff = fuse_file_get(wpa->ia.ff);
		list_add(&next->writepages_entry, &fi->writepages);
		tree_insert(&fi->writepages, next);

		/*
		 * Skip fuse_flush_writepages() to make it easy to crop requests
@@ -1865,7 +1901,7 @@ static int fuse_writepage_locked(struct page *page)
	inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);

	spin_lock(&fi->lock);
	list_add(&wpa->writepages_entry, &fi->writepages);
	tree_insert(&fi->writepages, wpa);
	list_add_tail(&wpa->queue_entry, &fi->queued_writes);
	fuse_flush_writepages(inode);
	spin_unlock(&fi->lock);
@@ -1977,10 +2013,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
	WARN_ON(new_ap->num_pages != 0);

	spin_lock(&fi->lock);
	list_del(&new_wpa->writepages_entry);
	rb_erase(&new_wpa->writepages_entry, &fi->writepages);
	old_wpa = fuse_find_writeback(fi, page->index, page->index);
	if (!old_wpa) {
		list_add(&new_wpa->writepages_entry, &fi->writepages);
		tree_insert(&fi->writepages, new_wpa);
		spin_unlock(&fi->lock);
		return false;
	}
@@ -2095,7 +2131,7 @@ static int fuse_writepages_fill(struct page *page,
		wpa->inode = inode;

		spin_lock(&fi->lock);
		list_add(&wpa->writepages_entry, &fi->writepages);
		tree_insert(&fi->writepages, wpa);
		spin_unlock(&fi->lock);

		data->wpa = wpa;
@@ -3405,5 +3441,5 @@ void fuse_init_file_inode(struct inode *inode)
	INIT_LIST_HEAD(&fi->queued_writes);
	fi->writectr = 0;
	init_waitqueue_head(&fi->page_waitq);
	INIT_LIST_HEAD(&fi->writepages);
	fi->writepages = RB_ROOT;
}
+1 −1
Original line number Diff line number Diff line
@@ -111,7 +111,7 @@ struct fuse_inode {
			wait_queue_head_t page_waitq;

			/* List of writepage requestst (pending or sent) */
			struct list_head writepages;
			struct rb_root writepages;
		};

		/* readdir cache (directory only) */