Commit 22208ded authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Theodore Ts'o
Browse files

ext4: Fix file fragmentation during large file write.



The range_cyclic writeback mode uses the address_space writeback_index
as the start index for writeback.  With delayed allocation we were
updating writeback_index wrongly resulting in highly fragmented file.
This patch reduces the number of extents reduced from 4000 to 27 for a
3GB file.

Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 17bc6c30
Loading
Loading
Loading
Loading
+57 −34
Original line number Diff line number Diff line
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
	int ret = 0, err, nr_pages, i;
	unsigned long index, end;
	struct pagevec pvec;
	long pages_skipped;

	BUG_ON(mpd->next_page <= mpd->first_page);
	pagevec_init(&pvec, 0);
@@ -1655,7 +1656,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
	end = mpd->next_page - 1;

	while (index <= end) {
		/* XXX: optimize tail */
		/*
		 * We can use PAGECACHE_TAG_DIRTY lookup here because
		 * even though we have cleared the dirty flag on the page
@@ -1673,8 +1673,13 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			pages_skipped = mpd->wbc->pages_skipped;
			err = mapping->a_ops->writepage(page, mpd->wbc);
			if (!err)
			if (!err && (pages_skipped == mpd->wbc->pages_skipped))
				/*
				 * have successfully written the page
				 * without skipping the same
				 */
				mpd->pages_written++;
			/*
			 * In error case, we have to continue because
@@ -2110,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
			       struct writeback_control *wbc,
			       struct mpage_da_data *mpd)
{
	long to_write;
	int ret;

	if (!mpd->get_block)
@@ -2125,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
	mpd->pages_written = 0;
	mpd->retval = 0;

	to_write = wbc->nr_to_write;

	ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);

	/*
	 * Handle last extent of pages
	 */
	if (!mpd->io_done && mpd->next_page != mpd->first_page) {
		if (mpage_da_map_blocks(mpd) == 0)
			mpage_da_submit_io(mpd);
	}

	wbc->nr_to_write = to_write - mpd->pages_written;
		mpd->io_done = 1;
		ret = MPAGE_DA_EXTENT_TAIL;
	}
	wbc->nr_to_write -= mpd->pages_written;
	return ret;
}

@@ -2366,11 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
static int ext4_da_writepages(struct address_space *mapping,
			      struct writeback_control *wbc)
{
	pgoff_t	index;
	int range_whole = 0;
	handle_t *handle = NULL;
	struct mpage_da_data mpd;
	struct inode *inode = mapping->host;
	int no_nrwrite_index_update;
	long pages_written = 0, pages_skipped;
	int needed_blocks, ret = 0, nr_to_writebump = 0;
	long to_write, pages_skipped = 0;
	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);

	/*
@@ -2390,16 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
		nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
		wbc->nr_to_write = sbi->s_mb_stream_request;
	}
	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
		range_whole = 1;


	pages_skipped = wbc->pages_skipped;
	if (wbc->range_cyclic)
		index = mapping->writeback_index;
	else
		index = wbc->range_start >> PAGE_CACHE_SHIFT;

	mpd.wbc = wbc;
	mpd.inode = mapping->host;

restart_loop:
	to_write = wbc->nr_to_write;
	while (!ret && to_write > 0) {
	/*
	 * we don't want write_cache_pages to update
	 * nr_to_write and writeback_index
	 */
	no_nrwrite_index_update = wbc->no_nrwrite_index_update;
	wbc->no_nrwrite_index_update = 1;
	pages_skipped = wbc->pages_skipped;

	while (!ret && wbc->nr_to_write > 0) {

		/*
		 * we  insert one extent at a time. So we need
@@ -2420,46 +2436,53 @@ restart_loop:
			dump_stack();
			goto out_writepages;
		}
		to_write -= wbc->nr_to_write;

		mpd.get_block = ext4_da_get_block_write;
		ret = mpage_da_writepages(mapping, wbc, &mpd);

		ext4_journal_stop(handle);

		if (mpd.retval == -ENOSPC)
		if (mpd.retval == -ENOSPC) {
			/* commit the transaction which would
			 * free blocks released in the transaction
			 * and try again
			 */
			jbd2_journal_force_commit_nested(sbi->s_journal);

		/* reset the retry count */
		if (ret == MPAGE_DA_EXTENT_TAIL) {
			wbc->pages_skipped = pages_skipped;
			ret = 0;
		} else if (ret == MPAGE_DA_EXTENT_TAIL) {
			/*
			 * got one extent now try with
			 * rest of the pages
			 */
			to_write += wbc->nr_to_write;
			pages_written += mpd.pages_written;
			wbc->pages_skipped = pages_skipped;
			ret = 0;
		} else if (wbc->nr_to_write) {
		} else if (wbc->nr_to_write)
			/*
			 * There is no more writeout needed
			 * or we requested for a noblocking writeout
			 * and we found the device congested
			 */
			to_write += wbc->nr_to_write;
			break;
	}
		wbc->nr_to_write = to_write;
	}
	if (pages_skipped != wbc->pages_skipped)
		printk(KERN_EMERG "This should not happen leaving %s "
				"with nr_to_write = %ld ret = %d\n",
				__func__, wbc->nr_to_write, ret);

	if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
		/* We skipped pages in this loop */
		wbc->nr_to_write = to_write +
				wbc->pages_skipped - pages_skipped;
		wbc->pages_skipped = pages_skipped;
		goto restart_loop;
	}
	/* Update index */
	index += pages_written;
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		/*
		 * set the writeback_index so that range_cyclic
		 * mode will write it back later
		 */
		mapping->writeback_index = index;

out_writepages:
	wbc->nr_to_write = to_write - nr_to_writebump;
	if (!no_nrwrite_index_update)
		wbc->no_nrwrite_index_update = 0;
	wbc->nr_to_write -= nr_to_writebump;
	return ret;
}