Commit 7f3c74fb authored by Chris Mason's avatar Chris Mason
Browse files

Btrfs: Keep extent mappings in ram until pending ordered extents are done



It was possible for stale mappings from disk to be used instead of the
new pending ordered extent.  This adds a flag to the extent map struct
to keep it pinned until the pending ordered extent is actually on disk.

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 211f90e6
Loading
Loading
Loading
Loading
+15 −12
Original line number Diff line number Diff line
@@ -2000,7 +2000,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
	struct block_device *bdev;
	int ret;
	int nr = 0;
	size_t page_offset = 0;
	size_t pg_offset = 0;
	size_t blocksize;
	loff_t i_size = i_size_read(inode);
	unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -2008,9 +2008,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
	u64 delalloc_end;

	WARN_ON(!PageLocked(page));
	page_offset = i_size & (PAGE_CACHE_SIZE - 1);
	pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
	if (page->index > end_index ||
	   (page->index == end_index && !page_offset)) {
	   (page->index == end_index && !pg_offset)) {
		page->mapping->a_ops->invalidatepage(page, 0);
		unlock_page(page);
		return 0;
@@ -2020,12 +2020,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
		char *userpage;

		userpage = kmap_atomic(page, KM_USER0);
		memset(userpage + page_offset, 0,
		       PAGE_CACHE_SIZE - page_offset);
		memset(userpage + pg_offset, 0,
		       PAGE_CACHE_SIZE - pg_offset);
		kunmap_atomic(userpage, KM_USER0);
		flush_dcache_page(page);
	}
	page_offset = 0;
	pg_offset = 0;

	set_page_extent_mapped(page);

@@ -2088,7 +2088,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
			unlock_start = page_end + 1;
			break;
		}
		em = epd->get_extent(inode, page, page_offset, cur,
		em = epd->get_extent(inode, page, pg_offset, cur,
				     end - cur + 1, 1);
		if (IS_ERR(em) || !em) {
			SetPageError(page);
@@ -2113,12 +2113,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,

			unlock_extent(tree, unlock_start, cur + iosize -1,
				      GFP_NOFS);

			if (tree->ops && tree->ops->writepage_end_io_hook)
				tree->ops->writepage_end_io_hook(page, cur,
							 cur + iosize - 1,
							 NULL, 1);
			cur = cur + iosize;
			page_offset += iosize;
			pg_offset += iosize;
			unlock_start = cur;
			continue;
		}
@@ -2127,7 +2128,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
		if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
				   EXTENT_DIRTY, 0)) {
			cur = cur + iosize;
			page_offset += iosize;
			pg_offset += iosize;
			continue;
		}
		clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
@@ -2141,6 +2142,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
			SetPageError(page);
		} else {
			unsigned long max_nr = end_index + 1;

			set_range_writeback(tree, cur, cur + iosize - 1);
			if (!PageWriteback(page)) {
				printk("warning page %lu not writeback, "
@@ -2150,14 +2152,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
			}

			ret = submit_extent_page(WRITE, tree, page, sector,
						 iosize, page_offset, bdev,
						 iosize, pg_offset, bdev,
						 &epd->bio, max_nr,
						 end_bio_extent_writepage, 0);
			if (ret)
				SetPageError(page);
		}
		cur = cur + iosize;
		page_offset += iosize;
		pg_offset += iosize;
		nr++;
	}
done:
@@ -2579,7 +2581,8 @@ int try_release_extent_mapping(struct extent_map_tree *map,
				spin_unlock(&map->lock);
				break;
			}
			if (em->start != start) {
			if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
			    em->start != start) {
				spin_unlock(&map->lock);
				free_extent_map(em);
				break;
+4 −0
Original line number Diff line number Diff line
@@ -173,6 +173,9 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)

static int mergable_maps(struct extent_map *prev, struct extent_map *next)
{
	if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
		return 0;

	if (extent_map_end(prev) == next->start &&
	    prev->flags == next->flags &&
	    prev->bdev == next->bdev &&
@@ -320,6 +323,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
	int ret = 0;

	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
	BUG_ON(spin_trylock(&tree->lock));
	rb_erase(&em->rb_node, &tree->map);
	em->in_tree = 0;
+3 −0
Original line number Diff line number Diff line
@@ -8,6 +8,9 @@
#define EXTENT_MAP_INLINE (u64)-2
#define EXTENT_MAP_DELALLOC (u64)-1

/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */

struct extent_map {
	struct rb_node rb_node;

+1 −4
Original line number Diff line number Diff line
@@ -192,7 +192,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
				 (char *)&sector_sum->sum);
		sector_sum->offset = page_offset(bvec->bv_page) +
			bvec->bv_offset;

		sector_sum++;
		bio_index++;
		total_bytes += bvec->bv_len;
@@ -201,9 +200,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
	}
	btrfs_add_ordered_sum(inode, ordered, sums);
	btrfs_put_ordered_extent(ordered);
	if (total_bytes != bio->bi_size) {
printk("warning, total bytes %lu bio size %u\n", total_bytes, bio->bi_size);
	}
	return 0;
}

@@ -372,6 +368,7 @@ next_sector:
		write_extent_buffer(leaf, &sector_sum->sum,
				    (unsigned long)item, BTRFS_CRC32_SIZE);
	}

	total_bytes += root->sectorsize;
	sector_sum++;
	if (total_bytes < sums->len) {
+10 −4
Original line number Diff line number Diff line
@@ -358,9 +358,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
	struct extent_map *split = NULL;
	struct extent_map *split2 = NULL;
	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
	struct extent_map *tmp;
	u64 len = end - start + 1;
	u64 next_start;
	int ret;
	int testend = 1;

@@ -381,8 +379,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
			spin_unlock(&em_tree->lock);
			break;
		}
		tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
		next_start = tmp->start;
		if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
			start = em->start + em->len;
			free_extent_map(em);
			spin_unlock(&em_tree->lock);
			if (start < end) {
				len = end - start + 1;
				continue;
			}
			break;
		}
		remove_extent_mapping(em_tree, em);

		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
Loading