Commit 6b529fb0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:

 - two regression fixes in clone/dedupe ioctls, the generic check
   callback needs to lock extents properly and wait for io to avoid
   problems with writeback and relocation

 - fix deadlock when using free space tree due to block group creation

 - a recently added check refuses a valid fileystem with seeding device,
   make that work again with a quickfix, proper solution needs more
   intrusive changes

* tag 'for-5.0-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: Use real device structure to verify dev extent
  Btrfs: fix deadlock when using free space tree due to block group creation
  Btrfs: fix race between reflink/dedupe and relocation
  Btrfs: fix race between cloning range ending at eof and writeback
parents 72d657dd 1b3922a8
Loading
Loading
Loading
Loading
+9 −7
Original line number Diff line number Diff line
@@ -1016,19 +1016,21 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
		parent_start = parent->start;

	/*
	 * If we are COWing a node/leaf from the extent, chunk or device trees,
	 * make sure that we do not finish block group creation of pending block
	 * groups. We do this to avoid a deadlock.
	 * If we are COWing a node/leaf from the extent, chunk, device or free
	 * space trees, make sure that we do not finish block group creation of
	 * pending block groups. We do this to avoid a deadlock.
	 * COWing can result in allocation of a new chunk, and flushing pending
	 * block groups (btrfs_create_pending_block_groups()) can be triggered
	 * when finishing allocation of a new chunk. Creation of a pending block
	 * group modifies the extent, chunk and device trees, therefore we could
	 * deadlock with ourselves since we are holding a lock on an extent
	 * buffer that btrfs_create_pending_block_groups() may try to COW later.
	 * group modifies the extent, chunk, device and free space trees,
	 * therefore we could deadlock with ourselves since we are holding a
	 * lock on an extent buffer that btrfs_create_pending_block_groups() may
	 * try to COW later.
	 */
	if (root == fs_info->extent_root ||
	    root == fs_info->chunk_root ||
	    root == fs_info->dev_root)
	    root == fs_info->dev_root ||
	    root == fs_info->free_space_root)
		trans->can_flush_pending_bgs = false;

	cow = btrfs_alloc_tree_block(trans, root, parent_start,
+43 −6
Original line number Diff line number Diff line
@@ -3221,6 +3221,26 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
	inode_lock_nested(inode2, I_MUTEX_CHILD);
}

static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
				       struct inode *inode2, u64 loff2, u64 len)
{
	unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
	unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
}

static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
				     struct inode *inode2, u64 loff2, u64 len)
{
	if (inode1 < inode2) {
		swap(inode1, inode2);
		swap(loff1, loff2);
	} else if (inode1 == inode2 && loff2 < loff1) {
		swap(loff1, loff2);
	}
	lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
	lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
}

static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
				   struct inode *dst, u64 dst_loff)
{
@@ -3242,11 +3262,12 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
		return -EINVAL;

	/*
	 * Lock destination range to serialize with concurrent readpages().
	 * Lock destination range to serialize with concurrent readpages() and
	 * source range to serialize with relocation.
	 */
	lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1);
	btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
	ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
	unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, dst_loff + len - 1);
	btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);

	return ret;
}
@@ -3905,17 +3926,33 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
		len = ALIGN(src->i_size, bs) - off;

	if (destoff > inode->i_size) {
		const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);

		ret = btrfs_cont_expand(inode, inode->i_size, destoff);
		if (ret)
			return ret;
		/*
		 * We may have truncated the last block if the inode's size is
		 * not sector size aligned, so we need to wait for writeback to
		 * complete before proceeding further, otherwise we can race
		 * with cloning and attempt to increment a reference to an
		 * extent that no longer exists (writeback completed right after
		 * we found the previous extent covering eof and before we
		 * attempted to increment its reference count).
		 */
		ret = btrfs_wait_ordered_range(inode, wb_start,
					       destoff - wb_start);
		if (ret)
			return ret;
	}

	/*
	 * Lock destination range to serialize with concurrent readpages().
	 * Lock destination range to serialize with concurrent readpages() and
	 * source range to serialize with relocation.
	 */
	lock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1);
	btrfs_double_extent_lock(src, off, inode, destoff, len);
	ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
	unlock_extent(&BTRFS_I(inode)->io_tree, destoff, destoff + len - 1);
	btrfs_double_extent_unlock(src, off, inode, destoff, len);
	/*
	 * Truncate page cache pages so that future reads will see the cloned
	 * data immediately and not the previous data.
+12 −0
Original line number Diff line number Diff line
@@ -7825,6 +7825,18 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
		ret = -EUCLEAN;
		goto out;
	}

	/* It's possible this device is a dummy for seed device */
	if (dev->disk_total_bytes == 0) {
		dev = find_device(fs_info->fs_devices->seed, devid, NULL);
		if (!dev) {
			btrfs_err(fs_info, "failed to find seed devid %llu",
				  devid);
			ret = -EUCLEAN;
			goto out;
		}
	}

	if (physical_offset + physical_len > dev->disk_total_bytes) {
		btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",