Commit effaf901 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:
 "A few more fixes that have been in the works during last twp weeks.
  All have a user visible effect and are stable material:

   - scrub: properly update progress after calling cancel ioctl, calling
     'resume' would start from the beginning otherwise

   - fix subvolume reference removal, after moving out of the original
     path the reference is not recognized and will lead to transaction
     abort

   - fix reloc root lifetime checks, could lead to crashes when there's
     subvolume cleaning running in parallel

   - fix memory leak when quotas get disabled in the middle of extent
     accounting

   - fix transaction abort in case of balance being started on degraded
     mount on eg. RAID1"

* tag 'for-5.5-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: check rw_devices, not num_devices for balance
  Btrfs: always copy scrub arguments back to user space
  btrfs: relocation: fix reloc_root lifespan and access
  btrfs: fix memory leak in qgroup accounting
  btrfs: do not delete mismatched root refs
  btrfs: fix invalid removal of root ref
  btrfs: rework arguments of btrfs_unlink_subvol
parents ab7541c3 b35cf1f0
Loading
Loading
Loading
Loading
+39 −34
Original line number Diff line number Diff line
@@ -4238,18 +4238,30 @@ out:
}

static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
			       struct inode *dir, u64 objectid,
			       const char *name, int name_len)
			       struct inode *dir, struct dentry *dentry)
{
	struct btrfs_root *root = BTRFS_I(dir)->root;
	struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
	struct btrfs_path *path;
	struct extent_buffer *leaf;
	struct btrfs_dir_item *di;
	struct btrfs_key key;
	const char *name = dentry->d_name.name;
	int name_len = dentry->d_name.len;
	u64 index;
	int ret;
	u64 objectid;
	u64 dir_ino = btrfs_ino(BTRFS_I(dir));

	if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
		objectid = inode->root->root_key.objectid;
	} else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
		objectid = inode->location.objectid;
	} else {
		WARN_ON(1);
		return -EINVAL;
	}

	path = btrfs_alloc_path();
	if (!path)
		return -ENOMEM;
@@ -4271,13 +4283,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
	}
	btrfs_release_path(path);

	ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
				 dir_ino, &index, name, name_len);
	if (ret < 0) {
		if (ret != -ENOENT) {
			btrfs_abort_transaction(trans, ret);
			goto out;
		}
	/*
	 * This is a placeholder inode for a subvolume we didn't have a
	 * reference to at the time of the snapshot creation.  In the meantime
	 * we could have renamed the real subvol link into our snapshot, so
	 * depending on btrfs_del_root_ref to return -ENOENT here is incorret.
	 * Instead simply lookup the dir_index_item for this entry so we can
	 * remove it.  Otherwise we know we have a ref to the root and we can
	 * call btrfs_del_root_ref, and it _shouldn't_ fail.
	 */
	if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
		di = btrfs_search_dir_index_item(root, path, dir_ino,
						 name, name_len);
		if (IS_ERR_OR_NULL(di)) {
@@ -4292,8 +4307,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
		leaf = path->nodes[0];
		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
		index = key.offset;
	}
		btrfs_release_path(path);
	} else {
		ret = btrfs_del_root_ref(trans, objectid,
					 root->root_key.objectid, dir_ino,
					 &index, name, name_len);
		if (ret) {
			btrfs_abort_transaction(trans, ret);
			goto out;
		}
	}

	ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
	if (ret) {
@@ -4487,8 +4510,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)

	btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));

	ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
				  dentry->d_name.name, dentry->d_name.len);
	ret = btrfs_unlink_subvol(trans, dir, dentry);
	if (ret) {
		err = ret;
		btrfs_abort_transaction(trans, ret);
@@ -4583,10 +4605,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
		return PTR_ERR(trans);

	if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
		err = btrfs_unlink_subvol(trans, dir,
					  BTRFS_I(inode)->location.objectid,
					  dentry->d_name.name,
					  dentry->d_name.len);
		err = btrfs_unlink_subvol(trans, dir, dentry);
		goto out;
	}

@@ -9536,7 +9555,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
	u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
	u64 old_idx = 0;
	u64 new_idx = 0;
	u64 root_objectid;
	int ret;
	bool root_log_pinned = false;
	bool dest_log_pinned = false;
@@ -9642,10 +9660,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,

	/* src is a subvolume */
	if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
		ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
					  old_dentry->d_name.name,
					  old_dentry->d_name.len);
		ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
	} else { /* src is an inode */
		ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
					   BTRFS_I(old_dentry->d_inode),
@@ -9661,10 +9676,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,

	/* dest is a subvolume */
	if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
		root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
		ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
					  new_dentry->d_name.name,
					  new_dentry->d_name.len);
		ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
	} else { /* dest is an inode */
		ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
					   BTRFS_I(new_dentry->d_inode),
@@ -9862,7 +9874,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
	struct inode *new_inode = d_inode(new_dentry);
	struct inode *old_inode = d_inode(old_dentry);
	u64 index = 0;
	u64 root_objectid;
	int ret;
	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
	bool log_pinned = false;
@@ -9970,10 +9981,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
				BTRFS_I(old_inode), 1);

	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
		ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
					old_dentry->d_name.name,
					old_dentry->d_name.len);
		ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
	} else {
		ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
					BTRFS_I(d_inode(old_dentry)),
@@ -9992,10 +10000,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
		new_inode->i_ctime = current_time(new_inode);
		if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
			     BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
			root_objectid = BTRFS_I(new_inode)->location.objectid;
			ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
						new_dentry->d_name.name,
						new_dentry->d_name.len);
			ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
			BUG_ON(new_inode->i_nlink == 0);
		} else {
			ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
+13 −1
Original line number Diff line number Diff line
@@ -4252,7 +4252,19 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
			      &sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
			      0);

	if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
	/*
	 * Copy scrub args to user space even if btrfs_scrub_dev() returned an
	 * error. This is important as it allows user space to know how much
	 * progress scrub has done. For example, if scrub is canceled we get
	 * -ECANCELED from btrfs_scrub_dev() and return that error back to user
	 * space. Later user space can inspect the progress from the structure
	 * btrfs_ioctl_scrub_args and resume scrub from where it left off
	 * previously (btrfs-progs does this).
	 * If we fail to copy the btrfs_ioctl_scrub_args structure to user space
	 * then return -EFAULT to signal the structure was not copied or it may
	 * be corrupt and unreliable due to a partial copy.
	 */
	if (copy_to_user(arg, sa, sizeof(*sa)))
		ret = -EFAULT;

	if (!(sa->flags & BTRFS_SCRUB_READONLY))
+5 −1
Original line number Diff line number Diff line
@@ -2423,8 +2423,12 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
	u64 nr_old_roots = 0;
	int ret = 0;

	/*
	 * If quotas get disabled meanwhile, the resouces need to be freed and
	 * we can't just exit here.
	 */
	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
		return 0;
		goto out_free;

	if (new_roots) {
		if (!maybe_fs_roots(new_roots))
+46 −5
Original line number Diff line number Diff line
@@ -517,6 +517,34 @@ static int update_backref_cache(struct btrfs_trans_handle *trans,
	return 1;
}

static bool reloc_root_is_dead(struct btrfs_root *root)
{
	/*
	 * Pair with set_bit/clear_bit in clean_dirty_subvols and
	 * btrfs_update_reloc_root. We need to see the updated bit before
	 * trying to access reloc_root
	 */
	smp_rmb();
	if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
		return true;
	return false;
}

/*
 * Check if this subvolume tree has valid reloc tree.
 *
 * Reloc tree after swap is considered dead, thus not considered as valid.
 * This is enough for most callers, as they don't distinguish dead reloc root
 * from no reloc root.  But should_ignore_root() below is a special case.
 */
static bool have_reloc_root(struct btrfs_root *root)
{
	if (reloc_root_is_dead(root))
		return false;
	if (!root->reloc_root)
		return false;
	return true;
}

static int should_ignore_root(struct btrfs_root *root)
{
@@ -525,6 +553,10 @@ static int should_ignore_root(struct btrfs_root *root)
	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
		return 0;

	/* This root has been merged with its reloc tree, we can ignore it */
	if (reloc_root_is_dead(root))
		return 1;

	reloc_root = root->reloc_root;
	if (!reloc_root)
		return 0;
@@ -1439,7 +1471,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
	 * The subvolume has reloc tree but the swap is finished, no need to
	 * create/update the dead reloc tree
	 */
	if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
	if (reloc_root_is_dead(root))
		return 0;

	if (root->reloc_root) {
@@ -1478,8 +1510,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
	struct btrfs_root_item *root_item;
	int ret;

	if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state) ||
	    !root->reloc_root)
	if (!have_reloc_root(root))
		goto out;

	reloc_root = root->reloc_root;
@@ -1489,6 +1520,11 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
	if (fs_info->reloc_ctl->merge_reloc_tree &&
	    btrfs_root_refs(root_item) == 0) {
		set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
		/*
		 * Mark the tree as dead before we change reloc_root so
		 * have_reloc_root will not touch it from now on.
		 */
		smp_wmb();
		__del_reloc_root(reloc_root);
	}

@@ -2201,6 +2237,11 @@ static int clean_dirty_subvols(struct reloc_control *rc)
				if (ret2 < 0 && !ret)
					ret = ret2;
			}
			/*
			 * Need barrier to ensure clear_bit() only happens after
			 * root->reloc_root = NULL. Pairs with have_reloc_root.
			 */
			smp_wmb();
			clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
			btrfs_put_fs_root(root);
		} else {
@@ -4718,7 +4759,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
	struct btrfs_root *root = pending->root;
	struct reloc_control *rc = root->fs_info->reloc_ctl;

	if (!root->reloc_root || !rc)
	if (!rc || !have_reloc_root(root))
		return;

	if (!rc->merge_reloc_tree)
@@ -4752,7 +4793,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
	struct reloc_control *rc = root->fs_info->reloc_ctl;
	int ret;

	if (!root->reloc_root || !rc)
	if (!rc || !have_reloc_root(root))
		return 0;

	rc = root->fs_info->reloc_ctl;
+6 −4
Original line number Diff line number Diff line
@@ -376,11 +376,13 @@ again:
		leaf = path->nodes[0];
		ref = btrfs_item_ptr(leaf, path->slots[0],
				     struct btrfs_root_ref);

		WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
		WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
		ptr = (unsigned long)(ref + 1);
		WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
		if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
		    (btrfs_root_ref_name_len(leaf, ref) != name_len) ||
		    memcmp_extent_buffer(leaf, name, ptr, name_len)) {
			err = -ENOENT;
			goto out;
		}
		*sequence = btrfs_root_ref_sequence(leaf, ref);

		ret = btrfs_del_item(trans, tree_root, path);
Loading