Commit a17a3ca5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull btrfs fixes from David Sterba:
 "A few fixes for various warnings that accumulated over past two weeks:

   - tree-checker: add missing return values for some errors

   - lockdep fixes
      - when reading qgroup config and starting quota rescan
      - reverse order of quota ioctl lock and VFS freeze lock

   - avoid accessing potentially stale fs info during device scan,
     reported by syzbot

   - add scope NOFS protection around qgroup relation changes

   - check for running transaction before flushing qgroups

   - fix tracking of new delalloc ranges for some cases"

* tag 'for-5.10-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix lockdep splat when enabling and disabling qgroups
  btrfs: do nofs allocations when adding and removing qgroup relations
  btrfs: fix lockdep splat when reading qgroup config on mount
  btrfs: tree-checker: add missing returns after data_ref alignment checks
  btrfs: don't access possibly stale fs_info data for printing duplicate device
  btrfs: tree-checker: add missing return after error in root_item
  btrfs: qgroup: don't commit transaction when we already hold the handle
  btrfs: fix missing delalloc new bit for new delalloc ranges
parents d41e9b22 a855fbe6
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -878,7 +878,10 @@ struct btrfs_fs_info {
	 */
	struct ulist *qgroup_ulist;

	/* protect user change for quota operations */
	/*
	 * Protect user change for quota operations. If a transaction is needed,
	 * it must be started before locking this lock.
	 */
	struct mutex qgroup_ioctl_lock;

	/* list of dirty qgroups to be written at next commit */
+0 −57
Original line number Diff line number Diff line
@@ -452,46 +452,6 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
	}
}

static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
					 const u64 start,
					 const u64 len,
					 struct extent_state **cached_state)
{
	u64 search_start = start;
	const u64 end = start + len - 1;

	while (search_start < end) {
		const u64 search_len = end - search_start + 1;
		struct extent_map *em;
		u64 em_len;
		int ret = 0;

		em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
		if (IS_ERR(em))
			return PTR_ERR(em);

		if (em->block_start != EXTENT_MAP_HOLE)
			goto next;

		em_len = em->len;
		if (em->start < search_start)
			em_len -= search_start - em->start;
		if (em_len > search_len)
			em_len = search_len;

		ret = set_extent_bit(&inode->io_tree, search_start,
				     search_start + em_len - 1,
				     EXTENT_DELALLOC_NEW,
				     NULL, cached_state, GFP_NOFS);
next:
		search_start = extent_map_end(em);
		free_extent_map(em);
		if (ret)
			return ret;
	}
	return 0;
}

/*
 * after copy_from_user, pages need to be dirtied and we need to make
 * sure holes are created between the current EOF and the start of
@@ -528,23 +488,6 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
			 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
			 0, 0, cached);

	if (!btrfs_is_free_space_inode(inode)) {
		if (start_pos >= isize &&
		    !(inode->flags & BTRFS_INODE_PREALLOC)) {
			/*
			 * There can't be any extents following eof in this case
			 * so just set the delalloc new bit for the range
			 * directly.
			 */
			extra_bits |= EXTENT_DELALLOC_NEW;
		} else {
			err = btrfs_find_new_delalloc_bytes(inode, start_pos,
							    num_bytes, cached);
			if (err)
				return err;
		}
	}

	err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
					extra_bits, cached);
	if (err)
+58 −0
Original line number Diff line number Diff line
@@ -2253,11 +2253,69 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
	return 0;
}

static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
					 const u64 start,
					 const u64 len,
					 struct extent_state **cached_state)
{
	u64 search_start = start;
	const u64 end = start + len - 1;

	while (search_start < end) {
		const u64 search_len = end - search_start + 1;
		struct extent_map *em;
		u64 em_len;
		int ret = 0;

		em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
		if (IS_ERR(em))
			return PTR_ERR(em);

		if (em->block_start != EXTENT_MAP_HOLE)
			goto next;

		em_len = em->len;
		if (em->start < search_start)
			em_len -= search_start - em->start;
		if (em_len > search_len)
			em_len = search_len;

		ret = set_extent_bit(&inode->io_tree, search_start,
				     search_start + em_len - 1,
				     EXTENT_DELALLOC_NEW,
				     NULL, cached_state, GFP_NOFS);
next:
		search_start = extent_map_end(em);
		free_extent_map(em);
		if (ret)
			return ret;
	}
	return 0;
}

int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
			      unsigned int extra_bits,
			      struct extent_state **cached_state)
{
	WARN_ON(PAGE_ALIGNED(end));

	if (start >= i_size_read(&inode->vfs_inode) &&
	    !(inode->flags & BTRFS_INODE_PREALLOC)) {
		/*
		 * There can't be any extents following eof in this case so just
		 * set the delalloc new bit for the range directly.
		 */
		extra_bits |= EXTENT_DELALLOC_NEW;
	} else {
		int ret;

		ret = btrfs_find_new_delalloc_bytes(inode, start,
						    end + 1 - start,
						    cached_state);
		if (ret)
			return ret;
	}

	return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
				   cached_state);
}
+78 −10
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/btrfs.h>
#include <linux/sched/mm.h>

#include "ctree.h"
#include "transaction.h"
@@ -497,13 +498,13 @@ next2:
			break;
	}
out:
	btrfs_free_path(path);
	fs_info->qgroup_flags |= flags;
	if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
		clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
	else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
		 ret >= 0)
		ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
	btrfs_free_path(path);

	if (ret < 0) {
		ulist_free(fs_info->qgroup_ulist);
@@ -936,6 +937,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
	struct btrfs_key found_key;
	struct btrfs_qgroup *qgroup = NULL;
	struct btrfs_trans_handle *trans = NULL;
	struct ulist *ulist = NULL;
	int ret = 0;
	int slot;

@@ -943,8 +945,8 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
	if (fs_info->quota_root)
		goto out;

	fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
	if (!fs_info->qgroup_ulist) {
	ulist = ulist_alloc(GFP_KERNEL);
	if (!ulist) {
		ret = -ENOMEM;
		goto out;
	}
@@ -952,6 +954,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
	ret = btrfs_sysfs_add_qgroups(fs_info);
	if (ret < 0)
		goto out;

	/*
	 * Unlock qgroup_ioctl_lock before starting the transaction. This is to
	 * avoid lock acquisition inversion problems (reported by lockdep) between
	 * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we
	 * start a transaction.
	 * After we started the transaction lock qgroup_ioctl_lock again and
	 * check if someone else created the quota root in the meanwhile. If so,
	 * just return success and release the transaction handle.
	 *
	 * Also we don't need to worry about someone else calling
	 * btrfs_sysfs_add_qgroups() after we unlock and getting an error because
	 * that function returns 0 (success) when the sysfs entries already exist.
	 */
	mutex_unlock(&fs_info->qgroup_ioctl_lock);

	/*
	 * 1 for quota root item
	 * 1 for BTRFS_QGROUP_STATUS item
@@ -961,12 +979,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
	 * would be a lot of overkill.
	 */
	trans = btrfs_start_transaction(tree_root, 2);

	mutex_lock(&fs_info->qgroup_ioctl_lock);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		trans = NULL;
		goto out;
	}

	if (fs_info->quota_root)
		goto out;

	fs_info->qgroup_ulist = ulist;
	ulist = NULL;

	/*
	 * initially create the quota tree
	 */
@@ -1124,11 +1150,14 @@ out:
	if (ret) {
		ulist_free(fs_info->qgroup_ulist);
		fs_info->qgroup_ulist = NULL;
		if (trans)
			btrfs_end_transaction(trans);
		btrfs_sysfs_del_qgroups(fs_info);
	}
	mutex_unlock(&fs_info->qgroup_ioctl_lock);
	if (ret && trans)
		btrfs_end_transaction(trans);
	else if (trans)
		ret = btrfs_end_transaction(trans);
	ulist_free(ulist);
	return ret;
}

@@ -1141,19 +1170,29 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
	mutex_lock(&fs_info->qgroup_ioctl_lock);
	if (!fs_info->quota_root)
		goto out;
	mutex_unlock(&fs_info->qgroup_ioctl_lock);

	/*
	 * 1 For the root item
	 *
	 * We should also reserve enough items for the quota tree deletion in
	 * btrfs_clean_quota_tree but this is not done.
	 *
	 * Also, we must always start a transaction without holding the mutex
	 * qgroup_ioctl_lock, see btrfs_quota_enable().
	 */
	trans = btrfs_start_transaction(fs_info->tree_root, 1);

	mutex_lock(&fs_info->qgroup_ioctl_lock);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		trans = NULL;
		goto out;
	}

	if (!fs_info->quota_root)
		goto out;

	clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
	btrfs_qgroup_wait_for_completion(fs_info, false);
	spin_lock(&fs_info->qgroup_lock);
@@ -1167,13 +1206,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
	ret = btrfs_clean_quota_tree(trans, quota_root);
	if (ret) {
		btrfs_abort_transaction(trans, ret);
		goto end_trans;
		goto out;
	}

	ret = btrfs_del_root(trans, &quota_root->root_key);
	if (ret) {
		btrfs_abort_transaction(trans, ret);
		goto end_trans;
		goto out;
	}

	list_del(&quota_root->dirty_list);
@@ -1185,10 +1224,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)

	btrfs_put_root(quota_root);

end_trans:
	ret = btrfs_end_transaction(trans);
out:
	mutex_unlock(&fs_info->qgroup_ioctl_lock);
	if (ret && trans)
		btrfs_end_transaction(trans);
	else if (trans)
		ret = btrfs_end_transaction(trans);

	return ret;
}

@@ -1324,13 +1366,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
	struct btrfs_qgroup *member;
	struct btrfs_qgroup_list *list;
	struct ulist *tmp;
	unsigned int nofs_flag;
	int ret = 0;

	/* Check the level of src and dst first */
	if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
		return -EINVAL;

	/* We hold a transaction handle open, must do a NOFS allocation. */
	nofs_flag = memalloc_nofs_save();
	tmp = ulist_alloc(GFP_KERNEL);
	memalloc_nofs_restore(nofs_flag);
	if (!tmp)
		return -ENOMEM;

@@ -1387,10 +1433,14 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
	struct btrfs_qgroup_list *list;
	struct ulist *tmp;
	bool found = false;
	unsigned int nofs_flag;
	int ret = 0;
	int ret2;

	/* We hold a transaction handle open, must do a NOFS allocation. */
	nofs_flag = memalloc_nofs_save();
	tmp = ulist_alloc(GFP_KERNEL);
	memalloc_nofs_restore(nofs_flag);
	if (!tmp)
		return -ENOMEM;

@@ -3512,6 +3562,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
{
	struct btrfs_trans_handle *trans;
	int ret;
	bool can_commit = true;

	/*
	 * We don't want to run flush again and again, so if there is a running
@@ -3523,6 +3574,20 @@ static int try_flush_qgroup(struct btrfs_root *root)
		return 0;
	}

	/*
	 * If current process holds a transaction, we shouldn't flush, as we
	 * assume all space reservation happens before a transaction handle is
	 * held.
	 *
	 * But there are cases like btrfs_delayed_item_reserve_metadata() where
	 * we try to reserve space with one transction handle already held.
	 * In that case we can't commit transaction, but at least try to end it
	 * and hope the started data writes can free some space.
	 */
	if (current->journal_info &&
	    current->journal_info != BTRFS_SEND_TRANS_STUB)
		can_commit = false;

	ret = btrfs_start_delalloc_snapshot(root);
	if (ret < 0)
		goto out;
@@ -3534,7 +3599,10 @@ static int try_flush_qgroup(struct btrfs_root *root)
		goto out;
	}

	if (can_commit)
		ret = btrfs_commit_transaction(trans);
	else
		ret = btrfs_end_transaction(trans);
out:
	clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
	wake_up(&root->qgroup_flush_wait);
+8 −4
Original line number Diff line number Diff line
@@ -983,7 +983,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
			       BTRFS_MAX_EXTENT_SIZE >> 1,
			       (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
			       EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
			       EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
			       EXTENT_UPTODATE, 0, 0, NULL);
	if (ret) {
		test_err("clear_extent_bit returned %d", ret);
		goto out;
@@ -1050,7 +1051,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
			       BTRFS_MAX_EXTENT_SIZE + sectorsize,
			       BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
			       EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
			       EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
			       EXTENT_UPTODATE, 0, 0, NULL);
	if (ret) {
		test_err("clear_extent_bit returned %d", ret);
		goto out;
@@ -1082,7 +1084,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)

	/* Empty */
	ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
			       EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
			       EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
			       EXTENT_UPTODATE, 0, 0, NULL);
	if (ret) {
		test_err("clear_extent_bit returned %d", ret);
		goto out;
@@ -1097,7 +1100,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
out:
	if (ret)
		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
				 EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
				 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
				 EXTENT_UPTODATE, 0, 0, NULL);
	iput(inode);
	btrfs_free_dummy_root(root);
	btrfs_free_dummy_fs_info(fs_info);
Loading