btrfs: migrate inc/dec_block_group_ro code (26ce2095) · Commits · 戴 / test

fs/btrfs/block-group.c

+212 −0

Original line number	Diff line number	Diff line
		@@ -1060,6 +1060,80 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
		num_items, 1);
		}

		/*
		* Mark block group @cache read-only, so later write won't happen to block
		* group @cache.
		*
		* If @force is not set, this function will only mark the block group readonly
		* if we have enough free space (1M) in other metadata/system block groups.
		* If @force is not set, this function will mark the block group readonly
		* without checking free space.
		*
		* NOTE: This function doesn't care if other block groups can contain all the
		* data in this block group. That check should be done by relocation routine,
		* not this function.
		*/
		int __btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
		{
		struct btrfs_space_info *sinfo = cache->space_info;
		u64 num_bytes;
		u64 sinfo_used;
		u64 min_allocable_bytes;
		int ret = -ENOSPC;

		/*
		* We need some metadata space and system metadata space for
		* allocating chunks in some corner cases until we force to set
		* it to be readonly.
		*/
		if ((sinfo->flags &
		(BTRFS_BLOCK_GROUP_SYSTEM \| BTRFS_BLOCK_GROUP_METADATA)) &&
		!force)
		min_allocable_bytes = SZ_1M;
		else
		min_allocable_bytes = 0;

		spin_lock(&sinfo->lock);
		spin_lock(&cache->lock);

		if (cache->ro) {
		cache->ro++;
		ret = 0;
		goto out;
		}

		num_bytes = cache->key.offset - cache->reserved - cache->pinned -
		cache->bytes_super - btrfs_block_group_used(&cache->item);
		sinfo_used = btrfs_space_info_used(sinfo, true);

		/*
		* sinfo_used + num_bytes should always <= sinfo->total_bytes.
		*
		* Here we make sure if we mark this bg RO, we still have enough
		* free space as buffer (if min_allocable_bytes is not 0).
		*/
		if (sinfo_used + num_bytes + min_allocable_bytes <=
		sinfo->total_bytes) {
		sinfo->bytes_readonly += num_bytes;
		cache->ro++;
		list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
		ret = 0;
		}
		out:
		spin_unlock(&cache->lock);
		spin_unlock(&sinfo->lock);
		if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
		btrfs_info(cache->fs_info,
		"unable to make block group %llu ro",
		cache->key.objectid);
		btrfs_info(cache->fs_info,
		"sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
		sinfo_used, num_bytes, min_allocable_bytes);
		btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
		}
		return ret;
		}

		/*
		* Process the unused_bgs list and remove any that don't have any allocated
		* space inside of them.
		@@ -1791,3 +1865,141 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
		set_avail_alloc_bits(fs_info, type);
		return 0;
		}

		static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
		{
		u64 num_devices;
		u64 stripped;

		/*
		* if restripe for this chunk_type is on pick target profile and
		* return, otherwise do the usual balance
		*/
		stripped = btrfs_get_restripe_target(fs_info, flags);
		if (stripped)
		return extended_to_chunk(stripped);

		num_devices = fs_info->fs_devices->rw_devices;

		stripped = BTRFS_BLOCK_GROUP_RAID0 \| BTRFS_BLOCK_GROUP_RAID56_MASK \|
		BTRFS_BLOCK_GROUP_RAID1_MASK \| BTRFS_BLOCK_GROUP_RAID10;

		if (num_devices == 1) {
		stripped \|= BTRFS_BLOCK_GROUP_DUP;
		stripped = flags & ~stripped;

		/* turn raid0 into single device chunks */
		if (flags & BTRFS_BLOCK_GROUP_RAID0)
		return stripped;

		/* turn mirroring into duplication */
		if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK \|
		BTRFS_BLOCK_GROUP_RAID10))
		return stripped \| BTRFS_BLOCK_GROUP_DUP;
		} else {
		/* they already had raid on here, just return */
		if (flags & stripped)
		return flags;

		stripped \|= BTRFS_BLOCK_GROUP_DUP;
		stripped = flags & ~stripped;

		/* switch duplicated blocks with raid1 */
		if (flags & BTRFS_BLOCK_GROUP_DUP)
		return stripped \| BTRFS_BLOCK_GROUP_RAID1;

		/* this is drive concat, leave it alone */
		}

		return flags;
		}

		int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)

		{
		struct btrfs_fs_info *fs_info = cache->fs_info;
		struct btrfs_trans_handle *trans;
		u64 alloc_flags;
		int ret;

		again:
		trans = btrfs_join_transaction(fs_info->extent_root);
		if (IS_ERR(trans))
		return PTR_ERR(trans);

		/*
		* we're not allowed to set block groups readonly after the dirty
		* block groups cache has started writing. If it already started,
		* back off and let this transaction commit
		*/
		mutex_lock(&fs_info->ro_block_group_mutex);
		if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
		u64 transid = trans->transid;

		mutex_unlock(&fs_info->ro_block_group_mutex);
		btrfs_end_transaction(trans);

		ret = btrfs_wait_for_commit(fs_info, transid);
		if (ret)
		return ret;
		goto again;
		}

		/*
		* if we are changing raid levels, try to allocate a corresponding
		* block group with the new raid level.
		*/
		alloc_flags = update_block_group_flags(fs_info, cache->flags);
		if (alloc_flags != cache->flags) {
		ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
		/*
		* ENOSPC is allowed here, we may have enough space
		* already allocated at the new raid level to
		* carry on
		*/
		if (ret == -ENOSPC)
		ret = 0;
		if (ret < 0)
		goto out;
		}

		ret = __btrfs_inc_block_group_ro(cache, 0);
		if (!ret)
		goto out;
		alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
		ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
		if (ret < 0)
		goto out;
		ret = __btrfs_inc_block_group_ro(cache, 0);
		out:
		if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
		alloc_flags = update_block_group_flags(fs_info, cache->flags);
		mutex_lock(&fs_info->chunk_mutex);
		check_system_chunk(trans, alloc_flags);
		mutex_unlock(&fs_info->chunk_mutex);
		}
		mutex_unlock(&fs_info->ro_block_group_mutex);

		btrfs_end_transaction(trans);
		return ret;
		}

		void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
		{
		struct btrfs_space_info *sinfo = cache->space_info;
		u64 num_bytes;

		BUG_ON(!cache->ro);

		spin_lock(&sinfo->lock);
		spin_lock(&cache->lock);
		if (!--cache->ro) {
		num_bytes = cache->key.offset - cache->reserved -
		cache->pinned - cache->bytes_super -
		btrfs_block_group_used(&cache->item);
		sinfo->bytes_readonly -= num_bytes;
		list_del_init(&cache->ro_list);
		}
		spin_unlock(&cache->lock);
		spin_unlock(&sinfo->lock);
		}

fs/btrfs/block-group.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -187,6 +187,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info);
		int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
		u64 type, u64 chunk_offset, u64 size);
		void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
		int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
		void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);

		static inline int btrfs_block_group_cache_done(
		struct btrfs_block_group_cache *cache)

fs/btrfs/ctree.h

+0 −2

Original line number	Diff line number	Diff line
		@@ -2590,8 +2590,6 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
		bool qgroup_free);

		int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
		int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
		void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
		void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
		u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
		int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,

fs/btrfs/extent-tree.c

+0 −212

Original line number	Diff line number	Diff line
		@@ -6529,198 +6529,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
		return ret;
		}

		static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
		{
		u64 num_devices;
		u64 stripped;

		/*
		* if restripe for this chunk_type is on pick target profile and
		* return, otherwise do the usual balance
		*/
		stripped = btrfs_get_restripe_target(fs_info, flags);
		if (stripped)
		return extended_to_chunk(stripped);

		num_devices = fs_info->fs_devices->rw_devices;

		stripped = BTRFS_BLOCK_GROUP_RAID0 \| BTRFS_BLOCK_GROUP_RAID56_MASK \|
		BTRFS_BLOCK_GROUP_RAID1_MASK \| BTRFS_BLOCK_GROUP_RAID10;

		if (num_devices == 1) {
		stripped \|= BTRFS_BLOCK_GROUP_DUP;
		stripped = flags & ~stripped;

		/* turn raid0 into single device chunks */
		if (flags & BTRFS_BLOCK_GROUP_RAID0)
		return stripped;

		/* turn mirroring into duplication */
		if (flags & (BTRFS_BLOCK_GROUP_RAID1_MASK \|
		BTRFS_BLOCK_GROUP_RAID10))
		return stripped \| BTRFS_BLOCK_GROUP_DUP;
		} else {
		/* they already had raid on here, just return */
		if (flags & stripped)
		return flags;

		stripped \|= BTRFS_BLOCK_GROUP_DUP;
		stripped = flags & ~stripped;

		/* switch duplicated blocks with raid1 */
		if (flags & BTRFS_BLOCK_GROUP_DUP)
		return stripped \| BTRFS_BLOCK_GROUP_RAID1;

		/* this is drive concat, leave it alone */
		}

		return flags;
		}

		/*
		* Mark block group @cache read-only, so later write won't happen to block
		* group @cache.
		*
		* If @force is not set, this function will only mark the block group readonly
		* if we have enough free space (1M) in other metadata/system block groups.
		* If @force is not set, this function will mark the block group readonly
		* without checking free space.
		*
		* NOTE: This function doesn't care if other block groups can contain all the
		* data in this block group. That check should be done by relocation routine,
		* not this function.
		*/
		int __btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
		{
		struct btrfs_space_info *sinfo = cache->space_info;
		u64 num_bytes;
		u64 sinfo_used;
		u64 min_allocable_bytes;
		int ret = -ENOSPC;

		/*
		* We need some metadata space and system metadata space for
		* allocating chunks in some corner cases until we force to set
		* it to be readonly.
		*/
		if ((sinfo->flags &
		(BTRFS_BLOCK_GROUP_SYSTEM \| BTRFS_BLOCK_GROUP_METADATA)) &&
		!force)
		min_allocable_bytes = SZ_1M;
		else
		min_allocable_bytes = 0;

		spin_lock(&sinfo->lock);
		spin_lock(&cache->lock);

		if (cache->ro) {
		cache->ro++;
		ret = 0;
		goto out;
		}

		num_bytes = cache->key.offset - cache->reserved - cache->pinned -
		cache->bytes_super - btrfs_block_group_used(&cache->item);
		sinfo_used = btrfs_space_info_used(sinfo, true);

		/*
		* sinfo_used + num_bytes should always <= sinfo->total_bytes.
		*
		* Here we make sure if we mark this bg RO, we still have enough
		* free space as buffer (if min_allocable_bytes is not 0).
		*/
		if (sinfo_used + num_bytes + min_allocable_bytes <=
		sinfo->total_bytes) {
		sinfo->bytes_readonly += num_bytes;
		cache->ro++;
		list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
		ret = 0;
		}
		out:
		spin_unlock(&cache->lock);
		spin_unlock(&sinfo->lock);
		if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
		btrfs_info(cache->fs_info,
		"unable to make block group %llu ro",
		cache->key.objectid);
		btrfs_info(cache->fs_info,
		"sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
		sinfo_used, num_bytes, min_allocable_bytes);
		btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
		}
		return ret;
		}

		int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)

		{
		struct btrfs_fs_info *fs_info = cache->fs_info;
		struct btrfs_trans_handle *trans;
		u64 alloc_flags;
		int ret;

		again:
		trans = btrfs_join_transaction(fs_info->extent_root);
		if (IS_ERR(trans))
		return PTR_ERR(trans);

		/*
		* we're not allowed to set block groups readonly after the dirty
		* block groups cache has started writing. If it already started,
		* back off and let this transaction commit
		*/
		mutex_lock(&fs_info->ro_block_group_mutex);
		if (test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &trans->transaction->flags)) {
		u64 transid = trans->transid;

		mutex_unlock(&fs_info->ro_block_group_mutex);
		btrfs_end_transaction(trans);

		ret = btrfs_wait_for_commit(fs_info, transid);
		if (ret)
		return ret;
		goto again;
		}

		/*
		* if we are changing raid levels, try to allocate a corresponding
		* block group with the new raid level.
		*/
		alloc_flags = update_block_group_flags(fs_info, cache->flags);
		if (alloc_flags != cache->flags) {
		ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
		/*
		* ENOSPC is allowed here, we may have enough space
		* already allocated at the new raid level to
		* carry on
		*/
		if (ret == -ENOSPC)
		ret = 0;
		if (ret < 0)
		goto out;
		}

		ret = __btrfs_inc_block_group_ro(cache, 0);
		if (!ret)
		goto out;
		alloc_flags = get_alloc_profile(fs_info, cache->space_info->flags);
		ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
		if (ret < 0)
		goto out;
		ret = __btrfs_inc_block_group_ro(cache, 0);
		out:
		if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
		alloc_flags = update_block_group_flags(fs_info, cache->flags);
		mutex_lock(&fs_info->chunk_mutex);
		check_system_chunk(trans, alloc_flags);
		mutex_unlock(&fs_info->chunk_mutex);
		}
		mutex_unlock(&fs_info->ro_block_group_mutex);

		btrfs_end_transaction(trans);
		return ret;
		}

		int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
		{
		u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
		@@ -6763,26 +6571,6 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
		return free_bytes;
		}

		void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
		{
		struct btrfs_space_info *sinfo = cache->space_info;
		u64 num_bytes;

		BUG_ON(!cache->ro);

		spin_lock(&sinfo->lock);
		spin_lock(&cache->lock);
		if (!--cache->ro) {
		num_bytes = cache->key.offset - cache->reserved -
		cache->pinned - cache->bytes_super -
		btrfs_block_group_used(&cache->item);
		sinfo->bytes_readonly -= num_bytes;
		list_del_init(&cache->ro_list);
		}
		spin_unlock(&cache->lock);
		spin_unlock(&sinfo->lock);
		}

		void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
		{
		struct btrfs_block_group_cache *block_group;

Admin message