btrfs: add the beginning of async discard, discard workqueue (b0643e59) · Commits · 戴 / test

fs/btrfs/Makefile

+1 −1

Original line number	Diff line number	Diff line
		@@ -11,7 +11,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
		compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
		reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
		uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
		block-rsv.o delalloc-space.o block-group.o
		block-rsv.o delalloc-space.o block-group.o discard.o

		btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
		btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

fs/btrfs/block-group.c

+34 −3

Original line number	Diff line number	Diff line
		@@ -14,6 +14,7 @@
		#include "sysfs.h"
		#include "tree-log.h"
		#include "delalloc-space.h"
		#include "discard.h"

		/*
		* Return target flags in extended format or 0 if restripe for this chunk_type
		@@ -131,6 +132,15 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
		WARN_ON(cache->pinned > 0);
		WARN_ON(cache->reserved > 0);

		/*
		* A block_group shouldn't be on the discard_list anymore.
		* Remove the block_group from the discard_list to prevent us
		* from causing a panic due to NULL pointer dereference.
		*/
		if (WARN_ON(!list_empty(&cache->discard_list)))
		btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
		cache);

		/*
		* If not empty, someone is still holding mutex of
		* full_stripe_lock, which can only be released by caller.
		@@ -466,8 +476,8 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
		} else if (extent_start > start && extent_start < end) {
		size = extent_start - start;
		total_added += size;
		ret = btrfs_add_free_space(block_group, start,
		size);
		ret = btrfs_add_free_space_async_trimmed(block_group,
		start, size);
		BUG_ON(ret); /* -ENOMEM or logic error */
		start = extent_end + 1;
		} else {
		@@ -478,7 +488,8 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
		if (start < end) {
		size = end - start;
		total_added += size;
		ret = btrfs_add_free_space(block_group, start, size);
		ret = btrfs_add_free_space_async_trimmed(block_group, start,
		size);
		BUG_ON(ret); /* -ENOMEM or logic error */
		}

		@@ -1258,6 +1269,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		}
		spin_unlock(&fs_info->unused_bgs_lock);

		btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);

		mutex_lock(&fs_info->delete_unused_bgs_mutex);

		/* Don't want to race with allocators so take the groups_sem */
		@@ -1333,6 +1346,23 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
		}
		mutex_unlock(&fs_info->unused_bg_unpin_mutex);

		/*
		* At this point, the block_group is read only and should fail
		* new allocations. However, btrfs_finish_extent_commit() can
		* cause this block_group to be placed back on the discard
		* lists because now the block_group isn't fully discarded.
		* Bail here and try again later after discarding everything.
		*/
		spin_lock(&fs_info->discard_ctl.lock);
		if (!list_empty(&block_group->discard_list)) {
		spin_unlock(&fs_info->discard_ctl.lock);
		btrfs_dec_block_group_ro(block_group);
		btrfs_discard_queue_work(&fs_info->discard_ctl,
		block_group);
		goto end_trans;
		}
		spin_unlock(&fs_info->discard_ctl.lock);

		/* Reset pinned so btrfs_put_block_group doesn't complain */
		spin_lock(&space_info->lock);
		spin_lock(&block_group->lock);
		@@ -1603,6 +1633,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
		INIT_LIST_HEAD(&cache->cluster_list);
		INIT_LIST_HEAD(&cache->bg_list);
		INIT_LIST_HEAD(&cache->ro_list);
		INIT_LIST_HEAD(&cache->discard_list);
		INIT_LIST_HEAD(&cache->dirty_list);
		INIT_LIST_HEAD(&cache->io_list);
		btrfs_init_free_space_ctl(cache);

fs/btrfs/block-group.h

+9 −0

Original line number	Diff line number	Diff line
		@@ -116,7 +116,11 @@ struct btrfs_block_group {
		/* For read-only block groups */
		struct list_head ro_list;

		/* For discard operations */
		atomic_t trimming;
		struct list_head discard_list;
		int discard_index;
		u64 discard_eligible_time;

		/* For dirty block groups */
		struct list_head dirty_list;
		@@ -158,6 +162,11 @@ struct btrfs_block_group {
		struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
		};

		static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
		{
		return (block_group->start + block_group->length);
		}

		#ifdef CONFIG_BTRFS_DEBUG
		static inline int btrfs_should_fragment_free_space(
		struct btrfs_block_group *block_group)

fs/btrfs/ctree.h

+21 −0

Original line number	Diff line number	Diff line
		@@ -440,6 +440,21 @@ struct btrfs_full_stripe_locks_tree {
		struct mutex lock;
		};

		/* Discard control. */
		/*
		* Async discard uses multiple lists to differentiate the discard filter
		* parameters.
		*/
		#define BTRFS_NR_DISCARD_LISTS 1

		struct btrfs_discard_ctl {
		struct workqueue_struct *discard_workers;
		struct delayed_work work;
		spinlock_t lock;
		struct btrfs_block_group *block_group;
		struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
		};

		/* delayed seq elem */
		struct seq_list {
		struct list_head list;
		@@ -526,6 +541,9 @@ enum {
		* so we don't need to offload checksums to workqueues.
		*/
		BTRFS_FS_CSUM_IMPL_FAST,

		/* Indicate that the discard workqueue can service discards. */
		BTRFS_FS_DISCARD_RUNNING,
		};

		struct btrfs_fs_info {
		@@ -816,6 +834,8 @@ struct btrfs_fs_info {
		struct btrfs_workqueue *scrub_wr_completion_workers;
		struct btrfs_workqueue *scrub_parity_workers;

		struct btrfs_discard_ctl discard_ctl;

		#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
		u32 check_integrity_print_mask;
		#endif
		@@ -1189,6 +1209,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
		#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
		#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
		#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
		#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)

		#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
		#define BTRFS_DEFAULT_MAX_INLINE (2048)

fs/btrfs/discard.c

0 → 100644

+273 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: GPL-2.0

		#include <linux/jiffies.h>
		#include <linux/kernel.h>
		#include <linux/ktime.h>
		#include <linux/list.h>
		#include <linux/sizes.h>
		#include <linux/workqueue.h>
		#include "ctree.h"
		#include "block-group.h"
		#include "discard.h"
		#include "free-space-cache.h"

		/* This is an initial delay to give some chance for block reuse */
		#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)

		static struct list_head get_discard_list(struct btrfs_discard_ctl discard_ctl,
		struct btrfs_block_group *block_group)
		{
		return &discard_ctl->discard_list[block_group->discard_index];
		}

		static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
		struct btrfs_block_group *block_group)
		{
		spin_lock(&discard_ctl->lock);

		if (!btrfs_run_discard_work(discard_ctl)) {
		spin_unlock(&discard_ctl->lock);
		return;
		}

		if (list_empty(&block_group->discard_list))
		block_group->discard_eligible_time = (ktime_get_ns() +
		BTRFS_DISCARD_DELAY);

		list_move_tail(&block_group->discard_list,
		get_discard_list(discard_ctl, block_group));

		spin_unlock(&discard_ctl->lock);
		}

		static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
		struct btrfs_block_group *block_group)
		{
		bool running = false;

		spin_lock(&discard_ctl->lock);

		if (block_group == discard_ctl->block_group) {
		running = true;
		discard_ctl->block_group = NULL;
		}

		block_group->discard_eligible_time = 0;
		list_del_init(&block_group->discard_list);

		spin_unlock(&discard_ctl->lock);

		return running;
		}

		/**
		* find_next_block_group - find block_group that's up next for discarding
		* @discard_ctl: discard control
		* @now: current time
		*
		* Iterate over the discard lists to find the next block_group up for
		* discarding checking the discard_eligible_time of block_group.
		*/
		static struct btrfs_block_group *find_next_block_group(
		struct btrfs_discard_ctl *discard_ctl,
		u64 now)
		{
		struct btrfs_block_group ret_block_group = NULL, block_group;
		int i;

		for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
		struct list_head *discard_list = &discard_ctl->discard_list[i];

		if (!list_empty(discard_list)) {
		block_group = list_first_entry(discard_list,
		struct btrfs_block_group,
		discard_list);

		if (!ret_block_group)
		ret_block_group = block_group;

		if (ret_block_group->discard_eligible_time < now)
		break;

		if (ret_block_group->discard_eligible_time >
		block_group->discard_eligible_time)
		ret_block_group = block_group;
		}
		}

		return ret_block_group;
		}

		/**
		* peek_discard_list - wrap find_next_block_group()
		* @discard_ctl: discard control
		*
		* This wraps find_next_block_group() and sets the block_group to be in use.
		*/
		static struct btrfs_block_group *peek_discard_list(
		struct btrfs_discard_ctl *discard_ctl)
		{
		struct btrfs_block_group *block_group;
		const u64 now = ktime_get_ns();

		spin_lock(&discard_ctl->lock);

		block_group = find_next_block_group(discard_ctl, now);

		if (block_group && now < block_group->discard_eligible_time)
		block_group = NULL;

		discard_ctl->block_group = block_group;

		spin_unlock(&discard_ctl->lock);

		return block_group;
		}

		/**
		* btrfs_discard_cancel_work - remove a block_group from the discard lists
		* @discard_ctl: discard control
		* @block_group: block_group of interest
		*
		* This removes @block_group from the discard lists. If necessary, it waits on
		* the current work and then reschedules the delayed work.
		*/
		void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
		struct btrfs_block_group *block_group)
		{
		if (remove_from_discard_list(discard_ctl, block_group)) {
		cancel_delayed_work_sync(&discard_ctl->work);
		btrfs_discard_schedule_work(discard_ctl, true);
		}
		}

		/**
		* btrfs_discard_queue_work - handles queuing the block_groups
		* @discard_ctl: discard control
		* @block_group: block_group of interest
		*
		* This maintains the LRU order of the discard lists.
		*/
		void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
		struct btrfs_block_group *block_group)
		{
		if (!block_group \|\| !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
		return;

		add_to_discard_list(discard_ctl, block_group);

		if (!delayed_work_pending(&discard_ctl->work))
		btrfs_discard_schedule_work(discard_ctl, false);
		}

		/**
		* btrfs_discard_schedule_work - responsible for scheduling the discard work
		* @discard_ctl: discard control
		* @override: override the current timer
		*
		* Discards are issued by a delayed workqueue item. @override is used to
		* update the current delay as the baseline delay interview is reevaluated
		* on transaction commit. This is also maxed with any other rate limit.
		*/
		void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
		bool override)
		{
		struct btrfs_block_group *block_group;
		const u64 now = ktime_get_ns();

		spin_lock(&discard_ctl->lock);

		if (!btrfs_run_discard_work(discard_ctl))
		goto out;

		if (!override && delayed_work_pending(&discard_ctl->work))
		goto out;

		block_group = find_next_block_group(discard_ctl, now);
		if (block_group) {
		u64 delay = 0;

		if (now < block_group->discard_eligible_time)
		delay = nsecs_to_jiffies(
		block_group->discard_eligible_time - now);

		mod_delayed_work(discard_ctl->discard_workers,
		&discard_ctl->work, delay);
		}
		out:
		spin_unlock(&discard_ctl->lock);
		}

		/**
		* btrfs_discard_workfn - discard work function
		* @work: work
		*
		* This finds the next block_group to start discarding and then discards it.
		*/
		static void btrfs_discard_workfn(struct work_struct *work)
		{
		struct btrfs_discard_ctl *discard_ctl;
		struct btrfs_block_group *block_group;
		u64 trimmed = 0;

		discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);

		block_group = peek_discard_list(discard_ctl);
		if (!block_group \|\| !btrfs_run_discard_work(discard_ctl))
		return;

		btrfs_trim_block_group(block_group, &trimmed, block_group->start,
		btrfs_block_group_end(block_group), 0);

		remove_from_discard_list(discard_ctl, block_group);
		btrfs_discard_schedule_work(discard_ctl, false);
		}

		/**
		* btrfs_run_discard_work - determines if async discard should be running
		* @discard_ctl: discard control
		*
		* Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
		*/
		bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
		{
		struct btrfs_fs_info *fs_info = container_of(discard_ctl,
		struct btrfs_fs_info,
		discard_ctl);

		return (!(fs_info->sb->s_flags & SB_RDONLY) &&
		test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
		}

		void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
		{
		if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
		btrfs_discard_cleanup(fs_info);
		return;
		}

		set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
		}

		void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
		{
		clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
		}

		void btrfs_discard_init(struct btrfs_fs_info *fs_info)
		{
		struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
		int i;

		spin_lock_init(&discard_ctl->lock);
		INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);

		for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
		INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
		}

		void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
		{
		btrfs_discard_stop(fs_info);
		cancel_delayed_work_sync(&fs_info->discard_ctl.work);
		}

Admin message