Commit a2309300 authored by Dennis Zhou's avatar Dennis Zhou Committed by David Sterba
Browse files

btrfs: calculate discard delay based on number of extents



An earlier patch keeps track of discardable_extents. These are
undiscarded extents managed by the free space cache. Here, we will use
this to dynamically calculate the discard delay interval.

There are 3 rate to consider. The first is the target convergence rate,
the rate to discard all discardable_extents over the
BTRFS_DISCARD_TARGET_MSEC time frame. This is clamped by the lower
limit, the iops limit or BTRFS_DISCARD_MIN_DELAY (1ms), and the upper
limit, BTRFS_DISCARD_MAX_DELAY (1s). We reevaluate this delay every
transaction commit.

Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Signed-off-by: default avatarDennis Zhou <dennis@kernel.org>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 5dc7c10b
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -468,6 +468,8 @@ struct btrfs_discard_ctl {
	struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
	atomic_t discardable_extents;
	atomic64_t discardable_bytes;
	unsigned long delay;
	u32 iops_limit;
};

/* delayed seq elem */
+51 −4
Original line number Diff line number Diff line
@@ -15,6 +15,12 @@
#define BTRFS_DISCARD_DELAY		(120ULL * NSEC_PER_SEC)
#define BTRFS_DISCARD_UNUSED_DELAY	(10ULL * NSEC_PER_SEC)

/* Target completion latency of discarding all discardable extents */
#define BTRFS_DISCARD_TARGET_MSEC	(6 * 60 * 60UL * MSEC_PER_SEC)
#define BTRFS_DISCARD_MIN_DELAY_MSEC	(1UL)
#define BTRFS_DISCARD_MAX_DELAY_MSEC	(1000UL)
#define BTRFS_DISCARD_MAX_IOPS		(10U)

static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
					  struct btrfs_block_group *block_group)
{
@@ -235,11 +241,17 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,

	block_group = find_next_block_group(discard_ctl, now);
	if (block_group) {
		u64 delay = 0;
		unsigned long delay = discard_ctl->delay;

		if (now < block_group->discard_eligible_time)
			delay = nsecs_to_jiffies(
				block_group->discard_eligible_time - now);
		/*
		 * This timeout is to hopefully prevent immediate discarding
		 * in a recently allocated block group.
		 */
		if (now < block_group->discard_eligible_time) {
			u64 bg_timeout = block_group->discard_eligible_time - now;

			delay = max(delay, nsecs_to_jiffies(bg_timeout));
		}

		mod_delayed_work(discard_ctl->discard_workers,
				 &discard_ctl->work, delay);
@@ -342,6 +354,39 @@ bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
		test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
}

/**
 * btrfs_discard_calc_delay - recalculate the base delay
 * @discard_ctl: discard control
 *
 * Recalculate the base delay which is based off the total number of
 * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
 * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
 */
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
{
	s32 discardable_extents;
	u32 iops_limit;
	unsigned long delay;
	unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC;

	discardable_extents = atomic_read(&discard_ctl->discardable_extents);
	if (!discardable_extents)
		return;

	spin_lock(&discard_ctl->lock);

	iops_limit = READ_ONCE(discard_ctl->iops_limit);
	if (iops_limit)
		lower_limit = max_t(unsigned long, lower_limit,
				    MSEC_PER_SEC / iops_limit);

	delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
	delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC);
	discard_ctl->delay = msecs_to_jiffies(delay);

	spin_unlock(&discard_ctl->lock);
}

/**
 * btrfs_discard_update_discardable - propagate discard counters
 * @block_group: block_group of interest
@@ -464,6 +509,8 @@ void btrfs_discard_init(struct btrfs_fs_info *fs_info)

	atomic_set(&discard_ctl->discardable_extents, 0);
	atomic64_set(&discard_ctl->discardable_bytes, 0);
	discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC;
	discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
}

void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
+1 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);

/* Update operations */
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
				      struct btrfs_free_space_ctl *ctl);

+3 −1
Original line number Diff line number Diff line
@@ -2935,8 +2935,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
		cond_resched();
	}

	if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
	if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
		btrfs_discard_calc_delay(&fs_info->discard_ctl);
		btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
	}

	/*
	 * Transaction is finished.  We don't need the lock anymore.  We
+31 −0
Original line number Diff line number Diff line
@@ -366,9 +366,40 @@ static ssize_t btrfs_discardable_extents_show(struct kobject *kobj,
}
BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show);

static ssize_t btrfs_discard_iops_limit_show(struct kobject *kobj,
					     struct kobj_attribute *a,
					     char *buf)
{
	struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);

	return snprintf(buf, PAGE_SIZE, "%u\n",
			READ_ONCE(fs_info->discard_ctl.iops_limit));
}

static ssize_t btrfs_discard_iops_limit_store(struct kobject *kobj,
					      struct kobj_attribute *a,
					      const char *buf, size_t len)
{
	struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
	struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
	u32 iops_limit;
	int ret;

	ret = kstrtou32(buf, 10, &iops_limit);
	if (ret)
		return -EINVAL;

	WRITE_ONCE(discard_ctl->iops_limit, iops_limit);

	return len;
}
BTRFS_ATTR_RW(discard, iops_limit, btrfs_discard_iops_limit_show,
	      btrfs_discard_iops_limit_store);

static const struct attribute *discard_debug_attrs[] = {
	BTRFS_ATTR_PTR(discard, discardable_bytes),
	BTRFS_ATTR_PTR(discard, discardable_extents),
	BTRFS_ATTR_PTR(discard, iops_limit),
	NULL,
};