Commit 493720a4 authored by Chao Yu's avatar Chao Yu Committed by Jaegeuk Kim
Browse files

f2fs: fix to avoid REQ_TIME and CP_TIME collision



Lei Li reported a issue: if foreground operations are frequent, background
checkpoint may be always skipped due to below check, result in losing more
data after sudden power-cut.

f2fs_balance_fs_bg()
...
	if (!is_idle(sbi, REQ_TIME) &&
		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
		return;

E.g:
cp_interval = 5 second
idle_interval = 2 second
foreground operation interval = 1 second (append 1 byte per second into file)

In such case, no matter when it calls f2fs_balance_fs_bg(), is_idle(, REQ_TIME)
returns false, result in skipping background checkpoint.

This patch changes as below to make trigger condition being more reasonable:
- trigger sync_fs() if dirty_{nats,nodes} and prefree segs exceeds threshold;
- skip triggering sync_fs() if there is any background inflight IO or there is
foreground operation recently and meanwhile cp_rwsem is being held by someone;

Reported-by: default avatarLei Li <noctis.akm@gmail.com>
Signed-off-by: default avatarChao Yu <yuchao0@huawei.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 8769918b
Loading
Loading
Loading
Loading
+13 −6
Original line number Diff line number Diff line
@@ -2400,24 +2400,31 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
	return entry;
}

static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type)
{
	if (sbi->gc_mode == GC_URGENT_HIGH)
		return true;

	if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
		get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
		get_pages(sbi, F2FS_WB_CP_DATA) ||
		get_pages(sbi, F2FS_DIO_READ) ||
		get_pages(sbi, F2FS_DIO_WRITE))
		return false;
		return true;

	if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info &&
			atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
		return false;
		return true;

	if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
			atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
		return true;
	return false;
}

static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
{
	if (sbi->gc_mode == GC_URGENT_HIGH)
		return true;

	if (is_inflight_io(sbi, type))
		return false;

	if (sbi->gc_mode == GC_URGENT_LOW &&
+27 −20
Original line number Diff line number Diff line
@@ -529,17 +529,25 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
	else
		f2fs_build_free_nids(sbi, false, false);

	if (!is_idle(sbi, REQ_TIME) &&
		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
	if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) ||
		excess_prefree_segs(sbi))
		goto do_sync;

	/* there is background inflight IO or foreground operation recently */
	if (is_inflight_io(sbi, REQ_TIME) ||
		(!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem)))
		return;

	/* exceed periodical checkpoint timeout threshold */
	if (f2fs_time_over(sbi, CP_TIME))
		goto do_sync;

	/* checkpoint is the only way to shrink partial cached entries */
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
			!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
			excess_prefree_segs(sbi) ||
			excess_dirty_nats(sbi) ||
			excess_dirty_nodes(sbi) ||
			f2fs_time_over(sbi, CP_TIME)) {
	if (f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
		f2fs_available_free_memory(sbi, INO_ENTRIES))
		return;

do_sync:
	if (test_opt(sbi, DATA_FLUSH) && from_bg) {
		struct blk_plug plug;

@@ -554,7 +562,6 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
	f2fs_sync_fs(sbi->sb, true);
	stat_inc_bg_cp_count(sbi->stat_info);
}
}

static int __submit_flush_wait(struct f2fs_sb_info *sbi,
				struct block_device *bdev)