Commit 8e710227 authored by Coly Li's avatar Coly Li Committed by Jens Axboe
Browse files

bcache: make bch_btree_check() to be multithreaded



When registering a cache device, bch_btree_check() is called to check
all btree nodes, to make sure the btree is consistent and not
corrupted.

bch_btree_check() is recursively executed in a single thread, when there
are a lot of data cached and the btree is huge, it may take very long
time to check all the btree nodes. In my testing, I observed it took
around 50 minutes to finish bch_btree_check().

When checking the bcache btree nodes, the cache set is not running yet,
and indeed the whole tree is in read-only state, it is safe to create
multiple threads to check the btree in parallel.

This patch tries to create multiple threads, and each thread tries to
one-by-one check the sub-tree indexed by a key from the btree root node.
The parallel thread number depends on how many keys in the btree root
node. At most BCH_BTR_CHKTHREAD_MAX (64) threads can be created, but in
practice is should be min(cpu-number/2, root-node-keys-number).

Signed-off-by: default avatarColy Li <colyli@suse.de>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent feac1a70
Loading
Loading
Loading
Loading
+166 −3
Original line number Diff line number Diff line
@@ -1897,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
	return ret;
}

int bch_btree_check(struct cache_set *c)

static int bch_btree_check_thread(void *arg)
{
	int ret;
	struct btree_check_info *info = arg;
	struct btree_check_state *check_state = info->state;
	struct cache_set *c = check_state->c;
	struct btree_iter iter;
	struct bkey *k, *p;
	int cur_idx, prev_idx, skip_nr;
	int i, n;

	k = p = NULL;
	i = n = 0;
	cur_idx = prev_idx = 0;
	ret = 0;

	/* root node keys are checked before thread created */
	bch_btree_iter_init(&c->root->keys, &iter, NULL);
	k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
	BUG_ON(!k);

	p = k;
	while (k) {
		/*
		 * Fetch a root node key index, skip the keys which
		 * should be fetched by other threads, then check the
		 * sub-tree indexed by the fetched key.
		 */
		spin_lock(&check_state->idx_lock);
		cur_idx = check_state->key_idx;
		check_state->key_idx++;
		spin_unlock(&check_state->idx_lock);

		skip_nr = cur_idx - prev_idx;

		while (skip_nr) {
			k = bch_btree_iter_next_filter(&iter,
						       &c->root->keys,
						       bch_ptr_bad);
			if (k)
				p = k;
			else {
				/*
				 * No more keys to check in root node,
				 * current checking threads are enough,
				 * stop creating more.
				 */
				atomic_set(&check_state->enough, 1);
				/* Update check_state->enough earlier */
				smp_mb();
				goto out;
			}
			skip_nr--;
			cond_resched();
		}

		if (p) {
			struct btree_op op;

	bch_btree_op_init(&op, SHRT_MAX);
			btree_node_prefetch(c->root, p);
			c->gc_stats.nodes++;
			bch_btree_op_init(&op, 0);
			ret = bcache_btree(check_recurse, p, c->root, &op);
			if (ret)
				goto out;
		}
		p = NULL;
		prev_idx = cur_idx;
		cond_resched();
	}

out:
	info->result = ret;
	/* update check_state->started among all CPUs */
	smp_mb();
	if (atomic_dec_and_test(&check_state->started))
		wake_up(&check_state->wait);

	return ret;
}



	return bcache_btree_root(check_recurse, c, &op);
static int bch_btree_chkthread_nr(void)
{
	int n = num_online_cpus()/2;

	if (n == 0)
		n = 1;
	else if (n > BCH_BTR_CHKTHREAD_MAX)
		n = BCH_BTR_CHKTHREAD_MAX;

	return n;
}

int bch_btree_check(struct cache_set *c)
{
	int ret = 0;
	int i;
	struct bkey *k = NULL;
	struct btree_iter iter;
	struct btree_check_state *check_state;
	char name[32];

	/* check and mark root node keys */
	for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
		bch_initial_mark_key(c, c->root->level, k);

	bch_initial_mark_key(c, c->root->level + 1, &c->root->key);

	if (c->root->level == 0)
		return 0;

	check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
	if (!check_state)
		return -ENOMEM;

	check_state->c = c;
	check_state->total_threads = bch_btree_chkthread_nr();
	check_state->key_idx = 0;
	spin_lock_init(&check_state->idx_lock);
	atomic_set(&check_state->started, 0);
	atomic_set(&check_state->enough, 0);
	init_waitqueue_head(&check_state->wait);

	/*
	 * Run multiple threads to check btree nodes in parallel,
	 * if check_state->enough is non-zero, it means current
	 * running check threads are enough, unncessary to create
	 * more.
	 */
	for (i = 0; i < check_state->total_threads; i++) {
		/* fetch latest check_state->enough earlier */
		smp_mb();
		if (atomic_read(&check_state->enough))
			break;

		check_state->infos[i].result = 0;
		check_state->infos[i].state = check_state;
		snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
		atomic_inc(&check_state->started);

		check_state->infos[i].thread =
			kthread_run(bch_btree_check_thread,
				    &check_state->infos[i],
				    name);
		if (IS_ERR(check_state->infos[i].thread)) {
			pr_err("fails to run thread bch_btrchk[%d]", i);
			for (--i; i >= 0; i--)
				kthread_stop(check_state->infos[i].thread);
			ret = -ENOMEM;
			goto out;
		}
	}

	wait_event_interruptible(check_state->wait,
				 atomic_read(&check_state->started) == 0 ||
				  test_bit(CACHE_SET_IO_DISABLE, &c->flags));

	for (i = 0; i < check_state->total_threads; i++) {
		if (check_state->infos[i].result) {
			ret = check_state->infos[i].result;
			goto out;
		}
	}

out:
	kfree(check_state);
	return ret;
}

void bch_initial_gc_finish(struct cache_set *c)
+22 −0
Original line number Diff line number Diff line
@@ -145,6 +145,9 @@ struct btree {
	struct bio		*bio;
};




#define BTREE_FLAG(flag)						\
static inline bool btree_node_ ## flag(struct btree *b)			\
{	return test_bit(BTREE_NODE_ ## flag, &b->flags); }		\
@@ -216,6 +219,25 @@ struct btree_op {
	unsigned int		insert_collision:1;
};

struct btree_check_state;
struct btree_check_info {
	struct btree_check_state	*state;
	struct task_struct		*thread;
	int				result;
};

#define BCH_BTR_CHKTHREAD_MAX	64
struct btree_check_state {
	struct cache_set		*c;
	int				total_threads;
	int				key_idx;
	spinlock_t			idx_lock;
	atomic_t			started;
	atomic_t			enough;
	wait_queue_head_t		wait;
	struct btree_check_info		infos[BCH_BTR_CHKTHREAD_MAX];
};

static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
{
	memset(op, 0, sizeof(struct btree_op));