Commit 67539e85 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcache: Add struct bset_sort_state



More disentangling bset.c from the rest of the bcache code - soon, the
sorting routines won't have any dependencies on any outside structs.

Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
parent 911c9610
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -187,6 +187,7 @@
#include <linux/types.h>
#include <linux/workqueue.h>

#include "bset.h"
#include "util.h"
#include "closure.h"

@@ -645,8 +646,7 @@ struct cache_set {
	 */
	mempool_t		*fill_iter;

	mempool_t		*sort_pool;
	unsigned		sort_crit_factor;
	struct bset_sort_state	sort;

	/* List of buckets we're currently writing data to */
	struct list_head	data_buckets;
@@ -662,7 +662,6 @@ struct cache_set {
	unsigned		congested_read_threshold_us;
	unsigned		congested_write_threshold_us;

	struct time_stats	sort_time;
	struct time_stats	btree_gc_time;
	struct time_stats	btree_split_time;
	struct time_stats	btree_read_time;
+42 −26
Original line number Diff line number Diff line
@@ -952,6 +952,26 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,

/* Mergesort */

void bch_bset_sort_state_free(struct bset_sort_state *state)
{
	if (state->pool)
		mempool_destroy(state->pool);
}

int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
{
	spin_lock_init(&state->time.lock);

	state->page_order = page_order;
	state->crit_factor = int_sqrt(1 << page_order);

	state->pool = mempool_create_page_pool(1, page_order);
	if (!state->pool)
		return -ENOMEM;

	return 0;
}

static void sort_key_next(struct btree_iter *iter,
			  struct btree_iter_set *i)
{
@@ -1077,22 +1097,24 @@ static void btree_mergesort(struct btree *b, struct bset *out,
}

static void __btree_sort(struct btree *b, struct btree_iter *iter,
			 unsigned start, unsigned order, bool fixup)
			 unsigned start, unsigned order, bool fixup,
			 struct bset_sort_state *state)
{
	uint64_t start_time;
	bool remove_stale = !b->written;
	bool used_mempool = false;
	struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
						     order);
	if (!out) {
		out = page_address(mempool_alloc(b->c->sort_pool, GFP_NOIO));
		BUG_ON(order > state->page_order);

		out = page_address(mempool_alloc(state->pool, GFP_NOIO));
		used_mempool = true;
		order = ilog2(bucket_pages(b->c));
	}

	start_time = local_clock();

	btree_mergesort(b, out, iter, fixup, remove_stale);
	btree_mergesort(b, out, iter, fixup, false);
	b->nsets = start;

	if (!start && order == b->page_order) {
@@ -1113,18 +1135,18 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
	}

	if (used_mempool)
		mempool_free(virt_to_page(out), b->c->sort_pool);
		mempool_free(virt_to_page(out), state->pool);
	else
		free_pages((unsigned long) out, order);

	if (b->written)
	bset_build_written_tree(b);

	if (!start)
		bch_time_stats_update(&b->c->sort_time, start_time);
		bch_time_stats_update(&state->time, start_time);
}

void bch_btree_sort_partial(struct btree *b, unsigned start)
void bch_btree_sort_partial(struct btree *b, unsigned start,
			    struct bset_sort_state *state)
{
	size_t order = b->page_order, keys = 0;
	struct btree_iter iter;
@@ -1148,18 +1170,19 @@ void bch_btree_sort_partial(struct btree *b, unsigned start)
			order = ilog2(order);
	}

	__btree_sort(b, &iter, start, order, false);
	__btree_sort(b, &iter, start, order, false, state);

	EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
}

void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter)
void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter,
				    struct bset_sort_state *state)
{
	BUG_ON(!b->written);
	__btree_sort(b, iter, 0, b->page_order, true);
	__btree_sort(b, iter, 0, b->page_order, true, state);
}

void bch_btree_sort_into(struct btree *b, struct btree *new)
void bch_btree_sort_into(struct btree *b, struct btree *new,
			 struct bset_sort_state *state)
{
	uint64_t start_time = local_clock();

@@ -1168,15 +1191,14 @@ void bch_btree_sort_into(struct btree *b, struct btree *new)

	btree_mergesort(b, new->sets->data, &iter, false, true);

	bch_time_stats_update(&b->c->sort_time, start_time);
	bch_time_stats_update(&state->time, start_time);

	bkey_copy_key(&new->key, &b->key);
	new->sets->size = 0;
}

#define SORT_CRIT	(4096 / sizeof(uint64_t))

void bch_btree_sort_lazy(struct btree *b)
void bch_btree_sort_lazy(struct btree *b, struct bset_sort_state *state)
{
	unsigned crit = SORT_CRIT;
	int i;
@@ -1185,24 +1207,18 @@ void bch_btree_sort_lazy(struct btree *b)
	if (!b->nsets)
		goto out;

	/* If not a leaf node, always sort */
	if (b->level) {
		bch_btree_sort(b);
		return;
	}

	for (i = b->nsets - 1; i >= 0; --i) {
		crit *= b->c->sort_crit_factor;
		crit *= state->crit_factor;

		if (b->sets[i].data->keys < crit) {
			bch_btree_sort_partial(b, i);
			bch_btree_sort_partial(b, i, state);
			return;
		}
	}

	/* Sort if we'd overflow */
	if (b->nsets + 1 == MAX_BSETS) {
		bch_btree_sort(b);
		bch_btree_sort(b, state);
		return;
	}

+29 −9
Original line number Diff line number Diff line
@@ -3,6 +3,8 @@

#include <linux/slab.h>

#include "util.h" /* for time_stats */

/*
 * BKEYS:
 *
@@ -190,6 +192,33 @@ struct bset_tree {
	struct bset	*data;
};

/* Sorting */

struct bset_sort_state {
	mempool_t		*pool;

	unsigned		page_order;
	unsigned		crit_factor;

	struct time_stats	time;
};

void bch_bset_sort_state_free(struct bset_sort_state *);
int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
void bch_btree_sort_lazy(struct btree *, struct bset_sort_state *);
void bch_btree_sort_into(struct btree *, struct btree *,
			 struct bset_sort_state *);
void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *,
				    struct bset_sort_state *);
void bch_btree_sort_partial(struct btree *, unsigned,
			    struct bset_sort_state *);

static inline void bch_btree_sort(struct btree *b,
				  struct bset_sort_state *state)
{
	bch_btree_sort_partial(b, 0, state);
}

/* Keylists */

struct keylist {
@@ -374,15 +403,6 @@ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
})

bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
void bch_btree_sort_lazy(struct btree *);
void bch_btree_sort_into(struct btree *, struct btree *);
void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *);
void bch_btree_sort_partial(struct btree *, unsigned);

static inline void bch_btree_sort(struct btree *b)
{
	bch_btree_sort_partial(b, 0);
}

int bch_bset_print_stats(struct cache_set *, char *);

+10 −4
Original line number Diff line number Diff line
@@ -263,7 +263,7 @@ void bch_btree_node_read_done(struct btree *b)
		if (i->seq == b->sets[0].data->seq)
			goto err;

	bch_btree_sort_and_fix_extents(b, iter);
	bch_btree_sort_and_fix_extents(b, iter, &b->c->sort);

	i = b->sets[0].data;
	err = "short btree key";
@@ -476,7 +476,11 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
	atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
			&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);

	bch_btree_sort_lazy(b);
	/* If not a leaf node, always sort */
	if (b->level && b->nsets)
		bch_btree_sort(b, &b->c->sort);
	else
		bch_btree_sort_lazy(b, &b->c->sort);

	/*
	 * do verify if there was more than one set initially (i.e. we did a
@@ -1125,8 +1129,10 @@ err:
static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait)
{
	struct btree *n = bch_btree_node_alloc(b->c, b->level, wait);
	if (!IS_ERR_OR_NULL(n))
		bch_btree_sort_into(b, n);
	if (!IS_ERR_OR_NULL(n)) {
		bch_btree_sort_into(b, n, &b->c->sort);
		bkey_copy_key(&n->key, &b->key);
	}

	return n;
}
+3 −6
Original line number Diff line number Diff line
@@ -1351,6 +1351,7 @@ static void cache_set_free(struct closure *cl)
		if (ca)
			kobject_put(&ca->kobj);

	bch_bset_sort_state_free(&c->sort);
	free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));

	if (c->bio_split)
@@ -1481,15 +1482,12 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
		c->btree_pages = max_t(int, c->btree_pages / 4,
				       BTREE_MAX_PAGES);

	c->sort_crit_factor = int_sqrt(c->btree_pages);

	sema_init(&c->sb_write_mutex, 1);
	mutex_init(&c->bucket_lock);
	init_waitqueue_head(&c->try_wait);
	init_waitqueue_head(&c->bucket_wait);
	sema_init(&c->uuid_write_mutex, 1);

	spin_lock_init(&c->sort_time.lock);
	spin_lock_init(&c->btree_gc_time.lock);
	spin_lock_init(&c->btree_split_time.lock);
	spin_lock_init(&c->btree_read_time.lock);
@@ -1517,12 +1515,11 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
				bucket_pages(c))) ||
	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
	    !(c->sort_pool = mempool_create_page_pool(1,
				ilog2(bucket_pages(c)))) ||
	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
	    bch_journal_alloc(c) ||
	    bch_btree_cache_alloc(c) ||
	    bch_open_buckets_alloc(c))
	    bch_open_buckets_alloc(c) ||
	    bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
		goto err;

	c->congested_read_threshold_us	= 2000;
Loading