Commit ee811287 authored by Kent Overstreet's avatar Kent Overstreet
Browse files

bcache: Rename/shuffle various code around



More work to disentangle bset.c from the rest of the code:

Signed-off-by: default avatarKent Overstreet <kmo@daterainc.com>
parent 67539e85
Loading
Loading
Loading
Loading
+0 −8
Original line number Diff line number Diff line
@@ -717,14 +717,6 @@ struct bbio {
#define bucket_bytes(c)		((c)->sb.bucket_size << 9)
#define block_bytes(c)		((c)->sb.block_size << 9)

#define __set_bytes(i, k)	(sizeof(*(i)) + (k) * sizeof(uint64_t))
#define set_bytes(i)		__set_bytes(i, i->keys)

#define __set_blocks(i, k, c)	DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c))
#define set_blocks(i, c)	__set_blocks(i, (i)->keys, c)

#define btree_data_space(b)	(PAGE_SIZE << (b)->page_order)

#define prios_per_bucket(c)				\
	((bucket_bytes(c) - sizeof(struct prio_set)) /	\
	 sizeof(struct bucket_disk))
+149 −23
Original line number Diff line number Diff line
@@ -302,6 +302,115 @@ bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r)
	return true;
}

/* Auxiliary search trees */

/* 32 bits total: */
#define BKEY_MID_BITS		3
#define BKEY_EXPONENT_BITS	7
#define BKEY_MANTISSA_BITS	(32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
#define BKEY_MANTISSA_MASK	((1 << BKEY_MANTISSA_BITS) - 1)

struct bkey_float {
	unsigned	exponent:BKEY_EXPONENT_BITS;
	unsigned	m:BKEY_MID_BITS;
	unsigned	mantissa:BKEY_MANTISSA_BITS;
} __packed;

/*
 * BSET_CACHELINE was originally intended to match the hardware cacheline size -
 * it used to be 64, but I realized the lookup code would touch slightly less
 * memory if it was 128.
 *
 * It definites the number of bytes (in struct bset) per struct bkey_float in
 * the auxiliar search tree - when we're done searching the bset_float tree we
 * have this many bytes left that we do a linear search over.
 *
 * Since (after level 5) every level of the bset_tree is on a new cacheline,
 * we're touching one fewer cacheline in the bset tree in exchange for one more
 * cacheline in the linear search - but the linear search might stop before it
 * gets to the second cacheline.
 */

#define BSET_CACHELINE		128

/* Space required for the btree node keys */
static inline size_t btree_keys_bytes(struct btree *b)
{
	return PAGE_SIZE << b->page_order;
}

static inline size_t btree_keys_cachelines(struct btree *b)
{
	return btree_keys_bytes(b) / BSET_CACHELINE;
}

/* Space required for the auxiliary search trees */
static inline size_t bset_tree_bytes(struct btree *b)
{
	return btree_keys_cachelines(b) * sizeof(struct bkey_float);
}

/* Space required for the prev pointers */
static inline size_t bset_prev_bytes(struct btree *b)
{
	return btree_keys_cachelines(b) * sizeof(uint8_t);
}

/* Memory allocation */

void bch_btree_keys_free(struct btree *b)
{
	struct bset_tree *t = b->sets;

	if (bset_prev_bytes(b) < PAGE_SIZE)
		kfree(t->prev);
	else
		free_pages((unsigned long) t->prev,
			   get_order(bset_prev_bytes(b)));

	if (bset_tree_bytes(b) < PAGE_SIZE)
		kfree(t->tree);
	else
		free_pages((unsigned long) t->tree,
			   get_order(bset_tree_bytes(b)));

	free_pages((unsigned long) t->data, b->page_order);

	t->prev = NULL;
	t->tree = NULL;
	t->data = NULL;
}

int bch_btree_keys_alloc(struct btree *b, unsigned page_order, gfp_t gfp)
{
	struct bset_tree *t = b->sets;

	BUG_ON(t->data);

	b->page_order = page_order;

	t->data = (void *) __get_free_pages(gfp, b->page_order);
	if (!t->data)
		goto err;

	t->tree = bset_tree_bytes(b) < PAGE_SIZE
		? kmalloc(bset_tree_bytes(b), gfp)
		: (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
	if (!t->tree)
		goto err;

	t->prev = bset_prev_bytes(b) < PAGE_SIZE
		? kmalloc(bset_prev_bytes(b), gfp)
		: (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
	if (!t->prev)
		goto err;

	return 0;
err:
	bch_btree_keys_free(b);
	return -ENOMEM;
}

/* Binary tree stuff for auxiliary search trees */

static unsigned inorder_next(unsigned j, unsigned size)
@@ -538,21 +647,36 @@ static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
		t++->size = 0;
}

static void bset_build_unwritten_tree(struct btree *b)
static void bch_bset_build_unwritten_tree(struct btree *b)
{
	struct bset_tree *t = b->sets + b->nsets;
	struct bset_tree *t = bset_tree_last(b);

	bset_alloc_tree(b, t);

	if (t->tree != b->sets->tree + bset_tree_space(b)) {
	if (t->tree != b->sets->tree + btree_keys_cachelines(b)) {
		t->prev[0] = bkey_to_cacheline_offset(t->data->start);
		t->size = 1;
	}
}

void bch_bset_init_next(struct btree *b, struct bset *i, uint64_t magic)
{
	if (i != b->sets->data) {
		b->sets[++b->nsets].data = i;
		i->seq = b->sets->data->seq;
	} else
		get_random_bytes(&i->seq, sizeof(uint64_t));

	i->magic	= magic;
	i->version	= 0;
	i->keys		= 0;

	bch_bset_build_unwritten_tree(b);
}

static void bset_build_written_tree(struct btree *b)
{
	struct bset_tree *t = b->sets + b->nsets;
	struct bset_tree *t = bset_tree_last(b);
	struct bkey *k = t->data->start;
	unsigned j, cacheline = 1;

@@ -560,7 +684,7 @@ static void bset_build_written_tree(struct btree *b)

	t->size = min_t(unsigned,
			bkey_to_cacheline(t, bset_bkey_last(t->data)),
			b->sets->tree + bset_tree_space(b) - t->tree);
			b->sets->tree + btree_keys_cachelines(b) - t->tree);

	if (t->size < 2) {
		t->size = 0;
@@ -599,7 +723,7 @@ void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k)
	struct bset_tree *t;
	unsigned inorder, j = 1;

	for (t = b->sets; t <= &b->sets[b->nsets]; t++)
	for (t = b->sets; t <= bset_tree_last(b); t++)
		if (k < bset_bkey_last(t->data))
			goto found_set;

@@ -639,9 +763,10 @@ fix_right: do {
		} while (j < t->size);
}

void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
static void bch_bset_fix_lookup_table(struct btree *b,
				      struct bset_tree *t,
				      struct bkey *k)
{
	struct bset_tree *t = &b->sets[b->nsets];
	unsigned shift = bkey_u64s(k);
	unsigned j = bkey_to_cacheline(t, k);

@@ -673,7 +798,7 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
		}
	}

	if (t->size == b->sets->tree + bset_tree_space(b) - t->tree)
	if (t->size == b->sets->tree + btree_keys_cachelines(b) - t->tree)
		return;

	/* Possibly add a new entry to the end of the lookup table */
@@ -687,21 +812,23 @@ void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
		}
}

void bch_bset_init_next(struct btree *b)
void bch_bset_insert(struct btree *b, struct bkey *where,
		     struct bkey *insert)
{
	struct bset *i = write_block(b);
	struct bset_tree *t = bset_tree_last(b);

	if (i != b->sets[0].data) {
		b->sets[++b->nsets].data = i;
		i->seq = b->sets[0].data->seq;
	} else
		get_random_bytes(&i->seq, sizeof(uint64_t));
	BUG_ON(t->data != write_block(b));
	BUG_ON(bset_byte_offset(b, t->data) +
	       __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) >
	       PAGE_SIZE << b->page_order);

	i->magic	= bset_magic(&b->c->sb);
	i->version	= 0;
	i->keys		= 0;
	memmove((uint64_t *) where + bkey_u64s(insert),
		where,
		(void *) bset_bkey_last(t->data) - (void *) where);

	bset_build_unwritten_tree(b);
	t->data->keys += bkey_u64s(insert);
	bkey_copy(where, insert);
	bch_bset_fix_lookup_table(b, t, where);
}

struct bset_search_iter {
@@ -1154,9 +1281,8 @@ void bch_btree_sort_partial(struct btree *b, unsigned start,

	__bch_btree_iter_init(b, &iter, NULL, &b->sets[start]);

	BUG_ON(b->sets[b->nsets].data == write_block(b) &&
	       (b->sets[b->nsets].size || b->nsets));

	BUG_ON(!bset_written(b, bset_tree_last(b)) &&
	       (bset_tree_last(b)->size || b->nsets));

	if (start) {
		unsigned i;
+114 −133
Original line number Diff line number Diff line
@@ -144,22 +144,11 @@
 * first key in that range of bytes again.
 */

struct cache_set;

/* Btree key comparison/iteration */
struct btree;
struct bkey_float;

#define MAX_BSETS		4U

struct btree_iter {
	size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
	struct btree *b;
#endif
	struct btree_iter_set {
		struct bkey *k, *end;
	} data[MAX_BSETS];
};

struct bset_tree {
	/*
	 * We construct a binary tree in an array as if the array
@@ -192,6 +181,55 @@ struct bset_tree {
	struct bset		*data;
};

#define __set_bytes(i, k)	(sizeof(*(i)) + (k) * sizeof(uint64_t))
#define set_bytes(i)		__set_bytes(i, i->keys)

#define __set_blocks(i, k, block_bytes)				\
	DIV_ROUND_UP(__set_bytes(i, k), block_bytes)
#define set_blocks(i, block_bytes)				\
	__set_blocks(i, (i)->keys, block_bytes)

void bch_btree_keys_free(struct btree *);
int bch_btree_keys_alloc(struct btree *, unsigned, gfp_t);

void bch_bset_fix_invalidated_key(struct btree *, struct bkey *);
void bch_bset_init_next(struct btree *, struct bset *, uint64_t);
void bch_bset_insert(struct btree *, struct bkey *, struct bkey *);

/* Btree key iteration */

struct btree_iter {
	size_t size, used;
#ifdef CONFIG_BCACHE_DEBUG
	struct btree *b;
#endif
	struct btree_iter_set {
		struct bkey *k, *end;
	} data[MAX_BSETS];
};

typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);

struct bkey *bch_btree_iter_next(struct btree_iter *);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
					struct btree *, ptr_filter_fn);

void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *,
				 struct bkey *);

struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
			   const struct bkey *);

/*
 * Returns the first key that is strictly greater than search
 */
static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
					   const struct bkey *search)
{
	return search ? __bch_bset_search(b, t, search) : t->data->start;
}

/* Sorting */

struct bset_sort_state {
@@ -219,6 +257,60 @@ static inline void bch_btree_sort(struct btree *b,
	bch_btree_sort_partial(b, 0, state);
}

/* Bkey utility code */

#define bset_bkey_last(i)	bkey_idx((struct bkey *) (i)->d, (i)->keys)

static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
{
	return bkey_idx(i->start, idx);
}

static inline void bkey_init(struct bkey *k)
{
	*k = ZERO_KEY;
}

static __always_inline int64_t bkey_cmp(const struct bkey *l,
					const struct bkey *r)
{
	return unlikely(KEY_INODE(l) != KEY_INODE(r))
		? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r)
		: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
}

void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
			      unsigned);
bool __bch_cut_front(const struct bkey *, struct bkey *);
bool __bch_cut_back(const struct bkey *, struct bkey *);

static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
{
	BUG_ON(bkey_cmp(where, k) > 0);
	return __bch_cut_front(where, k);
}

static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
{
	BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0);
	return __bch_cut_back(where, k);
}

#define PRECEDING_KEY(_k)					\
({								\
	struct bkey *_ret = NULL;				\
								\
	if (KEY_INODE(_k) || KEY_OFFSET(_k)) {			\
		_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0);	\
								\
		if (!_ret->low)					\
			_ret->high--;				\
		_ret->low--;					\
	}							\
								\
	_ret;							\
})

/* Keylists */

struct keylist {
@@ -282,126 +374,15 @@ struct bkey *bch_keylist_pop(struct keylist *);
void bch_keylist_pop_front(struct keylist *);
int __bch_keylist_realloc(struct keylist *, unsigned);

/* Bkey utility code */

#define bset_bkey_last(i)	bkey_idx((struct bkey *) (i)->d, (i)->keys)

static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
{
	return bkey_idx(i->start, idx);
}

static inline void bkey_init(struct bkey *k)
{
	*k = ZERO_KEY;
}

static __always_inline int64_t bkey_cmp(const struct bkey *l,
					const struct bkey *r)
{
	return unlikely(KEY_INODE(l) != KEY_INODE(r))
		? (int64_t) KEY_INODE(l) - (int64_t) KEY_INODE(r)
		: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
}

void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
			      unsigned);
bool __bch_cut_front(const struct bkey *, struct bkey *);
bool __bch_cut_back(const struct bkey *, struct bkey *);

static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
{
	BUG_ON(bkey_cmp(where, k) > 0);
	return __bch_cut_front(where, k);
}

static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
{
	BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0);
	return __bch_cut_back(where, k);
}

struct cache_set;
const char *bch_ptr_status(struct cache_set *, const struct bkey *);
bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
bool bch_btree_ptr_bad(struct btree *, const struct bkey *);
bool bch_extent_ptr_bad(struct btree *, const struct bkey *);

bool bch_ptr_bad(struct btree *, const struct bkey *);

typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);

struct bkey *bch_btree_iter_next(struct btree_iter *);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
					struct btree *, ptr_filter_fn);

void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
struct bkey *bch_btree_iter_init(struct btree *, struct btree_iter *,
				 struct bkey *);

/* 32 bits total: */
#define BKEY_MID_BITS		3
#define BKEY_EXPONENT_BITS	7
#define BKEY_MANTISSA_BITS	22
#define BKEY_MANTISSA_MASK	((1 << BKEY_MANTISSA_BITS) - 1)

struct bkey_float {
	unsigned	exponent:BKEY_EXPONENT_BITS;
	unsigned	m:BKEY_MID_BITS;
	unsigned	mantissa:BKEY_MANTISSA_BITS;
} __packed;

/*
 * BSET_CACHELINE was originally intended to match the hardware cacheline size -
 * it used to be 64, but I realized the lookup code would touch slightly less
 * memory if it was 128.
 *
 * It definites the number of bytes (in struct bset) per struct bkey_float in
 * the auxiliar search tree - when we're done searching the bset_float tree we
 * have this many bytes left that we do a linear search over.
 *
 * Since (after level 5) every level of the bset_tree is on a new cacheline,
 * we're touching one fewer cacheline in the bset tree in exchange for one more
 * cacheline in the linear search - but the linear search might stop before it
 * gets to the second cacheline.
 */

#define BSET_CACHELINE		128
#define bset_tree_space(b)	(btree_data_space(b) / BSET_CACHELINE)

#define bset_tree_bytes(b)	(bset_tree_space(b) * sizeof(struct bkey_float))
#define bset_prev_bytes(b)	(bset_tree_space(b) * sizeof(uint8_t))

void bch_bset_init_next(struct btree *);

void bch_bset_fix_invalidated_key(struct btree *, struct bkey *);
void bch_bset_fix_lookup_table(struct btree *, struct bkey *);

struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
			   const struct bkey *);

/*
 * Returns the first key that is strictly greater than search
 */
static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
					   const struct bkey *search)
{
	return search ? __bch_bset_search(b, t, search) : t->data->start;
}

#define PRECEDING_KEY(_k)					\
({								\
	struct bkey *_ret = NULL;				\
								\
	if (KEY_INODE(_k) || KEY_OFFSET(_k)) {			\
		_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0);	\
								\
		if (!_ret->low)					\
			_ret->high--;				\
		_ret->low--;					\
	}							\
								\
	_ret;							\
})

bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);

int bch_bset_print_stats(struct cache_set *, char *);
+63 −104
Original line number Diff line number Diff line
@@ -110,7 +110,7 @@ static inline bool should_split(struct btree *b)
{
	struct bset *i = write_block(b);
	return b->written >= btree_blocks(b) ||
		(b->written + __set_blocks(i, i->keys + 15, b->c)
		(b->written + __set_blocks(i, i->keys + 15, block_bytes(b->c))
		 > btree_blocks(b));
}

@@ -206,7 +206,7 @@ static uint64_t btree_csum_set(struct btree *b, struct bset *i)
void bch_btree_node_read_done(struct btree *b)
{
	const char *err = "bad btree header";
	struct bset *i = b->sets[0].data;
	struct bset *i = btree_bset_first(b);
	struct btree_iter *iter;

	iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
@@ -228,7 +228,8 @@ void bch_btree_node_read_done(struct btree *b)
			goto err;

		err = "bad btree header";
		if (b->written + set_blocks(i, b->c) > btree_blocks(b))
		if (b->written + set_blocks(i, block_bytes(b->c)) >
		    btree_blocks(b))
			goto err;

		err = "bad magic";
@@ -253,7 +254,7 @@ void bch_btree_node_read_done(struct btree *b)

		bch_btree_iter_push(iter, i->start, bset_bkey_last(i));

		b->written += set_blocks(i, b->c);
		b->written += set_blocks(i, block_bytes(b->c));
	}

	err = "corrupted btree";
@@ -272,7 +273,7 @@ void bch_btree_node_read_done(struct btree *b)
		goto err;

	if (b->written < btree_blocks(b))
		bch_bset_init_next(b);
		bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb));
out:
	mempool_free(iter, b->c->fill_iter);
	return;
@@ -393,7 +394,7 @@ static void btree_node_write_endio(struct bio *bio, int error)
static void do_btree_node_write(struct btree *b)
{
	struct closure *cl = &b->io;
	struct bset *i = b->sets[b->nsets].data;
	struct bset *i = btree_bset_last(b);
	BKEY_PADDED(key) k;

	i->version	= BCACHE_BSET_VERSION;
@@ -405,7 +406,7 @@ static void do_btree_node_write(struct btree *b)
	b->bio->bi_end_io	= btree_node_write_endio;
	b->bio->bi_private	= cl;
	b->bio->bi_rw		= REQ_META|WRITE_SYNC|REQ_FUA;
	b->bio->bi_iter.bi_size	= set_blocks(i, b->c) * block_bytes(b->c);
	b->bio->bi_iter.bi_size	= roundup(set_bytes(i), block_bytes(b->c));
	bch_bio_map(b->bio, i);

	/*
@@ -424,7 +425,8 @@ static void do_btree_node_write(struct btree *b)
	 */

	bkey_copy(&k.key, &b->key);
	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i));
	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
		       bset_sector_offset(b, i));

	if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
		int j;
@@ -451,14 +453,14 @@ static void do_btree_node_write(struct btree *b)

void bch_btree_node_write(struct btree *b, struct closure *parent)
{
	struct bset *i = b->sets[b->nsets].data;
	struct bset *i = btree_bset_last(b);

	trace_bcache_btree_write(b);

	BUG_ON(current->bio_list);
	BUG_ON(b->written >= btree_blocks(b));
	BUG_ON(b->written && !i->keys);
	BUG_ON(b->sets->data->seq != i->seq);
	BUG_ON(btree_bset_first(b)->seq != i->seq);
	bch_check_keys(b, "writing");

	cancel_delayed_work(&b->work);
@@ -472,8 +474,8 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)

	do_btree_node_write(b);

	b->written += set_blocks(i, b->c);
	atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
	b->written += set_blocks(i, block_bytes(b->c));
	atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size,
			&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);

	/* If not a leaf node, always sort */
@@ -490,7 +492,7 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
		bch_btree_verify(b);

	if (b->written < btree_blocks(b))
		bch_bset_init_next(b);
		bch_bset_init_next(b, write_block(b), bset_magic(&b->c->sb));
}

static void bch_btree_node_write_sync(struct btree *b)
@@ -515,7 +517,7 @@ static void btree_node_write_work(struct work_struct *w)

static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
{
	struct bset *i = b->sets[b->nsets].data;
	struct bset *i = btree_bset_last(b);
	struct btree_write *w = btree_current_write(b);

	BUG_ON(!b->written);
@@ -575,29 +577,12 @@ static void mca_reinit(struct btree *b)

static void mca_data_free(struct btree *b)
{
	struct bset_tree *t = b->sets;

	BUG_ON(b->io_mutex.count != 1);

	if (bset_prev_bytes(b) < PAGE_SIZE)
		kfree(t->prev);
	else
		free_pages((unsigned long) t->prev,
			   get_order(bset_prev_bytes(b)));

	if (bset_tree_bytes(b) < PAGE_SIZE)
		kfree(t->tree);
	else
		free_pages((unsigned long) t->tree,
			   get_order(bset_tree_bytes(b)));
	bch_btree_keys_free(b);

	free_pages((unsigned long) t->data, b->page_order);

	t->prev = NULL;
	t->tree = NULL;
	t->data = NULL;
	list_move(&b->list, &b->c->btree_cache_freed);
	b->c->bucket_cache_used--;
	list_move(&b->list, &b->c->btree_cache_freed);
}

static void mca_bucket_free(struct btree *b)
@@ -616,34 +601,16 @@ static unsigned btree_order(struct bkey *k)

static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
{
	struct bset_tree *t = b->sets;
	BUG_ON(t->data);

	b->page_order = max_t(unsigned,
	if (!bch_btree_keys_alloc(b,
				  max_t(unsigned,
					ilog2(b->c->btree_pages),
			      btree_order(k));

	t->data = (void *) __get_free_pages(gfp, b->page_order);
	if (!t->data)
		goto err;

	t->tree = bset_tree_bytes(b) < PAGE_SIZE
		? kmalloc(bset_tree_bytes(b), gfp)
		: (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
	if (!t->tree)
		goto err;

	t->prev = bset_prev_bytes(b) < PAGE_SIZE
		? kmalloc(bset_prev_bytes(b), gfp)
		: (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
	if (!t->prev)
		goto err;

	list_move(&b->list, &b->c->btree_cache);
					btree_order(k)),
				  gfp)) {
		b->c->bucket_cache_used++;
	return;
err:
	mca_data_free(b);
		list_move(&b->list, &b->c->btree_cache);
	} else {
		list_move(&b->list, &b->c->btree_cache_freed);
	}
}

static struct btree *mca_bucket_alloc(struct cache_set *c,
@@ -1111,7 +1078,7 @@ retry:
	}

	b->accessed = 1;
	bch_bset_init_next(b);
	bch_bset_init_next(b, b->sets->data, bset_magic(&b->c->sb));

	mutex_unlock(&c->bucket_lock);

@@ -1298,7 +1265,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
	blocks = btree_default_blocks(b->c) * 2 / 3;

	if (nodes < 2 ||
	    __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1))
	    __set_blocks(b->sets[0].data, keys,
			 block_bytes(b->c)) > blocks * (nodes - 1))
		return 0;

	for (i = 0; i < nodes; i++) {
@@ -1308,8 +1276,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
	}

	for (i = nodes - 1; i > 0; --i) {
		struct bset *n1 = new_nodes[i]->sets->data;
		struct bset *n2 = new_nodes[i - 1]->sets->data;
		struct bset *n1 = btree_bset_first(new_nodes[i]);
		struct bset *n2 = btree_bset_first(new_nodes[i - 1]);
		struct bkey *k, *last = NULL;

		keys = 0;
@@ -1319,7 +1287,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
			     k < bset_bkey_last(n2);
			     k = bkey_next(k)) {
				if (__set_blocks(n1, n1->keys + keys +
						 bkey_u64s(k), b->c) > blocks)
						 bkey_u64s(k),
						 block_bytes(b->c)) > blocks)
					break;

				last = k;
@@ -1335,7 +1304,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
			 * though)
			 */
			if (__set_blocks(n1, n1->keys + n2->keys,
					 b->c) > btree_blocks(new_nodes[i]))
					 block_bytes(b->c)) >
			    btree_blocks(new_nodes[i]))
				goto out_nocoalesce;

			keys = n2->keys;
@@ -1343,8 +1313,8 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
			last = &r->b->key;
		}

		BUG_ON(__set_blocks(n1, n1->keys + keys,
				    b->c) > btree_blocks(new_nodes[i]));
		BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) >
		       btree_blocks(new_nodes[i]));

		if (last)
			bkey_copy_key(&new_nodes[i]->key, last);
@@ -1380,7 +1350,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
	}

	/* We emptied out this node */
	BUG_ON(new_nodes[0]->sets->data->keys);
	BUG_ON(btree_bset_first(new_nodes[0])->keys);
	btree_node_free(new_nodes[0]);
	rw_unlock(true, new_nodes[0]);

@@ -1831,19 +1801,6 @@ err:

/* Btree insertion */

static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert)
{
	struct bset *i = b->sets[b->nsets].data;

	memmove((uint64_t *) where + bkey_u64s(insert),
		where,
		(void *) bset_bkey_last(i) - (void *) where);

	i->keys += bkey_u64s(insert);
	bkey_copy(where, insert);
	bch_bset_fix_lookup_table(b, where);
}

static bool fix_overlapping_extents(struct btree *b, struct bkey *insert,
				    struct btree_iter *iter,
				    struct bkey *replace_key)
@@ -1944,13 +1901,13 @@ static bool fix_overlapping_extents(struct btree *b, struct bkey *insert,
				 * depends on us inserting a new key for the top
				 * here.
				 */
				top = bch_bset_search(b, &b->sets[b->nsets],
				top = bch_bset_search(b, bset_tree_last(b),
						      insert);
				shift_keys(b, top, k);
				bch_bset_insert(b, top, k);
			} else {
				BKEY_PADDED(key) temp;
				bkey_copy(&temp.key, k);
				shift_keys(b, k, &temp.key);
				bch_bset_insert(b, k, &temp.key);
				top = bkey_next(k);
			}

@@ -1999,7 +1956,7 @@ check_failed:
static bool btree_insert_key(struct btree *b, struct btree_op *op,
			     struct bkey *k, struct bkey *replace_key)
{
	struct bset *i = b->sets[b->nsets].data;
	struct bset *i = btree_bset_last(b);
	struct bkey *m, *prev;
	unsigned status = BTREE_INSERT_STATUS_INSERT;

@@ -2051,10 +2008,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
			goto copy;
	} else {
		BUG_ON(replace_key);
		m = bch_bset_search(b, &b->sets[b->nsets], k);
		m = bch_bset_search(b, bset_tree_last(b), k);
	}

insert:	shift_keys(b, m, k);
insert:	bch_bset_insert(b, m, k);
copy:	bkey_copy(m, k);
merged:
	bch_check_keys(b, "%u for %s", status,
@@ -2079,8 +2036,9 @@ static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op,
		struct bset *i = write_block(b);
		struct bkey *k = insert_keys->keys;

		if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), b->c)
		    > btree_blocks(b))
		if (b->written +
		    __set_blocks(i, i->keys + bkey_u64s(k),
				 block_bytes(b->c)) > btree_blocks(b))
			break;

		if (bkey_cmp(k, &b->key) <= 0) {
@@ -2130,12 +2088,13 @@ static int btree_split(struct btree *b, struct btree_op *op,
	if (IS_ERR(n1))
		goto err;

	split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
	split = set_blocks(btree_bset_first(n1),
			   block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;

	if (split) {
		unsigned keys = 0;

		trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
		trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);

		n2 = bch_btree_node_alloc(b->c, b->level, true);
		if (IS_ERR(n2))
@@ -2154,20 +2113,20 @@ static int btree_split(struct btree *b, struct btree_op *op,
		 * search tree yet
		 */

		while (keys < (n1->sets[0].data->keys * 3) / 5)
			keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data,
		while (keys < (btree_bset_first(n1)->keys * 3) / 5)
			keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1),
							keys));

		bkey_copy_key(&n1->key,
			      bset_bkey_idx(n1->sets[0].data, keys));
		keys += bkey_u64s(bset_bkey_idx(n1->sets[0].data, keys));
			      bset_bkey_idx(btree_bset_first(n1), keys));
		keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys));

		n2->sets[0].data->keys = n1->sets[0].data->keys - keys;
		n1->sets[0].data->keys = keys;
		btree_bset_first(n2)->keys = btree_bset_first(n1)->keys - keys;
		btree_bset_first(n1)->keys = keys;

		memcpy(n2->sets[0].data->start,
		       bset_bkey_last(n1->sets[0].data),
		       n2->sets[0].data->keys * sizeof(uint64_t));
		memcpy(btree_bset_first(n2)->start,
		       bset_bkey_last(btree_bset_first(n1)),
		       btree_bset_first(n2)->keys * sizeof(uint64_t));

		bkey_copy_key(&n2->key, &b->key);

@@ -2175,7 +2134,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
		bch_btree_node_write(n2, &cl);
		rw_unlock(true, n2);
	} else {
		trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
		trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys);

		bch_btree_insert_keys(n1, op, insert_keys, replace_key);
	}
@@ -2256,7 +2215,7 @@ static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
				-EINTR;
		}
	} else {
		BUG_ON(write_block(b) != b->sets[b->nsets].data);
		BUG_ON(write_block(b) != btree_bset_last(b));

		if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) {
			if (!b->level)
+7 −2
Original line number Diff line number Diff line
@@ -180,9 +180,9 @@ static inline struct btree_write *btree_prev_write(struct btree *b)
	return b->writes + (btree_node_write_idx(b) ^ 1);
}

static inline unsigned bset_offset(struct btree *b, struct bset *i)
static inline struct bset_tree *bset_tree_last(struct btree *b)
{
	return (((size_t) i) - ((size_t) b->sets->data)) >> 9;
	return b->sets + b->nsets;
}

static inline struct bset *btree_bset_first(struct btree *b)
@@ -190,6 +190,11 @@ static inline struct bset *btree_bset_first(struct btree *b)
	return b->sets->data;
}

static inline struct bset *btree_bset_last(struct btree *b)
{
	return bset_tree_last(b)->data;
}

static inline unsigned bset_byte_offset(struct btree *b, struct bset *i)
{
	return ((size_t) i) - ((size_t) b->sets->data);
Loading