Commit 22a8f39c authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.6/drivers-2020-01-27' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "Like the core side, not a lot of changes here, just two main items:

   - Series of patches (via Coly) with fixes for bcache (Coly,
     Christoph)

   - MD pull request from Song"

* tag 'for-5.6/drivers-2020-01-27' of git://git.kernel.dk/linux-block: (31 commits)
  bcache: reap from tail of c->btree_cache in bch_mca_scan()
  bcache: reap c->btree_cache_freeable from the tail in bch_mca_scan()
  bcache: remove member accessed from struct btree
  bcache: print written and keys in trace_bcache_btree_write
  bcache: avoid unnecessary btree nodes flushing in btree_flush_write()
  bcache: add code comments for state->pool in __btree_sort()
  lib: crc64: include <linux/crc64.h> for 'crc64_be'
  bcache: use read_cache_page_gfp to read the superblock
  bcache: store a pointer to the on-disk sb in the cache and cached_dev structures
  bcache: return a pointer to the on-disk sb from read_super
  bcache: transfer the sb_page reference to register_{bdev,cache}
  bcache: fix use-after-free in register_bcache()
  bcache: properly initialize 'path' and 'err' in register_bcache()
  bcache: rework error unwinding in register_bcache
  bcache: use a separate data structure for the on-disk super block
  bcache: cached_dev_free needs to put the sb page
  md/raid1: introduce wait_for_serialization
  md/raid1: use bucket based mechanism for IO serialization
  md: introduce a new struct for IO serialization
  md: don't destroy serial_info_pool if serialize_policy is true
  ...
parents 48b4b4ff e3de0446
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -301,6 +301,7 @@ struct cached_dev {
	struct block_device	*bdev;

	struct cache_sb		sb;
	struct cache_sb_disk	*sb_disk;
	struct bio		sb_bio;
	struct bio_vec		sb_bv[1];
	struct closure		sb_write;
@@ -403,6 +404,7 @@ enum alloc_reserve {
struct cache {
	struct cache_set	*set;
	struct cache_sb		sb;
	struct cache_sb_disk	*sb_disk;
	struct bio		sb_bio;
	struct bio_vec		sb_bv[1];

+5 −0
Original line number Diff line number Diff line
@@ -1257,6 +1257,11 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
		 * Our temporary buffer is the same size as the btree node's
		 * buffer, we can just swap buffers instead of doing a big
		 * memcpy()
		 *
		 * Don't worry event 'out' is allocated from mempool, it can
		 * still be swapped here. Because state->pool is a page mempool
		 * creaated by by mempool_init_page_pool(), which allocates
		 * pages by alloc_pages() indeed.
		 */

		out->magic	= b->set->data->magic;
+10 −14
Original line number Diff line number Diff line
@@ -734,34 +734,32 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,

	i = 0;
	btree_cache_used = c->btree_cache_used;
	list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
	list_for_each_entry_safe_reverse(b, t, &c->btree_cache_freeable, list) {
		if (nr <= 0)
			goto out;

		if (++i > 3 &&
		    !mca_reap(b, 0, false)) {
		if (!mca_reap(b, 0, false)) {
			mca_data_free(b);
			rw_unlock(true, b);
			freed++;
		}
		nr--;
		i++;
	}

	for (;  (nr--) && i < btree_cache_used; i++) {
		if (list_empty(&c->btree_cache))
	list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
		if (nr <= 0 || i >= btree_cache_used)
			goto out;

		b = list_first_entry(&c->btree_cache, struct btree, list);
		list_rotate_left(&c->btree_cache);

		if (!b->accessed &&
		    !mca_reap(b, 0, false)) {
		if (!mca_reap(b, 0, false)) {
			mca_bucket_free(b);
			mca_data_free(b);
			rw_unlock(true, b);
			freed++;
		} else
			b->accessed = 0;
		}

		nr--;
		i++;
	}
out:
	mutex_unlock(&c->bucket_lock);
@@ -1069,7 +1067,6 @@ retry:
	BUG_ON(!b->written);

	b->parent = parent;
	b->accessed = 1;

	for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
		prefetch(b->keys.set[i].tree);
@@ -1160,7 +1157,6 @@ retry:
		goto retry;
	}

	b->accessed = 1;
	b->parent = parent;
	bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));

+0 −2
Original line number Diff line number Diff line
@@ -121,8 +121,6 @@ struct btree {
	/* Key/pointer for this btree node */
	BKEY_PADDED(key);

	/* Single bit - set when accessed, cleared by shrinker */
	unsigned long		accessed;
	unsigned long		seq;
	struct rw_semaphore	lock;
	struct cache_set	*c;
+75 −5
Original line number Diff line number Diff line
@@ -417,10 +417,14 @@ err:

/* Journalling */

#define nr_to_fifo_front(p, front_p, mask)	(((p) - (front_p)) & (mask))

static void btree_flush_write(struct cache_set *c)
{
	struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
	unsigned int i, n;
	unsigned int i, nr, ref_nr;
	atomic_t *fifo_front_p, *now_fifo_front_p;
	size_t mask;

	if (c->journal.btree_flushing)
		return;
@@ -433,12 +437,50 @@ static void btree_flush_write(struct cache_set *c)
	c->journal.btree_flushing = true;
	spin_unlock(&c->journal.flush_write_lock);

	/* get the oldest journal entry and check its refcount */
	spin_lock(&c->journal.lock);
	fifo_front_p = &fifo_front(&c->journal.pin);
	ref_nr = atomic_read(fifo_front_p);
	if (ref_nr <= 0) {
		/*
		 * do nothing if no btree node references
		 * the oldest journal entry
		 */
		spin_unlock(&c->journal.lock);
		goto out;
	}
	spin_unlock(&c->journal.lock);

	mask = c->journal.pin.mask;
	nr = 0;
	atomic_long_inc(&c->flush_write);
	memset(btree_nodes, 0, sizeof(btree_nodes));
	n = 0;

	mutex_lock(&c->bucket_lock);
	list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
		/*
		 * It is safe to get now_fifo_front_p without holding
		 * c->journal.lock here, because we don't need to know
		 * the exactly accurate value, just check whether the
		 * front pointer of c->journal.pin is changed.
		 */
		now_fifo_front_p = &fifo_front(&c->journal.pin);
		/*
		 * If the oldest journal entry is reclaimed and front
		 * pointer of c->journal.pin changes, it is unnecessary
		 * to scan c->btree_cache anymore, just quit the loop and
		 * flush out what we have already.
		 */
		if (now_fifo_front_p != fifo_front_p)
			break;
		/*
		 * quit this loop if all matching btree nodes are
		 * scanned and record in btree_nodes[] already.
		 */
		ref_nr = atomic_read(fifo_front_p);
		if (nr >= ref_nr)
			break;

		if (btree_node_journal_flush(b))
			pr_err("BUG: flush_write bit should not be set here!");

@@ -454,17 +496,44 @@ static void btree_flush_write(struct cache_set *c)
			continue;
		}

		/*
		 * Only select the btree node which exactly references
		 * the oldest journal entry.
		 *
		 * If the journal entry pointed by fifo_front_p is
		 * reclaimed in parallel, don't worry:
		 * - the list_for_each_xxx loop will quit when checking
		 *   next now_fifo_front_p.
		 * - If there are matched nodes recorded in btree_nodes[],
		 *   they are clean now (this is why and how the oldest
		 *   journal entry can be reclaimed). These selected nodes
		 *   will be ignored and skipped in the folowing for-loop.
		 */
		if (nr_to_fifo_front(btree_current_write(b)->journal,
				     fifo_front_p,
				     mask) != 0) {
			mutex_unlock(&b->write_lock);
			continue;
		}

		set_btree_node_journal_flush(b);

		mutex_unlock(&b->write_lock);

		btree_nodes[n++] = b;
		if (n == BTREE_FLUSH_NR)
		btree_nodes[nr++] = b;
		/*
		 * To avoid holding c->bucket_lock too long time,
		 * only scan for BTREE_FLUSH_NR matched btree nodes
		 * at most. If there are more btree nodes reference
		 * the oldest journal entry, try to flush them next
		 * time when btree_flush_write() is called.
		 */
		if (nr == BTREE_FLUSH_NR)
			break;
	}
	mutex_unlock(&c->bucket_lock);

	for (i = 0; i < n; i++) {
	for (i = 0; i < nr; i++) {
		b = btree_nodes[i];
		if (!b) {
			pr_err("BUG: btree_nodes[%d] is NULL", i);
@@ -497,6 +566,7 @@ static void btree_flush_write(struct cache_set *c)
		mutex_unlock(&b->write_lock);
	}

out:
	spin_lock(&c->journal.flush_write_lock);
	c->journal.btree_flushing = false;
	spin_unlock(&c->journal.flush_write_lock);
Loading