Commit 314b6dd0 authored by Johannes Thumshirn's avatar Johannes Thumshirn Committed by David Sterba
Browse files

btrfs: use bios instead of buffer_heads from super block writeout



Similar to the superblock read path, change the write path to using bios
and pages instead of buffer_heads. This allows us to skip over the
buffer_head code, for writing the superblock to disk.

This is based on a patch originally authored by Nikolay Borisov.

Co-developed-by: default avatarNikolay Borisov <nborisov@suse.com>
Signed-off-by: default avatarNikolay Borisov <nborisov@suse.com>
Reviewed-by: default avatarNikolay Borisov <nborisov@suse.com>
Reviewed-by: default avatarJosef Bacik <josef@toxicpanda.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 8f32380d
Loading
Loading
Loading
Loading
+73 −54
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/slab.h>
@@ -3395,25 +3394,34 @@ fail:
}
ALLOW_ERROR_INJECTION(open_ctree, ERRNO);

static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
static void btrfs_end_super_write(struct bio *bio)
{
	if (uptodate) {
		set_buffer_uptodate(bh);
	} else {
		struct btrfs_device *device = (struct btrfs_device *)
			bh->b_private;
	struct btrfs_device *device = bio->bi_private;
	struct bio_vec *bvec;
	struct bvec_iter_all iter_all;
	struct page *page;

	bio_for_each_segment_all(bvec, bio, iter_all) {
		page = bvec->bv_page;

		if (bio->bi_status) {
			btrfs_warn_rl_in_rcu(device->fs_info,
				"lost page write due to IO error on %s",
					  rcu_str_deref(device->name));
		/* note, we don't set_buffer_write_io_error because we have
		 * our own ways of dealing with the IO errors
		 */
		clear_buffer_uptodate(bh);
		btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
				"lost page write due to IO error on %s (%d)",
				rcu_str_deref(device->name),
				blk_status_to_errno(bio->bi_status));
			ClearPageUptodate(page);
			SetPageError(page);
			btrfs_dev_stat_inc_and_print(device,
						     BTRFS_DEV_STAT_WRITE_ERRS);
		} else {
			SetPageUptodate(page);
		}
	unlock_buffer(bh);
	put_bh(bh);

		put_page(page);
		unlock_page(page);
	}

	bio_put(bio);
}

struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
@@ -3473,25 +3481,23 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)

/*
 * Write superblock @sb to the @device. Do not wait for completion, all the
 * buffer heads we write are pinned.
 * pages we use for writing are locked.
 *
 * Write @max_mirrors copies of the superblock, where 0 means default that fit
 * the expected device size at commit time. Note that max_mirrors must be
 * same for write and wait phases.
 *
 * Return number of errors when buffer head is not found or submission fails.
 * Return number of errors when page is not found or submission fails.
 */
static int write_dev_supers(struct btrfs_device *device,
			    struct btrfs_super_block *sb, int max_mirrors)
{
	struct btrfs_fs_info *fs_info = device->fs_info;
	struct address_space *mapping = device->bdev->bd_inode->i_mapping;
	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
	struct buffer_head *bh;
	int i;
	int ret;
	int errors = 0;
	u64 bytenr;
	int op_flags;

	if (max_mirrors == 0)
		max_mirrors = BTRFS_SUPER_MIRROR_MAX;
@@ -3499,6 +3505,10 @@ static int write_dev_supers(struct btrfs_device *device,
	shash->tfm = fs_info->csum_shash;

	for (i = 0; i < max_mirrors; i++) {
		struct page *page;
		struct bio *bio;
		struct btrfs_super_block *disk_super;

		bytenr = btrfs_sb_offset(i);
		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
		    device->commit_total_bytes)
@@ -3511,37 +3521,45 @@ static int write_dev_supers(struct btrfs_device *device,
				    BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
		crypto_shash_final(shash, sb->csum);

		/* One reference for us, and we leave it for the caller */
		bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
			      BTRFS_SUPER_INFO_SIZE);
		if (!bh) {
		page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
					   GFP_NOFS);
		if (!page) {
			btrfs_err(device->fs_info,
			    "couldn't get super buffer head for bytenr %llu",
			    "couldn't get super block page for bytenr %llu",
			    bytenr);
			errors++;
			continue;
		}

		memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
		/* Bump the refcount for wait_dev_supers() */
		get_page(page);

		/* one reference for submit_bh */
		get_bh(bh);
		disk_super = page_address(page);
		memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);

		set_buffer_uptodate(bh);
		lock_buffer(bh);
		bh->b_end_io = btrfs_end_buffer_write_sync;
		bh->b_private = device;
		/*
		 * Directly use bios here instead of relying on the page cache
		 * to do I/O, so we don't lose the ability to do integrity
		 * checking.
		 */
		bio = bio_alloc(GFP_NOFS, 1);
		bio_set_dev(bio, device->bdev);
		bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
		bio->bi_private = device;
		bio->bi_end_io = btrfs_end_super_write;
		__bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
			       offset_in_page(bytenr));

		/*
		 * we fua the first super.  The others we allow
		 * to go down lazy.
		 * We FUA only the first super block.  The others we allow to
		 * go down lazy and there's a short window where the on-disk
		 * copies might still contain the older version.
		 */
		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
		bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO;
		if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
			op_flags |= REQ_FUA;
		ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
		if (ret)
			errors++;
			bio->bi_opf |= REQ_FUA;

		btrfsic_submit_bio(bio);
	}
	return errors < i ? 0 : -1;
}
@@ -3550,12 +3568,11 @@ static int write_dev_supers(struct btrfs_device *device,
 * Wait for write completion of superblocks done by write_dev_supers,
 * @max_mirrors same for write and wait phases.
 *
 * Return number of errors when buffer head is not found or not marked up to
 * Return number of errors when page is not found or not marked up to
 * date.
 */
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{
	struct buffer_head *bh;
	int i;
	int errors = 0;
	bool primary_failed = false;
@@ -3565,32 +3582,34 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
		max_mirrors = BTRFS_SUPER_MIRROR_MAX;

	for (i = 0; i < max_mirrors; i++) {
		struct page *page;

		bytenr = btrfs_sb_offset(i);
		if (bytenr + BTRFS_SUPER_INFO_SIZE >=
		    device->commit_total_bytes)
			break;

		bh = __find_get_block(device->bdev,
				      bytenr / BTRFS_BDEV_BLOCKSIZE,
				      BTRFS_SUPER_INFO_SIZE);
		if (!bh) {
		page = find_get_page(device->bdev->bd_inode->i_mapping,
				     bytenr >> PAGE_SHIFT);
		if (!page) {
			errors++;
			if (i == 0)
				primary_failed = true;
			continue;
		}
		wait_on_buffer(bh);
		if (!buffer_uptodate(bh)) {
		/* Page is submitted locked and unlocked once the IO completes */
		wait_on_page_locked(page);
		if (PageError(page)) {
			errors++;
			if (i == 0)
				primary_failed = true;
		}

		/* drop our reference */
		brelse(bh);
		/* Drop our reference */
		put_page(page);

		/* drop the reference from the writing run */
		brelse(bh);
		/* Drop the reference from the writing run */
		put_page(page);
	}

	/* log error, force error return */