Commit 67f2a930 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-4.20/dm-fixes' of...

Merge tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix DM cache metadata to verify that a cache has block before trying
   to continue with operation that requires them.

 - Fix bio-based DM core's dm_make_request() to properly impose device
   limits on individual bios by making use of blk_queue_split().

 - Fix long-standing race with how DM thinp notified userspace of
   thin-pool mode state changes before they were actually made.

 - Fix the zoned target's bio completion handling; this is a fairly
   invassive fix at this stage but it is localized to the zoned target.
   Any zoned target users will benefit from this fix.

* tag 'for-4.20/dm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm thin: bump target version
  dm thin: send event about thin-pool state change _after_ making it
  dm zoned: Fix target BIO completion handling
  dm: call blk_queue_split() to impose device limits on bios
  dm cache metadata: verify cache has blocks in blocks_are_clean_separate_dirty()
parents 14a996c3 2af6c070
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -930,6 +930,10 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
	bool dirty_flag;
	*result = true;

	if (from_cblock(cmd->cache_blocks) == 0)
		/* Nothing to do */
		return 0;

	r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
				   from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
	if (r) {
+37 −35
Original line number Diff line number Diff line
@@ -195,7 +195,7 @@ static void throttle_unlock(struct throttle *t)
struct dm_thin_new_mapping;

/*
 * The pool runs in 4 modes.  Ordered in degraded order for comparisons.
 * The pool runs in various modes.  Ordered in degraded order for comparisons.
 */
enum pool_mode {
	PM_WRITE,		/* metadata may be changed */
@@ -282,9 +282,38 @@ struct pool {
	mempool_t mapping_pool;
};

static enum pool_mode get_pool_mode(struct pool *pool);
static void metadata_operation_failed(struct pool *pool, const char *op, int r);

static enum pool_mode get_pool_mode(struct pool *pool)
{
	return pool->pf.mode;
}

static void notify_of_pool_mode_change(struct pool *pool)
{
	const char *descs[] = {
		"write",
		"out-of-data-space",
		"read-only",
		"read-only",
		"fail"
	};
	const char *extra_desc = NULL;
	enum pool_mode mode = get_pool_mode(pool);

	if (mode == PM_OUT_OF_DATA_SPACE) {
		if (!pool->pf.error_if_no_space)
			extra_desc = " (queue IO)";
		else
			extra_desc = " (error IO)";
	}

	dm_table_event(pool->ti->table);
	DMINFO("%s: switching pool to %s%s mode",
	       dm_device_name(pool->pool_md),
	       descs[(int)mode], extra_desc ? : "");
}

/*
 * Target context for a pool.
 */
@@ -2351,8 +2380,6 @@ static void do_waker(struct work_struct *ws)
	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
}

static void notify_of_pool_mode_change_to_oods(struct pool *pool);

/*
 * We're holding onto IO to allow userland time to react.  After the
 * timeout either the pool will have been resized (and thus back in
@@ -2365,7 +2392,7 @@ static void do_no_space_timeout(struct work_struct *ws)

	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
		pool->pf.error_if_no_space = true;
		notify_of_pool_mode_change_to_oods(pool);
		notify_of_pool_mode_change(pool);
		error_retry_list_with_code(pool, BLK_STS_NOSPC);
	}
}
@@ -2433,26 +2460,6 @@ static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))

/*----------------------------------------------------------------*/

static enum pool_mode get_pool_mode(struct pool *pool)
{
	return pool->pf.mode;
}

static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode)
{
	dm_table_event(pool->ti->table);
	DMINFO("%s: switching pool to %s mode",
	       dm_device_name(pool->pool_md), new_mode);
}

static void notify_of_pool_mode_change_to_oods(struct pool *pool)
{
	if (!pool->pf.error_if_no_space)
		notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
	else
		notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
}

static bool passdown_enabled(struct pool_c *pt)
{
	return pt->adjusted_pf.discard_passdown;
@@ -2501,8 +2508,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)

	switch (new_mode) {
	case PM_FAIL:
		if (old_mode != new_mode)
			notify_of_pool_mode_change(pool, "failure");
		dm_pool_metadata_read_only(pool->pmd);
		pool->process_bio = process_bio_fail;
		pool->process_discard = process_bio_fail;
@@ -2516,8 +2521,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)

	case PM_OUT_OF_METADATA_SPACE:
	case PM_READ_ONLY:
		if (!is_read_only_pool_mode(old_mode))
			notify_of_pool_mode_change(pool, "read-only");
		dm_pool_metadata_read_only(pool->pmd);
		pool->process_bio = process_bio_read_only;
		pool->process_discard = process_bio_success;
@@ -2538,8 +2541,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
		 * alarming rate.  Adjust your low water mark if you're
		 * frequently seeing this mode.
		 */
		if (old_mode != new_mode)
			notify_of_pool_mode_change_to_oods(pool);
		pool->out_of_data_space = true;
		pool->process_bio = process_bio_read_only;
		pool->process_discard = process_discard_bio;
@@ -2552,8 +2553,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
		break;

	case PM_WRITE:
		if (old_mode != new_mode)
			notify_of_pool_mode_change(pool, "write");
		if (old_mode == PM_OUT_OF_DATA_SPACE)
			cancel_delayed_work_sync(&pool->no_space_timeout);
		pool->out_of_data_space = false;
@@ -2573,6 +2572,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
	 * doesn't cause an unexpected mode transition on resume.
	 */
	pt->adjusted_pf.mode = new_mode;

	if (old_mode != new_mode)
		notify_of_pool_mode_change(pool);
}

static void abort_transaction(struct pool *pool)
@@ -4023,7 +4025,7 @@ static struct target_type pool_target = {
	.name = "thin-pool",
	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
		    DM_TARGET_IMMUTABLE,
	.version = {1, 20, 0},
	.version = {1, 21, 0},
	.module = THIS_MODULE,
	.ctr = pool_ctr,
	.dtr = pool_dtr,
@@ -4397,7 +4399,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)

static struct target_type thin_target = {
	.name = "thin",
	.version = {1, 20, 0},
	.version = {1, 21, 0},
	.module	= THIS_MODULE,
	.ctr = thin_ctr,
	.dtr = thin_dtr,
+38 −84
Original line number Diff line number Diff line
@@ -20,7 +20,6 @@ struct dmz_bioctx {
	struct dm_zone		*zone;
	struct bio		*bio;
	refcount_t		ref;
	blk_status_t		status;
};

/*
@@ -78,65 +77,66 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
{
	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));

	if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK)
		bioctx->status = status;
	if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
		bio->bi_status = status;

	if (refcount_dec_and_test(&bioctx->ref)) {
		struct dm_zone *zone = bioctx->zone;

		if (zone) {
			if (bio->bi_status != BLK_STS_OK &&
			    bio_op(bio) == REQ_OP_WRITE &&
			    dmz_is_seq(zone))
				set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
			dmz_deactivate_zone(zone);
		}
		bio_endio(bio);
	}
}

/*
 * Partial clone read BIO completion callback. This terminates the
 * Completion callback for an internally cloned target BIO. This terminates the
 * target BIO when there are no more references to its context.
 */
static void dmz_read_bio_end_io(struct bio *bio)
static void dmz_clone_endio(struct bio *clone)
{
	struct dmz_bioctx *bioctx = bio->bi_private;
	blk_status_t status = bio->bi_status;
	struct dmz_bioctx *bioctx = clone->bi_private;
	blk_status_t status = clone->bi_status;

	bio_put(bio);
	bio_put(clone);
	dmz_bio_endio(bioctx->bio, status);
}

/*
 * Issue a BIO to a zone. The BIO may only partially process the
 * Issue a clone of a target BIO. The clone may only partially process the
 * original target BIO.
 */
static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
			  struct bio *bio, sector_t chunk_block,
			  unsigned int nr_blocks)
{
	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
	sector_t sector;
	struct bio *clone;

	/* BIO remap sector */
	sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);

	/* If the read is not partial, there is no need to clone the BIO */
	if (nr_blocks == dmz_bio_blocks(bio)) {
		/* Setup and submit the BIO */
		bio->bi_iter.bi_sector = sector;
		refcount_inc(&bioctx->ref);
		generic_make_request(bio);
		return 0;
	}

	/* Partial BIO: we need to clone the BIO */
	clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
	if (!clone)
		return -ENOMEM;

	/* Setup the clone */
	clone->bi_iter.bi_sector = sector;
	bio_set_dev(clone, dmz->dev->bdev);
	clone->bi_iter.bi_sector =
		dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
	clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
	clone->bi_end_io = dmz_read_bio_end_io;
	clone->bi_end_io = dmz_clone_endio;
	clone->bi_private = bioctx;

	bio_advance(bio, clone->bi_iter.bi_size);

	/* Submit the clone */
	refcount_inc(&bioctx->ref);
	generic_make_request(clone);

	if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
		zone->wp_block += nr_blocks;

	return 0;
}

@@ -214,7 +214,7 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
		if (nr_blocks) {
			/* Valid blocks found: read them */
			nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block);
			ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks);
			ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks);
			if (ret)
				return ret;
			chunk_block += nr_blocks;
@@ -228,25 +228,6 @@ static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
	return 0;
}

/*
 * Issue a write BIO to a zone.
 */
static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
				 struct bio *bio, sector_t chunk_block,
				 unsigned int nr_blocks)
{
	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));

	/* Setup and submit the BIO */
	bio_set_dev(bio, dmz->dev->bdev);
	bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
	refcount_inc(&bioctx->ref);
	generic_make_request(bio);

	if (dmz_is_seq(zone))
		zone->wp_block += nr_blocks;
}

/*
 * Write blocks directly in a data zone, at the write pointer.
 * If a buffer zone is assigned, invalidate the blocks written
@@ -265,7 +246,9 @@ static int dmz_handle_direct_write(struct dmz_target *dmz,
		return -EROFS;

	/* Submit write */
	dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks);
	ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
	if (ret)
		return ret;

	/*
	 * Validate the blocks in the data zone and invalidate
@@ -301,7 +284,9 @@ static int dmz_handle_buffered_write(struct dmz_target *dmz,
		return -EROFS;

	/* Submit write */
	dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks);
	ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
	if (ret)
		return ret;

	/*
	 * Validate the blocks in the buffer zone
@@ -600,7 +585,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
	bioctx->zone = NULL;
	bioctx->bio = bio;
	refcount_set(&bioctx->ref, 1);
	bioctx->status = BLK_STS_OK;

	/* Set the BIO pending in the flush list */
	if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
@@ -623,35 +607,6 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
	return DM_MAPIO_SUBMITTED;
}

/*
 * Completed target BIO processing.
 */
static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
{
	struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));

	if (bioctx->status == BLK_STS_OK && *error)
		bioctx->status = *error;

	if (!refcount_dec_and_test(&bioctx->ref))
		return DM_ENDIO_INCOMPLETE;

	/* Done */
	bio->bi_status = bioctx->status;

	if (bioctx->zone) {
		struct dm_zone *zone = bioctx->zone;

		if (*error && bio_op(bio) == REQ_OP_WRITE) {
			if (dmz_is_seq(zone))
				set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
		}
		dmz_deactivate_zone(zone);
	}

	return DM_ENDIO_DONE;
}

/*
 * Get zoned device information.
 */
@@ -946,7 +901,6 @@ static struct target_type dmz_type = {
	.ctr		 = dmz_ctr,
	.dtr		 = dmz_dtr,
	.map		 = dmz_map,
	.end_io		 = dmz_end_io,
	.io_hints	 = dmz_io_hints,
	.prepare_ioctl	 = dmz_prepare_ioctl,
	.postsuspend	 = dmz_suspend,
+2 −0
Original line number Diff line number Diff line
@@ -1593,6 +1593,8 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
		return ret;
	}

	blk_queue_split(md->queue, &bio);

	init_clone_info(&ci, md, map, bio);

	if (bio->bi_opf & REQ_PREFLUSH) {