Commit b3b25b1d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-4.16/dm-fixes-2' of...

Merge tag 'for-4.16/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix an uninitialized variable false warning in dm bufio

 - Fix DM's passthrough ioctl support to be race free against an
   underlying device being removed.

 - Fix corner-case of DM raid resync reporting if/when the raid becomes
   degraded during resync; otherwise automated raid repair will fail.

 - A few DM multipath fixes to make non-SCSI optimizations, that were
   introduced during the 4.16 merge, useful for all non-SCSI devices,
   rather than narrowly define this non-SCSI mode in terms of "nvme".

   This allows the removal of "queue_mode nvme" that really didn't need
   to be introduced. Instead DM core will internalize whether
   nvme-specific IO submission optimizations are doable and DM multipath
   will only do SCSI-specific device handler operations if SCSI is in
   use.

* tag 'for-4.16/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm table: allow upgrade from bio-based to specialized bio-based variant
  dm mpath: remove unnecessary NVMe branching in favor of scsi_dh checks
  dm table: fix "nvme" test
  dm raid: fix incorrect sync_ratio when degraded
  dm: use blkdev_get rather than bdgrab when issuing pass-through ioctl
  dm bufio: avoid false-positive Wmaybe-uninitialized warning
parents 2f64e70c c934edad
Loading
Loading
Loading
Loading
+6 −10
Original line number Diff line number Diff line
@@ -386,9 +386,6 @@ static void __cache_size_refresh(void)
static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
			       enum data_mode *data_mode)
{
	unsigned noio_flag;
	void *ptr;

	if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
		*data_mode = DATA_MODE_SLAB;
		return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@@ -412,18 +409,17 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
	 * all allocations done by this process (including pagetables) are done
	 * as if GFP_NOIO was specified.
	 */
	if (gfp_mask & __GFP_NORETRY) {
		unsigned noio_flag = memalloc_noio_save();
		void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);

	if (gfp_mask & __GFP_NORETRY)
		noio_flag = memalloc_noio_save();

	ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);

	if (gfp_mask & __GFP_NORETRY)
		memalloc_noio_restore(noio_flag);

		return ptr;
	}

	return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
}

/*
 * Free buffer's data.
 */
+29 −37
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include <linux/time.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_dh.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
@@ -211,26 +212,14 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
		else
			m->queue_mode = DM_TYPE_REQUEST_BASED;

	} else if (m->queue_mode == DM_TYPE_BIO_BASED ||
		   m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
	} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
		INIT_WORK(&m->process_queued_bios, process_queued_bios);

		if (m->queue_mode == DM_TYPE_BIO_BASED) {
		/*
		 * bio-based doesn't support any direct scsi_dh management;
		 * it just discovers if a scsi_dh is attached.
		 */
		set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
	}
	}

	if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
		set_bit(MPATHF_QUEUE_IO, &m->flags);
		atomic_set(&m->pg_init_in_progress, 0);
		atomic_set(&m->pg_init_count, 0);
		m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
		init_waitqueue_head(&m->pg_init_wait);
	}

	dm_table_set_type(ti->table, m->queue_mode);

@@ -337,14 +326,12 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
{
	m->current_pg = pg;

	if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
		return;

	/* Must we initialise the PG first, and queue I/O till it's ready? */
	if (m->hw_handler_name) {
		set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
		set_bit(MPATHF_QUEUE_IO, &m->flags);
	} else {
		/* FIXME: not needed if no scsi_dh is attached */
		clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
		clear_bit(MPATHF_QUEUE_IO, &m->flags);
	}
@@ -385,7 +372,6 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
	unsigned bypassed = 1;

	if (!atomic_read(&m->nr_valid_paths)) {
		if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
		clear_bit(MPATHF_QUEUE_IO, &m->flags);
		goto failed;
	}
@@ -599,7 +585,7 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
	return pgpath;
}

static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
{
	struct pgpath *pgpath;
	unsigned long flags;
@@ -634,8 +620,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
{
	struct pgpath *pgpath;

	if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
		pgpath = __map_bio_nvme(m, bio);
	if (!m->hw_handler_name)
		pgpath = __map_bio_fast(m, bio);
	else
		pgpath = __map_bio(m, bio);

@@ -675,8 +661,7 @@ static void process_queued_io_list(struct multipath *m)
{
	if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
		dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
	else if (m->queue_mode == DM_TYPE_BIO_BASED ||
		 m->queue_mode == DM_TYPE_NVME_BIO_BASED)
	else if (m->queue_mode == DM_TYPE_BIO_BASED)
		queue_work(kmultipathd, &m->process_queued_bios);
}

@@ -838,6 +823,16 @@ retain:
			 */
			kfree(m->hw_handler_name);
			m->hw_handler_name = attached_handler_name;

			/*
			 * Init fields that are only used when a scsi_dh is attached
			 */
			if (!test_and_set_bit(MPATHF_QUEUE_IO, &m->flags)) {
				atomic_set(&m->pg_init_in_progress, 0);
				atomic_set(&m->pg_init_count, 0);
				m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
				init_waitqueue_head(&m->pg_init_wait);
			}
		}
	}

@@ -873,6 +868,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
	int r;
	struct pgpath *p;
	struct multipath *m = ti->private;
	struct scsi_device *sdev;

	/* we need at least a path arg */
	if (as->argc < 1) {
@@ -891,7 +887,9 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
		goto bad;
	}

	if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
	sdev = scsi_device_from_queue(bdev_get_queue(p->path.dev->bdev));
	if (sdev) {
		put_device(&sdev->sdev_gendev);
		INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
		r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
		if (r) {
@@ -1001,8 +999,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
	if (!hw_argc)
		return 0;

	if (m->queue_mode == DM_TYPE_BIO_BASED ||
	    m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
	if (m->queue_mode == DM_TYPE_BIO_BASED) {
		dm_consume_args(as, hw_argc);
		DMERR("bio-based multipath doesn't allow hardware handler args");
		return 0;
@@ -1091,8 +1088,6 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)

			if (!strcasecmp(queue_mode_name, "bio"))
				m->queue_mode = DM_TYPE_BIO_BASED;
			else if (!strcasecmp(queue_mode_name, "nvme"))
				m->queue_mode = DM_TYPE_NVME_BIO_BASED;
			else if (!strcasecmp(queue_mode_name, "rq"))
				m->queue_mode = DM_TYPE_REQUEST_BASED;
			else if (!strcasecmp(queue_mode_name, "mq"))
@@ -1193,7 +1188,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
	ti->num_discard_bios = 1;
	ti->num_write_same_bios = 1;
	ti->num_write_zeroes_bios = 1;
	if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
	if (m->queue_mode == DM_TYPE_BIO_BASED)
		ti->per_io_data_size = multipath_per_bio_data_size();
	else
		ti->per_io_data_size = sizeof(struct dm_mpath_io);
@@ -1730,9 +1725,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
			case DM_TYPE_BIO_BASED:
				DMEMIT("queue_mode bio ");
				break;
			case DM_TYPE_NVME_BIO_BASED:
				DMEMIT("queue_mode nvme ");
				break;
			case DM_TYPE_MQ_REQUEST_BASED:
				DMEMIT("queue_mode mq ");
				break;
+4 −3
Original line number Diff line number Diff line
@@ -3408,9 +3408,10 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
		set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);

	} else {
		if (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
		if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
		    (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
		     test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
		    test_bit(MD_RECOVERY_RUNNING, &recovery))
		     test_bit(MD_RECOVERY_RUNNING, &recovery)))
			r = mddev->curr_resync_completed;
		else
			r = mddev->recovery_cp;
+6 −10
Original line number Diff line number Diff line
@@ -942,17 +942,12 @@ static int dm_table_determine_type(struct dm_table *t)

	if (t->type != DM_TYPE_NONE) {
		/* target already set the table's type */
		if (t->type == DM_TYPE_BIO_BASED)
			return 0;
		else if (t->type == DM_TYPE_NVME_BIO_BASED) {
			if (!dm_table_does_not_support_partial_completion(t)) {
				DMERR("nvme bio-based is only possible with devices"
				      " that don't support partial completion");
				return -EINVAL;
			}
			/* Fallthru, also verify all devices are blk-mq */
		if (t->type == DM_TYPE_BIO_BASED) {
			/* possibly upgrade to a variant of bio-based */
			goto verify_bio_based;
		}
		BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
		BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
		goto verify_rq_based;
	}

@@ -985,6 +980,7 @@ static int dm_table_determine_type(struct dm_table *t)
	}

	if (bio_based) {
verify_bio_based:
		/* We must use this table as bio-based */
		t->type = DM_TYPE_BIO_BASED;
		if (dm_table_supports_dax(t) ||
@@ -1755,7 +1751,7 @@ static int device_no_partial_completion(struct dm_target *ti, struct dm_dev *dev
	char b[BDEVNAME_SIZE];

	/* For now, NVMe devices are the only devices of this class */
	return (strncmp(bdevname(dev->bdev, b), "nvme", 3) == 0);
	return (strncmp(bdevname(dev->bdev, b), "nvme", 4) == 0);
}

static bool dm_table_does_not_support_partial_completion(struct dm_table *t)
+20 −15
Original line number Diff line number Diff line
@@ -458,7 +458,9 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
	return dm_get_geometry(md, geo);
}

static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
static char *_dm_claim_ptr = "I belong to device-mapper";

static int dm_get_bdev_for_ioctl(struct mapped_device *md,
				 struct block_device **bdev,
				 fmode_t *mode)
{
@@ -490,6 +492,10 @@ retry:
		goto out;

	bdgrab(*bdev);
	r = blkdev_get(*bdev, *mode, _dm_claim_ptr);
	if (r < 0)
		goto out;

	dm_put_live_table(md, srcu_idx);
	return r;

@@ -508,7 +514,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
	struct mapped_device *md = bdev->bd_disk->private_data;
	int r;

	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
	if (r < 0)
		return r;

@@ -528,7 +534,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,

	r =  __blkdev_driver_ioctl(bdev, mode, cmd, arg);
out:
	bdput(bdev);
	blkdev_put(bdev, mode);
	return r;
}

@@ -708,14 +714,13 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
static int open_table_device(struct table_device *td, dev_t dev,
			     struct mapped_device *md)
{
	static char *_claim_ptr = "I belong to device-mapper";
	struct block_device *bdev;

	int r;

	BUG_ON(td->dm_dev.bdev);

	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
	if (IS_ERR(bdev))
		return PTR_ERR(bdev);

@@ -3011,7 +3016,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
	fmode_t mode;
	int r;

	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
	if (r < 0)
		return r;

@@ -3021,7 +3026,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
	else
		r = -EOPNOTSUPP;

	bdput(bdev);
	blkdev_put(bdev, mode);
	return r;
}

@@ -3032,7 +3037,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
	fmode_t mode;
	int r;

	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
	if (r < 0)
		return r;

@@ -3042,7 +3047,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
	else
		r = -EOPNOTSUPP;

	bdput(bdev);
	blkdev_put(bdev, mode);
	return r;
}

@@ -3054,7 +3059,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
	fmode_t mode;
	int r;

	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
	if (r < 0)
		return r;

@@ -3064,7 +3069,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
	else
		r = -EOPNOTSUPP;

	bdput(bdev);
	blkdev_put(bdev, mode);
	return r;
}

@@ -3075,7 +3080,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
	fmode_t mode;
	int r;

	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
	r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
	if (r < 0)
		return r;

@@ -3085,7 +3090,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
	else
		r = -EOPNOTSUPP;

	bdput(bdev);
	blkdev_put(bdev, mode);
	return r;
}