Commit 3b9351f0 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-5.11-20201202' of git://git.infradead.org/nvme into for-5.11/drivers

Pull NVMe updates from Christoph:

"nvme updates for 5.11

 - nvmet passthrough improvements (Chaitanya Kulkarni)
 - fcloop error injection support (James Smart)
 - read-only support for zoned namespaces without Zone Append
   (Javier González)
 - improve some error message (Minwoo Im)
 - reject I/O to offline fabrics namespaces (Victor Gladkov)
 - PCI queue allocation cleanups (Niklas Schnelle)
 - remove an unused allocation in nvmet (Amit Engel)
 - a Kconfig spelling fix (Colin Ian King)
 - nvme_req_qid simplication (Baolin Wang)"

* tag 'nvme-5.11-20201202' of git://git.infradead.org/nvme: (23 commits)
  nvme: export zoned namespaces without Zone Append support read-only
  nvme: rename bdev operations
  nvme: rename controller base dev_t char device
  nvme: remove unnecessary return values
  nvme: print a warning for when listing active namespaces fails
  nvme: improve an error message on Identify failure
  nvme-fabrics: reject I/O to offline device
  nvmet: fix a spelling mistake "incuding" -> "including" in Kconfig
  nvmet: make sure discovery change log event is protected
  nvmet: remove unused ctrl->cqs
  nvme-pci: don't allocate unused I/O queues
  nvme-pci: drop min() from nr_io_queues assignment
  nvmet: use inline bio for passthru fast path
  nvmet: use blk_rq_bio_prep instead of blk_rq_append_bio
  nvmet: remove op_flags for passthru commands
  nvme: split nvme_alloc_request()
  block: move blk_rq_bio_prep() to linux/blk-mq.h
  nvmet: add passthru io timeout value attr
  nvmet: add passthru admin timeout value attr
  nvme: use consistent macro name for timeout
  ...
parents 48332ff2 2f4c9ba2
Loading
Loading
Loading
Loading
+0 −12
Original line number Diff line number Diff line
@@ -91,18 +91,6 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
	return __bvec_gap_to_prev(q, bprv, offset);
}

static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
		unsigned int nr_segs)
{
	rq->nr_phys_segments = nr_segs;
	rq->__data_len = bio->bi_iter.bi_size;
	rq->bio = rq->biotail = bio;
	rq->ioprio = bio_prio(bio);

	if (bio->bi_disk)
		rq->rq_disk = bio->bi_disk;
}

#ifdef CONFIG_BLK_DEV_INTEGRITY
void blk_flush_integrity(void);
bool __bio_integrity_endio(struct bio *);
+112 −38
Original line number Diff line number Diff line
@@ -85,7 +85,7 @@ static LIST_HEAD(nvme_subsystems);
static DEFINE_MUTEX(nvme_subsystems_lock);

static DEFINE_IDA(nvme_instance_ida);
static dev_t nvme_chr_devt;
static dev_t nvme_ctrl_base_chr_devt;
static struct class *nvme_class;
static struct class *nvme_subsys_class;

@@ -148,6 +148,38 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_try_sched_reset);

static void nvme_failfast_work(struct work_struct *work)
{
	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
			struct nvme_ctrl, failfast_work);

	if (ctrl->state != NVME_CTRL_CONNECTING)
		return;

	set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
	dev_info(ctrl->device, "failfast expired\n");
	nvme_kick_requeue_lists(ctrl);
}

static inline void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
{
	if (!ctrl->opts || ctrl->opts->fast_io_fail_tmo == -1)
		return;

	schedule_delayed_work(&ctrl->failfast_work,
			      ctrl->opts->fast_io_fail_tmo * HZ);
}

static inline void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
{
	if (!ctrl->opts)
		return;

	cancel_delayed_work_sync(&ctrl->failfast_work);
	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
}


int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -433,8 +465,17 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
	}

	spin_unlock_irqrestore(&ctrl->lock, flags);
	if (changed && ctrl->state == NVME_CTRL_LIVE)
	if (!changed)
		return false;

	if (ctrl->state == NVME_CTRL_LIVE) {
		if (old_state == NVME_CTRL_CONNECTING)
			nvme_stop_failfast_work(ctrl);
		nvme_kick_requeue_lists(ctrl);
	} else if (ctrl->state == NVME_CTRL_CONNECTING &&
		old_state == NVME_CTRL_RESETTING) {
		nvme_start_failfast_work(ctrl);
	}
	return changed;
}
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
@@ -518,29 +559,49 @@ static inline void nvme_clear_nvme_request(struct request *req)
	}
}

struct request *nvme_alloc_request(struct request_queue *q,
		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
static inline unsigned int nvme_req_op(struct nvme_command *cmd)
{
	unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
	struct request *req;

	if (qid == NVME_QID_ANY) {
		req = blk_mq_alloc_request(q, op, flags);
	} else {
		req = blk_mq_alloc_request_hctx(q, op, flags,
				qid ? qid - 1 : 0);
	return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
}
	if (IS_ERR(req))
		return req;

static inline void nvme_init_request(struct request *req,
		struct nvme_command *cmd)
{
	if (req->q->queuedata)
		req->timeout = NVME_IO_TIMEOUT;
	else /* no queuedata implies admin queue */
		req->timeout = NVME_ADMIN_TIMEOUT;

	req->cmd_flags |= REQ_FAILFAST_DRIVER;
	nvme_clear_nvme_request(req);
	nvme_req(req)->cmd = cmd;
}

struct request *nvme_alloc_request(struct request_queue *q,
		struct nvme_command *cmd, blk_mq_req_flags_t flags)
{
	struct request *req;

	req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags);
	if (!IS_ERR(req))
		nvme_init_request(req, cmd);
	return req;
}
EXPORT_SYMBOL_GPL(nvme_alloc_request);

struct request *nvme_alloc_request_qid(struct request_queue *q,
		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
{
	struct request *req;

	req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags,
			qid ? qid - 1 : 0);
	if (!IS_ERR(req))
		nvme_init_request(req, cmd);
	return req;
}
EXPORT_SYMBOL_GPL(nvme_alloc_request_qid);

static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
{
	struct nvme_command c;
@@ -897,11 +958,15 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
	struct request *req;
	int ret;

	req = nvme_alloc_request(q, cmd, flags, qid);
	if (qid == NVME_QID_ANY)
		req = nvme_alloc_request(q, cmd, flags);
	else
		req = nvme_alloc_request_qid(q, cmd, flags, qid);
	if (IS_ERR(req))
		return PTR_ERR(req);

	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
	if (timeout)
		req->timeout = timeout;

	if (buffer && bufflen) {
		ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
@@ -1067,11 +1132,12 @@ static int nvme_submit_user_cmd(struct request_queue *q,
	void *meta = NULL;
	int ret;

	req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY);
	req = nvme_alloc_request(q, cmd, 0);
	if (IS_ERR(req))
		return PTR_ERR(req);

	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
	if (timeout)
		req->timeout = timeout;
	nvme_req(req)->flags |= NVME_REQ_USERCMD;

	if (ubuffer && bufflen) {
@@ -1141,8 +1207,8 @@ static int nvme_keep_alive(struct nvme_ctrl *ctrl)
{
	struct request *rq;

	rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED,
			NVME_QID_ANY);
	rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
			BLK_MQ_REQ_RESERVED);
	if (IS_ERR(rq))
		return PTR_ERR(rq);

@@ -1302,7 +1368,8 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
				      NVME_IDENTIFY_DATA_SIZE);
	if (status) {
		dev_warn(ctrl->device,
			"Identify Descriptors failed (%d)\n", status);
			"Identify Descriptors failed (nsid=%u, status=0x%x)\n",
			nsid, status);
		goto free_data;
	}

@@ -2058,7 +2125,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
	nvme_config_discard(disk, ns);
	nvme_config_write_zeroes(disk, ns);

	if (id->nsattr & NVME_NS_ATTR_RO)
	if ((id->nsattr & NVME_NS_ATTR_RO) ||
	    test_bit(NVME_NS_FORCE_RO, &ns->flags))
		set_disk_ro(disk, true);
}

@@ -2261,13 +2329,13 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
	cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8);
	cmd.common.cdw11 = cpu_to_le32(len);

	return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len,
				      ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false);
	return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, 0,
			NVME_QID_ANY, 1, 0, false);
}
EXPORT_SYMBOL_GPL(nvme_sec_submit);
#endif /* CONFIG_BLK_SED_OPAL */

static const struct block_device_operations nvme_fops = {
static const struct block_device_operations nvme_bdev_ops = {
	.owner		= THIS_MODULE,
	.ioctl		= nvme_ioctl,
	.compat_ioctl	= nvme_compat_ioctl,
@@ -3275,7 +3343,7 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
{
	struct gendisk *disk = dev_to_disk(dev);

	if (disk->fops == &nvme_fops)
	if (disk->fops == &nvme_bdev_ops)
		return nvme_get_ns_from_dev(dev)->head;
	else
		return disk->private_data;
@@ -3384,7 +3452,7 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
	}
#ifdef CONFIG_NVME_MULTIPATH
	if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
		if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
		if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */
			return 0;
		if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
			return 0;
@@ -3805,7 +3873,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
	struct gendisk *disk;
	struct nvme_id_ns *id;
	char disk_name[DISK_NAME_LEN];
	int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT, ret;
	int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT;

	if (nvme_identify_ns(ctrl, nsid, ids, &id))
		return;
@@ -3829,8 +3897,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
	ns->ctrl = ctrl;
	kref_init(&ns->kref);

	ret = nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED);
	if (ret)
	if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED))
		goto out_free_queue;
	nvme_set_disk_name(disk_name, ns, ctrl, &flags);

@@ -3838,7 +3905,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
	if (!disk)
		goto out_unlink_ns;

	disk->fops = &nvme_fops;
	disk->fops = &nvme_bdev_ops;
	disk->private_data = ns;
	disk->queue = ns->queue;
	disk->flags = flags;
@@ -3849,8 +3916,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
		goto out_put_disk;

	if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
		ret = nvme_nvm_register(ns, disk_name, node);
		if (ret) {
		if (nvme_nvm_register(ns, disk_name, node)) {
			dev_warn(ctrl->device, "LightNVM init failure\n");
			goto out_put_disk;
		}
@@ -4043,8 +4109,11 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)

		ret = nvme_submit_sync_cmd(ctrl->admin_q, &cmd, ns_list,
					    NVME_IDENTIFY_DATA_SIZE);
		if (ret)
		if (ret) {
			dev_warn(ctrl->device,
				"Identify NS List failed (status=0x%x)\n", ret);
			goto free;
		}

		for (i = 0; i < nr_entries; i++) {
			u32 nsid = le32_to_cpu(ns_list[i]);
@@ -4347,6 +4416,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
	nvme_mpath_stop(ctrl);
	nvme_stop_keep_alive(ctrl);
	nvme_stop_failfast_work(ctrl);
	flush_work(&ctrl->async_event_work);
	cancel_work_sync(&ctrl->fw_act_work);
}
@@ -4412,6 +4482,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
	int ret;

	ctrl->state = NVME_CTRL_NEW;
	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
	spin_lock_init(&ctrl->lock);
	mutex_init(&ctrl->scan_lock);
	INIT_LIST_HEAD(&ctrl->namespaces);
@@ -4428,6 +4499,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
	init_waitqueue_head(&ctrl->state_wq);

	INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
	memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;

@@ -4446,7 +4518,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,

	device_initialize(&ctrl->ctrl_device);
	ctrl->device = &ctrl->ctrl_device;
	ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance);
	ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
			ctrl->instance);
	ctrl->device->class = nvme_class;
	ctrl->device->parent = ctrl->dev;
	ctrl->device->groups = nvme_dev_attr_groups;
@@ -4655,7 +4728,8 @@ static int __init nvme_core_init(void)
	if (!nvme_delete_wq)
		goto destroy_reset_wq;

	result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
	result = alloc_chrdev_region(&nvme_ctrl_base_chr_devt, 0,
			NVME_MINORS, "nvme");
	if (result < 0)
		goto destroy_delete_wq;

@@ -4676,7 +4750,7 @@ static int __init nvme_core_init(void)
destroy_class:
	class_destroy(nvme_class);
unregister_chrdev:
	unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_delete_wq:
	destroy_workqueue(nvme_delete_wq);
destroy_reset_wq:
@@ -4691,7 +4765,7 @@ static void __exit nvme_core_exit(void)
{
	class_destroy(nvme_subsys_class);
	class_destroy(nvme_class);
	unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
	unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
	destroy_workqueue(nvme_delete_wq);
	destroy_workqueue(nvme_reset_wq);
	destroy_workqueue(nvme_wq);
+22 −3
Original line number Diff line number Diff line
@@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
{
	if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
	    ctrl->state != NVME_CTRL_DEAD &&
	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
		return BLK_STS_RESOURCE;

@@ -615,6 +616,7 @@ static const match_table_t opt_tokens = {
	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
	{ NVMF_OPT_TOS,			"tos=%d"		},
	{ NVMF_OPT_FAIL_FAST_TMO,	"fast_io_fail_tmo=%d"	},
	{ NVMF_OPT_ERR,			NULL			}
};

@@ -634,6 +636,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
	opts->kato = NVME_DEFAULT_KATO;
	opts->duplicate_connect = false;
	opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
	opts->hdr_digest = false;
	opts->data_digest = false;
	opts->tos = -1; /* < 0 == use transport default */
@@ -754,6 +757,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
			ctrl_loss_tmo = token;
			break;
		case NVMF_OPT_FAIL_FAST_TMO:
			if (match_int(args, &token)) {
				ret = -EINVAL;
				goto out;
			}

			if (token >= 0)
				pr_warn("I/O fail on reconnect controller after %d sec\n",
					token);
			opts->fast_io_fail_tmo = token;
			break;
		case NVMF_OPT_HOSTNQN:
			if (opts->host) {
				pr_err("hostnqn already user-assigned: %s\n",
@@ -884,11 +898,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
		opts->nr_poll_queues = 0;
		opts->duplicate_connect = true;
	}
	if (ctrl_loss_tmo < 0)
	if (ctrl_loss_tmo < 0) {
		opts->max_reconnects = -1;
	else
	} else {
		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
						opts->reconnect_delay);
		if (ctrl_loss_tmo < opts->fast_io_fail_tmo)
			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n",
				opts->fast_io_fail_tmo, ctrl_loss_tmo);
	}

	if (!opts->host) {
		kref_get(&nvmf_default_host->ref);
@@ -988,7 +1006,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
#define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
				 NVMF_OPT_DISABLE_SQFLOW)
				 NVMF_OPT_DISABLE_SQFLOW |\
				 NVMF_OPT_FAIL_FAST_TMO)

static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf)
+5 −0
Original line number Diff line number Diff line
@@ -15,6 +15,8 @@
#define NVMF_DEF_RECONNECT_DELAY	10
/* default to 600 seconds of reconnect attempts before giving up */
#define NVMF_DEF_CTRL_LOSS_TMO		600
/* default is -1: the fail fast mechanism is disabled  */
#define NVMF_DEF_FAIL_FAST_TMO		-1

/*
 * Define a host as seen by the target.  We allocate one at boot, but also
@@ -56,6 +58,7 @@ enum {
	NVMF_OPT_NR_WRITE_QUEUES = 1 << 17,
	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
	NVMF_OPT_TOS		= 1 << 19,
	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
};

/**
@@ -89,6 +92,7 @@ enum {
 * @nr_write_queues: number of queues for write I/O
 * @nr_poll_queues: number of queues for polling I/O
 * @tos: type of service
 * @fast_io_fail_tmo: Fast I/O fail timeout in seconds
 */
struct nvmf_ctrl_options {
	unsigned		mask;
@@ -111,6 +115,7 @@ struct nvmf_ctrl_options {
	unsigned int		nr_write_queues;
	unsigned int		nr_poll_queues;
	int			tos;
	int			fast_io_fail_tmo;
};

/*
+1 −1
Original line number Diff line number Diff line
@@ -3479,7 +3479,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
			    ctrl->lport->ops->fcprqst_priv_sz);
	ctrl->admin_tag_set.driver_data = ctrl;
	ctrl->admin_tag_set.nr_hw_queues = 1;
	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
	ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT;
	ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;

	ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
Loading