Merge branch 'nvme-5.9-rc' of git://git.infradead.org/nvme into block-5.9 (5d220bcd) · Commits · 戴 / test

drivers/nvme/host/core.c

+45 −11

Original line number	Diff line number	Diff line
		@@ -2026,13 +2026,49 @@ static void nvme_update_disk_info(struct gendisk *disk,
		blk_mq_unfreeze_queue(disk->queue);
		}

		static inline bool nvme_first_scan(struct gendisk *disk)
		{
		/* nvme_alloc_ns() scans the disk prior to adding it */
		return !(disk->flags & GENHD_FL_UP);
		}

		static void nvme_set_chunk_sectors(struct nvme_ns ns, struct nvme_id_ns id)
		{
		struct nvme_ctrl *ctrl = ns->ctrl;
		u32 iob;

		if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
		is_power_of_2(ctrl->max_hw_sectors))
		iob = ctrl->max_hw_sectors;
		else
		iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));

		if (!iob)
		return;

		if (!is_power_of_2(iob)) {
		if (nvme_first_scan(ns->disk))
		pr_warn("%s: ignoring unaligned IO boundary:%u\n",
		ns->disk->disk_name, iob);
		return;
		}

		if (blk_queue_is_zoned(ns->disk->queue)) {
		if (nvme_first_scan(ns->disk))
		pr_warn("%s: ignoring zoned namespace IO boundary\n",
		ns->disk->disk_name);
		return;
		}

		blk_queue_chunk_sectors(ns->queue, iob);
		}

		static int __nvme_revalidate_disk(struct gendisk disk, struct nvme_id_ns id)
		{
		unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
		struct nvme_ns *ns = disk->private_data;
		struct nvme_ctrl *ctrl = ns->ctrl;
		int ret;
		u32 iob;

		/*
		* If identify namespace failed, use default 512 byte block size so
		@@ -2060,12 +2096,6 @@ static int __nvme_revalidate_disk(struct gendisk disk, struct nvme_id_ns id)
		return -ENODEV;
		}

		if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
		is_power_of_2(ctrl->max_hw_sectors))
		iob = ctrl->max_hw_sectors;
		else
		iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));

		ns->features = 0;
		ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
		/* the PI implementation requires metadata equal t10 pi tuple size */
		@@ -2097,8 +2127,7 @@ static int __nvme_revalidate_disk(struct gendisk disk, struct nvme_id_ns id)
		}
		}

		if (iob && !blk_queue_is_zoned(ns->queue))
		blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
		nvme_set_chunk_sectors(ns, id);
		nvme_update_disk_info(disk, ns, id);
		#ifdef CONFIG_NVME_MULTIPATH
		if (ns->head->disk) {
		@@ -3676,6 +3705,10 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
		return 0;
		if (a == &dev_attr_hostid.attr && !ctrl->opts)
		return 0;
		if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts)
		return 0;
		if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts)
		return 0;

		return a->mode;
		}
		@@ -4390,7 +4423,7 @@ static void nvme_free_ctrl(struct device *dev)
		struct nvme_subsystem *subsys = ctrl->subsys;
		struct nvme_cel cel, next;

		if (subsys && ctrl->instance != subsys->instance)
		if (!subsys \|\| ctrl->instance != subsys->instance)
		ida_simple_remove(&nvme_instance_ida, ctrl->instance);

		list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
		@@ -4534,7 +4567,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
		}
		EXPORT_SYMBOL_GPL(nvme_unfreeze);

		void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
		int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
		{
		struct nvme_ns *ns;

		@@ -4545,6 +4578,7 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
		break;
		}
		up_read(&ctrl->namespaces_rwsem);
		return timeout;
		}
		EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);

drivers/nvme/host/fabrics.c

+0 −1

Original line number	Diff line number	Diff line
		@@ -576,7 +576,6 @@ bool __nvmf_check_ready(struct nvme_ctrl ctrl, struct request rq,
		* which is require to set the queue live in the appropinquate states.
		*/
		switch (ctrl->state) {
		case NVME_CTRL_NEW:
		case NVME_CTRL_CONNECTING:
		if (nvme_is_fabrics(req->cmd) &&
		req->cmd->fabrics.fctype == nvme_fabrics_type_connect)

drivers/nvme/host/nvme.h

+1 −1

Original line number	Diff line number	Diff line
		@@ -605,7 +605,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl);
		void nvme_sync_queues(struct nvme_ctrl *ctrl);
		void nvme_unfreeze(struct nvme_ctrl *ctrl);
		void nvme_wait_freeze(struct nvme_ctrl *ctrl);
		void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
		int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
		void nvme_start_freeze(struct nvme_ctrl *ctrl);

		#define NVME_QID_ANY -1

drivers/nvme/host/pci.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -1249,8 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
		dev_warn_ratelimited(dev->ctrl.device,
		"I/O %d QID %d timeout, disable controller\n",
		req->tag, nvmeq->qid);
		nvme_dev_disable(dev, true);
		nvme_req(req)->flags \|= NVME_REQ_CANCELLED;
		nvme_dev_disable(dev, true);
		return BLK_EH_DONE;
		case NVME_CTRL_RESETTING:
		return BLK_EH_RESET_TIMER;
		@@ -1267,10 +1267,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
		dev_warn(dev->ctrl.device,
		"I/O %d QID %d timeout, reset controller\n",
		req->tag, nvmeq->qid);
		nvme_req(req)->flags \|= NVME_REQ_CANCELLED;
		nvme_dev_disable(dev, false);
		nvme_reset_ctrl(&dev->ctrl);

		nvme_req(req)->flags \|= NVME_REQ_CANCELLED;
		return BLK_EH_DONE;
		}

drivers/nvme/host/rdma.c

+51 −17

Original line number	Diff line number	Diff line
		@@ -122,6 +122,7 @@ struct nvme_rdma_ctrl {
		struct sockaddr_storage src_addr;

		struct nvme_ctrl ctrl;
		struct mutex teardown_lock;
		bool use_inline_data;
		u32 io_queues[HCTX_MAX_TYPES];
		};
		@@ -975,7 +976,15 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)

		if (!new) {
		nvme_start_queues(&ctrl->ctrl);
		nvme_wait_freeze(&ctrl->ctrl);
		if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
		/*
		* If we timed out waiting for freeze we are likely to
		* be stuck. Fail the controller initialization just
		* to be safe.
		*/
		ret = -ENODEV;
		goto out_wait_freeze_timed_out;
		}
		blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
		ctrl->ctrl.queue_count - 1);
		nvme_unfreeze(&ctrl->ctrl);
		@@ -983,6 +992,9 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)

		return 0;

		out_wait_freeze_timed_out:
		nvme_stop_queues(&ctrl->ctrl);
		nvme_rdma_stop_io_queues(ctrl);
		out_cleanup_connect_q:
		if (new)
		blk_cleanup_queue(ctrl->ctrl.connect_q);
		@@ -997,6 +1009,7 @@ out_free_io_queues:
		static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
		bool remove)
		{
		mutex_lock(&ctrl->teardown_lock);
		blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
		nvme_rdma_stop_queue(&ctrl->queues[0]);
		if (ctrl->ctrl.admin_tagset) {
		@@ -1007,11 +1020,13 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
		if (remove)
		blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
		nvme_rdma_destroy_admin_queue(ctrl, remove);
		mutex_unlock(&ctrl->teardown_lock);
		}

		static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
		bool remove)
		{
		mutex_lock(&ctrl->teardown_lock);
		if (ctrl->ctrl.queue_count > 1) {
		nvme_start_freeze(&ctrl->ctrl);
		nvme_stop_queues(&ctrl->ctrl);
		@@ -1025,6 +1040,7 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
		nvme_start_queues(&ctrl->ctrl);
		nvme_rdma_destroy_io_queues(ctrl, remove);
		}
		mutex_unlock(&ctrl->teardown_lock);
		}

		static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
		@@ -1180,6 +1196,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
		if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
		return;

		dev_warn(ctrl->ctrl.device, "starting error recovery\n");
		queue_work(nvme_reset_wq, &ctrl->err_work);
		}

		@@ -1946,6 +1963,22 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
		return 0;
		}

		static void nvme_rdma_complete_timed_out(struct request *rq)
		{
		struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
		struct nvme_rdma_queue *queue = req->queue;
		struct nvme_rdma_ctrl *ctrl = queue->ctrl;

		/* fence other contexts that may complete the command */
		mutex_lock(&ctrl->teardown_lock);
		nvme_rdma_stop_queue(queue);
		if (!blk_mq_request_completed(rq)) {
		nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
		blk_mq_complete_request(rq);
		}
		mutex_unlock(&ctrl->teardown_lock);
		}

		static enum blk_eh_timer_return
		nvme_rdma_timeout(struct request *rq, bool reserved)
		{
		@@ -1956,29 +1989,29 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
		dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
		rq->tag, nvme_rdma_queue_idx(queue));

		/*
		* Restart the timer if a controller reset is already scheduled. Any
		* timed out commands would be handled before entering the connecting
		* state.
		*/
		if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
		return BLK_EH_RESET_TIMER;

		if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
		/*
		* Teardown immediately if controller times out while starting
		* or we are already started error recovery. all outstanding
		* requests are completed on shutdown, so we return BLK_EH_DONE.
		* If we are resetting, connecting or deleting we should
		* complete immediately because we may block controller
		* teardown or setup sequence
		* - ctrl disable/shutdown fabrics requests
		* - connect requests
		* - initialization admin requests
		* - I/O requests that entered after unquiescing and
		* the controller stopped responding
		*
		* All other requests should be cancelled by the error
		* recovery work, so it's fine that we fail it here.
		*/
		flush_work(&ctrl->err_work);
		nvme_rdma_teardown_io_queues(ctrl, false);
		nvme_rdma_teardown_admin_queue(ctrl, false);
		nvme_rdma_complete_timed_out(rq);
		return BLK_EH_DONE;
		}

		dev_warn(ctrl->ctrl.device, "starting error recovery\n");
		/*
		* LIVE state should trigger the normal error recovery which will
		* handle completing this request.
		*/
		nvme_rdma_error_recovery(ctrl);

		return BLK_EH_RESET_TIMER;
		}

		@@ -2278,6 +2311,7 @@ static struct nvme_ctrl nvme_rdma_create_ctrl(struct device dev,
		return ERR_PTR(-ENOMEM);
		ctrl->ctrl.opts = opts;
		INIT_LIST_HEAD(&ctrl->list);
		mutex_init(&ctrl->teardown_lock);

		if (!(opts->mask & NVMF_OPT_TRSVCID)) {
		opts->trsvcid =

Admin message