Commit a9e8e18a authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'nvme-5.9' of git://git.infradead.org/nvme into for-5.9/drivers

Pull NVMe updates from Christoph.

* 'nvme-5.9' of git://git.infradead.org/nvme: (30 commits)
  nvme-loop: remove extra variable in create ctrl
  nvme-loop: set ctrl state connecting after init
  nvme-multipath: do not fall back to __nvme_find_path() for non-optimized paths
  nvme-multipath: fix logic for non-optimized paths
  nvme-rdma: fix controller reset hang during traffic
  nvme-tcp: fix controller reset hang during traffic
  nvmet: introduce the passthru Kconfig option
  nvmet: introduce the passthru configfs interface
  nvmet: Add passthru enable/disable helpers
  nvmet: add passthru code to process commands
  nvme: export nvme_find_get_ns() and nvme_put_ns()
  nvme: introduce nvme_ctrl_get_by_path()
  nvme: introduce nvme_execute_passthru_rq to call nvme_passthru_[start|end]()
  nvme: create helper function to obtain command effects
  nvme: clear any SGL flags in passthru commands
  nvmet-fc: remove redundant del_work_active flag
  nvmet-fc: check successful reference in nvmet_fc_find_target_assoc
  nvme-fc: set max_segments to lldd max value
  nvme-fc: drop a duplicated word in a comment
  nvme-hwmon: log the controller device name
  ...
parents c5be1f2c b6cec06d
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -45,6 +45,9 @@ static const guid_t prp_guids[] = {
	/* Thunderbolt GUID for WAKE_SUPPORTED: 6c501103-c189-4296-ba72-9bf5a26ebe5d */
	GUID_INIT(0x6c501103, 0xc189, 0x4296,
		  0xba, 0x72, 0x9b, 0xf5, 0xa2, 0x6e, 0xbe, 0x5d),
	/* Storage device needs D3 GUID: 5025030f-842f-4ab4-a561-99a5189762d0 */
	GUID_INIT(0x5025030f, 0x842f, 0x4ab4,
		  0xa5, 0x61, 0x99, 0xa5, 0x18, 0x97, 0x62, 0xd0),
};

/* ACPI _DSD data subnodes GUID: dbb8e3e6-5886-4ba6-8795-1319f52a966b */
+175 −119
Original line number Diff line number Diff line
@@ -366,6 +366,16 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
			break;
		}
		break;
	case NVME_CTRL_DELETING_NOIO:
		switch (old_state) {
		case NVME_CTRL_DELETING:
		case NVME_CTRL_DEAD:
			changed = true;
			/* FALLTHRU */
		default:
			break;
		}
		break;
	case NVME_CTRL_DEAD:
		switch (old_state) {
		case NVME_CTRL_DELETING:
@@ -403,6 +413,7 @@ static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
	case NVME_CTRL_CONNECTING:
		return false;
	case NVME_CTRL_DELETING:
	case NVME_CTRL_DELETING_NOIO:
	case NVME_CTRL_DEAD:
		return true;
	default:
@@ -454,10 +465,11 @@ static void nvme_free_ns(struct kref *kref)
	kfree(ns);
}

static void nvme_put_ns(struct nvme_ns *ns)
void nvme_put_ns(struct nvme_ns *ns)
{
	kref_put(&ns->kref, nvme_free_ns);
}
EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU);

static inline void nvme_clear_nvme_request(struct request *req)
{
@@ -593,6 +605,14 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
		req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
}

static void nvme_setup_passthrough(struct request *req,
		struct nvme_command *cmd)
{
	memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
	/* passthru commands should let the driver set the SGL flags */
	cmd->common.flags &= ~NVME_CMD_SGL_ALL;
}

static inline void nvme_setup_flush(struct nvme_ns *ns,
		struct nvme_command *cmnd)
{
@@ -758,7 +778,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
	switch (req_op(req)) {
	case REQ_OP_DRV_IN:
	case REQ_OP_DRV_OUT:
		memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
		nvme_setup_passthrough(req, cmd);
		break;
	case REQ_OP_FLUSH:
		nvme_setup_flush(ns, cmd);
@@ -909,6 +929,120 @@ out:
	return ERR_PTR(ret);
}

static u32 nvme_known_admin_effects(u8 opcode)
{
	switch (opcode) {
	case nvme_admin_format_nvm:
		return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
			NVME_CMD_EFFECTS_CSE_MASK;
	case nvme_admin_sanitize_nvm:
		return NVME_CMD_EFFECTS_CSE_MASK;
	default:
		break;
	}
	return 0;
}

u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
{
	u32 effects = 0;

	if (ns) {
		if (ns->head->effects)
			effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
		if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
			dev_warn(ctrl->device,
				 "IO command:%02x has unhandled effects:%08x\n",
				 opcode, effects);
		return 0;
	}

	if (ctrl->effects)
		effects = le32_to_cpu(ctrl->effects->acs[opcode]);
	effects |= nvme_known_admin_effects(opcode);

	return effects;
}
EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU);

static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
			       u8 opcode)
{
	u32 effects = nvme_command_effects(ctrl, ns, opcode);

	/*
	 * For simplicity, IO to all namespaces is quiesced even if the command
	 * effects say only one namespace is affected.
	 */
	if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
		mutex_lock(&ctrl->scan_lock);
		mutex_lock(&ctrl->subsys->lock);
		nvme_mpath_start_freeze(ctrl->subsys);
		nvme_mpath_wait_freeze(ctrl->subsys);
		nvme_start_freeze(ctrl);
		nvme_wait_freeze(ctrl);
	}
	return effects;
}

static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
{
	struct nvme_ns *ns;

	down_read(&ctrl->namespaces_rwsem);
	list_for_each_entry(ns, &ctrl->namespaces, list)
		if (_nvme_revalidate_disk(ns->disk))
			nvme_set_queue_dying(ns);
		else if (blk_queue_is_zoned(ns->disk->queue)) {
			/*
			 * IO commands are required to fully revalidate a zoned
			 * device. Force the command effects to trigger rescan
			 * work so report zones can run in a context with
			 * unfrozen IO queues.
			 */
			*effects |= NVME_CMD_EFFECTS_NCC;
		}
	up_read(&ctrl->namespaces_rwsem);
}

static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
{
	/*
	 * Revalidate LBA changes prior to unfreezing. This is necessary to
	 * prevent memory corruption if a logical block size was changed by
	 * this command.
	 */
	if (effects & NVME_CMD_EFFECTS_LBCC)
		nvme_update_formats(ctrl, &effects);
	if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
		nvme_unfreeze(ctrl);
		nvme_mpath_unfreeze(ctrl->subsys);
		mutex_unlock(&ctrl->subsys->lock);
		nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
		mutex_unlock(&ctrl->scan_lock);
	}
	if (effects & NVME_CMD_EFFECTS_CCC)
		nvme_init_identify(ctrl);
	if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
		nvme_queue_scan(ctrl);
		flush_work(&ctrl->scan_work);
	}
}

void nvme_execute_passthru_rq(struct request *rq)
{
	struct nvme_command *cmd = nvme_req(rq)->cmd;
	struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
	struct nvme_ns *ns = rq->q->queuedata;
	struct gendisk *disk = ns ? ns->disk : NULL;
	u32 effects;

	effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
	blk_execute_rq(rq->q, disk, rq, 0);
	nvme_passthru_end(ctrl, effects);
}
EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU);

static int nvme_submit_user_cmd(struct request_queue *q,
		struct nvme_command *cmd, void __user *ubuffer,
		unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
@@ -947,7 +1081,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
		}
	}

	blk_execute_rq(req->q, disk, req, 0);
	nvme_execute_passthru_rq(req);
	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
		ret = -EINTR;
	else
@@ -1375,105 +1509,12 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
			metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
}

static u32 nvme_known_admin_effects(u8 opcode)
{
	switch (opcode) {
	case nvme_admin_format_nvm:
		return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
					NVME_CMD_EFFECTS_CSE_MASK;
	case nvme_admin_sanitize_nvm:
		return NVME_CMD_EFFECTS_CSE_MASK;
	default:
		break;
	}
	return 0;
}

static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
								u8 opcode)
{
	u32 effects = 0;

	if (ns) {
		if (ns->head->effects)
			effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
		if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
			dev_warn(ctrl->device,
				 "IO command:%02x has unhandled effects:%08x\n",
				 opcode, effects);
		return 0;
	}

	if (ctrl->effects)
		effects = le32_to_cpu(ctrl->effects->acs[opcode]);
	effects |= nvme_known_admin_effects(opcode);

	/*
	 * For simplicity, IO to all namespaces is quiesced even if the command
	 * effects say only one namespace is affected.
	 */
	if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
		mutex_lock(&ctrl->scan_lock);
		mutex_lock(&ctrl->subsys->lock);
		nvme_mpath_start_freeze(ctrl->subsys);
		nvme_mpath_wait_freeze(ctrl->subsys);
		nvme_start_freeze(ctrl);
		nvme_wait_freeze(ctrl);
	}
	return effects;
}

static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
{
	struct nvme_ns *ns;

	down_read(&ctrl->namespaces_rwsem);
	list_for_each_entry(ns, &ctrl->namespaces, list)
		if (_nvme_revalidate_disk(ns->disk))
			nvme_set_queue_dying(ns);
		else if (blk_queue_is_zoned(ns->disk->queue)) {
			/*
			 * IO commands are required to fully revalidate a zoned
			 * device. Force the command effects to trigger rescan
			 * work so report zones can run in a context with
			 * unfrozen IO queues.
			 */
			*effects |= NVME_CMD_EFFECTS_NCC;
		}
	up_read(&ctrl->namespaces_rwsem);
}

static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
{
	/*
	 * Revalidate LBA changes prior to unfreezing. This is necessary to
	 * prevent memory corruption if a logical block size was changed by
	 * this command.
	 */
	if (effects & NVME_CMD_EFFECTS_LBCC)
		nvme_update_formats(ctrl, &effects);
	if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
		nvme_unfreeze(ctrl);
		nvme_mpath_unfreeze(ctrl->subsys);
		mutex_unlock(&ctrl->subsys->lock);
		nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
		mutex_unlock(&ctrl->scan_lock);
	}
	if (effects & NVME_CMD_EFFECTS_CCC)
		nvme_init_identify(ctrl);
	if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
		nvme_queue_scan(ctrl);
		flush_work(&ctrl->scan_work);
	}
}

static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
			struct nvme_passthru_cmd __user *ucmd)
{
	struct nvme_passthru_cmd cmd;
	struct nvme_command c;
	unsigned timeout = 0;
	u32 effects;
	u64 result;
	int status;

@@ -1500,12 +1541,10 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
	if (cmd.timeout_ms)
		timeout = msecs_to_jiffies(cmd.timeout_ms);

	effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
			nvme_to_user_ptr(cmd.addr), cmd.data_len,
			nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
			0, &result, timeout);
	nvme_passthru_end(ctrl, effects);

	if (status >= 0) {
		if (put_user(result, &ucmd->result))
@@ -1521,7 +1560,6 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
	struct nvme_passthru_cmd64 cmd;
	struct nvme_command c;
	unsigned timeout = 0;
	u32 effects;
	int status;

	if (!capable(CAP_SYS_ADMIN))
@@ -1547,12 +1585,10 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
	if (cmd.timeout_ms)
		timeout = msecs_to_jiffies(cmd.timeout_ms);

	effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
			nvme_to_user_ptr(cmd.addr), cmd.data_len,
			nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
			0, &cmd.result, timeout);
	nvme_passthru_end(ctrl, effects);

	if (status >= 0) {
		if (put_user(cmd.result, &ucmd->result))
@@ -2345,12 +2381,7 @@ EXPORT_SYMBOL_GPL(nvme_disable_ctrl);

int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
{
	/*
	 * Default to a 4K page size, with the intention to update this
	 * path in the future to accomodate architectures with differing
	 * kernel and IO page sizes.
	 */
	unsigned dev_page_min, page_shift = 12;
	unsigned dev_page_min;
	int ret;

	ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
@@ -2360,20 +2391,18 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
	}
	dev_page_min = NVME_CAP_MPSMIN(ctrl->cap) + 12;

	if (page_shift < dev_page_min) {
	if (NVME_CTRL_PAGE_SHIFT < dev_page_min) {
		dev_err(ctrl->device,
			"Minimum device page size %u too large for host (%u)\n",
			1 << dev_page_min, 1 << page_shift);
			1 << dev_page_min, 1 << NVME_CTRL_PAGE_SHIFT);
		return -ENODEV;
	}

	ctrl->page_size = 1 << page_shift;

	if (NVME_CAP_CSS(ctrl->cap) & NVME_CAP_CSS_CSI)
		ctrl->ctrl_config = NVME_CC_CSS_CSI;
	else
		ctrl->ctrl_config = NVME_CC_CSS_NVM;
	ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
	ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
	ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
	ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
	ctrl->ctrl_config |= NVME_CC_ENABLE;
@@ -2423,13 +2452,13 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,

	if (ctrl->max_hw_sectors) {
		u32 max_segments =
			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
			(ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1;

		max_segments = min_not_zero(max_segments, ctrl->max_segments);
		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
	}
	blk_queue_virt_boundary(q, ctrl->page_size - 1);
	blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
	blk_queue_dma_alignment(q, 7);
	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
		vwc = true;
@@ -3483,6 +3512,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
		[NVME_CTRL_RESETTING]	= "resetting",
		[NVME_CTRL_CONNECTING]	= "connecting",
		[NVME_CTRL_DELETING]	= "deleting",
		[NVME_CTRL_DELETING_NOIO]= "deleting (no IO)",
		[NVME_CTRL_DEAD]	= "dead",
	};

@@ -3586,8 +3616,8 @@ static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
	int err;

	err = kstrtou32(buf, 10, &v);
	if (err || v > UINT_MAX)
		return -EINVAL;
	if (err)
		return err;

	ctrl->opts->reconnect_delay = v;
	return count;
@@ -3798,7 +3828,7 @@ static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
	return nsa->head->ns_id - nsb->head->ns_id;
}

static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
	struct nvme_ns *ns, *ret = NULL;

@@ -3816,6 +3846,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
	up_read(&ctrl->namespaces_rwsem);
	return ret;
}
EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);

static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
@@ -4119,6 +4150,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
	if (ctrl->state == NVME_CTRL_DEAD)
		nvme_kill_queues(ctrl);

	/* this is a no-op when called from the controller reset handler */
	nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);

	down_write(&ctrl->namespaces_rwsem);
	list_splice_init(&ctrl->namespaces, &ns_list);
	up_write(&ctrl->namespaces_rwsem);
@@ -4313,7 +4347,6 @@ EXPORT_SYMBOL_GPL(nvme_stop_ctrl);

void nvme_start_ctrl(struct nvme_ctrl *ctrl)
{
	if (ctrl->kato)
	nvme_start_keep_alive(ctrl);

	nvme_enable_aen(ctrl);
@@ -4559,6 +4592,29 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_sync_queues);

struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path)
{
	struct nvme_ctrl *ctrl;
	struct file *f;

	f = filp_open(path, O_RDWR, 0);
	if (IS_ERR(f))
		return ERR_CAST(f);

	if (f->f_op != &nvme_dev_fops) {
		ctrl = ERR_PTR(-EINVAL);
		goto out_close;
	}

	ctrl = f->private_data;
	nvme_get_ctrl(ctrl);

out_close:
	filp_close(f, NULL);
	return ctrl;
}
EXPORT_SYMBOL_NS_GPL(nvme_ctrl_get_by_path, NVME_TARGET_PASSTHRU);

/*
 * Check we didn't inadvertently grow the command structure sizes:
 */
+1 −1
Original line number Diff line number Diff line
@@ -547,7 +547,7 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
		struct request *rq)
{
	if (ctrl->state != NVME_CTRL_DELETING &&
	if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
	    ctrl->state != NVME_CTRL_DEAD &&
	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
		return BLK_STS_RESOURCE;
+2 −1
Original line number Diff line number Diff line
@@ -182,7 +182,8 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
		bool queue_live)
{
	if (likely(ctrl->state == NVME_CTRL_LIVE))
	if (likely(ctrl->state == NVME_CTRL_LIVE ||
		   ctrl->state == NVME_CTRL_DELETING))
		return true;
	return __nvmf_check_ready(ctrl, rq, queue_live);
}
+4 −2
Original line number Diff line number Diff line
@@ -826,6 +826,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
		break;

	case NVME_CTRL_DELETING:
	case NVME_CTRL_DELETING_NOIO:
	default:
		/* no action to take - let it delete */
		break;
@@ -3001,8 +3002,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
	if (ret)
		goto out_disconnect_admin_queue;

	ctrl->ctrl.max_hw_sectors =
		(ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9);
	ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
	ctrl->ctrl.max_hw_sectors = ctrl->ctrl.max_segments <<
						(ilog2(SZ_4K) - 9);

	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);

Loading