Commit 4f5735f3 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'nvme-4.20' of git://git.infradead.org/nvme into for-4.20/block

Pull NVMe updates from Christoph:

"A relatively boring merge window:

 - better AEN tracing (Chaitanya)
 - NUMA aware PCIe multipathing (me)
 - RDMA workqueue fixes (Sagi)
 - better bio usage in the target (Sagi)
 - FC rework for target removal (James)
 - better multipath handling of ->queue_rq failures (James)
 - various cleanups (Milan)"

* 'nvme-4.20' of git://git.infradead.org/nvme:
  nvmet-rdma: use a private workqueue for delete
  nvme: take node locality into account when selecting a path
  nvmet: don't split large I/Os unconditionally
  nvme: call nvme_complete_rq when nvmf_check_ready fails for mpath I/O
  nvme-core: add async event trace helper
  nvme_fc: add 'nvme_discovery' sysfs attribute to fc transport device
  nvmet_fc: support target port removal with nvmet layer
  nvme-fc: fix for a minor typos
  nvmet: remove redundant module prefix
  nvme: fix typo in nvme_identify_ns_descs
parents 9305455a 2acf70ad
Loading
Loading
Loading
Loading
+16 −4
Original line number Diff line number Diff line
@@ -971,7 +971,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
			uuid_copy(&ids->uuid, data + pos + sizeof(*cur));
			break;
		default:
			/* Skip unnkown types */
			/* Skip unknown types */
			len = cur->nidl;
			break;
		}
@@ -2908,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
		unsigned nsid, struct nvme_id_ns *id)
{
	struct nvme_ns_head *head;
	size_t size = sizeof(*head);
	int ret = -ENOMEM;

	head = kzalloc(sizeof(*head), GFP_KERNEL);
#ifdef CONFIG_NVME_MULTIPATH
	size += num_possible_nodes() * sizeof(struct nvme_ns *);
#endif

	head = kzalloc(size, GFP_KERNEL);
	if (!head)
		goto out;
	ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
@@ -3408,16 +3413,21 @@ static void nvme_fw_act_work(struct work_struct *work)

static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
{
	switch ((result & 0xff00) >> 8) {
	u32 aer_notice_type = (result & 0xff00) >> 8;

	switch (aer_notice_type) {
	case NVME_AER_NOTICE_NS_CHANGED:
		trace_nvme_async_event(ctrl, aer_notice_type);
		set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
		nvme_queue_scan(ctrl);
		break;
	case NVME_AER_NOTICE_FW_ACT_STARTING:
		trace_nvme_async_event(ctrl, aer_notice_type);
		queue_work(nvme_wq, &ctrl->fw_act_work);
		break;
#ifdef CONFIG_NVME_MULTIPATH
	case NVME_AER_NOTICE_ANA:
		trace_nvme_async_event(ctrl, aer_notice_type);
		if (!ctrl->ana_log_buf)
			break;
		queue_work(nvme_wq, &ctrl->ana_work);
@@ -3432,11 +3442,12 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
		volatile union nvme_result *res)
{
	u32 result = le32_to_cpu(res->u32);
	u32 aer_type = result & 0x07;

	if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
		return;

	switch (result & 0x7) {
	switch (aer_type) {
	case NVME_AER_NOTICE:
		nvme_handle_aen_notice(ctrl, result);
		break;
@@ -3444,6 +3455,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
	case NVME_AER_SMART:
	case NVME_AER_CSS:
	case NVME_AER_VS:
		trace_nvme_async_event(ctrl, aer_type);
		ctrl->aen_result = result;
		break;
	default:
+5 −2
Original line number Diff line number Diff line
@@ -552,8 +552,11 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
	    ctrl->state != NVME_CTRL_DEAD &&
	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
		return BLK_STS_RESOURCE;
	nvme_req(rq)->status = NVME_SC_ABORT_REQ;
	return BLK_STS_IOERR;

	nvme_req(rq)->status = NVME_SC_HOST_PATH_ERROR;
	blk_mq_start_request(rq);
	nvme_complete_rq(rq);
	return BLK_STS_OK;
}
EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);

+97 −11
Original line number Diff line number Diff line
@@ -122,6 +122,7 @@ struct nvme_fc_rport {
	struct list_head		endp_list; /* for lport->endp_list */
	struct list_head		ctrl_list;
	struct list_head		ls_req_list;
	struct list_head		disc_list;
	struct device			*dev;	/* physical device for dma */
	struct nvme_fc_lport		*lport;
	spinlock_t			lock;
@@ -210,7 +211,6 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
 * These items are short-term. They will eventually be moved into
 * a generic FC class. See comments in module init.
 */
static struct class *fc_class;
static struct device *fc_udev_device;


@@ -507,6 +507,7 @@ nvme_fc_free_rport(struct kref *ref)
	list_del(&rport->endp_list);
	spin_unlock_irqrestore(&nvme_fc_lock, flags);

	WARN_ON(!list_empty(&rport->disc_list));
	ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);

	kfree(rport);
@@ -694,6 +695,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
	INIT_LIST_HEAD(&newrec->endp_list);
	INIT_LIST_HEAD(&newrec->ctrl_list);
	INIT_LIST_HEAD(&newrec->ls_req_list);
	INIT_LIST_HEAD(&newrec->disc_list);
	kref_init(&newrec->ref);
	atomic_set(&newrec->act_ctrl_cnt, 0);
	spin_lock_init(&newrec->lock);
@@ -1385,7 +1387,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)

	__nvme_fc_finish_ls_req(lsop);

	/* fc-nvme iniator doesn't care about success or failure of cmd */
	/* fc-nvme initiator doesn't care about success or failure of cmd */

	kfree(lsop);
}
@@ -3159,7 +3161,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
	substring_t wwn = { name, &name[sizeof(name)-1] };
	int nnoffset, pnoffset;

	/* validate it string one of the 2 allowed formats */
	/* validate if string is one of the 2 allowed formats */
	if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
			!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
			!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
@@ -3254,6 +3256,90 @@ static struct nvmf_transport_ops nvme_fc_transport = {
	.create_ctrl	= nvme_fc_create_ctrl,
};

/* Arbitrary successive failures max. With lots of subsystems could be high */
#define DISCOVERY_MAX_FAIL	20

static ssize_t nvme_fc_nvme_discovery_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	unsigned long flags;
	LIST_HEAD(local_disc_list);
	struct nvme_fc_lport *lport;
	struct nvme_fc_rport *rport;
	int failcnt = 0;

	spin_lock_irqsave(&nvme_fc_lock, flags);
restart:
	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
		list_for_each_entry(rport, &lport->endp_list, endp_list) {
			if (!nvme_fc_lport_get(lport))
				continue;
			if (!nvme_fc_rport_get(rport)) {
				/*
				 * This is a temporary condition. Upon restart
				 * this rport will be gone from the list.
				 *
				 * Revert the lport put and retry.  Anything
				 * added to the list already will be skipped (as
				 * they are no longer list_empty).  Loops should
				 * resume at rports that were not yet seen.
				 */
				nvme_fc_lport_put(lport);

				if (failcnt++ < DISCOVERY_MAX_FAIL)
					goto restart;

				pr_err("nvme_discovery: too many reference "
				       "failures\n");
				goto process_local_list;
			}
			if (list_empty(&rport->disc_list))
				list_add_tail(&rport->disc_list,
					      &local_disc_list);
		}
	}

process_local_list:
	while (!list_empty(&local_disc_list)) {
		rport = list_first_entry(&local_disc_list,
					 struct nvme_fc_rport, disc_list);
		list_del_init(&rport->disc_list);
		spin_unlock_irqrestore(&nvme_fc_lock, flags);

		lport = rport->lport;
		/* signal discovery. Won't hurt if it repeats */
		nvme_fc_signal_discovery_scan(lport, rport);
		nvme_fc_rport_put(rport);
		nvme_fc_lport_put(lport);

		spin_lock_irqsave(&nvme_fc_lock, flags);
	}
	spin_unlock_irqrestore(&nvme_fc_lock, flags);

	return count;
}
static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);

static struct attribute *nvme_fc_attrs[] = {
	&dev_attr_nvme_discovery.attr,
	NULL
};

static struct attribute_group nvme_fc_attr_group = {
	.attrs = nvme_fc_attrs,
};

static const struct attribute_group *nvme_fc_attr_groups[] = {
	&nvme_fc_attr_group,
	NULL
};

static struct class fc_class = {
	.name = "fc",
	.dev_groups = nvme_fc_attr_groups,
	.owner = THIS_MODULE,
};

static int __init nvme_fc_init_module(void)
{
	int ret;
@@ -3272,16 +3358,16 @@ static int __init nvme_fc_init_module(void)
	 * put in place, this code will move to a more generic
	 * location for the class.
	 */
	fc_class = class_create(THIS_MODULE, "fc");
	if (IS_ERR(fc_class)) {
	ret = class_register(&fc_class);
	if (ret) {
		pr_err("couldn't register class fc\n");
		return PTR_ERR(fc_class);
		return ret;
	}

	/*
	 * Create a device for the FC-centric udev events
	 */
	fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
	fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL,
				"fc_udev_device");
	if (IS_ERR(fc_udev_device)) {
		pr_err("couldn't create fc_udev device!\n");
@@ -3296,9 +3382,9 @@ static int __init nvme_fc_init_module(void)
	return 0;

out_destroy_device:
	device_destroy(fc_class, MKDEV(0, 0));
	device_destroy(&fc_class, MKDEV(0, 0));
out_destroy_class:
	class_destroy(fc_class);
	class_unregister(&fc_class);
	return ret;
}

@@ -3313,8 +3399,8 @@ static void __exit nvme_fc_exit_module(void)
	ida_destroy(&nvme_fc_local_port_cnt);
	ida_destroy(&nvme_fc_ctrl_cnt);

	device_destroy(fc_class, MKDEV(0, 0));
	class_destroy(fc_class);
	device_destroy(&fc_class, MKDEV(0, 0));
	class_unregister(&fc_class);
}

module_init(nvme_fc_init_module);
+46 −11
Original line number Diff line number Diff line
@@ -77,6 +77,13 @@ void nvme_failover_req(struct request *req)
			queue_work(nvme_wq, &ns->ctrl->ana_work);
		}
		break;
	case NVME_SC_HOST_PATH_ERROR:
		/*
		 * Temporary transport disruption in talking to the controller.
		 * Try to send on a new path.
		 */
		nvme_mpath_clear_current_path(ns);
		break;
	default:
		/*
		 * Reset the controller for any non-ANA error as we don't know
@@ -110,29 +117,55 @@ static const char *nvme_ana_state_names[] = {
	[NVME_ANA_CHANGE]		= "change",
};

static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
	struct nvme_ns_head *head = ns->head;
	int node;

	if (!head)
		return;

	for_each_node(node) {
		if (ns == rcu_access_pointer(head->current_path[node]))
			rcu_assign_pointer(head->current_path[node], NULL);
	}
}

static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
{
	struct nvme_ns *ns, *fallback = NULL;
	int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
	struct nvme_ns *found = NULL, *fallback = NULL, *ns;

	list_for_each_entry_rcu(ns, &head->list, siblings) {
		if (ns->ctrl->state != NVME_CTRL_LIVE ||
		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
			continue;

		distance = node_distance(node, dev_to_node(ns->ctrl->dev));

		switch (ns->ana_state) {
		case NVME_ANA_OPTIMIZED:
			rcu_assign_pointer(head->current_path, ns);
			return ns;
			if (distance < found_distance) {
				found_distance = distance;
				found = ns;
			}
			break;
		case NVME_ANA_NONOPTIMIZED:
			if (distance < fallback_distance) {
				fallback_distance = distance;
				fallback = ns;
			}
			break;
		default:
			break;
		}
	}

	if (fallback)
		rcu_assign_pointer(head->current_path, fallback);
	return fallback;
	if (!found)
		found = fallback;
	if (found)
		rcu_assign_pointer(head->current_path[node], found);
	return found;
}

static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
@@ -143,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns)

inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
{
	struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
	int node = numa_node_id();
	struct nvme_ns *ns;

	ns = srcu_dereference(head->current_path[node], &head->srcu);
	if (unlikely(!ns || !nvme_path_is_optimized(ns)))
		ns = __nvme_find_path(head);
		ns = __nvme_find_path(head, node);
	return ns;
}

@@ -193,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
	int srcu_idx;

	srcu_idx = srcu_read_lock(&head->srcu);
	ns = srcu_dereference(head->current_path, &head->srcu);
	ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu);
	if (likely(ns && nvme_path_is_optimized(ns)))
		found = ns->queue->poll_fn(q, qc);
	srcu_read_unlock(&head->srcu, srcu_idx);
+9 −16
Original line number Diff line number Diff line
@@ -277,14 +277,6 @@ struct nvme_ns_ids {
 * only ever has a single entry for private namespaces.
 */
struct nvme_ns_head {
#ifdef CONFIG_NVME_MULTIPATH
	struct gendisk		*disk;
	struct nvme_ns __rcu	*current_path;
	struct bio_list		requeue_list;
	spinlock_t		requeue_lock;
	struct work_struct	requeue_work;
	struct mutex		lock;
#endif
	struct list_head	list;
	struct srcu_struct      srcu;
	struct nvme_subsystem	*subsys;
@@ -293,6 +285,14 @@ struct nvme_ns_head {
	struct list_head	entry;
	struct kref		ref;
	int			instance;
#ifdef CONFIG_NVME_MULTIPATH
	struct gendisk		*disk;
	struct bio_list		requeue_list;
	spinlock_t		requeue_lock;
	struct work_struct	requeue_work;
	struct mutex		lock;
	struct nvme_ns __rcu	*current_path[];
#endif
};

#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
@@ -474,14 +474,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);

static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
	struct nvme_ns_head *head = ns->head;

	if (head && ns == rcu_access_pointer(head->current_path))
		rcu_assign_pointer(head->current_path, NULL);
}
void nvme_mpath_clear_current_path(struct nvme_ns *ns);
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);

static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
Loading