Commit 1b52671d authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'nvme-5.8' of git://git.infradead.org/nvme into block-5.8

Pull NVMe fixes from Christoph.

* 'nvme-5.8' of git://git.infradead.org/nvme:
  nvme-multipath: fix bogus request queue reference put
  nvme-multipath: fix deadlock due to head->lock
  nvme: don't protect ns mutation with ns->head->lock
  nvme-multipath: fix deadlock between ana_work and scan_work
  nvme: fix possible deadlock when I/O is blocked
  nvme-rdma: assign completion vector correctly
  nvme-loop: initialize tagset numa value to the value of the ctrl
  nvme-tcp: initialize tagset numa value to the value of the ctrl
  nvme-pci: initialize tagset numa value to the value of the ctrl
  nvme-pci: override the value of the controller's numa node
  nvme: set initial value for controller's numa node
parents 0b8eb629 c3124466
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1974,7 +1974,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
	if (ns->head->disk) {
		nvme_update_disk_info(ns->head->disk, ns, id);
		blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
		revalidate_disk(ns->head->disk);
	}
#endif
	return 0;
@@ -4174,6 +4173,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
	ctrl->dev = dev;
	ctrl->ops = ops;
	ctrl->quirks = quirks;
	ctrl->numa_node = NUMA_NO_NODE;
	INIT_WORK(&ctrl->scan_work, nvme_scan_work);
	INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
	INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
+29 −17
Original line number Diff line number Diff line
@@ -409,15 +409,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
{
	struct nvme_ns_head *head = ns->head;

	lockdep_assert_held(&ns->head->lock);

	if (!head->disk)
		return;

	if (!(head->disk->flags & GENHD_FL_UP))
	if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
		device_add_disk(&head->subsys->dev, head->disk,
				nvme_ns_id_attr_groups);

	mutex_lock(&head->lock);
	if (nvme_path_is_optimized(ns)) {
		int node, srcu_idx;

@@ -426,9 +425,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
			__nvme_find_path(head, node);
		srcu_read_unlock(&head->srcu, srcu_idx);
	}
	mutex_unlock(&head->lock);

	synchronize_srcu(&ns->head->srcu);
	kblockd_schedule_work(&ns->head->requeue_work);
	synchronize_srcu(&head->srcu);
	kblockd_schedule_work(&head->requeue_work);
}

static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
@@ -483,14 +483,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
		struct nvme_ns *ns)
{
	mutex_lock(&ns->head->lock);
	ns->ana_grpid = le32_to_cpu(desc->grpid);
	ns->ana_state = desc->state;
	clear_bit(NVME_NS_ANA_PENDING, &ns->flags);

	if (nvme_state_is_live(ns->ana_state))
		nvme_mpath_set_live(ns);
	mutex_unlock(&ns->head->lock);
}

static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
@@ -640,31 +638,37 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
}
DEVICE_ATTR_RO(ana_state);

static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
		struct nvme_ana_group_desc *desc, void *data)
{
	struct nvme_ns *ns = data;

	if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
		nvme_update_ns_ana_state(desc, ns);
		return -ENXIO; /* just break out of the loop */
	}
	struct nvme_ana_group_desc *dst = data;

	if (desc->grpid != dst->grpid)
		return 0;

	*dst = *desc;
	return -ENXIO; /* just break out of the loop */
}

void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
{
	if (nvme_ctrl_use_ana(ns->ctrl)) {
		struct nvme_ana_group_desc desc = {
			.grpid = id->anagrpid,
			.state = 0,
		};

		mutex_lock(&ns->ctrl->ana_lock);
		ns->ana_grpid = le32_to_cpu(id->anagrpid);
		nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
		nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
		mutex_unlock(&ns->ctrl->ana_lock);
		if (desc.state) {
			/* found the group desc: update */
			nvme_update_ns_ana_state(&desc, ns);
		}
	} else {
		mutex_lock(&ns->head->lock);
		ns->ana_state = NVME_ANA_OPTIMIZED; 
		nvme_mpath_set_live(ns);
		mutex_unlock(&ns->head->lock);
	}

	if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
@@ -686,6 +690,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
	kblockd_schedule_work(&head->requeue_work);
	flush_work(&head->requeue_work);
	blk_cleanup_queue(head->disk->queue);
	if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
		/*
		 * if device_add_disk wasn't called, prevent
		 * disk release to put a bogus reference on the
		 * request queue
		 */
		head->disk->queue = NULL;
	}
	put_disk(head->disk);
}

+2 −0
Original line number Diff line number Diff line
@@ -364,6 +364,8 @@ struct nvme_ns_head {
	spinlock_t		requeue_lock;
	struct work_struct	requeue_work;
	struct mutex		lock;
	unsigned long		flags;
#define NVME_NSHEAD_DISK_LIVE	0
	struct nvme_ns __rcu	*current_path[];
#endif
};
+4 −2
Original line number Diff line number Diff line
@@ -1593,7 +1593,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)

		dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
		dev->admin_tagset.numa_node = dev_to_node(dev->dev);
		dev->admin_tagset.numa_node = dev->ctrl.numa_node;
		dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
		dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
		dev->admin_tagset.driver_data = dev;
@@ -1669,6 +1669,8 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
	if (result)
		return result;

	dev->ctrl.numa_node = dev_to_node(dev->dev);

	nvmeq = &dev->queues[0];
	aqa = nvmeq->q_depth - 1;
	aqa |= aqa << 16;
@@ -2257,7 +2259,7 @@ static void nvme_dev_add(struct nvme_dev *dev)
		if (dev->io_queues[HCTX_TYPE_POLL])
			dev->tagset.nr_maps++;
		dev->tagset.timeout = NVME_IO_TIMEOUT;
		dev->tagset.numa_node = dev_to_node(dev->dev);
		dev->tagset.numa_node = dev->ctrl.numa_node;
		dev->tagset.queue_depth =
				min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
		dev->tagset.cmd_size = sizeof(struct nvme_iod);
+1 −1
Original line number Diff line number Diff line
@@ -470,7 +470,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
	 * Spread I/O queues completion vectors according their queue index.
	 * Admin queues can always go on completion vector 0.
	 */
	comp_vector = idx == 0 ? idx : idx - 1;
	comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;

	/* Polling queues need direct cq polling context */
	if (nvme_rdma_poll_queue(queue))
Loading