Commit 103e515e authored by Hannes Reinecke's avatar Hannes Reinecke Committed by Jens Axboe
Browse files

nvme: add a numa_node field to struct nvme_ctrl



Instead of directly poking into the struct device add a new numa_node
field to struct nvme_ctrl.  This allows fabrics drivers where ctrl->dev
is a virtual device to support NUMA affinity as well.

Also expose the field as a sysfs attribute, and populate it for the
RDMA and FC transports.

Signed-off-by: default avatarHannes Reinecke <hare@suse.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 11902035
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -2766,6 +2766,7 @@ static ssize_t field##_show(struct device *dev, \
static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);

nvme_show_int_function(cntlid);
nvme_show_int_function(numa_node);

static ssize_t nvme_sysfs_delete(struct device *dev,
				struct device_attribute *attr, const char *buf,
@@ -2845,6 +2846,7 @@ static struct attribute *nvme_dev_attrs[] = {
	&dev_attr_subsysnqn.attr,
	&dev_attr_address.attr,
	&dev_attr_state.attr,
	&dev_attr_numa_node.attr,
	NULL
};

@@ -3055,7 +3057,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
	struct gendisk *disk;
	struct nvme_id_ns *id;
	char disk_name[DISK_NAME_LEN];
	int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
	int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT;

	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
	if (!ns)
+3 −2
Original line number Diff line number Diff line
@@ -2425,7 +2425,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
	ctrl->tag_set.ops = &nvme_fc_mq_ops;
	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
	ctrl->tag_set.numa_node = NUMA_NO_NODE;
	ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
	ctrl->tag_set.cmd_size =
		struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
@@ -3018,6 +3018,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,

	ctrl->ctrl.opts = opts;
	ctrl->ctrl.nr_reconnects = 0;
	ctrl->ctrl.numa_node = dev_to_node(lport->dev);
	INIT_LIST_HEAD(&ctrl->ctrl_list);
	ctrl->lport = lport;
	ctrl->rport = rport;
@@ -3058,7 +3059,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
	ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
	ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
	ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
	ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
	ctrl->admin_tag_set.cmd_size =
		struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
			    ctrl->lport->ops->fcprqst_priv_sz);
+2 −2
Original line number Diff line number Diff line
@@ -141,7 +141,7 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
		    test_bit(NVME_NS_ANA_PENDING, &ns->flags))
			continue;

		distance = node_distance(node, dev_to_node(ns->ctrl->dev));
		distance = node_distance(node, ns->ctrl->numa_node);

		switch (ns->ana_state) {
		case NVME_ANA_OPTIMIZED:
@@ -261,7 +261,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
	if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
		return 0;

	q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
	q = blk_alloc_queue_node(GFP_KERNEL, ctrl->numa_node);
	if (!q)
		goto out;
	q->queuedata = head;
+1 −0
Original line number Diff line number Diff line
@@ -153,6 +153,7 @@ struct nvme_ctrl {
	struct request_queue *connect_q;
	struct device *dev;
	int instance;
	int numa_node;
	struct blk_mq_tag_set *tagset;
	struct blk_mq_tag_set *admin_tagset;
	struct list_head namespaces;
+3 −2
Original line number Diff line number Diff line
@@ -694,7 +694,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
		set->ops = &nvme_rdma_admin_mq_ops;
		set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
		set->reserved_tags = 2; /* connect + keep-alive */
		set->numa_node = NUMA_NO_NODE;
		set->numa_node = nctrl->numa_node;
		set->cmd_size = sizeof(struct nvme_rdma_request) +
			SG_CHUNK_SIZE * sizeof(struct scatterlist);
		set->driver_data = ctrl;
@@ -707,7 +707,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
		set->ops = &nvme_rdma_mq_ops;
		set->queue_depth = nctrl->sqsize + 1;
		set->reserved_tags = 1; /* fabric connect */
		set->numa_node = NUMA_NO_NODE;
		set->numa_node = nctrl->numa_node;
		set->flags = BLK_MQ_F_SHOULD_MERGE;
		set->cmd_size = sizeof(struct nvme_rdma_request) +
			SG_CHUNK_SIZE * sizeof(struct scatterlist);
@@ -763,6 +763,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
		return error;

	ctrl->device = ctrl->queues[0].device;
	ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);

	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);