Commit d92f79a5 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'nvme-5.8' of git://git.infradead.org/nvme into for-5.8/drivers

Pull NVMe updates from Christoph:

"The second large batch of nvme updates:

 - t10 protection information support for nvme-rdma and nvmet-rdma
   (Israel Rukshin and Max Gurtovoy)
 - target side AEN improvements (Chaitanya Kulkarni)
 - various fixes and minor improvements all over, icluding the nvme part
   of the lpfc driver"

* 'nvme-5.8' of git://git.infradead.org/nvme: (38 commits)
  lpfc: Fix return value in __lpfc_nvme_ls_abort
  lpfc: fix axchg pointer reference after free and double frees
  lpfc: Fix pointer checks and comments in LS receive refactoring
  nvme: set dma alignment to qword
  nvmet: cleanups the loop in nvmet_async_events_process
  nvmet: fix memory leak when removing namespaces and controllers concurrently
  nvmet-rdma: add metadata/T10-PI support
  nvmet: add metadata support for block devices
  nvmet: add metadata/T10-PI support
  nvme: add Metadata Capabilities enumerations
  nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len
  nvmet: rename nvmet_rw_len to nvmet_rw_data_len
  nvmet: add metadata characteristics for a namespace
  nvme-rdma: add metadata/T10-PI support
  nvme-rdma: introduce nvme_rdma_sgl structure
  nvme: introduce NVME_INLINE_METADATA_SG_CNT
  nvme: enforce extended LBA format for fabrics metadata
  nvme: introduce max_integrity_segments ctrl attribute
  nvme: make nvme_ns_has_pi accessible to transports
  nvme: introduce NVME_NS_METADATA_SUPPORTED flag
  ...
parents 263c6158 6b6e8963
Loading
Loading
Loading
Loading
+70 −27
Original line number Diff line number Diff line
@@ -19,7 +19,6 @@
#include <linux/pr.h>
#include <linux/ptrace.h>
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
#include <asm/unaligned.h>

@@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
	nvme_put_ctrl(ctrl);
}

static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
{
	return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
}

static blk_status_t nvme_error_status(u16 status)
{
	switch (status & 0x7ff) {
@@ -552,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)

	ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
	if (ret)
		return ret;
		goto out_disable_stream;

	ctrl->nssa = le16_to_cpu(s.nssa);
	if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
		dev_info(ctrl->device, "too few streams (%u) available\n",
					ctrl->nssa);
		nvme_disable_streams(ctrl);
		return 0;
		goto out_disable_stream;
	}

	ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
	dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
	return 0;

out_disable_stream:
	nvme_disable_streams(ctrl);
	return ret;
}

/*
@@ -1302,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
	meta_len = (io.nblocks + 1) * ns->ms;
	metadata = nvme_to_user_ptr(io.metadata);

	if (ns->ext) {
	if (ns->features & NVME_NS_EXT_LBAS) {
		length += meta_len;
		meta_len = 0;
	} else if (meta_len) {
@@ -1696,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}

#ifdef CONFIG_BLK_DEV_INTEGRITY
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
				u32 max_integrity_segments)
{
	struct blk_integrity integrity;

@@ -1719,10 +1717,11 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
	}
	integrity.tuple_size = ms;
	blk_integrity_register(disk, &integrity);
	blk_queue_max_integrity_segments(disk->queue, 1);
	blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
}
#else
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
				u32 max_integrity_segments)
{
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
@@ -1842,7 +1841,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
	sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
	unsigned short bs = 1 << ns->lba_shift;
	u32 atomic_bs, phys_bs, io_opt;
	u32 atomic_bs, phys_bs, io_opt = 0;

	if (ns->lba_shift > PAGE_SHIFT) {
		/* unsupported block size, set capacity to 0 later */
@@ -1851,7 +1850,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
	blk_mq_freeze_queue(disk->queue);
	blk_integrity_unregister(disk);

	atomic_bs = phys_bs = io_opt = bs;
	atomic_bs = phys_bs = bs;
	nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt);
	if (id->nabo == 0) {
		/*
@@ -1882,12 +1881,27 @@ static void nvme_update_disk_info(struct gendisk *disk,
	blk_queue_io_min(disk->queue, phys_bs);
	blk_queue_io_opt(disk->queue, io_opt);

	if (ns->ms && !ns->ext &&
	    (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
		nvme_init_integrity(disk, ns->ms, ns->pi_type);
	if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
	    ns->lba_shift > PAGE_SHIFT)
	/*
	 * The block layer can't support LBA sizes larger than the page size
	 * yet, so catch this early and don't allow block I/O.
	 */
	if (ns->lba_shift > PAGE_SHIFT)
		capacity = 0;

	/*
	 * Register a metadata profile for PI, or the plain non-integrity NVMe
	 * metadata masquerading as Type 0 if supported, otherwise reject block
	 * I/O to namespaces with metadata except when the namespace supports
	 * PI, as it can strip/insert in that case.
	 */
	if (ns->ms) {
		if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
		    (ns->features & NVME_NS_METADATA_SUPPORTED))
			nvme_init_integrity(disk, ns->ms, ns->pi_type,
					    ns->ctrl->max_integrity_segments);
		else if (!nvme_ns_has_pi(ns))
			capacity = 0;
	}

	set_capacity_revalidate_and_notify(disk, capacity, false);

@@ -1902,9 +1916,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
	blk_mq_unfreeze_queue(disk->queue);
}

static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
	struct nvme_ns *ns = disk->private_data;
	struct nvme_ctrl *ctrl = ns->ctrl;
	u32 iob;

	/*
@@ -1915,20 +1930,43 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
	if (ns->lba_shift == 0)
		ns->lba_shift = 9;

	if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
	    is_power_of_2(ns->ctrl->max_hw_sectors))
		iob = ns->ctrl->max_hw_sectors;
	if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
	    is_power_of_2(ctrl->max_hw_sectors))
		iob = ctrl->max_hw_sectors;
	else
		iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));

	ns->features = 0;
	ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
	ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
	/* the PI implementation requires metadata equal t10 pi tuple size */
	if (ns->ms == sizeof(struct t10_pi_tuple))
		ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
	else
		ns->pi_type = 0;

	if (ns->ms) {
		/*
		 * For PCIe only the separate metadata pointer is supported,
		 * as the block layer supplies metadata in a separate bio_vec
		 * chain. For Fabrics, only metadata as part of extended data
		 * LBA is supported on the wire per the Fabrics specification,
		 * but the HBA/HCA will do the remapping from the separate
		 * metadata buffers for us.
		 */
		if (id->flbas & NVME_NS_FLBAS_META_EXT) {
			ns->features |= NVME_NS_EXT_LBAS;
			if ((ctrl->ops->flags & NVME_F_FABRICS) &&
			    (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) &&
			    ctrl->max_integrity_segments)
				ns->features |= NVME_NS_METADATA_SUPPORTED;
		} else {
			if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS))
				return -EINVAL;
			if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
				ns->features |= NVME_NS_METADATA_SUPPORTED;
		}
	}

	if (iob)
		blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
	nvme_update_disk_info(disk, ns, id);
@@ -1939,6 +1977,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
		revalidate_disk(ns->head->disk);
	}
#endif
	return 0;
}

static int nvme_revalidate_disk(struct gendisk *disk)
@@ -1974,7 +2013,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
		goto free_id;
	}

	__nvme_revalidate_disk(disk, id);
	ret = __nvme_revalidate_disk(disk, id);
free_id:
	kfree(id);
out:
@@ -2283,6 +2322,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
	}
	blk_queue_virt_boundary(q, ctrl->page_size - 1);
	blk_queue_dma_alignment(q, 7);
	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
		vwc = true;
	blk_queue_write_cache(q, vwc, vwc);
@@ -3628,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
	memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
	ns->disk = disk;

	__nvme_revalidate_disk(disk, id);
	if (__nvme_revalidate_disk(disk, id))
		goto out_free_disk;

	if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
		ret = nvme_nvm_register(ns, disk_name, node);
@@ -3655,6 +3696,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
	/* prevent double queue cleanup */
	ns->disk->queue = NULL;
	put_disk(ns->disk);
 out_free_disk:
	del_gendisk(ns->disk);
 out_unlink_ns:
	mutex_lock(&ctrl->subsys->lock);
	list_del_rcu(&ns->siblings);
+4 −2
Original line number Diff line number Diff line
@@ -108,7 +108,7 @@ struct nvme_fc_fcp_op {
struct nvme_fcp_op_w_sgl {
	struct nvme_fc_fcp_op	op;
	struct scatterlist	sgl[NVME_INLINE_SG_CNT];
	uint8_t			priv[0];
	uint8_t			priv[];
};

struct nvme_fc_lport {
@@ -3246,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
			dev_warn(ctrl->ctrl.device,
				"NVME-FC{%d}: dev_loss_tmo (%d) expired "
				"while waiting for remoteport connectivity.\n",
				ctrl->cnum, portptr->dev_loss_tmo);
				ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
					(ctrl->ctrl.opts->max_reconnects *
					 ctrl->ctrl.opts->reconnect_delay)));
		WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
	}
}
+5 −2
Original line number Diff line number Diff line
@@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl {
	__le32	tdresv;
	__le32	thresv;
	__le32	rsvd2[8];
	__u8	blk[0];
	__u8	blk[];
};

struct nvme_nvm_id20_addrf {
@@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
	geo = &dev->geo;
	geo->csecs = 1 << ns->lba_shift;
	geo->sos = ns->ms;
	geo->ext = ns->ext;
	if (ns->features & NVME_NS_EXT_LBAS)
		geo->ext = true;
	else
		geo->ext = false;
	geo->mdts = ns->ctrl->max_hw_sectors;

	dev->q = q;
+16 −2
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
#include <linux/wait.h>
#include <linux/t10-pi.h>

#include <trace/events/block.h>

@@ -30,8 +31,10 @@ extern unsigned int admin_timeout;

#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define  NVME_INLINE_SG_CNT  0
#define  NVME_INLINE_METADATA_SG_CNT  0
#else
#define  NVME_INLINE_SG_CNT  2
#define  NVME_INLINE_METADATA_SG_CNT  1
#endif

extern struct workqueue_struct *nvme_wq;
@@ -228,6 +231,7 @@ struct nvme_ctrl {
	u32 page_size;
	u32 max_hw_sectors;
	u32 max_segments;
	u32 max_integrity_segments;
	u16 crdt[3];
	u16 oncs;
	u16 oacs;
@@ -364,6 +368,11 @@ struct nvme_ns_head {
#endif
};

enum nvme_ns_features {
	NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
	NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
};

struct nvme_ns {
	struct list_head list;

@@ -383,8 +392,8 @@ struct nvme_ns {
	u16 ms;
	u16 sgs;
	u32 sws;
	bool ext;
	u8 pi_type;
	unsigned long features;
	unsigned long flags;
#define NVME_NS_REMOVING	0
#define NVME_NS_DEAD     	1
@@ -394,6 +403,12 @@ struct nvme_ns {

};

/* NVMe ns supports metadata actions by the controller (generate/strip) */
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
{
	return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
}

struct nvme_ctrl_ops {
	const char *name;
	struct module *module;
@@ -497,7 +512,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
void nvme_put_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_identify(struct nvme_ctrl *ctrl);

void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
+24 −4
Original line number Diff line number Diff line
@@ -68,14 +68,30 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");

static int io_queue_count_set(const char *val, const struct kernel_param *kp)
{
	unsigned int n;
	int ret;

	ret = kstrtouint(val, 10, &n);
	if (ret != 0 || n > num_possible_cpus())
		return -EINVAL;
	return param_set_uint(val, kp);
}

static const struct kernel_param_ops io_queue_count_ops = {
	.set = io_queue_count_set,
	.get = param_get_uint,
};

static unsigned int write_queues;
module_param(write_queues, uint, 0644);
module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644);
MODULE_PARM_DESC(write_queues,
	"Number of queues to use for writes. If not set, reads and writes "
	"will share a queue set.");

static unsigned int poll_queues;
module_param(poll_queues, uint, 0644);
module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");

struct nvme_dev;
@@ -2549,6 +2565,12 @@ static void nvme_reset_work(struct work_struct *work)
		goto out;
	}

	/*
	 * We do not support an SGL for metadata (yet), so we are limited to a
	 * single integrity segment for the separate metadata pointer.
	 */
	dev->ctrl.max_integrity_segments = 1;

	result = nvme_init_identify(&dev->ctrl);
	if (result)
		goto out;
@@ -3118,8 +3140,6 @@ static int __init nvme_init(void)
	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
	BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);

	write_queues = min(write_queues, num_possible_cpus());
	poll_queues = min(poll_queues, num_possible_cpus());
	return pci_register_driver(&nvme_driver);
}

Loading