Commit c4bd70e8 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus-2019-10-03' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - Mandate timespec64 for the io_uring timeout ABI (Arnd)

 - Set of NVMe changes via Sagi:
     - controller removal race fix from Balbir
     - quirk additions from Gabriel and Jian-Hong
     - nvme-pci power state save fix from Mario
     - Add 64bit user commands (for 64bit registers) from Marta
     - nvme-rdma/nvme-tcp fixes from Max, Mark and Me
     - Minor cleanups and nits from James, Dan and John

 - Two s390 dasd fixes (Jan, Stefan)

 - Have loop change block size in DIO mode (Martijn)

 - paride pg header ifdef guard (Masahiro)

 - Two blk-mq queue scheduler tweaks, fixing an ordering issue on zoned
   devices and suboptimal performance on others (Ming)

* tag 'for-linus-2019-10-03' of git://git.kernel.dk/linux-block: (22 commits)
  block: sed-opal: fix sparse warning: convert __be64 data
  block: sed-opal: fix sparse warning: obsolete array init.
  block: pg: add header include guard
  Revert "s390/dasd: Add discard support for ESE volumes"
  s390/dasd: Fix error handling during online processing
  io_uring: use __kernel_timespec in timeout ABI
  loop: change queue block size to match when using DIO
  blk-mq: apply normal plugging for HDD
  blk-mq: honor IO scheduler for multiqueue devices
  nvme-rdma: fix possible use-after-free in connect timeout
  nvme: Move ctrl sqsize to generic space
  nvme: Add ctrl attributes for queue_count and sqsize
  nvme: allow 64-bit results in passthru commands
  nvme: Add quirk for Kingston NVME SSD running FW E8FK11.T
  nvmet-tcp: remove superflous check on request sgl
  Added QUIRKs for ADATA XPG SX8200 Pro 512GB
  nvme-rdma: Fix max_hw_sectors calculation
  nvme: fix an error code in nvme_init_subsystem()
  nvme-pci: Save PCI state before putting drive into deepest state
  nvme-tcp: fix wrong stop condition in io_work
  ...
parents cc3a7bfe a9eb49c9
Loading
Loading
Loading
Loading
+9 −3
Original line number Diff line number Diff line
@@ -1992,10 +1992,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
		/* bypass scheduler for flush rq */
		blk_insert_flush(rq);
		blk_mq_run_hw_queue(data.hctx, true);
	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) {
	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
				!blk_queue_nonrot(q))) {
		/*
		 * Use plugging if we have a ->commit_rqs() hook as well, as
		 * we know the driver uses bd->last in a smart fashion.
		 *
		 * Use normal plugging if this disk is slow HDD, as sequential
		 * IO may benefit a lot from plug merging.
		 */
		unsigned int request_count = plug->rq_count;
		struct request *last = NULL;
@@ -2012,6 +2016,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
		}

		blk_add_rq_to_plug(plug, rq);
	} else if (q->elevator) {
		blk_mq_sched_insert_request(rq, false, true, true);
	} else if (plug && !blk_queue_nomerges(q)) {
		/*
		 * We do limited plugging. If the bio can be merged, do that.
@@ -2035,8 +2041,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
			blk_mq_try_issue_directly(data.hctx, same_queue_rq,
					&cookie);
		}
	} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
			!data.hctx->dispatch_busy)) {
	} else if ((q->nr_hw_queues > 1 && is_sync) ||
			!data.hctx->dispatch_busy) {
		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
	} else {
		blk_mq_sched_insert_request(rq, false, true, true);
+3 −3
Original line number Diff line number Diff line
@@ -129,7 +129,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
		{ 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 },

	/* tables */
	[OPAL_TABLE_TABLE]
	[OPAL_TABLE_TABLE] =
		{ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 },
	[OPAL_LOCKINGRANGE_GLOBAL] =
		{ 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 },
@@ -372,8 +372,8 @@ static void check_geometry(struct opal_dev *dev, const void *data)
{
	const struct d0_geometry_features *geo = data;

	dev->align = geo->alignment_granularity;
	dev->lowest_lba = geo->lowest_aligned_lba;
	dev->align = be64_to_cpu(geo->alignment_granularity);
	dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba);
}

static int execute_step(struct opal_dev *dev,
+10 −0
Original line number Diff line number Diff line
@@ -994,6 +994,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
		blk_queue_write_cache(lo->lo_queue, true, false);

	if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
		/* In case of direct I/O, match underlying block size */
		unsigned short bsize = bdev_logical_block_size(
			inode->i_sb->s_bdev);

		blk_queue_logical_block_size(lo->lo_queue, bsize);
		blk_queue_physical_block_size(lo->lo_queue, bsize);
		blk_queue_io_min(lo->lo_queue, bsize);
	}

	loop_update_rotational(lo);
	loop_update_dio(lo);
	set_capacity(lo->lo_disk, size);
+113 −19
Original line number Diff line number Diff line
@@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
	 */
	if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
		return;
	revalidate_disk(ns->disk);
	blk_set_queue_dying(ns->queue);
	/* Forcibly unquiesce queues to avoid blocking dispatch */
	blk_mq_unquiesce_queue(ns->queue);
	/*
	 * Revalidate after unblocking dispatchers that may be holding bd_butex
	 */
	revalidate_disk(ns->disk);
}

static void nvme_queue_scan(struct nvme_ctrl *ctrl)
@@ -847,7 +850,7 @@ out:
static int nvme_submit_user_cmd(struct request_queue *q,
		struct nvme_command *cmd, void __user *ubuffer,
		unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
		u32 meta_seed, u32 *result, unsigned timeout)
		u32 meta_seed, u64 *result, unsigned timeout)
{
	bool write = nvme_is_write(cmd);
	struct nvme_ns *ns = q->queuedata;
@@ -888,7 +891,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
	else
		ret = nvme_req(req)->status;
	if (result)
		*result = le32_to_cpu(nvme_req(req)->result.u32);
		*result = le64_to_cpu(nvme_req(req)->result.u64);
	if (meta && !ret && !write) {
		if (copy_to_user(meta_buffer, meta, meta_len))
			ret = -EFAULT;
@@ -1335,6 +1338,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
	struct nvme_command c;
	unsigned timeout = 0;
	u32 effects;
	u64 result;
	int status;

	if (!capable(CAP_SYS_ADMIN))
		return -EACCES;
	if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
		return -EFAULT;
	if (cmd.flags)
		return -EINVAL;

	memset(&c, 0, sizeof(c));
	c.common.opcode = cmd.opcode;
	c.common.flags = cmd.flags;
	c.common.nsid = cpu_to_le32(cmd.nsid);
	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
	c.common.cdw10 = cpu_to_le32(cmd.cdw10);
	c.common.cdw11 = cpu_to_le32(cmd.cdw11);
	c.common.cdw12 = cpu_to_le32(cmd.cdw12);
	c.common.cdw13 = cpu_to_le32(cmd.cdw13);
	c.common.cdw14 = cpu_to_le32(cmd.cdw14);
	c.common.cdw15 = cpu_to_le32(cmd.cdw15);

	if (cmd.timeout_ms)
		timeout = msecs_to_jiffies(cmd.timeout_ms);

	effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
			(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
			(void __user *)(uintptr_t)cmd.metadata,
			cmd.metadata_len, 0, &result, timeout);
	nvme_passthru_end(ctrl, effects);

	if (status >= 0) {
		if (put_user(result, &ucmd->result))
			return -EFAULT;
	}

	return status;
}

static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
			struct nvme_passthru_cmd64 __user *ucmd)
{
	struct nvme_passthru_cmd64 cmd;
	struct nvme_command c;
	unsigned timeout = 0;
	u32 effects;
	int status;

	if (!capable(CAP_SYS_ADMIN))
@@ -1405,6 +1456,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
		srcu_read_unlock(&head->srcu, idx);
}

static bool is_ctrl_ioctl(unsigned int cmd)
{
	if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
		return true;
	if (is_sed_ioctl(cmd))
		return true;
	return false;
}

static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
				  void __user *argp,
				  struct nvme_ns_head *head,
				  int srcu_idx)
{
	struct nvme_ctrl *ctrl = ns->ctrl;
	int ret;

	nvme_get_ctrl(ns->ctrl);
	nvme_put_ns_from_disk(head, srcu_idx);

	switch (cmd) {
	case NVME_IOCTL_ADMIN_CMD:
		ret = nvme_user_cmd(ctrl, NULL, argp);
		break;
	case NVME_IOCTL_ADMIN64_CMD:
		ret = nvme_user_cmd64(ctrl, NULL, argp);
		break;
	default:
		ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
		break;
	}
	nvme_put_ctrl(ctrl);
	return ret;
}

static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
		unsigned int cmd, unsigned long arg)
{
@@ -1422,20 +1508,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
	 * seperately and drop the ns SRCU reference early.  This avoids a
	 * deadlock when deleting namespaces using the passthrough interface.
	 */
	if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
		struct nvme_ctrl *ctrl = ns->ctrl;

		nvme_get_ctrl(ns->ctrl);
		nvme_put_ns_from_disk(head, srcu_idx);

		if (cmd == NVME_IOCTL_ADMIN_CMD)
			ret = nvme_user_cmd(ctrl, NULL, argp);
		else
			ret = sed_ioctl(ctrl->opal_dev, cmd, argp);

		nvme_put_ctrl(ctrl);
		return ret;
	}
	if (is_ctrl_ioctl(cmd))
		return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);

	switch (cmd) {
	case NVME_IOCTL_ID:
@@ -1448,6 +1522,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
	case NVME_IOCTL_SUBMIT_IO:
		ret = nvme_submit_io(ns, argp);
		break;
	case NVME_IOCTL_IO64_CMD:
		ret = nvme_user_cmd64(ns->ctrl, ns, argp);
		break;
	default:
		if (ns->ndev)
			ret = nvme_nvm_ioctl(ns, cmd, arg);
@@ -2289,6 +2366,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
		.vid = 0x14a4,
		.fr = "22301111",
		.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
	},
	{
		/*
		 * This Kingston E8FK11.T firmware version has no interrupt
		 * after resume with actions related to suspend to idle
		 * https://bugzilla.kernel.org/show_bug.cgi?id=204887
		 */
		.vid = 0x2646,
		.fr = "E8FK11.T",
		.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
	}
};

@@ -2540,8 +2627,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
		list_add_tail(&subsys->entry, &nvme_subsystems);
	}

	if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
			dev_name(ctrl->device))) {
	ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
				dev_name(ctrl->device));
	if (ret) {
		dev_err(ctrl->device,
			"failed to create sysfs link from subsystem.\n");
		goto out_put_subsystem;
@@ -2838,6 +2926,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
	switch (cmd) {
	case NVME_IOCTL_ADMIN_CMD:
		return nvme_user_cmd(ctrl, NULL, argp);
	case NVME_IOCTL_ADMIN64_CMD:
		return nvme_user_cmd64(ctrl, NULL, argp);
	case NVME_IOCTL_IO_CMD:
		return nvme_dev_user_cmd(ctrl, argp);
	case NVME_IOCTL_RESET:
@@ -3045,6 +3135,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);

nvme_show_int_function(cntlid);
nvme_show_int_function(numa_node);
nvme_show_int_function(queue_count);
nvme_show_int_function(sqsize);

static ssize_t nvme_sysfs_delete(struct device *dev,
				struct device_attribute *attr, const char *buf,
@@ -3125,6 +3217,8 @@ static struct attribute *nvme_dev_attrs[] = {
	&dev_attr_address.attr,
	&dev_attr_state.attr,
	&dev_attr_numa_node.attr,
	&dev_attr_queue_count.attr,
	&dev_attr_sqsize.attr,
	NULL
};

+1 −1
Original line number Diff line number Diff line
@@ -221,6 +221,7 @@ struct nvme_ctrl {
	u16 oacs;
	u16 nssa;
	u16 nr_streams;
	u16 sqsize;
	u32 max_namespaces;
	atomic_t abort_limit;
	u8 vwc;
@@ -269,7 +270,6 @@ struct nvme_ctrl {
	u16 hmmaxd;

	/* Fabrics only */
	u16 sqsize;
	u32 ioccsz;
	u32 iorcsz;
	u16 icdoff;
Loading