Commit ddc62910 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'nvme-5.10-2020-10-23' of git://git.infradead.org/nvme into block-5.10

Pull NVMe fixes from Christoph:

"nvme fixes for 5.10

 - rdma error handling fixes (Chao Leng)
 - fc error handling and reconnect fixes (James Smart)
 - fix the qid displace when tracing ioctl command (Keith Busch)
 - don't use BLK_MQ_REQ_NOWAIT for passthru (Chaitanya Kulkarni)
 - fix MTDT for passthru (Logan Gunthorpe)
 - blacklist Write Same on more devices (Kai-Heng Feng)
 - fix an uninitialized work struct (zhenwei pi)"

* tag 'nvme-5.10-2020-10-23' of git://git.infradead.org/nvme:
  nvme-fc: shorten reconnect delay if possible for FC
  nvme-fc: wait for queues to freeze before calling update_hr_hw_queues
  nvme-fc: fix error loop in create_hw_io_queues
  nvme-fc: fix io timeout to abort I/O
  nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru
  nvmet: cleanup nvmet_passthru_map_sg()
  nvmet: limit passthru MTDS by BIO_MAX_PAGES
  nvmet: fix uninitialized work for zero kato
  nvme-pci: disable Write Zeroes on Sandisk Skyhawk
  nvme: use queuedata for nvme_req_qid
  nvme-rdma: fix crash due to incorrect cqe
  nvme-rdma: fix crash when connect rejected
parents fd78874b f673714a
Loading
Loading
Loading
Loading
+94 −44
Original line number Diff line number Diff line
@@ -26,6 +26,10 @@ enum nvme_fc_queue_flags {
};

#define NVME_FC_DEFAULT_DEV_LOSS_TMO	60	/* seconds */
#define NVME_FC_DEFAULT_RECONNECT_TMO	2	/* delay between reconnects
						 * when connected and a
						 * connection failure.
						 */

struct nvme_fc_queue {
	struct nvme_fc_ctrl	*ctrl;
@@ -1837,8 +1841,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
	opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
	if (opstate != FCPOP_STATE_ACTIVE)
		atomic_set(&op->state, opstate);
	else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
	else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
		op->flags |= FCOP_FLAGS_TERMIO;
		ctrl->iocnt++;
	}
	spin_unlock_irqrestore(&ctrl->lock, flags);

	if (opstate != FCPOP_STATE_ACTIVE)
@@ -1874,7 +1880,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,

	if (opstate == FCPOP_STATE_ABORTED) {
		spin_lock_irqsave(&ctrl->lock, flags);
		if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
		if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
		    op->flags & FCOP_FLAGS_TERMIO) {
			if (!--ctrl->iocnt)
				wake_up(&ctrl->ioabort_wait);
		}
@@ -2314,7 +2321,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
	return 0;

delete_queues:
	for (; i >= 0; i--)
	for (; i > 0; i--)
		__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
	return ret;
}
@@ -2433,7 +2440,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
		return;

	dev_warn(ctrl->ctrl.device,
		"NVME-FC{%d}: transport association error detected: %s\n",
		"NVME-FC{%d}: transport association event: %s\n",
		ctrl->cnum, errmsg);
	dev_warn(ctrl->ctrl.device,
		"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
@@ -2446,15 +2453,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
{
	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
	struct nvme_fc_ctrl *ctrl = op->ctrl;
	struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
	struct nvme_command *sqe = &cmdiu->sqe;

	/*
	 * we can't individually ABTS an io without affecting the queue,
	 * thus killing the queue, and thus the association.
	 * So resolve by performing a controller reset, which will stop
	 * the host/io stack, terminate the association on the link,
	 * and recreate an association on the link.
	 * Attempt to abort the offending command. Command completion
	 * will detect the aborted io and will fail the connection.
	 */
	nvme_fc_error_recovery(ctrl, "io timeout error");
	dev_info(ctrl->ctrl.device,
		"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
		"x%08x/x%08x\n",
		ctrl->cnum, op->queue->qnum, sqe->common.opcode,
		sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
	if (__nvme_fc_abort_op(ctrl, op))
		nvme_fc_error_recovery(ctrl, "io timeout abort failed");

	/*
	 * the io abort has been initiated. Have the reset timer
@@ -2726,6 +2738,7 @@ nvme_fc_complete_rq(struct request *rq)
	struct nvme_fc_ctrl *ctrl = op->ctrl;

	atomic_set(&op->state, FCPOP_STATE_IDLE);
	op->flags &= ~FCOP_FLAGS_TERMIO;

	nvme_fc_unmap_data(ctrl, rq, op);
	nvme_complete_rq(rq);
@@ -2876,11 +2889,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
	if (ret)
		goto out_delete_hw_queues;

	if (prior_ioq_cnt != nr_io_queues)
	if (prior_ioq_cnt != nr_io_queues) {
		dev_info(ctrl->ctrl.device,
			"reconnect: revising io queue count from %d to %d\n",
			prior_ioq_cnt, nr_io_queues);
		nvme_wait_freeze(&ctrl->ctrl);
		blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
		nvme_unfreeze(&ctrl->ctrl);
	}

	return 0;

@@ -3090,26 +3106,19 @@ out_free_queue:
	return ret;
}


/*
 * This routine stops operation of the controller on the host side.
 * On the host os stack side: Admin and IO queues are stopped,
 *   outstanding ios on them terminated via FC ABTS.
 * On the link side: the association is terminated.
 * This routine runs through all outstanding commands on the association
 * and aborts them.  This routine is typically be called by the
 * delete_association routine. It is also called due to an error during
 * reconnect. In that scenario, it is most likely a command that initializes
 * the controller, including fabric Connect commands on io queues, that
 * may have timed out or failed thus the io must be killed for the connect
 * thread to see the error.
 */
static void
nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
{
	struct nvmefc_ls_rcv_op *disls = NULL;
	unsigned long flags;

	if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
		return;

	spin_lock_irqsave(&ctrl->lock, flags);
	set_bit(FCCTRL_TERMIO, &ctrl->flags);
	ctrl->iocnt = 0;
	spin_unlock_irqrestore(&ctrl->lock, flags);

	/*
	 * If io queues are present, stop them and terminate all outstanding
	 * ios on them. As FC allocates FC exchange for each io, the
@@ -3127,6 +3136,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
		blk_mq_tagset_busy_iter(&ctrl->tag_set,
				nvme_fc_terminate_exchange, &ctrl->ctrl);
		blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
		if (start_queues)
			nvme_start_queues(&ctrl->ctrl);
	}

	/*
@@ -3143,13 +3154,34 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)

	/*
	 * clean up the admin queue. Same thing as above.
	 * use blk_mq_tagset_busy_itr() and the transport routine to
	 * terminate the exchanges.
	 */
	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
				nvme_fc_terminate_exchange, &ctrl->ctrl);
	blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
}

/*
 * This routine stops operation of the controller on the host side.
 * On the host os stack side: Admin and IO queues are stopped,
 *   outstanding ios on them terminated via FC ABTS.
 * On the link side: the association is terminated.
 */
static void
nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
{
	struct nvmefc_ls_rcv_op *disls = NULL;
	unsigned long flags;

	if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
		return;

	spin_lock_irqsave(&ctrl->lock, flags);
	set_bit(FCCTRL_TERMIO, &ctrl->flags);
	ctrl->iocnt = 0;
	spin_unlock_irqrestore(&ctrl->lock, flags);

	__nvme_fc_abort_outstanding_ios(ctrl, false);

	/* kill the aens as they are a separate path */
	nvme_fc_abort_aen_ops(ctrl);
@@ -3263,21 +3295,26 @@ static void
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
{
	/*
	 * if state is connecting - the error occurred as part of a
	 * reconnect attempt. The create_association error paths will
	 * clean up any outstanding io.
	 *
	 * if it's a different state - ensure all pending io is
	 * terminated. Given this can delay while waiting for the
	 * aborted io to return, we recheck adapter state below
	 * before changing state.
	 * if state is CONNECTING - the error occurred as part of a
	 * reconnect attempt. Abort any ios on the association and
	 * let the create_association error paths resolve things.
	 */
	if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
		__nvme_fc_abort_outstanding_ios(ctrl, true);
		return;
	}

	/*
	 * For any other state, kill the association. As this routine
	 * is a common io abort routine for resetting and such, after
	 * the association is terminated, ensure that the state is set
	 * to CONNECTING.
	 */
	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {

	nvme_stop_keep_alive(&ctrl->ctrl);

	/* will block will waiting for io to terminate */
	nvme_fc_delete_association(ctrl);
	}

	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
	    !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
@@ -3403,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
{
	struct nvme_fc_ctrl *ctrl;
	unsigned long flags;
	int ret, idx;
	int ret, idx, ctrl_loss_tmo;

	if (!(rport->remoteport.port_role &
	    (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3429,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
		goto out_free_ctrl;
	}

	/*
	 * if ctrl_loss_tmo is being enforced and the default reconnect delay
	 * is being used, change to a shorter reconnect delay for FC.
	 */
	if (opts->max_reconnects != -1 &&
	    opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
	    opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
		ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
		opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
						opts->reconnect_delay);
	}

	ctrl->ctrl.opts = opts;
	ctrl->ctrl.nr_reconnects = 0;
	if (lport->dev)
+1 −1
Original line number Diff line number Diff line
@@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req)

static inline u16 nvme_req_qid(struct request *req)
{
	if (!req->rq_disk)
	if (!req->q->queuedata)
		return 0;
	return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
}
+2 −0
Original line number Diff line number Diff line
@@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = {
				NVME_QUIRK_IGNORE_DEV_SUBNQN, },
	{ PCI_DEVICE(0x1c5c, 0x1504),   /* SK Hynix PC400 */
		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
	{ PCI_DEVICE(0x15b7, 0x2001),   /*  Sandisk Skyhawk */
		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
		.driver_data = NVME_QUIRK_SINGLE_VECTOR },
	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
+3 −3
Original line number Diff line number Diff line
@@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
	req->result = cqe->result;

	if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
		if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
		if (unlikely(!req->mr ||
			     wc->ex.invalidate_rkey != req->mr->rkey)) {
			dev_err(queue->ctrl->ctrl.device,
				"Bogus remote invalidation for rkey %#x\n",
				req->mr->rkey);
				req->mr ? req->mr->rkey : 0);
			nvme_rdma_error_recovery(queue->ctrl);
		}
	} else if (req->mr) {
@@ -1926,7 +1927,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
		complete(&queue->cm_done);
		return 0;
	case RDMA_CM_EVENT_REJECTED:
		nvme_rdma_destroy_queue_ib(queue);
		cm_error = nvme_rdma_conn_rejected(queue, ev);
		break;
	case RDMA_CM_EVENT_ROUTE_ERROR:
+2 −1
Original line number Diff line number Diff line
@@ -1126,6 +1126,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
	 * in case a host died before it enabled the controller.  Hence, simply
	 * reset the keep alive timer when the controller is enabled.
	 */
	if (ctrl->kato)
		mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
}

Loading