nvme-fc: fix io timeout to abort I/O (52793d62) · Commits · 戴 / test

drivers/nvme/host/fc.c

+69 −39

Original line number	Diff line number	Diff line
		@@ -1837,8 +1837,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl ctrl, struct nvme_fc_fcp_op op)
		opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
		if (opstate != FCPOP_STATE_ACTIVE)
		atomic_set(&op->state, opstate);
		else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
		else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
		op->flags \|= FCOP_FLAGS_TERMIO;
		ctrl->iocnt++;
		}
		spin_unlock_irqrestore(&ctrl->lock, flags);

		if (opstate != FCPOP_STATE_ACTIVE)
		@@ -1874,7 +1876,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,

		if (opstate == FCPOP_STATE_ABORTED) {
		spin_lock_irqsave(&ctrl->lock, flags);
		if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
		if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
		op->flags & FCOP_FLAGS_TERMIO) {
		if (!--ctrl->iocnt)
		wake_up(&ctrl->ioabort_wait);
		}
		@@ -2446,15 +2449,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
		{
		struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
		struct nvme_fc_ctrl *ctrl = op->ctrl;
		struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
		struct nvme_command *sqe = &cmdiu->sqe;

		/*
		* we can't individually ABTS an io without affecting the queue,
		* thus killing the queue, and thus the association.
		* So resolve by performing a controller reset, which will stop
		* the host/io stack, terminate the association on the link,
		* and recreate an association on the link.
		* Attempt to abort the offending command. Command completion
		* will detect the aborted io and will fail the connection.
		*/
		nvme_fc_error_recovery(ctrl, "io timeout error");
		dev_info(ctrl->ctrl.device,
		"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
		"x%08x/x%08x\n",
		ctrl->cnum, op->queue->qnum, sqe->common.opcode,
		sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
		if (__nvme_fc_abort_op(ctrl, op))
		nvme_fc_error_recovery(ctrl, "io timeout abort failed");

		/*
		* the io abort has been initiated. Have the reset timer
		@@ -2726,6 +2734,7 @@ nvme_fc_complete_rq(struct request *rq)
		struct nvme_fc_ctrl *ctrl = op->ctrl;

		atomic_set(&op->state, FCPOP_STATE_IDLE);
		op->flags &= ~FCOP_FLAGS_TERMIO;

		nvme_fc_unmap_data(ctrl, rq, op);
		nvme_complete_rq(rq);
		@@ -3090,26 +3099,19 @@ out_free_queue:
		return ret;
		}


		/*
		* This routine stops operation of the controller on the host side.
		* On the host os stack side: Admin and IO queues are stopped,
		* outstanding ios on them terminated via FC ABTS.
		* On the link side: the association is terminated.
		* This routine runs through all outstanding commands on the association
		* and aborts them. This routine is typically be called by the
		* delete_association routine. It is also called due to an error during
		* reconnect. In that scenario, it is most likely a command that initializes
		* the controller, including fabric Connect commands on io queues, that
		* may have timed out or failed thus the io must be killed for the connect
		* thread to see the error.
		*/
		static void
		nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
		__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
		{
		struct nvmefc_ls_rcv_op *disls = NULL;
		unsigned long flags;

		if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
		return;

		spin_lock_irqsave(&ctrl->lock, flags);
		set_bit(FCCTRL_TERMIO, &ctrl->flags);
		ctrl->iocnt = 0;
		spin_unlock_irqrestore(&ctrl->lock, flags);

		/*
		* If io queues are present, stop them and terminate all outstanding
		* ios on them. As FC allocates FC exchange for each io, the
		@@ -3127,6 +3129,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
		blk_mq_tagset_busy_iter(&ctrl->tag_set,
		nvme_fc_terminate_exchange, &ctrl->ctrl);
		blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
		if (start_queues)
		nvme_start_queues(&ctrl->ctrl);
		}

		/*
		@@ -3143,13 +3147,34 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)

		/*
		* clean up the admin queue. Same thing as above.
		* use blk_mq_tagset_busy_itr() and the transport routine to
		* terminate the exchanges.
		*/
		blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
		blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
		nvme_fc_terminate_exchange, &ctrl->ctrl);
		blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
		}

		/*
		* This routine stops operation of the controller on the host side.
		* On the host os stack side: Admin and IO queues are stopped,
		* outstanding ios on them terminated via FC ABTS.
		* On the link side: the association is terminated.
		*/
		static void
		nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
		{
		struct nvmefc_ls_rcv_op *disls = NULL;
		unsigned long flags;

		if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
		return;

		spin_lock_irqsave(&ctrl->lock, flags);
		set_bit(FCCTRL_TERMIO, &ctrl->flags);
		ctrl->iocnt = 0;
		spin_unlock_irqrestore(&ctrl->lock, flags);

		__nvme_fc_abort_outstanding_ios(ctrl, false);

		/* kill the aens as they are a separate path */
		nvme_fc_abort_aen_ops(ctrl);
		@@ -3263,21 +3288,26 @@ static void
		__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
		{
		/*
		* if state is connecting - the error occurred as part of a
		* reconnect attempt. The create_association error paths will
		* clean up any outstanding io.
		*
		* if it's a different state - ensure all pending io is
		* terminated. Given this can delay while waiting for the
		* aborted io to return, we recheck adapter state below
		* before changing state.
		* if state is CONNECTING - the error occurred as part of a
		* reconnect attempt. Abort any ios on the association and
		* let the create_association error paths resolve things.
		*/
		if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
		if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
		__nvme_fc_abort_outstanding_ios(ctrl, true);
		return;
		}

		/*
		* For any other state, kill the association. As this routine
		* is a common io abort routine for resetting and such, after
		* the association is terminated, ensure that the state is set
		* to CONNECTING.
		*/

		nvme_stop_keep_alive(&ctrl->ctrl);

		/* will block will waiting for io to terminate */
		nvme_fc_delete_association(ctrl);
		}

		if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
		!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))

Admin message