Commit a96d4bd8 authored Oct 27, 2017 by James Smart Committed by Christoph Hellwig Nov 01, 2017

nvmet: fix fatal_err_work deadlock



Below is a stack trace for an issue that was reported.

What's happening is that the nvmet layer had it's controller kato
timeout fire, which causes it to schedule its fatal error handler
via the fatal_err_work element. The error handler is invoked, which
calls the transport delete_ctrl() entry point, and as the transport
tears down the controller, nvmet_sq_destroy ends up doing the final
put on the ctlr causing it to enter its free routine. The ctlr free
routine does a cancel_work_sync() on fatal_err_work element, which
then does a flush_work and wait_for_completion. But, as the wait is
in the context of the work element being flushed, its in a catch-22
and the thread hangs.

[  326.903131] nvmet: ctrl 1 keep-alive timer (15 seconds) expired!
[  326.909832] nvmet: ctrl 1 fatal error occurred!
[  327.643100] lpfc 0000:04:00.0: 0:6313 NVMET Defer ctx release xri
x114 flg x2
[  494.582064] INFO: task kworker/0:2:243 blocked for more than 120
seconds.
[  494.589638]       Not tainted 4.14.0-rc1.James+ #1
[  494.594986] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs"
disables this message.
[  494.603718] kworker/0:2     D    0   243      2 0x80000000
[  494.609839] Workqueue: events nvmet_fatal_error_handler [nvmet]
[  494.616447] Call Trace:
[  494.619177]  __schedule+0x28d/0x890
[  494.623070]  schedule+0x36/0x80
[  494.626571]  schedule_timeout+0x1dd/0x300
[  494.631044]  ? dequeue_task_fair+0x592/0x840
[  494.635810]  ? pick_next_task_fair+0x23b/0x5c0
[  494.640756]  wait_for_completion+0x121/0x180
[  494.645521]  ? wake_up_q+0x80/0x80
[  494.649315]  flush_work+0x11d/0x1a0
[  494.653206]  ? wake_up_worker+0x30/0x30
[  494.657484]  __cancel_work_timer+0x10b/0x190
[  494.662249]  cancel_work_sync+0x10/0x20
[  494.666525]  nvmet_ctrl_put+0xa3/0x100 [nvmet]
[  494.671482]  nvmet_sq_:q+0x64/0xd0 [nvmet]
[  494.676540]  nvmet_fc_delete_target_queue+0x202/0x220 [nvmet_fc]
[  494.683245]  nvmet_fc_delete_target_assoc+0x6d/0xc0 [nvmet_fc]
[  494.689743]  nvmet_fc_delete_ctrl+0x137/0x1a0 [nvmet_fc]
[  494.695673]  nvmet_fatal_error_handler+0x30/0x40 [nvmet]
[  494.701589]  process_one_work+0x149/0x360
[  494.706064]  worker_thread+0x4d/0x3c0
[  494.710148]  kthread+0x109/0x140
[  494.713751]  ? rescuer_thread+0x380/0x380
[  494.718214]  ? kthread_park+0x60/0x60

Correct by having the fc transport convert to a different workq context
for the actual controller teardown which may call the cancel_work_sync.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>

parent 2b632970

drivers/nvme/target/fc.c

+14 −2

Original line number	Diff line number	Diff line
		@@ -150,6 +150,7 @@ struct nvmet_fc_tgt_assoc {
		struct list_head a_list;
		struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1];
		struct kref ref;
		struct work_struct del_work;
		};


		@@ -232,6 +233,7 @@ static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
		static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
		static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
		struct nvmet_fc_fcp_iod *fod);
		static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc);


		/* ********************* FC-NVME DMA Handling ************************** */
		@@ -802,6 +804,16 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
		return NULL;
		}

		static void
		nvmet_fc_delete_assoc(struct work_struct *work)
		{
		struct nvmet_fc_tgt_assoc *assoc =
		container_of(work, struct nvmet_fc_tgt_assoc, del_work);

		nvmet_fc_delete_target_assoc(assoc);
		nvmet_fc_tgt_a_put(assoc);
		}

		static struct nvmet_fc_tgt_assoc *
		nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
		{
		@@ -826,6 +838,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
		assoc->a_id = idx;
		INIT_LIST_HEAD(&assoc->a_list);
		kref_init(&assoc->ref);
		INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc);

		while (needrandom) {
		get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID);
		@@ -1118,8 +1131,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
		nvmet_fc_tgtport_put(tgtport);

		if (found_ctrl) {
		nvmet_fc_delete_target_assoc(assoc);
		nvmet_fc_tgt_a_put(assoc);
		schedule_work(&assoc->del_work);
		return;
		}

Admin message