Commit c30d5dc6 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915/gt: Push engine stopping into reset-prepare



Push the engine stop into the back reset_prepare (where it already was!)
This allows us to avoid dangerously setting the RING registers to 0 for
logical contexts. If we clear the register on a live context, those
invalid register values are recorded in the logical context state and
replayed (with hilarious results).

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190716124931.5870-2-chris@chris-wilson.co.uk
parent fff8102a
Loading
Loading
Loading
Loading
+14 −2
Original line number Diff line number Diff line
@@ -2183,11 +2183,23 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
	__tasklet_disable_sync_once(&execlists->tasklet);
	GEM_BUG_ON(!reset_in_progress(execlists));

	intel_engine_stop_cs(engine);

	/* And flush any current direct submission. */
	spin_lock_irqsave(&engine->active.lock, flags);
	spin_unlock_irqrestore(&engine->active.lock, flags);

	/*
	 * We stop engines, otherwise we might get failed reset and a
	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
	 * from system hang if batchbuffer is progressing when
	 * the reset is issued, regardless of READY_TO_RESET ack.
	 * Thus assume it is best to stop engines on all gens
	 * where we have a gpu reset.
	 *
	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
	 *
	 * FIXME: Wa for more modern gens needs to be validated
	 */
	intel_engine_stop_cs(engine);
}

static void reset_csb_pointers(struct intel_engine_cs *engine)
+0 −58
Original line number Diff line number Diff line
@@ -135,47 +135,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
	}
}

static void gen3_stop_engine(struct intel_engine_cs *engine)
{
	struct intel_uncore *uncore = engine->uncore;
	const u32 base = engine->mmio_base;

	GEM_TRACE("%s\n", engine->name);

	if (intel_engine_stop_cs(engine))
		GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);

	intel_uncore_write_fw(uncore,
			      RING_HEAD(base),
			      intel_uncore_read_fw(uncore, RING_TAIL(base)));
	intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */

	intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
	intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
	intel_uncore_posting_read_fw(uncore, RING_TAIL(base));

	/* The ring must be empty before it is disabled */
	intel_uncore_write_fw(uncore, RING_CTL(base), 0);

	/* Check acts as a post */
	if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
		GEM_TRACE("%s: ring head [%x] not parked\n",
			  engine->name,
			  intel_uncore_read_fw(uncore, RING_HEAD(base)));
}

static void stop_engines(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
	struct intel_engine_cs *engine;
	intel_engine_mask_t tmp;

	if (INTEL_GEN(gt->i915) < 3)
		return;

	for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
		gen3_stop_engine(engine);
}

static bool i915_in_reset(struct pci_dev *pdev)
{
	u8 gdrst;
@@ -607,23 +566,6 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
	 */
	intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
	for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
		/*
		 * We stop engines, otherwise we might get failed reset and a
		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
		 * from system hang if batchbuffer is progressing when
		 * the reset is issued, regardless of READY_TO_RESET ack.
		 * Thus assume it is best to stop engines on all gens
		 * where we have a gpu reset.
		 *
		 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
		 *
		 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
		 *
		 * FIXME: Wa for more modern gens needs to be validated
		 */
		if (retry)
			stop_engines(gt, engine_mask);

		GEM_TRACE("engine_mask=%x\n", engine_mask);
		preempt_disable();
		ret = reset(gt, engine_mask, retry);
+39 −1
Original line number Diff line number Diff line
@@ -739,7 +739,45 @@ out:

static void reset_prepare(struct intel_engine_cs *engine)
{
	intel_engine_stop_cs(engine);
	struct intel_uncore *uncore = engine->uncore;
	const u32 base = engine->mmio_base;

	/*
	 * We stop engines, otherwise we might get failed reset and a
	 * dead gpu (on elk). Also as modern gpu as kbl can suffer
	 * from system hang if batchbuffer is progressing when
	 * the reset is issued, regardless of READY_TO_RESET ack.
	 * Thus assume it is best to stop engines on all gens
	 * where we have a gpu reset.
	 *
	 * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
	 *
	 * WaMediaResetMainRingCleanup:ctg,elk (presumably)
	 *
	 * FIXME: Wa for more modern gens needs to be validated
	 */
	GEM_TRACE("%s\n", engine->name);

	if (intel_engine_stop_cs(engine))
		GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);

	intel_uncore_write_fw(uncore,
			      RING_HEAD(base),
			      intel_uncore_read_fw(uncore, RING_TAIL(base)));
	intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */

	intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
	intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
	intel_uncore_posting_read_fw(uncore, RING_TAIL(base));

	/* The ring must be empty before it is disabled */
	intel_uncore_write_fw(uncore, RING_CTL(base), 0);

	/* Check acts as a post */
	if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
		GEM_TRACE("%s: ring head [%x] not parked\n",
			  engine->name,
			  intel_uncore_read_fw(uncore, RING_HEAD(base)));
}

static void reset_ring(struct intel_engine_cs *engine, bool stalled)