Commit 3f04bdce authored by Michał Winiarski's avatar Michał Winiarski Committed by Chris Wilson
Browse files

drm/i915: Reboot CI if we get wedged during driver init



Getting wedged device on driver init is pretty much unrecoverable.
Since we're running various scenarios that may potentially hit this in
CI (module reload / selftests / hotunplug), and if it happens, it means
that we can't trust any subsequent CI results, we should just apply the
taint to let the CI know that it should reboot (CI checks taint between
test runs).

v2: Comment that WEDGED_ON_INIT is non-recoverable, distinguish
    WEDGED_ON_INIT from WEDGED_ON_FINI (Chris)
v3: Appease checkpatch, fixup search-replace logic expression mindbomb
    in assert (Chris)

Signed-off-by: default avatarMichał Winiarski <michal.winiarski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Petri Latvala <petri.latvala@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200706144107.204821-1-michal@hardline.pl
parent d3913019
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -201,7 +201,7 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
				     uabi_node);
		char old[sizeof(engine->name)];

		if (intel_gt_has_init_error(engine->gt))
		if (intel_gt_has_unrecoverable_error(engine->gt))
			continue; /* ignore incomplete engines */

		GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
+1 −1
Original line number Diff line number Diff line
@@ -510,7 +510,7 @@ static int __engines_verify_workarounds(struct intel_gt *gt)

static void __intel_gt_disable(struct intel_gt *gt)
{
	intel_gt_set_wedged_on_init(gt);
	intel_gt_set_wedged_on_fini(gt);

	intel_gt_suspend_prepare(gt);
	intel_gt_suspend_late(gt);
+8 −4
Original line number Diff line number Diff line
@@ -58,14 +58,18 @@ static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
	return i915_ggtt_offset(gt->scratch) + field;
}

static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
static inline bool intel_gt_has_unrecoverable_error(const struct intel_gt *gt)
{
	return __intel_reset_failed(&gt->reset);
	return test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags) ||
	       test_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
}

static inline bool intel_gt_has_init_error(const struct intel_gt *gt)
static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
{
	return test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
	GEM_BUG_ON(intel_gt_has_unrecoverable_error(gt) &&
		   !test_bit(I915_WEDGED, &gt->reset.flags));

	return unlikely(test_bit(I915_WEDGED, &gt->reset.flags));
}

#endif /* __INTEL_GT_H__ */
+1 −1
Original line number Diff line number Diff line
@@ -188,7 +188,7 @@ int intel_gt_resume(struct intel_gt *gt)
	enum intel_engine_id id;
	int err;

	err = intel_gt_has_init_error(gt);
	err = intel_gt_has_unrecoverable_error(gt);
	if (err)
		return err;

+11 −2
Original line number Diff line number Diff line
@@ -880,7 +880,7 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
		return true;

	/* Never fully initialised, recovery impossible */
	if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
	if (intel_gt_has_unrecoverable_error(gt))
		return false;

	GT_TRACE(gt, "start\n");
@@ -1342,7 +1342,7 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
	if (!intel_gt_is_wedged(gt))
		return 0;

	if (intel_gt_has_init_error(gt))
	if (intel_gt_has_unrecoverable_error(gt))
		return -EIO;

	/* Reset still in progress? Maybe we will recover? */
@@ -1360,6 +1360,15 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
		     I915_WEDGED_ON_INIT);
	intel_gt_set_wedged(gt);
	set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);

	/* Wedged on init is non-recoverable */
	add_taint_for_CI(TAINT_WARN);
}

void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
{
	intel_gt_set_wedged(gt);
	set_bit(I915_WEDGED_ON_FINI, &gt->reset.flags);
}

void intel_gt_init_reset(struct intel_gt *gt)
Loading