Commit 280e285d authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915/selftests: Be a little more lenient for reset workers



Give the reset worker a kick before losing help when waiting for hang
recovery, as the CPU scheduler is a little unreliable.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200227085723.1961649-15-chris@chris-wilson.co.uk
parent 834c6bb7
Loading
Loading
Loading
Loading
+53 −22
Original line number Diff line number Diff line
@@ -90,6 +90,49 @@ static int wait_for_submit(struct intel_engine_cs *engine,
	return -ETIME;
}

static int wait_for_reset(struct intel_engine_cs *engine,
			  struct i915_request *rq,
			  unsigned long timeout)
{
	timeout += jiffies;

	do {
		cond_resched();
		intel_engine_flush_submission(engine);

		if (READ_ONCE(engine->execlists.pending[0]))
			continue;

		if (i915_request_completed(rq))
			break;

		if (READ_ONCE(rq->fence.error))
			break;
	} while (time_before(jiffies, timeout));

	flush_scheduled_work();

	if (rq->fence.error != -EIO) {
		pr_err("%s: hanging request %llx:%lld not reset\n",
		       engine->name,
		       rq->fence.context,
		       rq->fence.seqno);
		return -EINVAL;
	}

	/* Give the request a jiffie to complete after flushing the worker */
	if (i915_request_wait(rq, 0,
			      max(0l, (long)(timeout - jiffies)) + 1) < 0) {
		pr_err("%s: hanging request %llx:%lld did not complete\n",
		       engine->name,
		       rq->fence.context,
		       rq->fence.seqno);
		return -ETIME;
	}

	return 0;
}

static int live_sanitycheck(void *arg)
{
	struct intel_gt *gt = arg;
@@ -1805,14 +1848,9 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
	if (err)
		goto out;

	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
		err = -EIO;
		goto out;
	}

	if (rq->fence.error != -EIO) {
		pr_err("Cancelled inflight0 request did not report -EIO\n");
		err = -EINVAL;
	err = wait_for_reset(arg->engine, rq, HZ / 2);
	if (err) {
		pr_err("Cancelled inflight0 request did not reset\n");
		goto out;
	}

@@ -1870,10 +1908,9 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
		goto out;

	igt_spinner_end(&arg->a.spin);
	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
		err = -EIO;
	err = wait_for_reset(arg->engine, rq[1], HZ / 2);
	if (err)
		goto out;
	}

	if (rq[0]->fence.error != 0) {
		pr_err("Normal inflight0 request did not complete\n");
@@ -1953,10 +1990,9 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
	if (err)
		goto out;

	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
		err = -EIO;
	err = wait_for_reset(arg->engine, rq[2], HZ / 2);
	if (err)
		goto out;
	}

	if (rq[0]->fence.error != -EIO) {
		pr_err("Cancelled inflight0 request did not report -EIO\n");
@@ -2014,14 +2050,9 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
	if (err)
		goto out;

	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
		err = -EIO;
		goto out;
	}

	if (rq->fence.error != -EIO) {
		pr_err("Cancelled inflight0 request did not report -EIO\n");
		err = -EINVAL;
	err = wait_for_reset(arg->engine, rq, HZ / 2);
	if (err) {
		pr_err("Cancelled inflight0 request did not reset\n");
		goto out;
	}