Commit b5e8e954 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915/gt: Introduce barrier pulses along engines



To flush idle barriers, and even inflight requests, we want to send a
preemptive 'pulse' along an engine. We use a no-op request along the
pinned kernel_context at high priority so that it should run or else
kick off the stuck requests. We can use this to ensure idle barriers are
immediately flushed, as part of a context cancellation mechanism, or as
part of a heartbeat mechanism to detect and reset a stuck GPU.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191021174339.5389-1-chris@chris-wilson.co.uk
parent 928da10c
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -78,8 +78,9 @@ gt-y += \
	gt/intel_breadcrumbs.o \
	gt/intel_context.o \
	gt/intel_engine_cs.o \
	gt/intel_engine_pool.o \
	gt/intel_engine_heartbeat.o \
	gt/intel_engine_pm.o \
	gt/intel_engine_pool.o \
	gt/intel_engine_user.o \
	gt/intel_gt.o \
	gt/intel_gt_irq.o \
+77 −0
Original line number Diff line number Diff line
/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2019 Intel Corporation
 */

#include "i915_request.h"

#include "intel_context.h"
#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "intel_engine.h"
#include "intel_gt.h"

static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
{
	engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
	i915_request_add_active_barriers(rq);
}

int intel_engine_pulse(struct intel_engine_cs *engine)
{
	struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
	struct intel_context *ce = engine->kernel_context;
	struct i915_request *rq;
	int err = 0;

	if (!intel_engine_has_preemption(engine))
		return -ENODEV;

	if (!intel_engine_pm_get_if_awake(engine))
		return 0;

	if (mutex_lock_interruptible(&ce->timeline->mutex))
		goto out_rpm;

	intel_context_enter(ce);
	rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
	intel_context_exit(ce);
	if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
		goto out_unlock;
	}

	rq->flags |= I915_REQUEST_SENTINEL;
	idle_pulse(engine, rq);

	__i915_request_commit(rq);
	__i915_request_queue(rq, &attr);

out_unlock:
	mutex_unlock(&ce->timeline->mutex);
out_rpm:
	intel_engine_pm_put(engine);
	return err;
}

int intel_engine_flush_barriers(struct intel_engine_cs *engine)
{
	struct i915_request *rq;

	if (llist_empty(&engine->barrier_tasks))
		return 0;

	rq = i915_request_create(engine->kernel_context);
	if (IS_ERR(rq))
		return PTR_ERR(rq);

	idle_pulse(engine, rq);
	i915_request_add(rq);

	return 0;
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_engine_heartbeat.c"
#endif
+15 −0
Original line number Diff line number Diff line
/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2019 Intel Corporation
 */

#ifndef INTEL_ENGINE_HEARTBEAT_H
#define INTEL_ENGINE_HEARTBEAT_H

struct intel_engine_cs;

int intel_engine_pulse(struct intel_engine_cs *engine);
int intel_engine_flush_barriers(struct intel_engine_cs *engine);

#endif /* INTEL_ENGINE_HEARTBEAT_H */
+1 −1
Original line number Diff line number Diff line
@@ -111,7 +111,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
	i915_request_add_active_barriers(rq);

	/* Install ourselves as a preemption barrier */
	rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE;
	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
	__i915_request_commit(rq);

	/* Release our exclusive hold on the engine */
+159 −0
Original line number Diff line number Diff line
/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2018 Intel Corporation
 */

#include "i915_drv.h"

#include "intel_gt_requests.h"
#include "i915_selftest.h"

struct pulse {
	struct i915_active active;
	struct kref kref;
};

static int pulse_active(struct i915_active *active)
{
	kref_get(&container_of(active, struct pulse, active)->kref);
	return 0;
}

static void pulse_free(struct kref *kref)
{
	kfree(container_of(kref, struct pulse, kref));
}

static void pulse_put(struct pulse *p)
{
	kref_put(&p->kref, pulse_free);
}

static void pulse_retire(struct i915_active *active)
{
	pulse_put(container_of(active, struct pulse, active));
}

static struct pulse *pulse_create(void)
{
	struct pulse *p;

	p = kmalloc(sizeof(*p), GFP_KERNEL);
	if (!p)
		return p;

	kref_init(&p->kref);
	i915_active_init(&p->active, pulse_active, pulse_retire);

	return p;
}

static int __live_idle_pulse(struct intel_engine_cs *engine,
			     int (*fn)(struct intel_engine_cs *cs))
{
	struct pulse *p;
	int err;

	p = pulse_create();
	if (!p)
		return -ENOMEM;

	err = i915_active_acquire(&p->active);
	if (err)
		goto out;

	err = i915_active_acquire_preallocate_barrier(&p->active, engine);
	if (err) {
		i915_active_release(&p->active);
		goto out;
	}

	i915_active_acquire_barrier(&p->active);
	i915_active_release(&p->active);

	GEM_BUG_ON(i915_active_is_idle(&p->active));

	err = fn(engine);
	if (err)
		goto out;

	if (intel_gt_retire_requests_timeout(engine->gt, HZ / 5)) {
		err = -ETIME;
		goto out;
	}

	if (!i915_active_is_idle(&p->active)) {
		pr_err("%s: heartbeat pulse did not flush idle tasks\n",
		       engine->name);
		err = -EINVAL;
		goto out;
	}

out:
	pulse_put(p);
	return err;
}

static int live_idle_flush(void *arg)
{
	struct intel_gt *gt = arg;
	struct intel_engine_cs *engine;
	enum intel_engine_id id;
	int err = 0;

	/* Check that we can flush the idle barriers */

	for_each_engine(engine, gt, id) {
		intel_engine_pm_get(engine);
		err = __live_idle_pulse(engine, intel_engine_flush_barriers);
		intel_engine_pm_put(engine);
		if (err)
			break;
	}

	return err;
}

static int live_idle_pulse(void *arg)
{
	struct intel_gt *gt = arg;
	struct intel_engine_cs *engine;
	enum intel_engine_id id;
	int err = 0;

	/* Check that heartbeat pulses flush the idle barriers */

	for_each_engine(engine, gt, id) {
		intel_engine_pm_get(engine);
		err = __live_idle_pulse(engine, intel_engine_pulse);
		intel_engine_pm_put(engine);
		if (err && err != -ENODEV)
			break;

		err = 0;
	}

	return err;
}

int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
{
	static const struct i915_subtest tests[] = {
		SUBTEST(live_idle_flush),
		SUBTEST(live_idle_pulse),
	};
	int saved_hangcheck;
	int err;

	if (intel_gt_is_wedged(&i915->gt))
		return 0;

	saved_hangcheck = i915_modparams.enable_hangcheck;
	i915_modparams.enable_hangcheck = INT_MAX;

	err =  intel_gt_live_subtests(tests, &i915->gt);

	i915_modparams.enable_hangcheck = saved_hangcheck;
	return err;
}
Loading