Commit 1883a0a4 authored by Tvrtko Ursulin's avatar Tvrtko Ursulin Committed by Chris Wilson
Browse files

drm/i915: Track hw reported context runtime



GPU saves accumulated context runtime (in CS timestamp units) in PPHWSP
which will be useful for us in cases when we are not able to track context
busyness ourselves (like with GuC). Keep a copy of this in struct
intel_context from where it can be easily read even if the context is not
pinned.

v2:
 (Chris)
 * Do not store pphwsp address in intel_context.
 * Log CS wrap-around.
 * Simplify calculation by relying on integer wraparound.
v3:
 * Include total/avg in traces and error state for debugging

Signed-off-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20200216133620.394962-1-chris@chris-wilson.co.uk
parent 0f8839f5
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -220,7 +220,9 @@ static void __intel_context_retire(struct i915_active *active)
{
	struct intel_context *ce = container_of(active, typeof(*ce), active);

	CE_TRACE(ce, "retire\n");
	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
		 intel_context_get_total_runtime_ns(ce),
		 intel_context_get_avg_runtime_ns(ce));

	set_bit(CONTEXT_VALID_BIT, &ce->flags);
	if (ce->state)
@@ -281,6 +283,8 @@ intel_context_init(struct intel_context *ce,
	ce->sseu = engine->sseu;
	ce->ring = __intel_context_ring_size(SZ_4K);

	ewma_runtime_init(&ce->runtime.avg);

	ce->vm = i915_vm_get(engine->gt->vm);

	INIT_LIST_HEAD(&ce->signal_link);
+17 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <linux/types.h>

#include "i915_active.h"
#include "i915_drv.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
#include "intel_ring_types.h"
@@ -227,4 +228,20 @@ intel_context_clear_nopreempt(struct intel_context *ce)
	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}

static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
{
	const u32 period =
		RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;

	return READ_ONCE(ce->runtime.total) * period;
}

static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
{
	const u32 period =
		RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;

	return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
}

#endif /* __INTEL_CONTEXT_H__ */
+12 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#ifndef __INTEL_CONTEXT_TYPES__
#define __INTEL_CONTEXT_TYPES__

#include <linux/average.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -19,6 +20,8 @@

#define CONTEXT_REDZONE POISON_INUSE

DECLARE_EWMA(runtime, 3, 8);

struct i915_gem_context;
struct i915_vma;
struct intel_context;
@@ -68,6 +71,15 @@ struct intel_context {
	u64 lrc_desc;
	u32 tag; /* cookie passed to HW to track this context on submission */

	/* Time on GPU as tracked by the hw. */
	struct {
		struct ewma_runtime avg;
		u64 total;
		u32 last;
		I915_SELFTEST_DECLARE(u32 num_underflow);
		I915_SELFTEST_DECLARE(u32 max_underflow);
	} runtime;

	unsigned int active_count; /* protected by timeline->mutex */

	atomic_t pin_count;
+44 −2
Original line number Diff line number Diff line
@@ -1195,6 +1195,42 @@ static void reset_active(struct i915_request *rq,
	ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}

static u32 intel_context_get_runtime(const struct intel_context *ce)
{
	/*
	 * We can use either ppHWSP[16] which is recorded before the context
	 * switch (and so excludes the cost of context switches) or use the
	 * value from the context image itself, which is saved/restored earlier
	 * and so includes the cost of the save.
	 */
	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
}

static void intel_context_update_runtime(struct intel_context *ce)
{
	u32 old;
	s32 dt;

	if (intel_context_is_barrier(ce))
		return;

	old = ce->runtime.last;
	ce->runtime.last = intel_context_get_runtime(ce);
	dt = ce->runtime.last - old;

	if (unlikely(dt <= 0)) {
		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
			 old, ce->runtime.last, dt);
		I915_SELFTEST_ONLY(ce->runtime.num_underflow += dt < 0);
		I915_SELFTEST_ONLY(ce->runtime.max_underflow =
				   max_t(u32, ce->runtime.max_underflow, -dt));
		return;
	}

	ewma_runtime_add(&ce->runtime.avg, dt);
	ce->runtime.total += dt;
}

static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
@@ -1278,6 +1314,7 @@ __execlists_schedule_out(struct i915_request *rq,
	    i915_request_completed(rq))
		intel_engine_add_retire(engine, ce->timeline);

	intel_context_update_runtime(ce);
	intel_engine_context_out(engine);
	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
	intel_gt_pm_put_async(engine->gt);
@@ -4607,8 +4644,13 @@ populate_lr_context(struct intel_context *ce,
		inhibit = false;
	}

	/* The second page of the context object contains some fields which must
	 * be set up prior to the first execution. */
	/* Clear the ppHWSP (inc. per-context counters) */
	memset(vaddr, 0, PAGE_SIZE);

	/*
	 * The second page of the context object contains some registers which
	 * must be set up prior to the first execution.
	 */
	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
				 ce, engine, ring, inhibit);

+1 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#define CTX_RING_CTL			(0x0a + 1)
#define CTX_BB_STATE			(0x10 + 1)
#define CTX_BB_PER_CTX_PTR		(0x18 + 1)
#define CTX_TIMESTAMP			(0x22 + 1)
#define CTX_PDP3_UDW			(0x24 + 1)
#define CTX_PDP3_LDW			(0x26 + 1)
#define CTX_PDP2_UDW			(0x28 + 1)
Loading