Commit ccdeed49 authored by Umesh Nerlige Ramappa's avatar Umesh Nerlige Ramappa Committed by Lionel Landwerlin
Browse files

drm/i915/perf: Configure OAR for specific context



Gen12 supports saving/restoring render counters per context. Apply OAR
configuration only for the context that is passed in to perf.

v2:
- Fix OACTXCONTROL value to only stop/resume counters.
- Remove gen12_update_reg_state_unlocked as power state is already
  applied by the caller.

v3: (Lionel)
- Move register initialization into the array
- Assume a valid oa_config in enable_metric_set

Signed-off-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Fixes: 00a7f0d7 ("drm/i915/tgl: Add perf support on TGL")
Reviewed-by: default avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: default avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191206194339.31356-2-umesh.nerlige.ramappa@intel.com
parent 322d56aa
Loading
Loading
Loading
Loading
+112 −87
Original line number Original line Diff line number Diff line
@@ -2082,20 +2082,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
	u32 *reg_state = ce->lrc_reg_state;
	u32 *reg_state = ce->lrc_reg_state;
	int i;
	int i;


	if (IS_GEN(stream->perf->i915, 12)) {
		u32 format = stream->oa_buffer.format;

		reg_state[ctx_oactxctrl + 1] =
			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
			(stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
	} else {
	reg_state[ctx_oactxctrl + 1] =
	reg_state[ctx_oactxctrl + 1] =
		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
		GEN8_OA_COUNTER_RESUME;
		GEN8_OA_COUNTER_RESUME;
	}


	for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
	for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
		reg_state[ctx_flexeu0 + i * 2 + 1] =
		reg_state[ctx_flexeu0 + i * 2 + 1] =
			oa_config_flex_reg(stream->oa_config, flex_regs[i]);
			oa_config_flex_reg(stream->oa_config, flex_regs[i]);


@@ -2228,34 +2220,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
	return err;
	return err;
}
}


static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
{
{
	struct i915_request *rq;
	int err;
	u32 *cs;
	struct intel_context *ce = stream->pinned_ctx;
	int err = 0;
	u32 format = stream->oa_buffer.format;

	struct flex regs_context[] = {
	rq = i915_request_create(ce);
		{
	if (IS_ERR(rq))
			GEN8_OACTXCONTROL,
		return PTR_ERR(rq);
			stream->perf->ctx_oactxctrl_offset + 1,

			enable ? GEN8_OA_COUNTER_RESUME : 0,
	cs = intel_ring_begin(rq, 4);
		},
	if (IS_ERR(cs)) {
	};
		err = PTR_ERR(cs);
	/* Offsets in regs_lri are not used since this configuration is only
		goto out;
	 * applied using LRI. Initialize the correct offsets for posterity.
	}
	 */

#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
	*cs++ = MI_LOAD_REGISTER_IMM(1);
	struct flex regs_lri[] = {
	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
		{
	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
			GEN12_OAR_OACONTROL,
			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
			GEN12_OAR_OACONTROL_OFFSET + 1,
	*cs++ = MI_NOOP;
			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |

			(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
	intel_ring_advance(rq, cs);
		},
		{
			RING_CONTEXT_CONTROL(ce->engine->mmio_base),
			CTX_CONTEXT_CONTROL,
			_MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
				      enable ?
				      GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
				      0)
		},
	};


out:
	/* Modify the context image of pinned context with regs_context*/
	i915_request_add(rq);
	err = intel_context_lock_pinned(ce);
	if (err)
		return err;


	err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
	intel_context_unlock_pinned(ce);
	if (err)
		return err;
		return err;

	/* Apply regs_lri using LRI with pinned context */
	return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
}
}


/*
/*
@@ -2281,53 +2290,16 @@ out:
 *   per-context OA state.
 *   per-context OA state.
 *
 *
 * Note: it's only the RCS/Render context that has any OA state.
 * Note: it's only the RCS/Render context that has any OA state.
 * Note: the first flex register passed must always be R_PWR_CLK_STATE
 */
 */
static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
static int oa_configure_all_contexts(struct i915_perf_stream *stream,
				      const struct i915_oa_config *oa_config)
				     struct flex *regs,
				     size_t num_regs)
{
{
	struct drm_i915_private *i915 = stream->perf->i915;
	struct drm_i915_private *i915 = stream->perf->i915;
	/* The MMIO offsets for Flex EU registers aren't contiguous */
	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
	struct flex regs[] = {
		{
			GEN8_R_PWR_CLK_STATE,
			CTX_R_PWR_CLK_STATE,
		},
		{
			IS_GEN(i915, 12) ?
			GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
			stream->perf->ctx_oactxctrl_offset + 1,
		},
		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
	};
#undef ctx_flexeuN
	struct intel_engine_cs *engine;
	struct intel_engine_cs *engine;
	struct i915_gem_context *ctx, *cn;
	struct i915_gem_context *ctx, *cn;
	size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
	int err;
	int i, err;

	if (IS_GEN(i915, 12)) {
		u32 format = stream->oa_buffer.format;

		regs[1].value =
			(format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
			(oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
	} else {
		regs[1].value =
			(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
			(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
			GEN8_OA_COUNTER_RESUME;
	}

	for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);


	lockdep_assert_held(&stream->perf->lock);
	lockdep_assert_held(&stream->perf->lock);


@@ -2357,7 +2329,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,


		spin_unlock(&i915->gem.contexts.lock);
		spin_unlock(&i915->gem.contexts.lock);


		err = gen8_configure_context(ctx, regs, array_size);
		err = gen8_configure_context(ctx, regs, num_regs);
		if (err) {
		if (err) {
			i915_gem_context_put(ctx);
			i915_gem_context_put(ctx);
			return err;
			return err;
@@ -2382,7 +2354,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,


		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
		regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);


		err = gen8_modify_self(ce, regs, array_size);
		err = gen8_modify_self(ce, regs, num_regs);
		if (err)
		if (err)
			return err;
			return err;
	}
	}
@@ -2390,6 +2362,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
	return 0;
	return 0;
}
}


static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
					const struct i915_oa_config *oa_config)
{
	struct flex regs[] = {
		{
			GEN8_R_PWR_CLK_STATE,
			CTX_R_PWR_CLK_STATE,
		},
	};

	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
}

static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
				      const struct i915_oa_config *oa_config)
{
	/* The MMIO offsets for Flex EU registers aren't contiguous */
	const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
	struct flex regs[] = {
		{
			GEN8_R_PWR_CLK_STATE,
			CTX_R_PWR_CLK_STATE,
		},
		{
			GEN8_OACTXCONTROL,
			stream->perf->ctx_oactxctrl_offset + 1,
		},
		{ EU_PERF_CNTL0, ctx_flexeuN(0) },
		{ EU_PERF_CNTL1, ctx_flexeuN(1) },
		{ EU_PERF_CNTL2, ctx_flexeuN(2) },
		{ EU_PERF_CNTL3, ctx_flexeuN(3) },
		{ EU_PERF_CNTL4, ctx_flexeuN(4) },
		{ EU_PERF_CNTL5, ctx_flexeuN(5) },
		{ EU_PERF_CNTL6, ctx_flexeuN(6) },
	};
#undef ctx_flexeuN
	int i;

	regs[1].value =
		(stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
		(stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
		GEN8_OA_COUNTER_RESUME;

	for (i = 2; i < ARRAY_SIZE(regs); i++)
		regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);

	return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
}

static int gen8_enable_metric_set(struct i915_perf_stream *stream)
static int gen8_enable_metric_set(struct i915_perf_stream *stream)
{
{
	struct intel_uncore *uncore = stream->uncore;
	struct intel_uncore *uncore = stream->uncore;
@@ -2473,7 +2495,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
	 * to make sure all slices/subslices are ON before writing to NOA
	 * to make sure all slices/subslices are ON before writing to NOA
	 * registers.
	 * registers.
	 */
	 */
	ret = lrc_configure_all_contexts(stream, oa_config);
	ret = gen12_configure_all_contexts(stream, oa_config);
	if (ret)
	if (ret)
		return ret;
		return ret;


@@ -2483,8 +2505,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
	 * requested this.
	 * requested this.
	 */
	 */
	if (stream->ctx) {
	if (stream->ctx) {
		ret = gen12_emit_oar_config(stream->pinned_ctx,
		ret = gen12_configure_oar_context(stream, true);
					    oa_config != NULL);
		if (ret)
		if (ret)
			return ret;
			return ret;
	}
	}
@@ -2518,11 +2539,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
	struct intel_uncore *uncore = stream->uncore;
	struct intel_uncore *uncore = stream->uncore;


	/* Reset all contexts' slices/subslices configurations. */
	/* Reset all contexts' slices/subslices configurations. */
	lrc_configure_all_contexts(stream, NULL);
	gen12_configure_all_contexts(stream, NULL);


	/* disable the context save/restore or OAR counters */
	/* disable the context save/restore or OAR counters */
	if (stream->ctx)
	if (stream->ctx)
		gen12_emit_oar_config(stream->pinned_ctx, false);
		gen12_configure_oar_context(stream, false);


	/* Make sure we disable noa to save power. */
	/* Make sure we disable noa to save power. */
	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2864,7 +2885,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
		return;
		return;


	stream = engine->i915->perf.exclusive_stream;
	stream = engine->i915->perf.exclusive_stream;
	if (stream)
	/*
	 * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
	 * is already doing that, so nothing to be done for gen12 here.
	 */
	if (stream && INTEL_GEN(stream->perf->i915) < 12)
		gen8_update_reg_state_unlocked(ce, stream);
		gen8_update_reg_state_unlocked(ce, stream);
}
}