Commit 70b73f9a authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915/ringbuffer: Delay after invalidating gen6+ xcs

During stress testing of full-ppgtt (on Baytrail at least), we found
that the invalidation around a context/mm switch was insufficient (writes
would go astray). Adding a second MI_FLUSH_DW barrier prevents this, but
it is unclear as to whether this is merely a delaying tactic or if it is
truly serialising with the TLB invalidation. Either way, it is
empirically required.

v2: Avoid the loop for readability;

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107715
References: https://bugs.freedesktop.org/show_bug.cgi?id=107759


Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarVille Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180830161042.29193-1-chris@chris-wilson.co.uk
parent 09605548
Loading
Loading
Loading
Loading
+34 −35
Original line number Diff line number Diff line
@@ -1944,7 +1944,7 @@ static void gen6_bsd_submit_request(struct i915_request *request)
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}

static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
static int emit_mi_flush_dw(struct i915_request *rq, u32 flags)
{
	u32 cmd, *cs;

@@ -1954,7 +1954,8 @@ static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)

	cmd = MI_FLUSH_DW;

	/* We always require a command barrier so that subsequent
	/*
	 * We always require a command barrier so that subsequent
	 * commands, such as breadcrumb interrupts, are strictly ordered
	 * wrt the contents of the write cache being flushed to memory
	 * (and thus being coherent from the CPU).
@@ -1962,22 +1963,49 @@ static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;

	/*
	 * Bspec vol 1c.5 - video engine command streamer:
	 * Bspec vol 1c.3 - blitter engine command streamer:
	 * "If ENABLED, all TLBs will be invalidated once the flush
	 * operation is complete. This bit is only valid when the
	 * Post-Sync Operation field is a value of 1h or 3h."
	 */
	if (mode & EMIT_INVALIDATE)
		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
	cmd |= flags;

	*cs++ = cmd;
	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
	*cs++ = 0;
	*cs++ = MI_NOOP;

	intel_ring_advance(rq, cs);

	return 0;
}

static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
{
	int err;

	/*
	 * Not only do we need a full barrier (post-sync write) after
	 * invalidating the TLBs, but we need to wait a little bit
	 * longer. Whether this is merely delaying us, or the
	 * subsequent flush is a key part of serialising with the
	 * post-sync op, this extra pass appears vital before a
	 * mm switch!
	 */
	if (mode & EMIT_INVALIDATE) {
		err = emit_mi_flush_dw(rq, invflags);
		if (err)
			return err;
	}

	return emit_mi_flush_dw(rq, 0);
}

static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
{
	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
}

static int
hsw_emit_bb_start(struct i915_request *rq,
		  u64 offset, u32 len,
@@ -2022,36 +2050,7 @@ gen6_emit_bb_start(struct i915_request *rq,

static int gen6_ring_flush(struct i915_request *rq, u32 mode)
{
	u32 cmd, *cs;

	cs = intel_ring_begin(rq, 4);
	if (IS_ERR(cs))
		return PTR_ERR(cs);

	cmd = MI_FLUSH_DW;

	/* We always require a command barrier so that subsequent
	 * commands, such as breadcrumb interrupts, are strictly ordered
	 * wrt the contents of the write cache being flushed to memory
	 * (and thus being coherent from the CPU).
	 */
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;

	/*
	 * Bspec vol 1c.3 - blitter engine command streamer:
	 * "If ENABLED, all TLBs will be invalidated once the flush
	 * operation is complete. This bit is only valid when the
	 * Post-Sync Operation field is a value of 1h or 3h."
	 */
	if (mode & EMIT_INVALIDATE)
		cmd |= MI_INVALIDATE_TLB;
	*cs++ = cmd;
	*cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
	*cs++ = 0;
	*cs++ = MI_NOOP;
	intel_ring_advance(rq, cs);

	return 0;
	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
}

static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,