Commit 866bc2d3 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-intel-fixes-2020-11-05' of...

Merge tag 'drm-intel-fixes-2020-11-05' of git://anongit.freedesktop.org/drm/drm-intel

 into drm-fixes

- GVT fixes including vGPU suspend/resume fixes and workaround for APL guest GPU hang.
- Fix set domain's cache coherency (Chris)
- Fixes around breadcrumbs (Chris)
- Fix encoder lookup during PSR atomic (Imre)
- Hold onto an explicit ref to i915_vma_work.pinned (Chris)

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20201105173026.GA858446@intel.com
parents 53aa37fb 537457a9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1754,7 +1754,7 @@ void intel_psr_atomic_check(struct drm_connector *connector,
		return;

	intel_connector = to_intel_connector(connector);
	dig_port = enc_to_dig_port(intel_attached_encoder(intel_connector));
	dig_port = enc_to_dig_port(to_intel_encoder(new_state->best_encoder));
	if (dev_priv->psr.dp != &dig_port->dp)
		return;

+13 −15
Original line number Diff line number Diff line
@@ -508,21 +508,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
	if (!obj)
		return -ENOENT;

	/*
	 * Already in the desired write domain? Nothing for us to do!
	 *
	 * We apply a little bit of cunning here to catch a broader set of
	 * no-ops. If obj->write_domain is set, we must be in the same
	 * obj->read_domains, and only that domain. Therefore, if that
	 * obj->write_domain matches the request read_domains, we are
	 * already in the same read/write domain and can skip the operation,
	 * without having to further check the requested write_domain.
	 */
	if (READ_ONCE(obj->write_domain) == read_domains) {
		err = 0;
		goto out;
	}

	/*
	 * Try to flush the object off the GPU without holding the lock.
	 * We will repeat the flush holding the lock in the normal manner
@@ -560,6 +545,19 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
	if (err)
		goto out;

	/*
	 * Already in the desired write domain? Nothing for us to do!
	 *
	 * We apply a little bit of cunning here to catch a broader set of
	 * no-ops. If obj->write_domain is set, we must be in the same
	 * obj->read_domains, and only that domain. Therefore, if that
	 * obj->write_domain matches the request read_domains, we are
	 * already in the same read/write domain and can skip the operation,
	 * without having to further check the requested write_domain.
	 */
	if (READ_ONCE(obj->write_domain) == read_domains)
		goto out_unpin;

	err = i915_gem_object_lock_interruptible(obj, NULL);
	if (err)
		goto out_unpin;
+35 −20
Original line number Diff line number Diff line
@@ -245,22 +245,14 @@ static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u
}

static inline u32 *
__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1)
{
	/* We're using qword write, offset should be aligned to 8 bytes. */
	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));

	/* w/a for post sync ops following a GPGPU operation we
	 * need a prior CS_STALL, which is emitted by the flush
	 * following the batch.
	 */
	*cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
	*cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
	*cs++ = gtt_offset;
	*cs++ = flags1 | PIPE_CONTROL_QW_WRITE;
	*cs++ = offset;
	*cs++ = 0;
	*cs++ = value;
	/* We're thrashing one dword of HWS. */
	*cs++ = 0;
	*cs++ = 0; /* We're thrashing one extra dword. */

	return cs;
}
@@ -268,13 +260,38 @@ __gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 f
static inline u32*
gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
	return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags);
	/* We're using qword write, offset should be aligned to 8 bytes. */
	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));

	return __gen8_emit_write_rcs(cs,
				     value,
				     gtt_offset,
				     0,
				     flags | PIPE_CONTROL_GLOBAL_GTT_IVB);
}

static inline u32*
gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
{
	return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1);
	/* We're using qword write, offset should be aligned to 8 bytes. */
	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));

	return __gen8_emit_write_rcs(cs,
				     value,
				     gtt_offset,
				     flags0,
				     flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB);
}

static inline u32 *
__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
	*cs++ = (MI_FLUSH_DW + 1) | flags;
	*cs++ = gtt_offset;
	*cs++ = 0;
	*cs++ = value;

	return cs;
}

static inline u32 *
@@ -285,12 +302,10 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
	/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));

	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
	*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
	*cs++ = 0;
	*cs++ = value;

	return cs;
	return __gen8_emit_flush_dw(cs,
				    value,
				    gtt_offset | MI_FLUSH_DW_USE_GTT,
				    flags | MI_FLUSH_DW_OP_STOREDW);
}

static inline void __intel_engine_reset(struct intel_engine_cs *engine,
+22 −9
Original line number Diff line number Diff line
@@ -3547,6 +3547,19 @@ static const struct intel_context_ops execlists_context_ops = {
	.destroy = execlists_context_destroy,
};

static u32 hwsp_offset(const struct i915_request *rq)
{
	const struct intel_timeline_cacheline *cl;

	/* Before the request is executed, the timeline/cachline is fixed */

	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
	if (cl)
		return cl->ggtt_offset;

	return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
}

static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
	u32 *cs;
@@ -3569,7 +3582,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
	*cs++ = MI_NOOP;

	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
	*cs++ = i915_request_timeline(rq)->hwsp_offset;
	*cs++ = hwsp_offset(rq);
	*cs++ = 0;
	*cs++ = rq->fence.seqno - 1;

@@ -4886,11 +4899,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
	return gen8_emit_wa_tail(request, cs);
}

static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
{
	u32 addr = i915_request_active_timeline(request)->hwsp_offset;

	return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
	return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
}

static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -4909,7 +4920,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
	cs = gen8_emit_ggtt_write_rcs(cs,
				      request->fence.seqno,
				      i915_request_active_timeline(request)->hwsp_offset,
				      hwsp_offset(request),
				      PIPE_CONTROL_FLUSH_ENABLE |
				      PIPE_CONTROL_CS_STALL);

@@ -4921,7 +4932,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
	cs = gen8_emit_ggtt_write_rcs(cs,
				      request->fence.seqno,
				      i915_request_active_timeline(request)->hwsp_offset,
				      hwsp_offset(request),
				      PIPE_CONTROL_CS_STALL |
				      PIPE_CONTROL_TILE_CACHE_FLUSH |
				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -4983,7 +4994,9 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)

static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{
	return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
	/* XXX Stalling flush before seqno write; post-sync not */
	cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
	return gen12_emit_fini_breadcrumb_tail(rq, cs);
}

static u32 *
@@ -4991,7 +5004,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
	cs = gen12_emit_ggtt_write_rcs(cs,
				       request->fence.seqno,
				       i915_request_active_timeline(request)->hwsp_offset,
				       hwsp_offset(request),
				       PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
				       PIPE_CONTROL_CS_STALL |
				       PIPE_CONTROL_TILE_CACHE_FLUSH |
+10 −8
Original line number Diff line number Diff line
@@ -188,9 +188,13 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
	return cl;
}

static void cacheline_acquire(struct intel_timeline_cacheline *cl)
static void cacheline_acquire(struct intel_timeline_cacheline *cl,
			      u32 ggtt_offset)
{
	if (cl)
	if (!cl)
		return;

	cl->ggtt_offset = ggtt_offset;
	i915_active_acquire(&cl->active);
}

@@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
		 tl->fence_context, tl->hwsp_offset);

	cacheline_acquire(tl->hwsp_cacheline);
	cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
	if (atomic_fetch_inc(&tl->pin_count)) {
		cacheline_release(tl->hwsp_cacheline);
		__i915_vma_unpin(tl->hwsp_ggtt);
@@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
	GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
		 tl->fence_context, tl->hwsp_offset);

	cacheline_acquire(cl);
	cacheline_acquire(cl, tl->hwsp_offset);
	tl->hwsp_cacheline = cl;

	*seqno = timeline_advance(tl);
@@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
	if (err)
		goto out;

	*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
		ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;

	*hwsp = cl->ggtt_offset;
out:
	i915_active_release(&cl->active);
	return err;
Loading