drm/i915: use a separate context for gpu relocs (8a9a9827) · Commits · 戴 / test

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

+29 −1

Original line number	Diff line number	Diff line
		@@ -252,6 +252,7 @@ struct i915_execbuffer {
		bool has_fence : 1;
		bool needs_unfenced : 1;

		struct intel_context *ce;
		struct i915_request *rq;
		u32 *rq_cmd;
		unsigned int rq_size;
		@@ -880,6 +881,9 @@ static void eb_destroy(const struct i915_execbuffer *eb)
		{
		GEM_BUG_ON(eb->reloc_cache.rq);

		if (eb->reloc_cache.ce)
		intel_context_put(eb->reloc_cache.ce);

		if (eb->lut_size > 0)
		kfree(eb->buckets);
		}
		@@ -903,6 +907,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
		cache->has_fence = cache->gen < 4;
		cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
		cache->node.allocated = false;
		cache->ce = NULL;
		cache->rq = NULL;
		cache->rq_size = 0;
		}
		@@ -1168,7 +1173,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
		if (err)
		goto err_unmap;

		rq = i915_request_create(eb->context);
		rq = intel_context_create_request(cache->ce);
		if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
		goto err_unpin;
		@@ -1239,6 +1244,29 @@ static u32 reloc_gpu(struct i915_execbuffer eb,
		if (!intel_engine_can_store_dword(eb->engine))
		return ERR_PTR(-ENODEV);

		if (!cache->ce) {
		struct intel_context *ce;

		/*
		* The CS pre-parser can pre-fetch commands across
		* memory sync points and starting gen12 it is able to
		* pre-fetch across BB_START and BB_END boundaries
		* (within the same context). We therefore use a
		* separate context gen12+ to guarantee that the reloc
		* writes land before the parser gets to the target
		* memory location.
		*/
		if (cache->gen >= 12)
		ce = intel_context_create(eb->context->gem_context,
		eb->engine);
		else
		ce = intel_context_get(eb->context);
		if (IS_ERR(ce))
		return ERR_CAST(ce);

		cache->ce = ce;
		}

		err = __reloc_gpu_alloc(eb, vma, len);
		if (unlikely(err))
		return ERR_PTR(err);

drivers/gpu/drm/i915/gt/intel_lrc.c

+18 −0

Original line number	Diff line number	Diff line
		@@ -2931,6 +2931,24 @@ static u32 gen8_emit_fini_breadcrumb_rcs(struct i915_request request, u32 *cs)
		return gen8_emit_fini_breadcrumb_footer(request, cs);
		}

		/*
		* Note that the CS instruction pre-parser will not stall on the breadcrumb
		* flush and will continue pre-fetching the instructions after it before the
		* memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
		* BB_START/END instructions, so, even though we might pre-fetch the pre-amble
		* of the next request before the memory has been flushed, we're guaranteed that
		* we won't access the batch itself too early.
		* However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
		* so, if the current request is modifying an instruction in the next request on
		* the same intel_context, we might pre-fetch and then execute the pre-update
		* instruction. To avoid this, the users of self-modifying code should either
		* disable the parser around the code emitting the memory writes, via a new flag
		* added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
		* the in-kernel use-cases we've opted to use a separate context, see
		* reloc_gpu() as an example.
		* All the above applies only to the instructions themselves. Non-inline data
		* used by the instructions is not pre-fetched.
		*/
		static u32 gen11_emit_fini_breadcrumb_rcs(struct i915_request request,
		u32 *cs)
		{

Admin message