drm/i915: Reorder await_execution before await_request (dd873dd5) · Commits · 戴 / test

drivers/gpu/drm/i915/i915_request.c

+132 −132

Original line number	Diff line number	Diff line
		@@ -1053,37 +1053,91 @@ await_fence:
		I915_FENCE_GFP);
		}

		static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
		struct dma_fence *fence)
		{
		return __intel_timeline_sync_is_later(tl,
		fence->context,
		fence->seqno - 1);
		}

		static int intel_timeline_sync_set_start(struct intel_timeline *tl,
		const struct dma_fence *fence)
		{
		return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
		}

		static int
		i915_request_await_request(struct i915_request to, struct i915_request from)
		__i915_request_await_execution(struct i915_request *to,
		struct i915_request *from,
		void (hook)(struct i915_request rq,
		struct dma_fence *signal))
		{
		int ret;
		int err;

		GEM_BUG_ON(to == from);
		GEM_BUG_ON(to->timeline == from->timeline);
		GEM_BUG_ON(intel_context_is_barrier(from->context));

		if (i915_request_completed(from)) {
		i915_sw_fence_set_error_once(&to->submit, from->fence.error);
		/* Submit both requests at the same time */
		err = __await_execution(to, from, hook, I915_FENCE_GFP);
		if (err)
		return err;

		/* Squash repeated depenendices to the same timelines */
		if (intel_timeline_sync_has_start(i915_request_timeline(to),
		&from->fence))
		return 0;

		/*
		* Wait until the start of this request.
		*
		* The execution cb fires when we submit the request to HW. But in
		* many cases this may be long before the request itself is ready to
		* run (consider that we submit 2 requests for the same context, where
		* the request of interest is behind an indefinite spinner). So we hook
		* up to both to reduce our queues and keep the execution lag minimised
		* in the worst case, though we hope that the await_start is elided.
		*/
		err = i915_request_await_start(to, from);
		if (err < 0)
		return err;

		/*
		* Ensure both start together [after all semaphores in signal]
		*
		* Now that we are queued to the HW at roughly the same time (thanks
		* to the execute cb) and are ready to run at roughly the same time
		* (thanks to the await start), our signaler may still be indefinitely
		* delayed by waiting on a semaphore from a remote engine. If our
		* signaler depends on a semaphore, so indirectly do we, and we do not
		* want to start our payload until our signaler also starts theirs.
		* So we wait.
		*
		* However, there is also a second condition for which we need to wait
		* for the precise start of the signaler. Consider that the signaler
		* was submitted in a chain of requests following another context
		* (with just an ordinary intra-engine fence dependency between the
		* two). In this case the signaler is queued to HW, but not for
		* immediate execution, and so we must wait until it reaches the
		* active slot.
		*/
		if (intel_engine_has_semaphores(to->engine) &&
		!i915_request_has_initial_breadcrumb(to)) {
		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
		if (err < 0)
		return err;
		}

		/* Couple the dependency tree for PI on this exposed to->fence */
		if (to->engine->schedule) {
		ret = i915_sched_node_add_dependency(&to->sched,
		err = i915_sched_node_add_dependency(&to->sched,
		&from->sched,
		I915_DEPENDENCY_EXTERNAL);
		if (ret < 0)
		return ret;
		I915_DEPENDENCY_WEAK);
		if (err < 0)
		return err;
		}

		if (to->engine == from->engine)
		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
		&from->submit,
		I915_FENCE_GFP);
		else
		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
		if (ret < 0)
		return ret;

		return 0;
		return intel_timeline_sync_set_start(i915_request_timeline(to),
		&from->fence);
		}

		static void mark_external(struct i915_request *rq)
		@@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request rq, struct dma_fence fence)
		}

		int
		i915_request_await_dma_fence(struct i915_request rq, struct dma_fence fence)
		i915_request_await_execution(struct i915_request *rq,
		struct dma_fence *fence,
		void (hook)(struct i915_request rq,
		struct dma_fence *signal))
		{
		struct dma_fence **child = &fence;
		unsigned int nchild = 1;
		int ret;

		/*
		* Note that if the fence-array was created in signal-on-any mode,
		* we should not decompose it into its individual fences. However,
		* we don't currently store which mode the fence-array is operating
		* in. Fortunately, the only user of signal-on-any is private to
		* amdgpu and we should not see any incoming fence-array from
		* sync-file being in signal-on-any mode.
		*/
		if (dma_fence_is_array(fence)) {
		struct dma_fence_array *array = to_dma_fence_array(fence);

		/* XXX Error for signal-on-any fence arrays */

		child = array->fences;
		nchild = array->num_fences;
		GEM_BUG_ON(!nchild);
		@@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request rq, struct dma_fence fence)
		continue;
		}

		/*
		* Requests on the same timeline are explicitly ordered, along
		* with their dependencies, by i915_request_add() which ensures
		* that requests are submitted in-order through each ring.
		*/
		if (fence->context == rq->fence.context)
		continue;

		/* Squash repeated waits to the same timelines */
		if (fence->context &&
		intel_timeline_sync_is_later(i915_request_timeline(rq),
		fence))
		continue;
		/*
		* We don't squash repeated fence dependencies here as we
		* want to run our callback in all cases.
		*/

		if (dma_fence_is_i915(fence))
		ret = i915_request_await_request(rq, to_request(fence));
		ret = __i915_request_await_execution(rq,
		to_request(fence),
		hook);
		else
		ret = i915_request_await_external(rq, fence);
		if (ret < 0)
		return ret;

		/* Record the latest fence used against each timeline */
		if (fence->context)
		intel_timeline_sync_set(i915_request_timeline(rq),
		fence);
		} while (--nchild);

		return 0;
		}

		static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
		struct dma_fence *fence)
		{
		return __intel_timeline_sync_is_later(tl,
		fence->context,
		fence->seqno - 1);
		}

		static int intel_timeline_sync_set_start(struct intel_timeline *tl,
		const struct dma_fence *fence)
		{
		return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
		}

		static int
		__i915_request_await_execution(struct i915_request *to,
		struct i915_request *from,
		void (hook)(struct i915_request rq,
		struct dma_fence *signal))
		i915_request_await_request(struct i915_request to, struct i915_request from)
		{
		int err;

		GEM_BUG_ON(intel_context_is_barrier(from->context));
		int ret;

		/* Submit both requests at the same time */
		err = __await_execution(to, from, hook, I915_FENCE_GFP);
		if (err)
		return err;
		GEM_BUG_ON(to == from);
		GEM_BUG_ON(to->timeline == from->timeline);

		/* Squash repeated depenendices to the same timelines */
		if (intel_timeline_sync_has_start(i915_request_timeline(to),
		&from->fence))
		if (i915_request_completed(from)) {
		i915_sw_fence_set_error_once(&to->submit, from->fence.error);
		return 0;

		/*
		* Wait until the start of this request.
		*
		* The execution cb fires when we submit the request to HW. But in
		* many cases this may be long before the request itself is ready to
		* run (consider that we submit 2 requests for the same context, where
		* the request of interest is behind an indefinite spinner). So we hook
		* up to both to reduce our queues and keep the execution lag minimised
		* in the worst case, though we hope that the await_start is elided.
		*/
		err = i915_request_await_start(to, from);
		if (err < 0)
		return err;

		/*
		* Ensure both start together [after all semaphores in signal]
		*
		* Now that we are queued to the HW at roughly the same time (thanks
		* to the execute cb) and are ready to run at roughly the same time
		* (thanks to the await start), our signaler may still be indefinitely
		* delayed by waiting on a semaphore from a remote engine. If our
		* signaler depends on a semaphore, so indirectly do we, and we do not
		* want to start our payload until our signaler also starts theirs.
		* So we wait.
		*
		* However, there is also a second condition for which we need to wait
		* for the precise start of the signaler. Consider that the signaler
		* was submitted in a chain of requests following another context
		* (with just an ordinary intra-engine fence dependency between the
		* two). In this case the signaler is queued to HW, but not for
		* immediate execution, and so we must wait until it reaches the
		* active slot.
		*/
		if (intel_engine_has_semaphores(to->engine) &&
		!i915_request_has_initial_breadcrumb(to)) {
		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
		if (err < 0)
		return err;
		}

		/* Couple the dependency tree for PI on this exposed to->fence */
		if (to->engine->schedule) {
		err = i915_sched_node_add_dependency(&to->sched,
		ret = i915_sched_node_add_dependency(&to->sched,
		&from->sched,
		I915_DEPENDENCY_WEAK);
		if (err < 0)
		return err;
		I915_DEPENDENCY_EXTERNAL);
		if (ret < 0)
		return ret;
		}

		return intel_timeline_sync_set_start(i915_request_timeline(to),
		&from->fence);
		if (to->engine == READ_ONCE(from->engine))
		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
		&from->submit,
		I915_FENCE_GFP);
		else
		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
		if (ret < 0)
		return ret;

		return 0;
		}

		int
		i915_request_await_execution(struct i915_request *rq,
		struct dma_fence *fence,
		void (hook)(struct i915_request rq,
		struct dma_fence *signal))
		i915_request_await_dma_fence(struct i915_request rq, struct dma_fence fence)
		{
		struct dma_fence **child = &fence;
		unsigned int nchild = 1;
		int ret;

		/*
		* Note that if the fence-array was created in signal-on-any mode,
		* we should not decompose it into its individual fences. However,
		* we don't currently store which mode the fence-array is operating
		* in. Fortunately, the only user of signal-on-any is private to
		* amdgpu and we should not see any incoming fence-array from
		* sync-file being in signal-on-any mode.
		*/
		if (dma_fence_is_array(fence)) {
		struct dma_fence_array *array = to_dma_fence_array(fence);

		/* XXX Error for signal-on-any fence arrays */

		child = array->fences;
		nchild = array->num_fences;
		GEM_BUG_ON(!nchild);
		@@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq,
		continue;
		}

		/*
		* Requests on the same timeline are explicitly ordered, along
		* with their dependencies, by i915_request_add() which ensures
		* that requests are submitted in-order through each ring.
		*/
		if (fence->context == rq->fence.context)
		continue;

		/*
		* We don't squash repeated fence dependencies here as we
		* want to run our callback in all cases.
		*/
		/* Squash repeated waits to the same timelines */
		if (fence->context &&
		intel_timeline_sync_is_later(i915_request_timeline(rq),
		fence))
		continue;

		if (dma_fence_is_i915(fence))
		ret = __i915_request_await_execution(rq,
		to_request(fence),
		hook);
		ret = i915_request_await_request(rq, to_request(fence));
		else
		ret = i915_request_await_external(rq, fence);
		if (ret < 0)
		return ret;

		/* Record the latest fence used against each timeline */
		if (fence->context)
		intel_timeline_sync_set(i915_request_timeline(rq),
		fence);
		} while (--nchild);

		return 0;

Admin message