Commit a8385f0c authored by Chris Wilson's avatar Chris Wilson Committed by Rodrigo Vivi
Browse files

drm/i915: Only enqueue already completed requests



If we are asked to submit a completed request, just move it onto the
active-list without modifying it's payload. If we try to emit the
modified payload of a completed request, we risk racing with the
ring->head update during retirement which may advance the head past our
breadcrumb and so we generate a warning for the emission being behind
the RING_HEAD.

v2: Commentary for the sneaky, shared responsibility between functions.
v3: Spelling mistakes and bonus assertion

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190923110056.15176-3-chris@chris-wilson.co.uk


(cherry picked from commit c0bb487d)
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent 6535a4b3
Loading
Loading
Loading
Loading
+41 −25
Original line number Diff line number Diff line
@@ -796,6 +796,17 @@ static bool can_merge_rq(const struct i915_request *prev,
	GEM_BUG_ON(prev == next);
	GEM_BUG_ON(!assert_priority_queue(prev, next));

	/*
	 * We do not submit known completed requests. Therefore if the next
	 * request is already completed, we can pretend to merge it in
	 * with the previous context (and we will skip updating the ELSP
	 * and tracking). Thus hopefully keeping the ELSP full with active
	 * contexts, despite the best efforts of preempt-to-busy to confuse
	 * us.
	 */
	if (i915_request_completed(next))
		return true;

	if (!can_merge_ctx(prev->hw_context, next->hw_context))
		return false;

@@ -1171,21 +1182,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
				continue;
			}

			if (i915_request_completed(rq)) {
				ve->request = NULL;
				ve->base.execlists.queue_priority_hint = INT_MIN;
				rb_erase_cached(rb, &execlists->virtual);
				RB_CLEAR_NODE(rb);

				rq->engine = engine;
				__i915_request_submit(rq);

				spin_unlock(&ve->base.active.lock);

				rb = rb_first_cached(&execlists->virtual);
				continue;
			}

			if (last && !can_merge_rq(last, rq)) {
				spin_unlock(&ve->base.active.lock);
				return; /* leave this for another */
@@ -1236,11 +1232,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
				GEM_BUG_ON(ve->siblings[0] != engine);
			}

			__i915_request_submit(rq);
			if (!i915_request_completed(rq)) {
			if (__i915_request_submit(rq)) {
				submit = true;
				last = rq;
			}

			/*
			 * Hmm, we have a bunch of virtual engine requests,
			 * but the first one was already completed (thanks
			 * preempt-to-busy!). Keep looking at the veng queue
			 * until we have no more relevant requests (i.e.
			 * the normal submit queue has higher priority).
			 */
			if (!submit) {
				spin_unlock(&ve->base.active.lock);
				rb = rb_first_cached(&execlists->virtual);
				continue;
			}
		}

		spin_unlock(&ve->base.active.lock);
@@ -1253,8 +1261,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
		int i;

		priolist_for_each_request_consume(rq, rn, p, i) {
			if (i915_request_completed(rq))
				goto skip;
			bool merge = true;

			/*
			 * Can we combine this request with the current port?
@@ -1295,14 +1302,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
				    ctx_single_port_submission(rq->hw_context))
					goto done;

				merge = false;
			}

			if (__i915_request_submit(rq)) {
				if (!merge) {
					*port = execlists_schedule_in(last, port - execlists->pending);
					port++;
					last = NULL;
				}

			last = rq;
				GEM_BUG_ON(last &&
					   !can_merge_ctx(last->hw_context,
							  rq->hw_context));

				submit = true;
skip:
			__i915_request_submit(rq);
				last = rq;
			}
		}

		rb_erase_cached(&p->node, &execlists->queue);
+32 −12
Original line number Diff line number Diff line
@@ -377,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
	return 0;
}

void __i915_request_submit(struct i915_request *request)
bool __i915_request_submit(struct i915_request *request)
{
	struct intel_engine_cs *engine = request->engine;
	bool result = false;

	GEM_TRACE("%s fence %llx:%lld, current %d\n",
		  engine->name,
@@ -389,6 +390,25 @@ void __i915_request_submit(struct i915_request *request)
	GEM_BUG_ON(!irqs_disabled());
	lockdep_assert_held(&engine->active.lock);

	/*
	 * With the advent of preempt-to-busy, we frequently encounter
	 * requests that we have unsubmitted from HW, but left running
	 * until the next ack and so have completed in the meantime. On
	 * resubmission of that completed request, we can skip
	 * updating the payload, and execlists can even skip submitting
	 * the request.
	 *
	 * We must remove the request from the caller's priority queue,
	 * and the caller must only call us when the request is in their
	 * priority queue, under the active.lock. This ensures that the
	 * request has *not* yet been retired and we can safely move
	 * the request into the engine->active.list where it will be
	 * dropped upon retiring. (Otherwise if resubmit a *retired*
	 * request, this would be a horrible use-after-free.)
	 */
	if (i915_request_completed(request))
		goto xfer;

	if (i915_gem_context_is_banned(request->gem_context))
		i915_request_skip(request, -EIO);

@@ -412,14 +432,19 @@ void __i915_request_submit(struct i915_request *request)
	    i915_sw_fence_signaled(&request->semaphore))
		engine->saturated |= request->sched.semaphores;

	/* We may be recursing from the signal callback of another i915 fence */
	engine->emit_fini_breadcrumb(request,
				     request->ring->vaddr + request->postfix);

	trace_i915_request_execute(request);
	engine->serial++;
	result = true;

xfer:	/* We may be recursing from the signal callback of another i915 fence */
	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);

	if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
		list_move_tail(&request->sched.link, &engine->active.requests);

	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);

	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
	    !i915_request_enable_breadcrumb(request))
@@ -429,12 +454,7 @@ void __i915_request_submit(struct i915_request *request)

	spin_unlock(&request->lock);

	engine->emit_fini_breadcrumb(request,
				     request->ring->vaddr + request->postfix);

	engine->serial++;

	trace_i915_request_execute(request);
	return result;
}

void i915_request_submit(struct i915_request *request)
+1 −1
Original line number Diff line number Diff line
@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,

void i915_request_add(struct i915_request *rq);

void __i915_request_submit(struct i915_request *request);
bool __i915_request_submit(struct i915_request *request);
void i915_request_submit(struct i915_request *request);

void i915_request_skip(struct i915_request *request, int error);