Commit 18e4af04 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Drop no-semaphore boosting



Now that we have fast timeslicing on semaphores, we no longer need to
prioritise none-semaphore work as we will yield any work blocked on a
semaphore to the next in the queue. Previously with no timeslicing,
blocking on the semaphore caused extremely bad scheduling with multiple
clients utilising multiple rings. Now, there is no impact and we can
remove the complication.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200513173504.28322-1-chris@chris-wilson.co.uk
parent 701f0265
Loading
Loading
Loading
Loading
+0 −15
Original line number Diff line number Diff line
@@ -2582,21 +2582,6 @@ static void eb_request_add(struct i915_execbuffer *eb)
	/* Check that the context wasn't destroyed before submission */
	if (likely(!intel_context_is_closed(eb->context))) {
		attr = eb->gem_context->sched;

		/*
		 * Boost actual workloads past semaphores!
		 *
		 * With semaphores we spin on one engine waiting for another,
		 * simply to reduce the latency of starting our work when
		 * the signaler completes. However, if there is any other
		 * work that we could be doing on this engine instead, that
		 * is better utilisation and will reduce the overall duration
		 * of the current work. To avoid PI boosting a semaphore
		 * far in the distance past over useful work, we keep a history
		 * of any semaphore use along our dependency chain.
		 */
		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
	} else {
		/* Serialise with context_close via the add_to_timeline */
		i915_request_set_error_once(rq, -ENOENT);
+0 −9
Original line number Diff line number Diff line
@@ -429,15 +429,6 @@ static int effective_prio(const struct i915_request *rq)
	if (i915_request_has_nopreempt(rq))
		prio = I915_PRIORITY_UNPREEMPTABLE;

	/*
	 * On unwinding the active request, we give it a priority bump
	 * if it has completed waiting on any semaphore. If we know that
	 * the request has already started, we can prevent an unwanted
	 * preempt-to-idle cycle by taking that into account now.
	 */
	if (__i915_request_has_started(rq))
		prio |= I915_PRIORITY_NOSEMAPHORE;

	return prio;
}

+1 −0
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq)

	/* Opencode i915_request_add() so we can keep the timeline locked. */
	__i915_request_commit(rq);
	rq->sched.attr.priority = I915_PRIORITY_BARRIER;
	__i915_request_queue(rq, NULL);

	timeout = i915_request_wait(rq, 0, HZ / 10);
+1 −3
Original line number Diff line number Diff line
@@ -24,14 +24,12 @@ enum {
	I915_PRIORITY_DISPLAY,
};

#define I915_USER_PRIORITY_SHIFT 1
#define I915_USER_PRIORITY_SHIFT 0
#define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)

#define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
#define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)

#define I915_PRIORITY_NOSEMAPHORE	((u8)BIT(0))

/* Smallest priority value that cannot be bumped. */
#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)

+4 −36
Original line number Diff line number Diff line
@@ -368,8 +368,6 @@ __await_execution(struct i915_request *rq,
	}
	spin_unlock_irq(&signal->lock);

	/* Copy across semaphore status as we need the same behaviour */
	rq->sched.flags |= signal->sched.flags;
	return 0;
}

@@ -537,10 +535,8 @@ void __i915_request_unsubmit(struct i915_request *request)
	spin_unlock(&request->lock);

	/* We've already spun, don't charge on resubmitting. */
	if (request->sched.semaphores && i915_request_started(request)) {
		request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
	if (request->sched.semaphores && i915_request_started(request))
		request->sched.semaphores = 0;
	}

	/*
	 * We don't need to wake_up any waiters on request->execute, they
@@ -598,15 +594,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
	return NOTIFY_DONE;
}

static void irq_semaphore_cb(struct irq_work *wrk)
{
	struct i915_request *rq =
		container_of(wrk, typeof(*rq), semaphore_work);

	i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
	i915_request_put(rq);
}

static int __i915_sw_fence_call
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
{
@@ -614,11 +601,6 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)

	switch (state) {
	case FENCE_COMPLETE:
		if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
			i915_request_get(rq);
			init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
			irq_work_queue(&rq->semaphore_work);
		}
		break;

	case FENCE_FREE:
@@ -997,6 +979,7 @@ emit_semaphore_wait(struct i915_request *to,
		    gfp_t gfp)
{
	const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
	struct i915_sw_fence *wait = &to->submit;

	if (!intel_context_use_semaphores(to->context))
		goto await_fence;
@@ -1031,11 +1014,10 @@ emit_semaphore_wait(struct i915_request *to,
		goto await_fence;

	to->sched.semaphores |= mask;
	to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
	return 0;
	wait = &to->semaphore;

await_fence:
	return i915_sw_fence_await_dma_fence(&to->submit,
	return i915_sw_fence_await_dma_fence(wait,
					     &from->fence, 0,
					     I915_FENCE_GFP);
}
@@ -1070,17 +1052,6 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
	if (ret < 0)
		return ret;

	if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
		ret = i915_sw_fence_await_dma_fence(&to->semaphore,
						    &from->fence, 0,
						    I915_FENCE_GFP);
		if (ret < 0)
			return ret;
	}

	if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
		to->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN;

	return 0;
}

@@ -1528,9 +1499,6 @@ void i915_request_add(struct i915_request *rq)
		attr = ctx->sched;
	rcu_read_unlock();

	if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
		attr.priority |= I915_PRIORITY_NOSEMAPHORE;

	__i915_request_queue(rq, &attr);

	mutex_unlock(&tl->mutex);
Loading