Commit a47cd45a authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915/selftests: Basic stress test for rapid context switching



We need to exercise the HW and submission paths for switching contexts
rapidly to check that features such as execlists' wa_tail are adequate.
Plus it's an interesting baseline latency metric.

v2: Check the initial request for allocation errors
v3: Use finite waits for more robust handling of broken code

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180920105809.1872-1-chris@chris-wilson.co.uk
parent 82c7c4fc
Loading
Loading
Loading
Loading
+195 −0
Original line number Diff line number Diff line
@@ -22,6 +22,8 @@
 *
 */

#include <linux/prime_numbers.h>

#include "../i915_selftest.h"
#include "i915_random.h"
#include "igt_flush_test.h"
@@ -32,6 +34,198 @@

#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))

struct live_test {
	struct drm_i915_private *i915;
	const char *func;
	const char *name;

	unsigned int reset_count;
};

static int begin_live_test(struct live_test *t,
			   struct drm_i915_private *i915,
			   const char *func,
			   const char *name)
{
	int err;

	t->i915 = i915;
	t->func = func;
	t->name = name;

	err = i915_gem_wait_for_idle(i915,
				     I915_WAIT_LOCKED,
				     MAX_SCHEDULE_TIMEOUT);
	if (err) {
		pr_err("%s(%s): failed to idle before, with err=%d!",
		       func, name, err);
		return err;
	}

	i915->gpu_error.missed_irq_rings = 0;
	t->reset_count = i915_reset_count(&i915->gpu_error);

	return 0;
}

static int end_live_test(struct live_test *t)
{
	struct drm_i915_private *i915 = t->i915;

	if (igt_flush_test(i915, I915_WAIT_LOCKED))
		return -EIO;

	if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
		pr_err("%s(%s): GPU was reset %d times!\n",
		       t->func, t->name,
		       i915_reset_count(&i915->gpu_error) - t->reset_count);
		return -EIO;
	}

	if (i915->gpu_error.missed_irq_rings) {
		pr_err("%s(%s): Missed interrupts on engines %lx\n",
		       t->func, t->name, i915->gpu_error.missed_irq_rings);
		return -EIO;
	}

	return 0;
}

static int live_nop_switch(void *arg)
{
	const unsigned int nctx = 1024;
	struct drm_i915_private *i915 = arg;
	struct intel_engine_cs *engine;
	struct i915_gem_context **ctx;
	enum intel_engine_id id;
	struct drm_file *file;
	struct live_test t;
	unsigned long n;
	int err = -ENODEV;

	/*
	 * Create as many contexts as we can feasibly get away with
	 * and check we can switch between them rapidly.
	 *
	 * Serves as very simple stress test for submission and HW switching
	 * between contexts.
	 */

	if (!DRIVER_CAPS(i915)->has_logical_contexts)
		return 0;

	file = mock_file(i915);
	if (IS_ERR(file))
		return PTR_ERR(file);

	mutex_lock(&i915->drm.struct_mutex);

	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
	if (!ctx) {
		err = -ENOMEM;
		goto out_unlock;
	}

	for (n = 0; n < nctx; n++) {
		ctx[n] = i915_gem_create_context(i915, file->driver_priv);
		if (IS_ERR(ctx[n])) {
			err = PTR_ERR(ctx[n]);
			goto out_unlock;
		}
	}

	for_each_engine(engine, i915, id) {
		struct i915_request *rq;
		unsigned long end_time, prime;
		ktime_t times[2] = {};

		times[0] = ktime_get_raw();
		for (n = 0; n < nctx; n++) {
			rq = i915_request_alloc(engine, ctx[n]);
			if (IS_ERR(rq)) {
				err = PTR_ERR(rq);
				goto out_unlock;
			}
			i915_request_add(rq);
		}
		if (i915_request_wait(rq,
				      I915_WAIT_LOCKED,
				      HZ / 5) < 0) {
			pr_err("Failed to populated %d contexts\n", nctx);
			i915_gem_set_wedged(i915);
			err = -EIO;
			goto out_unlock;
		}

		times[1] = ktime_get_raw();

		pr_info("Populated %d contexts on %s in %lluns\n",
			nctx, engine->name, ktime_to_ns(times[1] - times[0]));

		err = begin_live_test(&t, i915, __func__, engine->name);
		if (err)
			goto out_unlock;

		end_time = jiffies + i915_selftest.timeout_jiffies;
		for_each_prime_number_from(prime, 2, 8192) {
			times[1] = ktime_get_raw();

			for (n = 0; n < prime; n++) {
				rq = i915_request_alloc(engine, ctx[n % nctx]);
				if (IS_ERR(rq)) {
					err = PTR_ERR(rq);
					goto out_unlock;
				}

				/*
				 * This space is left intentionally blank.
				 *
				 * We do not actually want to perform any
				 * action with this request, we just want
				 * to measure the latency in allocation
				 * and submission of our breadcrumbs -
				 * ensuring that the bare request is sufficient
				 * for the system to work (i.e. proper HEAD
				 * tracking of the rings, interrupt handling,
				 * etc). It also gives us the lowest bounds
				 * for latency.
				 */

				i915_request_add(rq);
			}
			if (i915_request_wait(rq,
					      I915_WAIT_LOCKED,
					      HZ / 5) < 0) {
				pr_err("Switching between %ld contexts timed out\n",
				       prime);
				i915_gem_set_wedged(i915);
				break;
			}

			times[1] = ktime_sub(ktime_get_raw(), times[1]);
			if (prime == 2)
				times[0] = times[1];

			if (__igt_timeout(end_time, NULL))
				break;
		}

		err = end_live_test(&t);
		if (err)
			goto out_unlock;

		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
			engine->name,
			ktime_to_ns(times[0]),
			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
	}

out_unlock:
	mutex_unlock(&i915->drm.struct_mutex);
	mock_file_free(i915, file);
	return err;
}

static struct i915_vma *
gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
{
@@ -714,6 +908,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
{
	static const struct i915_subtest tests[] = {
		SUBTEST(igt_switch_to_kernel_context),
		SUBTEST(live_nop_switch),
		SUBTEST(igt_ctx_exec),
		SUBTEST(igt_ctx_readonly),
	};