Commit 01f624f0 authored by Chris Wilson's avatar Chris Wilson
Browse files

drm/i915: Ratelimit i915_globals_park

When doing our global park, we like to be a good citizen and shrink our
slab caches (of which we have quite a few now), but each
kmem_cache_shrink() incurs a stop_machine() and so ends up being quite
expensive, causing machine-wide stalls. While ideally we would like to
throw away unused pages in our slab caches whenever it appears that we
are idling, doing so will require a much cheaper mechanism. In the
meantime use a delayed worked to impose a rate-limit that means we have
to have been idle for more than 2 seconds before we start shrinking.

References: https://gitlab.freedesktop.org/drm/intel/issues/848


Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191218094057.3510459-1-chris@chris-wilson.co.uk
parent 54400257
Loading
Loading
Loading
Loading
+44 −9
Original line number Diff line number Diff line
@@ -20,7 +20,10 @@ static LIST_HEAD(globals);
static atomic_t active;
static atomic_t epoch;
static struct park_work {
	struct rcu_work work;
	struct delayed_work work;
	struct rcu_head rcu;
	unsigned long flags;
#define PENDING 0
	int epoch;
} park;

@@ -37,10 +40,32 @@ static void i915_globals_shrink(void)
		global->shrink();
}

static void __i915_globals_grace(struct rcu_head *rcu)
{
	/* Ratelimit parking as shrinking is quite slow */
	schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ));
}

static void __i915_globals_queue_rcu(void)
{
	park.epoch = atomic_inc_return(&epoch);
	if (!atomic_read(&active)) {
		init_rcu_head(&park.rcu);
		call_rcu(&park.rcu, __i915_globals_grace);
	}
}

static void __i915_globals_park(struct work_struct *work)
{
	destroy_rcu_head(&park.rcu);

	/* Confirm nothing woke up in the last grace period */
	if (park.epoch == atomic_read(&epoch))
	if (park.epoch != atomic_read(&epoch)) {
		__i915_globals_queue_rcu();
		return;
	}

	clear_bit(PENDING, &park.flags);
	i915_globals_shrink();
}

@@ -85,7 +110,7 @@ int __init i915_globals_init(void)
		}
	}

	INIT_RCU_WORK(&park.work, __i915_globals_park);
	INIT_DELAYED_WORK(&park.work, __i915_globals_park);
	return 0;
}

@@ -103,8 +128,9 @@ void i915_globals_park(void)
	if (!atomic_dec_and_test(&active))
		return;

	park.epoch = atomic_inc_return(&epoch);
	queue_rcu_work(system_wq, &park.work);
	/* Queue cleanup after the next RCU grace period has freed slabs */
	if (!test_and_set_bit(PENDING, &park.flags))
		__i915_globals_queue_rcu();
}

void i915_globals_unpark(void)
@@ -113,12 +139,21 @@ void i915_globals_unpark(void)
	atomic_inc(&active);
}

static void __exit __i915_globals_flush(void)
{
	atomic_inc(&active); /* skip shrinking */

	rcu_barrier(); /* wait for the work to be queued */
	flush_delayed_work(&park.work);

	atomic_dec(&active);
}

void __exit i915_globals_exit(void)
{
	/* Flush any residual park_work */
	atomic_inc(&epoch);
	flush_rcu_work(&park.work);
	GEM_BUG_ON(atomic_read(&active));

	__i915_globals_flush();
	__i915_globals_cleanup();

	/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */