Commit c1d51f68 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki
Browse files

cpuidle: Use nanoseconds as the unit of time



Currently, the cpuidle subsystem uses microseconds as the unit of
time which (among other things) causes the idle loop to incur some
integer division overhead for no clear benefit.

In order to allow cpuidle to measure time in nanoseconds, add two
new fields, exit_latency_ns and target_residency_ns, to represent the
exit latency and target residency of an idle state in nanoseconds,
respectively, to struct cpuidle_state and initialize them with the
help of the corresponding values in microseconds provided by drivers.
Additionally, change cpuidle_governor_latency_req() to return the
idle state exit latency constraint in nanoseconds.

Also meeasure idle state residency (last_residency_ns in struct
cpuidle_device and time_ns in struct cpuidle_driver) in nanoseconds
and update the cpuidle core and governors accordingly.

However, the menu governor still computes typical intervals in
microseconds to avoid integer overflows.

Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarDoug Smythies <dsmythies@telus.net>
Tested-by: default avatarDoug Smythies <dsmythies@telus.net>
parent 99e98d3f
Loading
Loading
Loading
Loading
+17 −19
Original line number Diff line number Diff line
@@ -75,24 +75,24 @@ int cpuidle_play_dead(void)

static int find_deepest_state(struct cpuidle_driver *drv,
			      struct cpuidle_device *dev,
			      unsigned int max_latency,
			      u64 max_latency_ns,
			      unsigned int forbidden_flags,
			      bool s2idle)
{
	unsigned int latency_req = 0;
	u64 latency_req = 0;
	int i, ret = 0;

	for (i = 1; i < drv->state_count; i++) {
		struct cpuidle_state *s = &drv->states[i];

		if (dev->states_usage[i].disable ||
		    s->exit_latency <= latency_req ||
		    s->exit_latency > max_latency ||
		    s->exit_latency_ns <= latency_req ||
		    s->exit_latency_ns > max_latency_ns ||
		    (s->flags & forbidden_flags) ||
		    (s2idle && !s->enter_s2idle))
			continue;

		latency_req = s->exit_latency;
		latency_req = s->exit_latency_ns;
		ret = i;
	}
	return ret;
@@ -124,7 +124,7 @@ void cpuidle_use_deepest_state(bool enable)
int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
			       struct cpuidle_device *dev)
{
	return find_deepest_state(drv, dev, UINT_MAX, 0, false);
	return find_deepest_state(drv, dev, U64_MAX, 0, false);
}

#ifdef CONFIG_SUSPEND
@@ -180,7 +180,7 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
	 * that interrupts won't be enabled when it exits and allows the tick to
	 * be frozen safely.
	 */
	index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
	if (index > 0)
		enter_s2idle_proper(drv, dev, index);

@@ -209,7 +209,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
	 * CPU as a broadcast timer, this call may fail if it is not available.
	 */
	if (broadcast && tick_broadcast_enter()) {
		index = find_deepest_state(drv, dev, target_state->exit_latency,
		index = find_deepest_state(drv, dev, target_state->exit_latency_ns,
					   CPUIDLE_FLAG_TIMER_STOP, false);
		if (index < 0) {
			default_idle_call();
@@ -247,7 +247,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
		local_irq_enable();

	if (entered_state >= 0) {
		s64 diff, delay = drv->states[entered_state].exit_latency;
		s64 diff, delay = drv->states[entered_state].exit_latency_ns;
		int i;

		/*
@@ -255,15 +255,13 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
		 * This can be moved to within driver enter routine,
		 * but that results in multiple copies of same code.
		 */
		diff = ktime_us_delta(time_end, time_start);
		if (diff > INT_MAX)
			diff = INT_MAX;
		diff = ktime_sub(time_end, time_start);

		dev->last_residency = (int)diff;
		dev->states_usage[entered_state].time += dev->last_residency;
		dev->last_residency_ns = diff;
		dev->states_usage[entered_state].time_ns += diff;
		dev->states_usage[entered_state].usage++;

		if (diff < drv->states[entered_state].target_residency) {
		if (diff < drv->states[entered_state].target_residency_ns) {
			for (i = entered_state - 1; i >= 0; i--) {
				if (dev->states_usage[i].disable)
					continue;
@@ -281,14 +279,14 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
				 * Update if a deeper state would have been a
				 * better match for the observed idle duration.
				 */
				if (diff - delay >= drv->states[i].target_residency)
				if (diff - delay >= drv->states[i].target_residency_ns)
					dev->states_usage[entered_state].below++;

				break;
			}
		}
	} else {
		dev->last_residency = 0;
		dev->last_residency_ns = 0;
	}

	return entered_state;
@@ -381,7 +379,7 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv,
		if (dev->states_usage[i].disable)
			continue;

		limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
		limit_ns = (u64)drv->states[i].target_residency_ns;
	}

	dev->poll_limit_ns = limit_ns;
@@ -552,7 +550,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)
static void __cpuidle_device_init(struct cpuidle_device *dev)
{
	memset(dev->states_usage, 0, sizeof(dev->states_usage));
	dev->last_residency = 0;
	dev->last_residency_ns = 0;
	dev->next_hrtimer = 0;
}

+20 −9
Original line number Diff line number Diff line
@@ -165,16 +165,27 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
	if (!drv->cpumask)
		drv->cpumask = (struct cpumask *)cpu_possible_mask;

	for (i = 0; i < drv->state_count; i++) {
		struct cpuidle_state *s = &drv->states[i];

		/*
	 * Look for the timer stop flag in the different states, so that we know
	 * if the broadcast timer has to be set up.  The loop is in the reverse
	 * order, because usually one of the deeper states have this flag set.
		 * Look for the timer stop flag in the different states and if
		 * it is found, indicate that the broadcast timer has to be set
		 * up.
		 */
	for (i = drv->state_count - 1; i >= 0 ; i--) {
		if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) {
		if (s->flags & CPUIDLE_FLAG_TIMER_STOP)
			drv->bctimer = 1;
			break;
		}

		/*
		 * The core will use the target residency and exit latency
		 * values in nanoseconds, but allow drivers to provide them in
		 * microseconds too.
		 */
		if (s->target_residency > 0)
			s->target_residency_ns = s->target_residency * NSEC_PER_USEC;

		if (s->exit_latency > 0)
			s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
	}
}

+5 −2
Original line number Diff line number Diff line
@@ -107,11 +107,14 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
 * cpuidle_governor_latency_req - Compute a latency constraint for CPU
 * @cpu: Target CPU
 */
int cpuidle_governor_latency_req(unsigned int cpu)
s64 cpuidle_governor_latency_req(unsigned int cpu)
{
	int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
	struct device *device = get_cpu_device(cpu);
	int device_req = dev_pm_qos_raw_resume_latency(device);

	return device_req < global_req ? device_req : global_req;
	if (device_req > global_req)
		device_req = global_req;

	return (s64)device_req * NSEC_PER_USEC;
}
+3 −4
Original line number Diff line number Diff line
@@ -49,7 +49,7 @@ static int haltpoll_select(struct cpuidle_driver *drv,
			   struct cpuidle_device *dev,
			   bool *stop_tick)
{
	int latency_req = cpuidle_governor_latency_req(dev->cpu);
	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);

	if (!drv->state_count || latency_req == 0) {
		*stop_tick = false;
@@ -75,10 +75,9 @@ static int haltpoll_select(struct cpuidle_driver *drv,
	return 0;
}

static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
{
	unsigned int val;
	u64 block_ns = block_us*NSEC_PER_USEC;

	/* Grow cpu_halt_poll_us if
	 * cpu_halt_poll_us < block_ns < guest_halt_poll_us
@@ -115,7 +114,7 @@ static void haltpoll_reflect(struct cpuidle_device *dev, int index)
	dev->last_state_idx = index;

	if (index != 0)
		adjust_poll_limit(dev, dev->last_residency);
		adjust_poll_limit(dev, dev->last_residency_ns);
}

/**
+13 −12
Original line number Diff line number Diff line
@@ -27,8 +27,8 @@ struct ladder_device_state {
	struct {
		u32 promotion_count;
		u32 demotion_count;
		u32 promotion_time;
		u32 demotion_time;
		u64 promotion_time_ns;
		u64 demotion_time_ns;
	} threshold;
	struct {
		int promotion_count;
@@ -68,9 +68,10 @@ static int ladder_select_state(struct cpuidle_driver *drv,
{
	struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
	struct ladder_device_state *last_state;
	int last_residency, last_idx = dev->last_state_idx;
	int last_idx = dev->last_state_idx;
	int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
	int latency_req = cpuidle_governor_latency_req(dev->cpu);
	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
	s64 last_residency;

	/* Special case when user has set very strict latency requirement */
	if (unlikely(latency_req == 0)) {
@@ -80,13 +81,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,

	last_state = &ldev->states[last_idx];

	last_residency = dev->last_residency - drv->states[last_idx].exit_latency;
	last_residency = dev->last_residency_ns - drv->states[last_idx].exit_latency_ns;

	/* consider promotion */
	if (last_idx < drv->state_count - 1 &&
	    !dev->states_usage[last_idx + 1].disable &&
	    last_residency > last_state->threshold.promotion_time &&
	    drv->states[last_idx + 1].exit_latency <= latency_req) {
	    last_residency > last_state->threshold.promotion_time_ns &&
	    drv->states[last_idx + 1].exit_latency_ns <= latency_req) {
		last_state->stats.promotion_count++;
		last_state->stats.demotion_count = 0;
		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
@@ -98,11 +99,11 @@ static int ladder_select_state(struct cpuidle_driver *drv,
	/* consider demotion */
	if (last_idx > first_idx &&
	    (dev->states_usage[last_idx].disable ||
	    drv->states[last_idx].exit_latency > latency_req)) {
	    drv->states[last_idx].exit_latency_ns > latency_req)) {
		int i;

		for (i = last_idx - 1; i > first_idx; i--) {
			if (drv->states[i].exit_latency <= latency_req)
			if (drv->states[i].exit_latency_ns <= latency_req)
				break;
		}
		ladder_do_selection(dev, ldev, last_idx, i);
@@ -110,7 +111,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
	}

	if (last_idx > first_idx &&
	    last_residency < last_state->threshold.demotion_time) {
	    last_residency < last_state->threshold.demotion_time_ns) {
		last_state->stats.demotion_count++;
		last_state->stats.promotion_count = 0;
		if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
@@ -150,9 +151,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
		lstate->threshold.demotion_count = DEMOTION_COUNT;

		if (i < drv->state_count - 1)
			lstate->threshold.promotion_time = state->exit_latency;
			lstate->threshold.promotion_time_ns = state->exit_latency_ns;
		if (i > first_idx)
			lstate->threshold.demotion_time = state->exit_latency;
			lstate->threshold.demotion_time_ns = state->exit_latency_ns;
	}

	return 0;
Loading