Commit f48627e6 authored by Jason Low's avatar Jason Low Committed by Ingo Molnar
Browse files

sched/balancing: Periodically decay max cost of idle balance



This patch builds on patch 2 and periodically decays that max value to
do idle balancing per sched domain by approximately 1% per second. Also
decay the rq's max_idle_balance_cost value.

Signed-off-by: default avatarJason Low <jason.low2@hp.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379096813-3032-4-git-send-email-jason.low2@hp.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 9bd721c5
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
	.balance_interval	= 1,			\
	.nr_balance_failed	= 0,			\
	.max_newidle_lb_cost	= 0,			\
	.next_decay_max_lb_cost	= jiffies,		\
}

#define cpu_to_node(cpu)	((void)(cpu), 0)
+3 −0
Original line number Diff line number Diff line
@@ -810,7 +810,10 @@ struct sched_domain {
	unsigned int nr_balance_failed; /* initialise to 0 */

	u64 last_update;

	/* idle_balance() stats */
	u64 max_newidle_lb_cost;
	unsigned long next_decay_max_lb_cost;

#ifdef CONFIG_SCHEDSTATS
	/* load_balance() stats */
+3 −0
Original line number Diff line number Diff line
@@ -107,6 +107,7 @@ int arch_update_cpu_topology(void);
	.balance_interval	= 1,					\
	.smt_gain		= 1178,	/* 15% */			\
	.max_newidle_lb_cost	= 0,					\
	.next_decay_max_lb_cost	= jiffies,				\
}
#endif
#endif /* CONFIG_SCHED_SMT */
@@ -137,6 +138,7 @@ int arch_update_cpu_topology(void);
	.last_balance		= jiffies,				\
	.balance_interval	= 1,					\
	.max_newidle_lb_cost	= 0,					\
	.next_decay_max_lb_cost	= jiffies,				\
}
#endif
#endif /* CONFIG_SCHED_MC */
@@ -169,6 +171,7 @@ int arch_update_cpu_topology(void);
	.last_balance		= jiffies,				\
	.balance_interval	= 1,					\
	.max_newidle_lb_cost	= 0,					\
	.next_decay_max_lb_cost	= jiffies,				\
}
#endif

+31 −7
Original line number Diff line number Diff line
@@ -5681,15 +5681,39 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
	/* Earliest time when we have to do rebalance again */
	unsigned long next_balance = jiffies + 60*HZ;
	int update_next_balance = 0;
	int need_serialize;
	int need_serialize, need_decay = 0;
	u64 max_cost = 0;

	update_blocked_averages(cpu);

	rcu_read_lock();
	for_each_domain(cpu, sd) {
		/*
		 * Decay the newidle max times here because this is a regular
		 * visit to all the domains. Decay ~1% per second.
		 */
		if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
			sd->max_newidle_lb_cost =
				(sd->max_newidle_lb_cost * 253) / 256;
			sd->next_decay_max_lb_cost = jiffies + HZ;
			need_decay = 1;
		}
		max_cost += sd->max_newidle_lb_cost;

		if (!(sd->flags & SD_LOAD_BALANCE))
			continue;

		/*
		 * Stop the load balance at this level. There is another
		 * CPU in our sched group which is doing load balancing more
		 * actively.
		 */
		if (!continue_balancing) {
			if (need_decay)
				continue;
			break;
		}

		interval = sd->balance_interval;
		if (idle != CPU_IDLE)
			interval *= sd->busy_factor;
@@ -5723,14 +5747,14 @@ out:
			next_balance = sd->last_balance + interval;
			update_next_balance = 1;
		}

	}
	if (need_decay) {
		/*
		 * Stop the load balance at this level. There is another
		 * CPU in our sched group which is doing load balancing more
		 * actively.
		 * Ensure the rq-wide value also decays but keep it at a
		 * reasonable floor to avoid funnies with rq->avg_idle.
		 */
		if (!continue_balancing)
			break;
		rq->max_idle_balance_cost =
			max((u64)sysctl_sched_migration_cost, max_cost);
	}
	rcu_read_unlock();