sched/balancing: Periodically decay max cost of idle balance (f48627e6) · Commits · 戴 / test

arch/metag/include/asm/topology.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -27,6 +27,7 @@
		.balance_interval = 1, \
		.nr_balance_failed = 0, \
		.max_newidle_lb_cost = 0, \
		.next_decay_max_lb_cost = jiffies, \
		}

		#define cpu_to_node(cpu) ((void)(cpu), 0)

include/linux/sched.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -810,7 +810,10 @@ struct sched_domain {
		unsigned int nr_balance_failed; /* initialise to 0 */

		u64 last_update;

		/* idle_balance() stats */
		u64 max_newidle_lb_cost;
		unsigned long next_decay_max_lb_cost;

		#ifdef CONFIG_SCHEDSTATS
		/* load_balance() stats */

include/linux/topology.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -107,6 +107,7 @@ int arch_update_cpu_topology(void);
		.balance_interval = 1, \
		.smt_gain = 1178, /* 15% */ \
		.max_newidle_lb_cost = 0, \
		.next_decay_max_lb_cost = jiffies, \
		}
		#endif
		#endif /* CONFIG_SCHED_SMT */
		@@ -137,6 +138,7 @@ int arch_update_cpu_topology(void);
		.last_balance = jiffies, \
		.balance_interval = 1, \
		.max_newidle_lb_cost = 0, \
		.next_decay_max_lb_cost = jiffies, \
		}
		#endif
		#endif /* CONFIG_SCHED_MC */
		@@ -169,6 +171,7 @@ int arch_update_cpu_topology(void);
		.last_balance = jiffies, \
		.balance_interval = 1, \
		.max_newidle_lb_cost = 0, \
		.next_decay_max_lb_cost = jiffies, \
		}
		#endif

kernel/sched/fair.c

+31 −7

Original line number	Diff line number	Diff line
		@@ -5681,15 +5681,39 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
		/* Earliest time when we have to do rebalance again */
		unsigned long next_balance = jiffies + 60*HZ;
		int update_next_balance = 0;
		int need_serialize;
		int need_serialize, need_decay = 0;
		u64 max_cost = 0;

		update_blocked_averages(cpu);

		rcu_read_lock();
		for_each_domain(cpu, sd) {
		/*
		* Decay the newidle max times here because this is a regular
		* visit to all the domains. Decay ~1% per second.
		*/
		if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
		sd->max_newidle_lb_cost =
		(sd->max_newidle_lb_cost * 253) / 256;
		sd->next_decay_max_lb_cost = jiffies + HZ;
		need_decay = 1;
		}
		max_cost += sd->max_newidle_lb_cost;

		if (!(sd->flags & SD_LOAD_BALANCE))
		continue;

		/*
		* Stop the load balance at this level. There is another
		* CPU in our sched group which is doing load balancing more
		* actively.
		*/
		if (!continue_balancing) {
		if (need_decay)
		continue;
		break;
		}

		interval = sd->balance_interval;
		if (idle != CPU_IDLE)
		interval *= sd->busy_factor;
		@@ -5723,14 +5747,14 @@ out:
		next_balance = sd->last_balance + interval;
		update_next_balance = 1;
		}

		}
		if (need_decay) {
		/*
		* Stop the load balance at this level. There is another
		* CPU in our sched group which is doing load balancing more
		* actively.
		* Ensure the rq-wide value also decays but keep it at a
		* reasonable floor to avoid funnies with rq->avg_idle.
		*/
		if (!continue_balancing)
		break;
		rq->max_idle_balance_cost =
		max((u64)sysctl_sched_migration_cost, max_cost);
		}
		rcu_read_unlock();

Admin message