Commit f7f4e7fc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - power-aware scheduling improvements (Patrick Bellasi)

 - NUMA balancing improvements (Mel Gorman)

 - vCPU scheduling fixes (Rohit Jain)

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/fair: Update util_est before updating schedutil
  sched/cpufreq: Modify aggregate utilization to always include blocked FAIR utilization
  sched/deadline/Documentation: Add overrun signal and GRUB-PA documentation
  sched/core: Distinguish between idle_cpu() calls based on desired effect, introduce available_idle_cpu()
  sched/wait: Include <linux/wait.h> in <linux/swait.h>
  sched/numa: Stagger NUMA balancing scan periods for new threads
  sched/core: Don't schedule threads on pre-empted vCPUs
  sched/fair: Avoid calling sync_entity_load_avg() unnecessarily
  sched/fair: Rearrange select_task_rq_fair() to optimize it
parents d9b446e2 2539fc82
Loading
Loading
Loading
Loading
+24 −1
Original line number Original line Diff line number Diff line
@@ -49,7 +49,7 @@ CONTENTS
2.1 Main algorithm
2.1 Main algorithm
------------------
------------------


 SCHED_DEADLINE uses three parameters, named "runtime", "period", and
 SCHED_DEADLINE [18] uses three parameters, named "runtime", "period", and
 "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
 "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
 "runtime" microseconds of execution time every "period" microseconds, and
 "runtime" microseconds of execution time every "period" microseconds, and
 these "runtime" microseconds are available within "deadline" microseconds
 these "runtime" microseconds are available within "deadline" microseconds
@@ -117,6 +117,10 @@ CONTENTS
         scheduling deadline = scheduling deadline + period
         scheduling deadline = scheduling deadline + period
         remaining runtime = remaining runtime + runtime
         remaining runtime = remaining runtime + runtime


 The SCHED_FLAG_DL_OVERRUN flag in sched_attr's sched_flags field allows a task
 to get informed about runtime overruns through the delivery of SIGXCPU
 signals.



2.2 Bandwidth reclaiming
2.2 Bandwidth reclaiming
------------------------
------------------------
@@ -279,6 +283,19 @@ CONTENTS
    running_bw is incremented.
    running_bw is incremented.




2.3 Energy-aware scheduling
------------------------

 When cpufreq's schedutil governor is selected, SCHED_DEADLINE implements the
 GRUB-PA [19] algorithm, reducing the CPU operating frequency to the minimum
 value that still allows to meet the deadlines. This behavior is currently
 implemented only for ARM architectures.

 A particular care must be taken in case the time needed for changing frequency
 is of the same order of magnitude of the reservation period. In such cases,
 setting a fixed CPU frequency results in a lower amount of deadline misses.


3. Scheduling Real-Time Tasks
3. Scheduling Real-Time Tasks
=============================
=============================


@@ -505,6 +522,12 @@ CONTENTS
  17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
  17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
       or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
       or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
       Computing, 2016.
       Computing, 2016.
  18 - J. Lelli, C. Scordino, L. Abeni, D. Faggioli, Deadline scheduling in the
       Linux kernel, Software: Practice and Experience, 46(6): 821-839, June
       2016.
  19 - C. Scordino, L. Abeni, J. Lelli, Energy-Aware Real-Time Scheduling in
       the Linux Kernel, 33rd ACM/SIGAPP Symposium On Applied Computing (SAC
       2018), Pau, France, April 2018.




4. Bandwidth management
4. Bandwidth management
+1 −0
Original line number Original line Diff line number Diff line
@@ -1512,6 +1512,7 @@ static inline int task_nice(const struct task_struct *p)
extern int can_nice(const struct task_struct *p, const int nice);
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
extern int idle_cpu(int cpu);
extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
+1 −0
Original line number Original line Diff line number Diff line
@@ -5,6 +5,7 @@
#include <linux/list.h>
#include <linux/list.h>
#include <linux/stddef.h>
#include <linux/stddef.h>
#include <linux/spinlock.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <asm/current.h>
#include <asm/current.h>


/*
/*
+18 −21
Original line number Original line Diff line number Diff line
@@ -2194,27 +2194,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
	INIT_HLIST_HEAD(&p->preempt_notifiers);
	INIT_HLIST_HEAD(&p->preempt_notifiers);
#endif
#endif


#ifdef CONFIG_NUMA_BALANCING
	init_numa_balancing(clone_flags, p);
	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
		p->mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
		p->mm->numa_scan_seq = 0;
	}

	if (clone_flags & CLONE_VM)
		p->numa_preferred_nid = current->numa_preferred_nid;
	else
		p->numa_preferred_nid = -1;

	p->node_stamp = 0ULL;
	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
	p->numa_work.next = &p->numa_work;
	p->numa_faults = NULL;
	p->last_task_numa_placement = 0;
	p->last_sum_exec_runtime = 0;

	p->numa_group = NULL;
#endif /* CONFIG_NUMA_BALANCING */
}
}


DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -4049,6 +4029,23 @@ int idle_cpu(int cpu)
	return 1;
	return 1;
}
}


/**
 * available_idle_cpu - is a given CPU idle for enqueuing work.
 * @cpu: the CPU in question.
 *
 * Return: 1 if the CPU is currently idle. 0 otherwise.
 */
int available_idle_cpu(int cpu)
{
	if (!idle_cpu(cpu))
		return 0;

	if (vcpu_is_preempted(cpu))
		return 0;

	return 1;
}

/**
/**
 * idle_task - return the idle task for a given CPU.
 * idle_task - return the idle task for a given CPU.
 * @cpu: the processor in question.
 * @cpu: the processor in question.
+8 −9
Original line number Original line Diff line number Diff line
@@ -183,22 +183,21 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
{
{
	struct rq *rq = cpu_rq(sg_cpu->cpu);
	struct rq *rq = cpu_rq(sg_cpu->cpu);
	unsigned long util;


	if (rq->rt.rt_nr_running) {
	if (rq->rt.rt_nr_running)
		util = sg_cpu->max;
		return sg_cpu->max;
	} else {
		util = sg_cpu->util_dl;
		if (rq->cfs.h_nr_running)
			util += sg_cpu->util_cfs;
	}


	/*
	/*
	 * Utilization required by DEADLINE must always be granted while, for
	 * FAIR, we use blocked utilization of IDLE CPUs as a mechanism to
	 * gracefully reduce the frequency when no tasks show up for longer
	 * periods of time.
	 *
	 * Ideally we would like to set util_dl as min/guaranteed freq and
	 * Ideally we would like to set util_dl as min/guaranteed freq and
	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
	 * ready for such an interface. So, we only do the latter for now.
	 * ready for such an interface. So, we only do the latter for now.
	 */
	 */
	return min(util, sg_cpu->max);
	return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));
}
}


static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
Loading