Commit 5b9b28a6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main scheduler changes in this cycle were:

   - various sched/deadline fixes and enhancements

   - rescheduling latency fixes/cleanups

   - rework the rq->clock code to be more consistent and more robust.

   - minor micro-optimizations

   - ->avg.decay_count fixes

   - add a stack overflow check to might_sleep()

   - idle-poll handler fix, possibly resulting in power savings

   - misc smaller updates and fixes"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/Documentation: Remove unneeded word
  sched/wait: Introduce wait_on_bit_timeout()
  sched: Pull resched loop to __schedule() callers
  sched/deadline: Remove cpu_active_mask from cpudl_find()
  sched: Fix hrtick_start() on UP
  sched/deadline: Avoid pointless __setscheduler()
  sched/deadline: Fix stale yield state
  sched/deadline: Fix hrtick for a non-leftmost task
  sched/deadline: Modify cpudl::free_cpus to reflect rd->online
  sched/idle: Add missing checks to the exit condition of cpu_idle_poll()
  sched: Fix missing preemption opportunity
  sched/rt: Reduce rq lock contention by eliminating locking of non-feasible target
  sched/debug: Print rq->clock_task
  sched/core: Rework rq->clock update skips
  sched/core: Validate rq_clock*() serialization
  sched/core: Remove check of p->sched_class
  sched/fair: Fix sched_entity::avg::decay_count initialization
  sched/debug: Fix potential call to __ffs(0) in sched_show_task()
  sched/debug: Check for stack overflow in ___might_sleep()
  sched/fair: Fix the dealing with decay_count in __synchronize_entity_decay()
parents a4cbbf54 139b6fd2
Loading
Loading
Loading
Loading
+26 −0
Original line number Diff line number Diff line
@@ -989,6 +989,32 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
				       mode);
}

/**
 * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
 * @word: the word being waited on, a kernel virtual address
 * @bit: the bit of the word being waited on
 * @mode: the task state to sleep in
 * @timeout: timeout, in jiffies
 *
 * Use the standard hashed waitqueue table to wait for a bit
 * to be cleared. This is similar to wait_on_bit(), except also takes a
 * timeout parameter.
 *
 * Returned value will be zero if the bit was cleared before the
 * @timeout elapsed, or non-zero if the @timeout elapsed or process
 * received a signal and the mode permitted wakeup on that signal.
 */
static inline int
wait_on_bit_timeout(void *word, int bit, unsigned mode, unsigned long timeout)
{
	might_sleep();
	if (!test_bit(bit, word))
		return 0;
	return out_of_line_wait_on_bit_timeout(word, bit,
					       bit_wait_timeout,
					       mode, timeout);
}

/**
 * wait_on_bit_action - wait for a bit to be cleared
 * @word: the word being waited on, a kernel virtual address
+1 −1
Original line number Diff line number Diff line
@@ -81,7 +81,7 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count);
 * The mutex must later on be released by the same task that
 * acquired it. Recursive locking is not allowed. The task
 * may not exit without first unlocking the mutex. Also, kernel
 * memory where the mutex resides mutex must not be freed with
 * memory where the mutex resides must not be freed with
 * the mutex still locked. The mutex must first be initialized
 * (or statically defined) before it can be locked. memset()-ing
 * the mutex to 0 is not allowed.
+69 −38
Original line number Diff line number Diff line
@@ -119,7 +119,9 @@ void update_rq_clock(struct rq *rq)
{
	s64 delta;

	if (rq->skip_clock_update > 0)
	lockdep_assert_held(&rq->lock);

	if (rq->clock_skip_update & RQCF_ACT_SKIP)
		return;

	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
@@ -490,6 +492,11 @@ static __init void init_hrtick(void)
 */
void hrtick_start(struct rq *rq, u64 delay)
{
	/*
	 * Don't schedule slices shorter than 10000ns, that just
	 * doesn't make sense. Rely on vruntime for fairness.
	 */
	delay = max_t(u64, delay, 10000LL);
	__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
			HRTIMER_MODE_REL_PINNED, 0);
}
@@ -1046,7 +1053,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
	 * this case, we can save a useless back to back clock update.
	 */
	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
		rq->skip_clock_update = 1;
		rq_clock_skip_update(rq, true);
}

#ifdef CONFIG_SMP
@@ -1836,6 +1843,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
	p->se.prev_sum_exec_runtime	= 0;
	p->se.nr_migrations		= 0;
	p->se.vruntime			= 0;
#ifdef CONFIG_SMP
	p->se.avg.decay_count		= 0;
#endif
	INIT_LIST_HEAD(&p->se.group_node);

#ifdef CONFIG_SCHEDSTATS
@@ -2755,6 +2765,10 @@ again:
 *          - explicit schedule() call
 *          - return from syscall or exception to user-space
 *          - return from interrupt-handler to user-space
 *
 * WARNING: all callers must re-check need_resched() afterward and reschedule
 * accordingly in case an event triggered the need for rescheduling (such as
 * an interrupt waking up a task) while preemption was disabled in __schedule().
 */
static void __sched __schedule(void)
{
@@ -2763,7 +2777,6 @@ static void __sched __schedule(void)
	struct rq *rq;
	int cpu;

need_resched:
	preempt_disable();
	cpu = smp_processor_id();
	rq = cpu_rq(cpu);
@@ -2783,6 +2796,8 @@ need_resched:
	smp_mb__before_spinlock();
	raw_spin_lock_irq(&rq->lock);

	rq->clock_skip_update <<= 1; /* promote REQ to ACT */

	switch_count = &prev->nivcsw;
	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
		if (unlikely(signal_pending_state(prev->state, prev))) {
@@ -2807,13 +2822,13 @@ need_resched:
		switch_count = &prev->nvcsw;
	}

	if (task_on_rq_queued(prev) || rq->skip_clock_update < 0)
	if (task_on_rq_queued(prev))
		update_rq_clock(rq);

	next = pick_next_task(rq, prev);
	clear_tsk_need_resched(prev);
	clear_preempt_need_resched();
	rq->skip_clock_update = 0;
	rq->clock_skip_update = 0;

	if (likely(prev != next)) {
		rq->nr_switches++;
@@ -2828,8 +2843,6 @@ need_resched:
	post_schedule(rq);

	sched_preempt_enable_no_resched();
	if (need_resched())
		goto need_resched;
}

static inline void sched_submit_work(struct task_struct *tsk)
@@ -2849,7 +2862,9 @@ asmlinkage __visible void __sched schedule(void)
	struct task_struct *tsk = current;

	sched_submit_work(tsk);
	do {
		__schedule();
	} while (need_resched());
}
EXPORT_SYMBOL(schedule);

@@ -2884,6 +2899,21 @@ void __sched schedule_preempt_disabled(void)
	preempt_disable();
}

static void preempt_schedule_common(void)
{
	do {
		__preempt_count_add(PREEMPT_ACTIVE);
		__schedule();
		__preempt_count_sub(PREEMPT_ACTIVE);

		/*
		 * Check again in case we missed a preemption opportunity
		 * between schedule and now.
		 */
		barrier();
	} while (need_resched());
}

#ifdef CONFIG_PREEMPT
/*
 * this is the entry point to schedule() from in-kernel preemption
@@ -2899,17 +2929,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
	if (likely(!preemptible()))
		return;

	do {
		__preempt_count_add(PREEMPT_ACTIVE);
		__schedule();
		__preempt_count_sub(PREEMPT_ACTIVE);

		/*
		 * Check again in case we missed a preemption opportunity
		 * between schedule and now.
		 */
		barrier();
	} while (need_resched());
	preempt_schedule_common();
}
NOKPROBE_SYMBOL(preempt_schedule);
EXPORT_SYMBOL(preempt_schedule);
@@ -3405,6 +3425,20 @@ static bool check_same_owner(struct task_struct *p)
	return match;
}

static bool dl_param_changed(struct task_struct *p,
		const struct sched_attr *attr)
{
	struct sched_dl_entity *dl_se = &p->dl;

	if (dl_se->dl_runtime != attr->sched_runtime ||
		dl_se->dl_deadline != attr->sched_deadline ||
		dl_se->dl_period != attr->sched_period ||
		dl_se->flags != attr->sched_flags)
		return true;

	return false;
}

static int __sched_setscheduler(struct task_struct *p,
				const struct sched_attr *attr,
				bool user)
@@ -3533,7 +3567,7 @@ recheck:
			goto change;
		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
			goto change;
		if (dl_policy(policy))
		if (dl_policy(policy) && dl_param_changed(p, attr))
			goto change;

		p->sched_reset_on_fork = reset_on_fork;
@@ -4225,17 +4259,10 @@ SYSCALL_DEFINE0(sched_yield)
	return 0;
}

static void __cond_resched(void)
{
	__preempt_count_add(PREEMPT_ACTIVE);
	__schedule();
	__preempt_count_sub(PREEMPT_ACTIVE);
}

int __sched _cond_resched(void)
{
	if (should_resched()) {
		__cond_resched();
		preempt_schedule_common();
		return 1;
	}
	return 0;
@@ -4260,7 +4287,7 @@ int __cond_resched_lock(spinlock_t *lock)
	if (spin_needbreak(lock) || resched) {
		spin_unlock(lock);
		if (resched)
			__cond_resched();
			preempt_schedule_common();
		else
			cpu_relax();
		ret = 1;
@@ -4276,7 +4303,7 @@ int __sched __cond_resched_softirq(void)

	if (should_resched()) {
		local_bh_enable();
		__cond_resched();
		preempt_schedule_common();
		local_bh_disable();
		return 1;
	}
@@ -4531,9 +4558,10 @@ void sched_show_task(struct task_struct *p)
{
	unsigned long free = 0;
	int ppid;
	unsigned state;
	unsigned long state = p->state;

	state = p->state ? __ffs(p->state) + 1 : 0;
	if (state)
		state = __ffs(state) + 1;
	printk(KERN_INFO "%-15.15s %c", p->comm,
		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
#if BITS_PER_LONG == 32
@@ -4766,7 +4794,7 @@ static struct rq *move_queued_task(struct task_struct *p, int new_cpu)

void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{
	if (p->sched_class && p->sched_class->set_cpus_allowed)
	if (p->sched_class->set_cpus_allowed)
		p->sched_class->set_cpus_allowed(p, new_mask);

	cpumask_copy(&p->cpus_allowed, new_mask);
@@ -7275,6 +7303,11 @@ void __init sched_init(void)
	atomic_inc(&init_mm.mm_count);
	enter_lazy_tlb(&init_mm, current);

	/*
	 * During early bootup we pretend to be a normal task:
	 */
	current->sched_class = &fair_sched_class;

	/*
	 * Make us the idle thread. Technically, schedule() should not be
	 * called from this thread, however somewhere below it might be,
@@ -7285,11 +7318,6 @@ void __init sched_init(void)

	calc_load_update = jiffies + LOAD_FREQ;

	/*
	 * During early bootup we pretend to be a normal task:
	 */
	current->sched_class = &fair_sched_class;

#ifdef CONFIG_SMP
	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
	/* May be allocated at isolcpus cmdline parse time */
@@ -7350,6 +7378,9 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
			in_atomic(), irqs_disabled(),
			current->pid, current->comm);

	if (task_stack_end_corrupted(current))
		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");

	debug_show_held_locks(current);
	if (irqs_disabled())
		print_irqtrace_events(current);
+23 −4
Original line number Diff line number Diff line
@@ -107,7 +107,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
	int best_cpu = -1;
	const struct sched_dl_entity *dl_se = &p->dl;

	if (later_mask && cpumask_and(later_mask, later_mask, cp->free_cpus)) {
	if (later_mask &&
	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
		best_cpu = cpumask_any(later_mask);
		goto out;
	} else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
@@ -185,6 +186,26 @@ out:
	raw_spin_unlock_irqrestore(&cp->lock, flags);
}

/*
 * cpudl_set_freecpu - Set the cpudl.free_cpus
 * @cp: the cpudl max-heap context
 * @cpu: rd attached cpu
 */
void cpudl_set_freecpu(struct cpudl *cp, int cpu)
{
	cpumask_set_cpu(cpu, cp->free_cpus);
}

/*
 * cpudl_clear_freecpu - Clear the cpudl.free_cpus
 * @cp: the cpudl max-heap context
 * @cpu: rd attached cpu
 */
void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
{
	cpumask_clear_cpu(cpu, cp->free_cpus);
}

/*
 * cpudl_init - initialize the cpudl structure
 * @cp: the cpudl max-heap context
@@ -203,7 +224,7 @@ int cpudl_init(struct cpudl *cp)
	if (!cp->elements)
		return -ENOMEM;

	if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
	if (!zalloc_cpumask_var(&cp->free_cpus, GFP_KERNEL)) {
		kfree(cp->elements);
		return -ENOMEM;
	}
@@ -211,8 +232,6 @@ int cpudl_init(struct cpudl *cp)
	for_each_possible_cpu(i)
		cp->elements[i].idx = IDX_INVALID;

	cpumask_setall(cp->free_cpus);

	return 0;
}

+2 −0
Original line number Diff line number Diff line
@@ -24,6 +24,8 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
	       struct cpumask *later_mask);
void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
int cpudl_init(struct cpudl *cp);
void cpudl_set_freecpu(struct cpudl *cp, int cpu);
void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
void cpudl_cleanup(struct cpudl *cp);
#endif /* CONFIG_SMP */

Loading