Commit 91a9a90d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched_urgent_for_5.8_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Borislav Petkov:
 "The most anticipated fix in this pull request is probably the horrible
  build fix for the RANDSTRUCT fail that didn't make -rc2. Also included
  is the cleanup that removes those BUILD_BUG_ON()s and replaces it with
  ugly unions.

  Also included is the try_to_wake_up() race fix that was first
  triggered by Paul's RCU-torture runs, but was independently hit by
  Dave Chinner's fstest runs as well"

* tag 'sched_urgent_for_5.8_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/cfs: change initial value of runnable_avg
  smp, irq_work: Continue smp_call_function*() and irq_work*() integration
  sched/core: s/WF_ON_RQ/WQ_ON_CPU/
  sched/core: Fix ttwu() race
  sched/core: Fix PI boosting between RT and DEADLINE tasks
  sched/deadline: Initialize ->dl_boosted
  sched/core: Check cpus_mask, not cpus_ptr in __set_cpus_allowed_ptr(), to fix mask corruption
  sched/core: Fix CONFIG_GCC_PLUGIN_RANDSTRUCT build fail
parents 098c7938 e21cf434
Loading
Loading
Loading
Loading
+8 −18
Original line number Diff line number Diff line
@@ -2,7 +2,7 @@
#ifndef _LINUX_IRQ_WORK_H
#define _LINUX_IRQ_WORK_H

#include <linux/llist.h>
#include <linux/smp_types.h>

/*
 * An entry can be in one of four states:
@@ -13,24 +13,14 @@
 * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
 */

/* flags share CSD_FLAG_ space */

#define IRQ_WORK_PENDING	BIT(0)
#define IRQ_WORK_BUSY		BIT(1)

/* Doesn't want IPI, wait for tick: */
#define IRQ_WORK_LAZY		BIT(2)
/* Run hard IRQ context, even on RT */
#define IRQ_WORK_HARD_IRQ	BIT(3)

#define IRQ_WORK_CLAIMED	(IRQ_WORK_PENDING | IRQ_WORK_BUSY)

/*
 * structure shares layout with single_call_data_t.
 */
struct irq_work {
	union {
		struct __call_single_node node;
		struct {
			struct llist_node llnode;
			atomic_t flags;
		};
	};
	void (*func)(struct irq_work *);
};

+1 −2
Original line number Diff line number Diff line
@@ -654,9 +654,8 @@ struct task_struct {
	unsigned int			ptrace;

#ifdef CONFIG_SMP
	struct llist_node		wake_entry;
	unsigned int			wake_entry_type;
	int				on_cpu;
	struct __call_single_node	wake_entry;
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/* Current CPU: */
	unsigned int			cpu;
+8 −15
Original line number Diff line number Diff line
@@ -12,29 +12,22 @@
#include <linux/list.h>
#include <linux/cpumask.h>
#include <linux/init.h>
#include <linux/llist.h>
#include <linux/smp_types.h>

typedef void (*smp_call_func_t)(void *info);
typedef bool (*smp_cond_func_t)(int cpu, void *info);

enum {
	CSD_FLAG_LOCK		= 0x01,

	/* IRQ_WORK_flags */

	CSD_TYPE_ASYNC		= 0x00,
	CSD_TYPE_SYNC		= 0x10,
	CSD_TYPE_IRQ_WORK	= 0x20,
	CSD_TYPE_TTWU		= 0x30,
	CSD_FLAG_TYPE_MASK	= 0xF0,
};

/*
 * structure shares (partial) layout with struct irq_work
 */
struct __call_single_data {
	union {
		struct __call_single_node node;
		struct {
			struct llist_node llist;
			unsigned int flags;
		};
	};
	smp_call_func_t func;
	void *info;
};
+66 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_SMP_TYPES_H
#define __LINUX_SMP_TYPES_H

#include <linux/llist.h>

enum {
	CSD_FLAG_LOCK		= 0x01,

	IRQ_WORK_PENDING	= 0x01,
	IRQ_WORK_BUSY		= 0x02,
	IRQ_WORK_LAZY		= 0x04, /* No IPI, wait for tick */
	IRQ_WORK_HARD_IRQ	= 0x08, /* IRQ context on PREEMPT_RT */

	IRQ_WORK_CLAIMED	= (IRQ_WORK_PENDING | IRQ_WORK_BUSY),

	CSD_TYPE_ASYNC		= 0x00,
	CSD_TYPE_SYNC		= 0x10,
	CSD_TYPE_IRQ_WORK	= 0x20,
	CSD_TYPE_TTWU		= 0x30,

	CSD_FLAG_TYPE_MASK	= 0xF0,
};

/*
 * struct __call_single_node is the primary type on
 * smp.c:call_single_queue.
 *
 * flush_smp_call_function_queue() only reads the type from
 * __call_single_node::u_flags as a regular load, the above
 * (anonymous) enum defines all the bits of this word.
 *
 * Other bits are not modified until the type is known.
 *
 * CSD_TYPE_SYNC/ASYNC:
 *	struct {
 *		struct llist_node node;
 *		unsigned int flags;
 *		smp_call_func_t func;
 *		void *info;
 *	};
 *
 * CSD_TYPE_IRQ_WORK:
 *	struct {
 *		struct llist_node node;
 *		atomic_t flags;
 *		void (*func)(struct irq_work *);
 *	};
 *
 * CSD_TYPE_TTWU:
 *	struct {
 *		struct llist_node node;
 *		unsigned int flags;
 *	};
 *
 */

struct __call_single_node {
	struct llist_node	llist;
	union {
		unsigned int	u_flags;
		atomic_t	a_flags;
	};
};

#endif /* __LINUX_SMP_TYPES_H */
+34 −10
Original line number Diff line number Diff line
@@ -1637,7 +1637,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
		goto out;
	}

	if (cpumask_equal(p->cpus_ptr, new_mask))
	if (cpumask_equal(&p->cpus_mask, new_mask))
		goto out;

	/*
@@ -2293,8 +2293,15 @@ void sched_ttwu_pending(void *arg)
	rq_lock_irqsave(rq, &rf);
	update_rq_clock(rq);

	llist_for_each_entry_safe(p, t, llist, wake_entry)
	llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
		if (WARN_ON_ONCE(p->on_cpu))
			smp_cond_load_acquire(&p->on_cpu, !VAL);

		if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
			set_task_cpu(p, cpu_of(rq));

		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
	}

	rq_unlock_irqrestore(rq, &rf);
}
@@ -2322,7 +2329,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);

	WRITE_ONCE(rq->ttwu_pending, 1);
	__smp_call_single_queue(cpu, &p->wake_entry);
	__smp_call_single_queue(cpu, &p->wake_entry.llist);
}

void wake_up_if_idle(int cpu)
@@ -2369,7 +2376,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
	 * the soon-to-be-idle CPU as the current CPU is likely busy.
	 * nr_running is checked to avoid unnecessary task stacking.
	 */
	if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1)
	if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
		return true;

	return false;
@@ -2378,6 +2385,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
{
	if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
		if (WARN_ON_ONCE(cpu == smp_processor_id()))
			return false;

		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
		__ttwu_queue_wakelist(p, cpu, wake_flags);
		return true;
@@ -2528,7 +2538,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
			goto out;

		success = 1;
		cpu = task_cpu(p);
		trace_sched_waking(p);
		p->state = TASK_RUNNING;
		trace_sched_wakeup(p);
@@ -2550,7 +2559,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)

	/* We're going to change ->state: */
	success = 1;
	cpu = task_cpu(p);

	/*
	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
@@ -2614,8 +2622,21 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
	 * which potentially sends an IPI instead of spinning on p->on_cpu to
	 * let the waker make forward progress. This is safe because IRQs are
	 * disabled and the IPI will deliver after on_cpu is cleared.
	 *
	 * Ensure we load task_cpu(p) after p->on_cpu:
	 *
	 * set_task_cpu(p, cpu);
	 *   STORE p->cpu = @cpu
	 * __schedule() (switch to task 'p')
	 *   LOCK rq->lock
	 *   smp_mb__after_spin_lock()		smp_cond_load_acquire(&p->on_cpu)
	 *   STORE p->on_cpu = 1		LOAD p->cpu
	 *
	 * to ensure we observe the correct CPU on which the task is currently
	 * scheduling.
	 */
	if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ))
	if (smp_load_acquire(&p->on_cpu) &&
	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
		goto unlock;

	/*
@@ -2635,6 +2656,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
		psi_ttwu_dequeue(p);
		set_task_cpu(p, cpu);
	}
#else
	cpu = task_cpu(p);
#endif /* CONFIG_SMP */

	ttwu_queue(p, cpu, wake_flags);
@@ -2642,7 +2665,7 @@ unlock:
	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out:
	if (success)
		ttwu_stat(p, cpu, wake_flags);
		ttwu_stat(p, task_cpu(p), wake_flags);
	preempt_enable();

	return success;
@@ -2763,7 +2786,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
#endif
	init_numa_balancing(clone_flags, p);
#ifdef CONFIG_SMP
	p->wake_entry_type = CSD_TYPE_TTWU;
	p->wake_entry.u_flags = CSD_TYPE_TTWU;
#endif
}

@@ -4533,7 +4556,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
	 */
	if (dl_prio(prio)) {
		if (!dl_prio(p->normal_prio) ||
		    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
		    (pi_task && dl_prio(pi_task->prio) &&
		     dl_entity_preempt(&pi_task->dl, &p->dl))) {
			p->dl.dl_boosted = 1;
			queue_flag |= ENQUEUE_REPLENISH;
		} else
Loading