Commit a1488664 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

sched: Replace rq::wake_list



The recent commit: 90b5363a ("sched: Clean up scheduler_ipi()")
got smp_call_function_single_async() subtly wrong. Even though it will
return -EBUSY when trying to re-use a csd, that condition is not
atomic and still requires external serialization.

The change in ttwu_queue_remote() got this wrong.

While on first reading ttwu_queue_remote() has an atomic test-and-set
that appears to serialize the use, the matching 'release' is not in
the right place to actually guarantee this serialization.

The actual race is vs the sched_ttwu_pending() call in the idle loop;
that can run the wakeup-list without consuming the CSD.

Instead of trying to chain the lists, merge them.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20200526161908.129371594@infradead.org
parent 126c2092
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -654,6 +654,7 @@ struct task_struct {

#ifdef CONFIG_SMP
	struct llist_node		wake_entry;
	unsigned int			wake_entry_type;
	int				on_cpu;
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/* Current CPU: */
+1 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ enum {
	CSD_TYPE_ASYNC		= 0x00,
	CSD_TYPE_SYNC		= 0x10,
	CSD_TYPE_IRQ_WORK	= 0x20,
	CSD_TYPE_TTWU		= 0x30,
	CSD_FLAG_TYPE_MASK	= 0xF0,
};

+7 −18
Original line number Diff line number Diff line
@@ -1538,7 +1538,7 @@ static int migration_cpu_stop(void *data)
	 * __migrate_task() such that we will not miss enforcing cpus_ptr
	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
	 */
	sched_ttwu_pending();
	flush_smp_call_function_from_idle();

	raw_spin_lock(&p->pi_lock);
	rq_lock(rq, &rf);
@@ -2272,14 +2272,13 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
}

#ifdef CONFIG_SMP
void sched_ttwu_pending(void)
void sched_ttwu_pending(void *arg)
{
	struct llist_node *llist = arg;
	struct rq *rq = this_rq();
	struct llist_node *llist;
	struct task_struct *p, *t;
	struct rq_flags rf;

	llist = llist_del_all(&rq->wake_list);
	if (!llist)
		return;

@@ -2299,11 +2298,6 @@ void sched_ttwu_pending(void)
	rq_unlock_irqrestore(rq, &rf);
}

static void wake_csd_func(void *info)
{
	sched_ttwu_pending();
}

void send_call_function_single_ipi(int cpu)
{
	struct rq *rq = cpu_rq(cpu);
@@ -2327,12 +2321,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);

	WRITE_ONCE(rq->ttwu_pending, 1);
	if (llist_add(&p->wake_entry, &rq->wake_list)) {
		if (!set_nr_if_polling(rq->idle))
			smp_call_function_single_async(cpu, &rq->wake_csd);
		else
			trace_sched_wake_idle_without_ipi(cpu);
	}
	__smp_call_single_queue(cpu, &p->wake_entry);
}

void wake_up_if_idle(int cpu)
@@ -2772,6 +2761,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
	p->capture_control = NULL;
#endif
	init_numa_balancing(clone_flags, p);
#ifdef CONFIG_SMP
	p->wake_entry_type = CSD_TYPE_TTWU;
#endif
}

DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -6564,7 +6556,6 @@ int sched_cpu_dying(unsigned int cpu)
	struct rq_flags rf;

	/* Handle pending wakeups and then migrate everything off */
	sched_ttwu_pending();
	sched_tick_stop(cpu);

	rq_lock_irqsave(rq, &rf);
@@ -6763,8 +6754,6 @@ void __init sched_init(void)
		rq->avg_idle = 2*sysctl_sched_migration_cost;
		rq->max_idle_balance_cost = sysctl_sched_migration_cost;

		rq_csd_init(rq, &rq->wake_csd, wake_csd_func);

		INIT_LIST_HEAD(&rq->cfs_tasks);

		rq_attach_root(rq, &def_root_domain);
+0 −1
Original line number Diff line number Diff line
@@ -294,7 +294,6 @@ static void do_idle(void)
	 * critical section.
	 */
	flush_smp_call_function_from_idle();
	sched_ttwu_pending();
	schedule_idle();

	if (unlikely(klp_patch_pending(current)))
+0 −8
Original line number Diff line number Diff line
@@ -1023,11 +1023,6 @@ struct rq {
	unsigned int		ttwu_local;
#endif

#ifdef CONFIG_SMP
	call_single_data_t	wake_csd;
	struct llist_head	wake_list;
#endif

#ifdef CONFIG_CPU_IDLE
	/* Must be inspected within a rcu lock section */
	struct cpuidle_state	*idle_state;
@@ -1371,8 +1366,6 @@ queue_balance_callback(struct rq *rq,
	rq->balance_callback = head;
}

extern void sched_ttwu_pending(void);

#define rcu_dereference_check_sched_domain(p) \
	rcu_dereference_check((p), \
			      lockdep_is_held(&sched_domains_mutex))
@@ -1512,7 +1505,6 @@ extern void flush_smp_call_function_from_idle(void);

#else /* !CONFIG_SMP: */
static inline void flush_smp_call_function_from_idle(void) { }
static inline void sched_ttwu_pending(void) { }
#endif

#include "stats.h"
Loading