Commit 4b44a21d authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

irq_work, smp: Allow irq_work on call_single_queue



Currently irq_work_queue_on() will issue an unconditional
arch_send_call_function_single_ipi() and has the handler do
irq_work_run().

This is unfortunate in that it makes the IPI handler look at a second
cacheline and it misses the opportunity to avoid the IPI. Instead note
that struct irq_work and struct __call_single_data are very similar in
layout, so use a few bits in the flags word to encode a type and stick
the irq_work on the call_single_queue list.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20200526161908.011635912@infradead.org
parent b2a02fc4
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -13,6 +13,8 @@
 * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
 */

/* flags share CSD_FLAG_ space */

#define IRQ_WORK_PENDING	BIT(0)
#define IRQ_WORK_BUSY		BIT(1)

@@ -23,9 +25,12 @@

#define IRQ_WORK_CLAIMED	(IRQ_WORK_PENDING | IRQ_WORK_BUSY)

/*
 * structure shares layout with single_call_data_t.
 */
struct irq_work {
	atomic_t flags;
	struct llist_node llnode;
	atomic_t flags;
	void (*func)(struct irq_work *);
};

+22 −1
Original line number Diff line number Diff line
@@ -16,17 +16,38 @@

typedef void (*smp_call_func_t)(void *info);
typedef bool (*smp_cond_func_t)(int cpu, void *info);

enum {
	CSD_FLAG_LOCK		= 0x01,

	/* IRQ_WORK_flags */

	CSD_TYPE_ASYNC		= 0x00,
	CSD_TYPE_SYNC		= 0x10,
	CSD_TYPE_IRQ_WORK	= 0x20,
	CSD_FLAG_TYPE_MASK	= 0xF0,
};

/*
 * structure shares (partial) layout with struct irq_work
 */
struct __call_single_data {
	struct llist_node llist;
	unsigned int flags;
	smp_call_func_t func;
	void *info;
	unsigned int flags;
};

/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
typedef struct __call_single_data call_single_data_t
	__aligned(sizeof(struct __call_single_data));

/*
 * Enqueue a llist_node on the call_single_queue; be very careful, read
 * flush_smp_call_function_queue() in detail.
 */
extern void __smp_call_single_queue(int cpu, struct llist_node *node);

/* total number of cpus in this system (may exceed NR_CPUS) */
extern unsigned int total_cpus;

+29 −24
Original line number Diff line number Diff line
@@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work)
{
	int oflags;

	oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags);
	oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
	/*
	 * If the work is already pending, no need to raise the IPI.
	 * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
@@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
	if (cpu != smp_processor_id()) {
		/* Arch remote IPI send/receive backend aren't NMI safe */
		WARN_ON_ONCE(in_nmi());
		if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
			arch_send_call_function_single_ipi(cpu);
		__smp_call_single_queue(cpu, &work->llnode);
	} else {
		__irq_work_queue_local(work);
	}
@@ -131,19 +130,11 @@ bool irq_work_needs_cpu(void)
	return true;
}

static void irq_work_run_list(struct llist_head *list)
void irq_work_single(void *arg)
{
	struct irq_work *work, *tmp;
	struct llist_node *llnode;

	BUG_ON(!irqs_disabled());

	if (llist_empty(list))
		return;

	llnode = llist_del_all(list);
	llist_for_each_entry_safe(work, tmp, llnode, llnode) {
	struct irq_work *work = arg;
	int flags;

	/*
	 * Clear the PENDING bit, after this point the @work
	 * can be re-used.
@@ -163,6 +154,20 @@ static void irq_work_run_list(struct llist_head *list)
	flags &= ~IRQ_WORK_PENDING;
	(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
}

static void irq_work_run_list(struct llist_head *list)
{
	struct irq_work *work, *tmp;
	struct llist_node *llnode;

	BUG_ON(!irqs_disabled());

	if (llist_empty(list))
		return;

	llnode = llist_del_all(list);
	llist_for_each_entry_safe(work, tmp, llnode, llnode)
		irq_work_single(work);
}

/*
+73 −46
Original line number Diff line number Diff line
@@ -23,10 +23,8 @@

#include "smpboot.h"

enum {
	CSD_FLAG_LOCK		= 0x01,
	CSD_FLAG_SYNCHRONOUS	= 0x02,
};

#define CSD_TYPE(_csd)	((_csd)->flags & CSD_FLAG_TYPE_MASK)

struct call_function_data {
	call_single_data_t	__percpu *csd;
@@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);

extern void send_call_function_single_ipi(int cpu);

void __smp_call_single_queue(int cpu, struct llist_node *node)
{
	/*
	 * The list addition should be visible before sending the IPI
	 * handler locks the list to pull the entry off it because of
	 * normal cache coherency rules implied by spinlocks.
	 *
	 * If IPIs can go out of order to the cache coherency protocol
	 * in an architecture, sufficient synchronisation should be added
	 * to arch code to make it appear to obey cache coherency WRT
	 * locking and barrier primitives. Generic code isn't really
	 * equipped to do the right thing...
	 */
	if (llist_add(node, &per_cpu(call_single_queue, cpu)))
		send_call_function_single_ipi(cpu);
}

/*
 * Insert a previously allocated call_single_data_t element
 * for execution on the given CPU. data must already have
 * ->func, ->info, and ->flags set.
 */
static int generic_exec_single(int cpu, call_single_data_t *csd,
			       smp_call_func_t func, void *info)
static int generic_exec_single(int cpu, call_single_data_t *csd)
{
	if (cpu == smp_processor_id()) {
		smp_call_func_t func = csd->func;
		void *info = csd->info;
		unsigned long flags;

		/*
@@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
		return 0;
	}


	if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
		csd_unlock(csd);
		return -ENXIO;
	}

	csd->func = func;
	csd->info = info;

	/*
	 * The list addition should be visible before sending the IPI
	 * handler locks the list to pull the entry off it because of
	 * normal cache coherency rules implied by spinlocks.
	 *
	 * If IPIs can go out of order to the cache coherency protocol
	 * in an architecture, sufficient synchronisation should be added
	 * to arch code to make it appear to obey cache coherency WRT
	 * locking and barrier primitives. Generic code isn't really
	 * equipped to do the right thing...
	 */
	if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
		send_call_function_single_ipi(cpu);
	__smp_call_single_queue(cpu, &csd->llist);

	return 0;
}
@@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
void generic_smp_call_function_single_interrupt(void)
{
	flush_smp_call_function_queue(true);

	/*
	 * Handle irq works queued remotely by irq_work_queue_on().
	 * Smp functions above are typically synchronous so they
	 * better run first since some other CPUs may be busy waiting
	 * for them.
	 */
	irq_work_run();
}

extern void irq_work_single(void *);

/**
 * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
 *
@@ -241,9 +235,21 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
		 * We don't have to use the _safe() variant here
		 * because we are not invoking the IPI handlers yet.
		 */
		llist_for_each_entry(csd, entry, llist)
		llist_for_each_entry(csd, entry, llist) {
			switch (CSD_TYPE(csd)) {
			case CSD_TYPE_ASYNC:
			case CSD_TYPE_SYNC:
			case CSD_TYPE_IRQ_WORK:
				pr_warn("IPI callback %pS sent to offline CPU\n",
					csd->func);
				break;

			default:
				pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
					CSD_TYPE(csd));
				break;
			}
		}
	}

	/*
@@ -251,16 +257,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
	 */
	prev = NULL;
	llist_for_each_entry_safe(csd, csd_next, entry, llist) {
		/* Do we wait until *after* callback? */
		if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
			smp_call_func_t func = csd->func;
			void *info = csd->info;

		/* Do we wait until *after* callback? */
		if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
			if (prev) {
				prev->next = &csd_next->llist;
			} else {
				entry = &csd_next->llist;
			}

			func(info);
			csd_unlock(csd);
		} else {
@@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
	 * Second; run all !SYNC callbacks.
	 */
	llist_for_each_entry_safe(csd, csd_next, entry, llist) {
		int type = CSD_TYPE(csd);

		if (type == CSD_TYPE_ASYNC) {
			smp_call_func_t func = csd->func;
			void *info = csd->info;

			csd_unlock(csd);
			func(info);
		} else if (type == CSD_TYPE_IRQ_WORK) {
			irq_work_single(csd);
		}
	}
}

@@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
{
	call_single_data_t *csd;
	call_single_data_t csd_stack = {
		.flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS,
		.flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC,
	};
	int this_cpu;
	int err;
@@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
		csd_lock(csd);
	}

	err = generic_exec_single(cpu, csd, func, info);
	csd->func = func;
	csd->info = info;

	err = generic_exec_single(cpu, csd);

	if (wait)
		csd_lock_wait(csd);
@@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
	csd->flags = CSD_FLAG_LOCK;
	smp_wmb();

	err = generic_exec_single(cpu, csd, csd->func, csd->info);
	err = generic_exec_single(cpu, csd);

out:
	preempt_enable();
@@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,

		csd_lock(csd);
		if (wait)
			csd->flags |= CSD_FLAG_SYNCHRONOUS;
			csd->flags |= CSD_TYPE_SYNC;
		csd->func = func;
		csd->info = info;
		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
@@ -632,6 +648,17 @@ void __init smp_init(void)
{
	int num_nodes, num_cpus;

	/*
	 * Ensure struct irq_work layout matches so that
	 * flush_smp_call_function_queue() can do horrible things.
	 */
	BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
		     offsetof(struct __call_single_data, llist));
	BUILD_BUG_ON(offsetof(struct irq_work, func) !=
		     offsetof(struct __call_single_data, func));
	BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
		     offsetof(struct __call_single_data, flags));

	idle_threads_init();
	cpuhp_threads_init();