irq_work, smp: Allow irq_work on call_single_queue (4b44a21d) · Commits · 戴 / test

include/linux/irq_work.h

+6 −1

Original line number	Diff line number	Diff line
		@@ -13,6 +13,8 @@
		* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
		*/

		/* flags share CSD_FLAG_ space */

		#define IRQ_WORK_PENDING BIT(0)
		#define IRQ_WORK_BUSY BIT(1)

		@@ -23,9 +25,12 @@

		#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING \| IRQ_WORK_BUSY)

		/*
		* structure shares layout with single_call_data_t.
		*/
		struct irq_work {
		atomic_t flags;
		struct llist_node llnode;
		atomic_t flags;
		void (func)(struct irq_work );
		};

include/linux/smp.h

+22 −1

Original line number	Diff line number	Diff line
		@@ -16,17 +16,38 @@

		typedef void (smp_call_func_t)(void info);
		typedef bool (smp_cond_func_t)(int cpu, void info);

		enum {
		CSD_FLAG_LOCK = 0x01,

		/* IRQ_WORK_flags */

		CSD_TYPE_ASYNC = 0x00,
		CSD_TYPE_SYNC = 0x10,
		CSD_TYPE_IRQ_WORK = 0x20,
		CSD_FLAG_TYPE_MASK = 0xF0,
		};

		/*
		* structure shares (partial) layout with struct irq_work
		*/
		struct __call_single_data {
		struct llist_node llist;
		unsigned int flags;
		smp_call_func_t func;
		void *info;
		unsigned int flags;
		};

		/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
		typedef struct __call_single_data call_single_data_t
		__aligned(sizeof(struct __call_single_data));

		/*
		* Enqueue a llist_node on the call_single_queue; be very careful, read
		* flush_smp_call_function_queue() in detail.
		*/
		extern void __smp_call_single_queue(int cpu, struct llist_node *node);

		/* total number of cpus in this system (may exceed NR_CPUS) */
		extern unsigned int total_cpus;

kernel/irq_work.c

+29 −24

Original line number	Diff line number	Diff line
		@@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work)
		{
		int oflags;

		oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags);
		oflags = atomic_fetch_or(IRQ_WORK_CLAIMED \| CSD_TYPE_IRQ_WORK, &work->flags);
		/*
		* If the work is already pending, no need to raise the IPI.
		* The pairing atomic_fetch_andnot() in irq_work_run() makes sure
		@@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
		if (cpu != smp_processor_id()) {
		/* Arch remote IPI send/receive backend aren't NMI safe */
		WARN_ON_ONCE(in_nmi());
		if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
		arch_send_call_function_single_ipi(cpu);
		__smp_call_single_queue(cpu, &work->llnode);
		} else {
		__irq_work_queue_local(work);
		}
		@@ -131,19 +130,11 @@ bool irq_work_needs_cpu(void)
		return true;
		}

		static void irq_work_run_list(struct llist_head *list)
		void irq_work_single(void *arg)
		{
		struct irq_work work, tmp;
		struct llist_node *llnode;

		BUG_ON(!irqs_disabled());

		if (llist_empty(list))
		return;

		llnode = llist_del_all(list);
		llist_for_each_entry_safe(work, tmp, llnode, llnode) {
		struct irq_work *work = arg;
		int flags;

		/*
		* Clear the PENDING bit, after this point the @work
		* can be re-used.
		@@ -163,6 +154,20 @@ static void irq_work_run_list(struct llist_head *list)
		flags &= ~IRQ_WORK_PENDING;
		(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
		}

		static void irq_work_run_list(struct llist_head *list)
		{
		struct irq_work work, tmp;
		struct llist_node *llnode;

		BUG_ON(!irqs_disabled());

		if (llist_empty(list))
		return;

		llnode = llist_del_all(list);
		llist_for_each_entry_safe(work, tmp, llnode, llnode)
		irq_work_single(work);
		}

		/*

kernel/smp.c

+73 −46

Original line number	Diff line number	Diff line
		@@ -23,10 +23,8 @@

		#include "smpboot.h"

		enum {
		CSD_FLAG_LOCK = 0x01,
		CSD_FLAG_SYNCHRONOUS = 0x02,
		};

		#define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK)

		struct call_function_data {
		call_single_data_t __percpu *csd;
		@@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);

		extern void send_call_function_single_ipi(int cpu);

		void __smp_call_single_queue(int cpu, struct llist_node *node)
		{
		/*
		* The list addition should be visible before sending the IPI
		* handler locks the list to pull the entry off it because of
		* normal cache coherency rules implied by spinlocks.
		*
		* If IPIs can go out of order to the cache coherency protocol
		* in an architecture, sufficient synchronisation should be added
		* to arch code to make it appear to obey cache coherency WRT
		* locking and barrier primitives. Generic code isn't really
		* equipped to do the right thing...
		*/
		if (llist_add(node, &per_cpu(call_single_queue, cpu)))
		send_call_function_single_ipi(cpu);
		}

		/*
		* Insert a previously allocated call_single_data_t element
		* for execution on the given CPU. data must already have
		* ->func, ->info, and ->flags set.
		*/
		static int generic_exec_single(int cpu, call_single_data_t *csd,
		smp_call_func_t func, void *info)
		static int generic_exec_single(int cpu, call_single_data_t *csd)
		{
		if (cpu == smp_processor_id()) {
		smp_call_func_t func = csd->func;
		void *info = csd->info;
		unsigned long flags;

		/*
		@@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
		return 0;
		}


		if ((unsigned)cpu >= nr_cpu_ids \|\| !cpu_online(cpu)) {
		csd_unlock(csd);
		return -ENXIO;
		}

		csd->func = func;
		csd->info = info;

		/*
		* The list addition should be visible before sending the IPI
		* handler locks the list to pull the entry off it because of
		* normal cache coherency rules implied by spinlocks.
		*
		* If IPIs can go out of order to the cache coherency protocol
		* in an architecture, sufficient synchronisation should be added
		* to arch code to make it appear to obey cache coherency WRT
		* locking and barrier primitives. Generic code isn't really
		* equipped to do the right thing...
		*/
		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
		send_call_function_single_ipi(cpu);
		__smp_call_single_queue(cpu, &csd->llist);

		return 0;
		}
		@@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
		void generic_smp_call_function_single_interrupt(void)
		{
		flush_smp_call_function_queue(true);

		/*
		* Handle irq works queued remotely by irq_work_queue_on().
		* Smp functions above are typically synchronous so they
		* better run first since some other CPUs may be busy waiting
		* for them.
		*/
		irq_work_run();
		}

		extern void irq_work_single(void *);

		/**
		* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
		*
		@@ -241,9 +235,21 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
		* We don't have to use the _safe() variant here
		* because we are not invoking the IPI handlers yet.
		*/
		llist_for_each_entry(csd, entry, llist)
		llist_for_each_entry(csd, entry, llist) {
		switch (CSD_TYPE(csd)) {
		case CSD_TYPE_ASYNC:
		case CSD_TYPE_SYNC:
		case CSD_TYPE_IRQ_WORK:
		pr_warn("IPI callback %pS sent to offline CPU\n",
		csd->func);
		break;

		default:
		pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
		CSD_TYPE(csd));
		break;
		}
		}
		}

		/*
		@@ -251,16 +257,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
		*/
		prev = NULL;
		llist_for_each_entry_safe(csd, csd_next, entry, llist) {
		/* Do we wait until after callback? */
		if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
		smp_call_func_t func = csd->func;
		void *info = csd->info;

		/* Do we wait until after callback? */
		if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
		if (prev) {
		prev->next = &csd_next->llist;
		} else {
		entry = &csd_next->llist;
		}

		func(info);
		csd_unlock(csd);
		} else {
		@@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
		* Second; run all !SYNC callbacks.
		*/
		llist_for_each_entry_safe(csd, csd_next, entry, llist) {
		int type = CSD_TYPE(csd);

		if (type == CSD_TYPE_ASYNC) {
		smp_call_func_t func = csd->func;
		void *info = csd->info;

		csd_unlock(csd);
		func(info);
		} else if (type == CSD_TYPE_IRQ_WORK) {
		irq_work_single(csd);
		}
		}
		}

		@@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
		{
		call_single_data_t *csd;
		call_single_data_t csd_stack = {
		.flags = CSD_FLAG_LOCK \| CSD_FLAG_SYNCHRONOUS,
		.flags = CSD_FLAG_LOCK \| CSD_TYPE_SYNC,
		};
		int this_cpu;
		int err;
		@@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
		csd_lock(csd);
		}

		err = generic_exec_single(cpu, csd, func, info);
		csd->func = func;
		csd->info = info;

		err = generic_exec_single(cpu, csd);

		if (wait)
		csd_lock_wait(csd);
		@@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
		csd->flags = CSD_FLAG_LOCK;
		smp_wmb();

		err = generic_exec_single(cpu, csd, csd->func, csd->info);
		err = generic_exec_single(cpu, csd);

		out:
		preempt_enable();
		@@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,

		csd_lock(csd);
		if (wait)
		csd->flags \|= CSD_FLAG_SYNCHRONOUS;
		csd->flags \|= CSD_TYPE_SYNC;
		csd->func = func;
		csd->info = info;
		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
		@@ -632,6 +648,17 @@ void __init smp_init(void)
		{
		int num_nodes, num_cpus;

		/*
		* Ensure struct irq_work layout matches so that
		* flush_smp_call_function_queue() can do horrible things.
		*/
		BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
		offsetof(struct __call_single_data, llist));
		BUILD_BUG_ON(offsetof(struct irq_work, func) !=
		offsetof(struct __call_single_data, func));
		BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
		offsetof(struct __call_single_data, flags));

		idle_threads_init();
		cpuhp_threads_init();

Admin message