Commit 452cddbf authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar
Browse files

static_call: Add static_call_cond()



Extend the static_call infrastructure to optimize the following common
pattern:

	if (func_ptr)
		func_ptr(args...)

For the trampoline (which is in effect a tail-call), we patch the
JMP.d32 into a RET, which then directly consumes the trampoline call.

For the in-line sites we replace the CALL with a NOP5.

NOTE: this is 'obviously' limited to functions with a 'void' return type.

NOTE: DEFINE_STATIC_COND_CALL() only requires a typename, as opposed
      to a full function.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20200818135805.042977182@infradead.org
parent c43a43e4
Loading
Loading
Loading
Loading
+9 −3
Original line number Diff line number Diff line
@@ -20,15 +20,21 @@
 * it does tail-call optimization on the call; since you cannot compute the
 * relative displacement across sections.
 */
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)			\

#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns)			\
	asm(".pushsection .static_call.text, \"ax\"		\n"	\
	    ".align 4						\n"	\
	    ".globl " STATIC_CALL_TRAMP_STR(name) "		\n"	\
	    STATIC_CALL_TRAMP_STR(name) ":			\n"	\
	    "	.byte 0xe9 # jmp.d32				\n"	\
	    "	.long " #func " - (. + 4)			\n"	\
	    insns "						\n"	\
	    ".type " STATIC_CALL_TRAMP_STR(name) ", @function	\n"	\
	    ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
	    ".popsection					\n")

#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)			\
	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")

#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)			\
	__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop")

#endif /* _ASM_STATIC_CALL_H */
+32 −10
Original line number Diff line number Diff line
@@ -4,19 +4,41 @@
#include <linux/bug.h>
#include <asm/text-patching.h>

static void __static_call_transform(void *insn, u8 opcode, void *func)
enum insn_type {
	CALL = 0, /* site call */
	NOP = 1,  /* site cond-call */
	JMP = 2,  /* tramp / site tail-call */
	RET = 3,  /* tramp / site cond-tail-call */
};

static void __static_call_transform(void *insn, enum insn_type type, void *func)
{
	const void *code = text_gen_insn(opcode, insn, func);

	if (WARN_ONCE(*(u8 *)insn != opcode,
		      "unexpected static call insn opcode 0x%x at %pS\n",
		      opcode, insn))
		return;
	int size = CALL_INSN_SIZE;
	const void *code;

	switch (type) {
	case CALL:
		code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
		break;

	case NOP:
		code = ideal_nops[NOP_ATOMIC5];
		break;

	case JMP:
		code = text_gen_insn(JMP32_INSN_OPCODE, insn, func);
		break;

	case RET:
		code = text_gen_insn(RET_INSN_OPCODE, insn, func);
		size = RET_INSN_SIZE;
		break;
	}

	if (memcmp(insn, code, CALL_INSN_SIZE) == 0)
	if (memcmp(insn, code, size) == 0)
		return;

	text_poke_bp(insn, code, CALL_INSN_SIZE, NULL);
	text_poke_bp(insn, code, size, NULL);
}

void arch_static_call_transform(void *site, void *tramp, void *func)
@@ -24,10 +46,10 @@ void arch_static_call_transform(void *site, void *tramp, void *func)
	mutex_lock(&text_mutex);

	if (tramp)
		__static_call_transform(tramp, JMP32_INSN_OPCODE, func);
		__static_call_transform(tramp, func ? JMP : RET, func);

	if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
		__static_call_transform(site, CALL_INSN_OPCODE, func);
		__static_call_transform(site, func ? CALL : NOP, func);

	mutex_unlock(&text_mutex);
}
+86 −0
Original line number Diff line number Diff line
@@ -16,7 +16,9 @@
 *
 *   DECLARE_STATIC_CALL(name, func);
 *   DEFINE_STATIC_CALL(name, func);
 *   DEFINE_STATIC_CALL_NULL(name, typename);
 *   static_call(name)(args...);
 *   static_call_cond(name)(args...);
 *   static_call_update(name, func);
 *
 * Usage example:
@@ -52,6 +54,43 @@
 *   rather than calling through the trampoline.  This requires objtool or a
 *   compiler plugin to detect all the static_call() sites and annotate them
 *   in the .static_call_sites section.
 *
 *
 * Notes on NULL function pointers:
 *
 *   Static_call()s support NULL functions, with many of the caveats that
 *   regular function pointers have.
 *
 *   Clearly calling a NULL function pointer is 'BAD', so too for
 *   static_call()s (although when HAVE_STATIC_CALL it might not be immediately
 *   fatal). A NULL static_call can be the result of:
 *
 *     DECLARE_STATIC_CALL_NULL(my_static_call, void (*)(int));
 *
 *   which is equivalent to declaring a NULL function pointer with just a
 *   typename:
 *
 *     void (*my_func_ptr)(int arg1) = NULL;
 *
 *   or using static_call_update() with a NULL function. In both cases the
 *   HAVE_STATIC_CALL implementation will patch the trampoline with a RET
 *   instruction, instead of an immediate tail-call JMP. HAVE_STATIC_CALL_INLINE
 *   architectures can patch the trampoline call to a NOP.
 *
 *   In all cases, any argument evaluation is unconditional. Unlike a regular
 *   conditional function pointer call:
 *
 *     if (my_func_ptr)
 *         my_func_ptr(arg1)
 *
 *   where the argument evaludation also depends on the pointer value.
 *
 *   When calling a static_call that can be NULL, use:
 *
 *     static_call_cond(name)(arg1);
 *
 *   which will include the required value tests to avoid NULL-pointer
 *   dereferences.
 */

#include <linux/types.h>
@@ -120,7 +159,16 @@ extern int static_call_text_reserved(void *start, void *end);
	};								\
	ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

#define DEFINE_STATIC_CALL_NULL(name, _func)				\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = NULL,						\
		.type = 1,						\
	};								\
	ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)

#define static_call(name)	__static_call(name)
#define static_call_cond(name)	(void)__static_call(name)

#define EXPORT_STATIC_CALL(name)					\
	EXPORT_SYMBOL(STATIC_CALL_KEY(name));				\
@@ -143,7 +191,15 @@ struct static_call_key {
	};								\
	ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func)

#define DEFINE_STATIC_CALL_NULL(name, _func)				\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = NULL,						\
	};								\
	ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)

#define static_call(name)	__static_call(name)
#define static_call_cond(name)	(void)__static_call(name)

static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
@@ -179,9 +235,39 @@ struct static_call_key {
		.func = _func,						\
	}

#define DEFINE_STATIC_CALL_NULL(name, _func)				\
	DECLARE_STATIC_CALL(name, _func);				\
	struct static_call_key STATIC_CALL_KEY(name) = {		\
		.func = NULL,						\
	}

#define static_call(name)						\
	((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))

static inline void __static_call_nop(void) { }

/*
 * This horrific hack takes care of two things:
 *
 *  - it ensures the compiler will only load the function pointer ONCE,
 *    which avoids a reload race.
 *
 *  - it ensures the argument evaluation is unconditional, similar
 *    to the HAVE_STATIC_CALL variant.
 *
 * Sadly current GCC/Clang (10 for both) do not optimize this properly
 * and will emit an indirect call for the NULL case :-(
 */
#define __static_call_cond(name)					\
({									\
	void *func = READ_ONCE(STATIC_CALL_KEY(name).func);		\
	if (!func)							\
		func = &__static_call_nop;				\
	(typeof(STATIC_CALL_TRAMP(name))*)func;				\
})

#define static_call_cond(name)	(void)__static_call_cond(name)

static inline
void __static_call_update(struct static_call_key *key, void *tramp, void *func)
{