Commit 3b23e499 authored by Torsten Duwe's avatar Torsten Duwe Committed by Mark Rutland
Browse files

arm64: implement ftrace with regs



This patch implements FTRACE_WITH_REGS for arm64, which allows a traced
function's arguments (and some other registers) to be captured into a
struct pt_regs, allowing these to be inspected and/or modified. This is
a building block for live-patching, where a function's arguments may be
forwarded to another function. This is also necessary to enable ftrace
and in-kernel pointer authentication at the same time, as it allows the
LR value to be captured and adjusted prior to signing.

Using GCC's -fpatchable-function-entry=N option, we can have the
compiler insert a configurable number of NOPs between the function entry
point and the usual prologue. This also ensures functions are AAPCS
compliant (e.g. disabling inter-procedural register allocation).

For example, with -fpatchable-function-entry=2, GCC 8.1.0 compiles the
following:

| unsigned long bar(void);
|
| unsigned long foo(void)
| {
|         return bar() + 1;
| }

... to:

| <foo>:
|         nop
|         nop
|         stp     x29, x30, [sp, #-16]!
|         mov     x29, sp
|         bl      0 <bar>
|         add     x0, x0, #0x1
|         ldp     x29, x30, [sp], #16
|         ret

This patch builds the kernel with -fpatchable-function-entry=2,
prefixing each function with two NOPs. To trace a function, we replace
these NOPs with a sequence that saves the LR into a GPR, then calls an
ftrace entry assembly function which saves this and other relevant
registers:

| mov	x9, x30
| bl	<ftrace-entry>

Since patchable functions are AAPCS compliant (and the kernel does not
use x18 as a platform register), x9-x18 can be safely clobbered in the
patched sequence and the ftrace entry code.

There are now two ftrace entry functions, ftrace_regs_entry (which saves
all GPRs), and ftrace_entry (which saves the bare minimum). A PLT is
allocated for each within modules.

Signed-off-by: default avatarTorsten Duwe <duwe@suse.de>
[Mark: rework asm, comments, PLTs, initialization, commit message]
Signed-off-by: default avatarMark Rutland <mark.rutland@arm.com>
Reviewed-by: default avatarAmit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: default avatarTorsten Duwe <duwe@suse.de>
Tested-by: default avatarAmit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: default avatarTorsten Duwe <duwe@suse.de>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Julien Thierry <jthierry@redhat.com>
Cc: Will Deacon <will@kernel.org>
parent 1f377e04
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -143,6 +143,8 @@ config ARM64
	select HAVE_DEBUG_KMEMLEAK
	select HAVE_DMA_CONTIGUOUS
	select HAVE_DYNAMIC_FTRACE
	select HAVE_DYNAMIC_FTRACE_WITH_REGS \
		if $(cc-option,-fpatchable-function-entry=2)
	select HAVE_EFFICIENT_UNALIGNED_ACCESS
	select HAVE_FAST_GUP
	select HAVE_FTRACE_MCOUNT_RECORD
+5 −0
Original line number Diff line number Diff line
@@ -95,6 +95,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDS_MODULE	+= $(srctree)/arch/arm64/kernel/module.lds
endif

ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
  KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
  CC_FLAGS_FTRACE := -fpatchable-function-entry=2
endif

# Default value
head-y		:= arch/arm64/kernel/head.o

+23 −0
Original line number Diff line number Diff line
@@ -11,9 +11,20 @@
#include <asm/insn.h>

#define HAVE_FUNCTION_GRAPH_FP_TEST

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
#define ARCH_SUPPORTS_FTRACE_OPS 1
#else
#define MCOUNT_ADDR		((unsigned long)_mcount)
#endif

/* The BL at the callsite's adjusted rec->ip */
#define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE

#define FTRACE_PLT_IDX		0
#define FTRACE_REGS_PLT_IDX	1
#define NR_FTRACE_PLTS		2

/*
 * Currently, gcc tends to save the link register after the local variables
 * on the stack. This causes the max stack tracer to report the function
@@ -43,6 +54,12 @@ extern void return_to_handler(void);

static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
	/*
	 * Adjust addr to point at the BL in the callsite.
	 * See ftrace_init_nop() for the callsite sequence.
	 */
	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
		return addr + AARCH64_INSN_SIZE;
	/*
	 * addr is the address of the mcount call instruction.
	 * recordmcount does the necessary offset calculation.
@@ -50,6 +67,12 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
	return addr;
}

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
struct dyn_ftrace;
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
#define ftrace_init_nop ftrace_init_nop
#endif

#define ftrace_return_address(n) return_address(n)

/*
+1 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ struct mod_arch_specific {
	struct mod_plt_sec	init;

	/* for CONFIG_DYNAMIC_FTRACE */
	struct plt_entry 	*ftrace_trampoline;
	struct plt_entry	*ftrace_trampolines;
};
#endif

+135 −5
Original line number Diff line number Diff line
@@ -7,10 +7,137 @@
 */

#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/ftrace.h>
#include <asm/insn.h>

#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/*
 * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
 * the regular function prologue. For an enabled callsite, ftrace_init_nop() and
 * ftrace_make_call() have patched those NOPs to:
 *
 * 	MOV	X9, LR
 * 	BL	<entry>
 *
 * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
 *
 * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
 * live, and x9-x18 are safe to clobber.
 *
 * We save the callsite's context into a pt_regs before invoking any ftrace
 * callbacks. So that we can get a sensible backtrace, we create a stack record
 * for the callsite and the ftrace entry assembly. This is not sufficient for
 * reliable stacktrace: until we create the callsite stack record, its caller
 * is missing from the LR and existing chain of frame records.
 */
	.macro  ftrace_regs_entry, allregs=0
	/* Make room for pt_regs, plus a callee frame */
	sub	sp, sp, #(S_FRAME_SIZE + 16)

	/* Save function arguments (and x9 for simplicity) */
	stp	x0, x1, [sp, #S_X0]
	stp	x2, x3, [sp, #S_X2]
	stp	x4, x5, [sp, #S_X4]
	stp	x6, x7, [sp, #S_X6]
	stp	x8, x9, [sp, #S_X8]

	/* Optionally save the callee-saved registers, always save the FP */
	.if \allregs == 1
	stp	x10, x11, [sp, #S_X10]
	stp	x12, x13, [sp, #S_X12]
	stp	x14, x15, [sp, #S_X14]
	stp	x16, x17, [sp, #S_X16]
	stp	x18, x19, [sp, #S_X18]
	stp	x20, x21, [sp, #S_X20]
	stp	x22, x23, [sp, #S_X22]
	stp	x24, x25, [sp, #S_X24]
	stp	x26, x27, [sp, #S_X26]
	stp	x28, x29, [sp, #S_X28]
	.else
	str	x29, [sp, #S_FP]
	.endif

	/* Save the callsite's SP and LR */
	add	x10, sp, #(S_FRAME_SIZE + 16)
	stp	x9, x10, [sp, #S_LR]

	/* Save the PC after the ftrace callsite */
	str	x30, [sp, #S_PC]

	/* Create a frame record for the callsite above pt_regs */
	stp	x29, x9, [sp, #S_FRAME_SIZE]
	add	x29, sp, #S_FRAME_SIZE

	/* Create our frame record within pt_regs. */
	stp	x29, x30, [sp, #S_STACKFRAME]
	add	x29, sp, #S_STACKFRAME
	.endm

ENTRY(ftrace_regs_caller)
	ftrace_regs_entry	1
	b	ftrace_common
ENDPROC(ftrace_regs_caller)

ENTRY(ftrace_caller)
	ftrace_regs_entry	0
	b	ftrace_common
ENDPROC(ftrace_caller)

ENTRY(ftrace_common)
	sub	x0, x30, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
	mov	x1, x9				// parent_ip (callsite's LR)
	ldr_l	x2, function_trace_op		// op
	mov	x3, sp				// regs

GLOBAL(ftrace_call)
	bl	ftrace_stub

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)		// ftrace_graph_caller();
	nop				// If enabled, this will be replaced
					// "b ftrace_graph_caller"
#endif

/*
 * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
 * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
 * to restore x0-x8, x29, and x30.
 */
ftrace_common_return:
	/* Restore function arguments */
	ldp	x0, x1, [sp]
	ldp	x2, x3, [sp, #S_X2]
	ldp	x4, x5, [sp, #S_X4]
	ldp	x6, x7, [sp, #S_X6]
	ldr	x8, [sp, #S_X8]

	/* Restore the callsite's FP, LR, PC */
	ldr	x29, [sp, #S_FP]
	ldr	x30, [sp, #S_LR]
	ldr	x9, [sp, #S_PC]

	/* Restore the callsite's SP */
	add	sp, sp, #S_FRAME_SIZE + 16

	ret	x9
ENDPROC(ftrace_common)

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
	ldr	x0, [sp, #S_PC]
	sub	x0, x0, #AARCH64_INSN_SIZE	// ip (callsite's BL insn)
	add	x1, sp, #S_LR			// parent_ip (callsite's LR)
	ldr	x2, [sp, #S_FRAME_SIZE]	   	// parent fp (callsite's FP)
	bl	prepare_ftrace_return
	b	ftrace_common_return
ENDPROC(ftrace_graph_caller)
#else
#endif

#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */

/*
 * Gcc with -pg will put the following code in the beginning of each function:
 *      mov x0, x30
@@ -160,11 +287,6 @@ GLOBAL(ftrace_graph_call) // ftrace_graph_caller();

	mcount_exit
ENDPROC(ftrace_caller)
#endif /* CONFIG_DYNAMIC_FTRACE */

ENTRY(ftrace_stub)
	ret
ENDPROC(ftrace_stub)

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
@@ -184,7 +306,15 @@ ENTRY(ftrace_graph_caller)

	mcount_exit
ENDPROC(ftrace_graph_caller)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */

ENTRY(ftrace_stub)
	ret
ENDPROC(ftrace_stub)

#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
 * void return_to_handler(void)
 *
Loading