Commit 3803a229 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo
Browse files

perf trace: Put the per-syscall entry/exit prog_array BPF map infrastructure in place

I.e. look for "syscalls_sys_enter" and "syscalls_sys_exit" BPF maps of
type PROG_ARRAY and populate it with the handlers as specified per
syscall, for now only 'open' is wiring it to something, in time all
syscalls that need to copy arguments entering a syscall or returning
from one will set these to the right handlers, reusing when possible
pre-existing ones.

Next step is to use bpf_tail_call() into that.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Luis Cláudio Gonçalves <lclaudio@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-t0p4u43i9vbpzs1xtowna3gb@git.kernel.org


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 6ff8fff4
Loading
Loading
Loading
Loading
+72 −4
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0-only
/*
 * builtin-trace.c
 *
@@ -83,6 +82,10 @@ struct trace {
		int		max;
		struct syscall  *table;
		struct bpf_map  *map;
		struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
			struct bpf_map  *sys_enter,
					*sys_exit;
		}		prog_array;
		struct {
			struct perf_evsel *sys_enter,
					  *sys_exit,
@@ -1619,6 +1622,22 @@ out_free:
	goto out;
}

static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id)
{
	bool in_ev_qualifier;

	if (trace->ev_qualifier_ids.nr == 0)
		return true;

	in_ev_qualifier = bsearch(&id, trace->ev_qualifier_ids.entries,
				  trace->ev_qualifier_ids.nr, sizeof(int), intcmp) != NULL;

	if (in_ev_qualifier)
	       return !trace->not_ev_qualifier;

	return trace->not_ev_qualifier;
}

/*
 * args is to be interpreted as a series of longs but we need to handle
 * 8-byte unaligned accesses. args points to raw_data within the event
@@ -2784,6 +2803,18 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
	}
}

static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
{
	struct syscall *sc = trace__syscall_info(trace, NULL, id);
	return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->syscalls.unaugmented_prog);
}

static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
{
	struct syscall *sc = trace__syscall_info(trace, NULL, id);
	return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
}

static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
{
	struct syscall *sc = trace__syscall_info(trace, NULL, id);
@@ -2837,10 +2868,8 @@ static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
	int err = 0, key;

	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
		if (enabled) {
		if (enabled)
			trace__init_bpf_map_syscall_args(trace, key, &value);
			trace__init_syscall_bpf_progs(trace, key);
		}

		err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
		if (err)
@@ -2859,6 +2888,34 @@ static int trace__init_syscalls_bpf_map(struct trace *trace)

	return __trace__init_syscalls_bpf_map(trace, enabled);
}

static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
{
	int map_enter_fd = bpf_map__fd(trace->syscalls.prog_array.sys_enter),
	    map_exit_fd  = bpf_map__fd(trace->syscalls.prog_array.sys_exit);
	int err = 0, key;

	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
		int prog_fd;

		if (!trace__syscall_enabled(trace, key))
			continue;

		trace__init_syscall_bpf_progs(trace, key);

		// It'll get at least the "!raw_syscalls:unaugmented"
		prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
		err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
		if (err)
			break;
		prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
		err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
		if (err)
			break;
	}

	return err;
}
#else
static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
{
@@ -2875,6 +2932,11 @@ static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace
{
	return NULL;
}

static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace __maybe_unused)
{
	return 0;
}
#endif // HAVE_LIBBPF_SUPPORT

static int trace__set_ev_qualifier_filter(struct trace *trace)
@@ -3129,6 +3191,10 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
	if (trace->syscalls.map)
		trace__init_syscalls_bpf_map(trace);

	if (trace->syscalls.prog_array.sys_enter)
		trace__init_syscalls_bpf_prog_array_maps(trace);


	if (trace->ev_qualifier_ids.nr > 0) {
		err = trace__set_ev_qualifier_filter(trace);
		if (err < 0)
@@ -3754,6 +3820,8 @@ static void trace__set_bpf_map_filtered_pids(struct trace *trace)
static void trace__set_bpf_map_syscalls(struct trace *trace)
{
	trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
	trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
	trace->syscalls.prog_array.sys_exit  = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
}

static int trace__config(const char *var, const char *value, void *arg)
+14 −0
Original line number Diff line number Diff line
@@ -33,6 +33,20 @@ struct syscall {

bpf_map(syscalls, ARRAY, int, struct syscall, 512);

/*
 * What to augment at entry?
 *
 * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
 */
bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512);

/*
 * What to augment at exit?
 *
 * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
 */
bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512);

struct syscall_enter_args {
	unsigned long long common_tp_fields;
	long		   syscall_nr;