Commit bed9441b authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-20160411' of...

Merge tag 'perf-core-for-mingo-20160411' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements from Arnaldo Carvalho de Melo:

User visible changes:

- Automagically create a 'bpf-output' event, easing the setup of BPF
  C "scripts" that produce output via the perf ring buffer. Now it is
  just a matter of calling any perf tool, such as 'trace', with a C
  source file that references the __bpf_stdout__ output channel and
  that channel will be created and connected to the script:

  # trace -e nanosleep --event test_bpf_stdout.c usleep 1
    0.013 ( 0.013 ms): usleep/2818 nanosleep(rqtp: 0x7ffcead45f40                                        ) ...
    0.013 (         ): __bpf_stdout__:Raise a BPF event!..)
    0.015 (         ): perf_bpf_probe:func_begin:(ffffffff81112460))
    0.261 (         ): __bpf_stdout__:Raise a BPF event!..)
    0.262 (         ): perf_bpf_probe:func_end:(ffffffff81112460 <- ffffffff81003d92))
    0.264 ( 0.264 ms): usleep/2818  ... [continued]: nanosleep()) = 0
  #

  Further work is needed to reduce the number of lines in a perf bpf C source
  file, this being the part where we greatly reduce the command line setup (Wang Nan)

- 'perf trace' now supports callchains, with 'trace --call-graph dwarf' using
  libunwind, just like 'perf top', to ask the kernel for stack dumps for CFI
  processing. This reduces the overhead by asking just for userspace callchains
  and also only for the syscall exit tracepoint (raw_syscalls:sys_exit)
  (Milian Wolff, Arnaldo Carvalho de Melo)

  Try it with, for instance:

     # perf trace --call dwarf ping 127.0.0.1

  An excerpt of a system wide 'perf trace --call dwarf" session is at:

   https://fedorapeople.org/~acme/perf/perf-trace--call-graph-dwarf--all-cpus.txt



  You may need to bump the number of mmap pages, using -m/--mmap-pages,
  but on a Broadwell machine the defaults allowed system wide tracing to
  work without losing that many records, experiment with just some
  syscalls, like:

    # perf trace --call dwarf -e nanosleep,futex

  All the targets available for 'perf record', 'perf top' (--pid, --tid, --cpu,
  etc) should work. Also --duration may be interesting to try.

  To get filenames from in various syscalls pointer args (open, ettc), add this
  to the mix:

  # perf probe 'vfs_getname=getname_flags:72 pathname=filename:string'

  Making this work is next in line:

     # trace --call dwarf --ev sched:sched_switch/call-graph=fp/ usleep 1

  I.e. honouring per-tracepoint callchains in 'perf trace' in addition to
  in raw_syscalls:sys_exit.

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents aeaae7d6 00768a2b
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -117,6 +117,15 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
--syscalls::
	Trace system calls. This options is enabled by default.

--call-graph [mode,type,min[,limit],order[,key][,branch]]::
        Setup and enable call-graph (stack chain/backtrace) recording.
        See `--call-graph` section in perf-record and perf-report
        man pages for details. The ones that are most useful in 'perf trace'
        are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.

--kernel-syscall-graph::
	 Show the kernel callchains on the syscall exit path.

--event::
	Trace other events, see 'perf list' for a complete list.

+1 −1
Original line number Diff line number Diff line
@@ -71,7 +71,7 @@ int test__perf_time_to_tsc(int subtest __maybe_unused)

	CHECK__(parse_events(evlist, "cycles:u", NULL));

	perf_evlist__config(evlist, &opts);
	perf_evlist__config(evlist, &opts, NULL);

	evsel = perf_evlist__first(evlist);

+4 −4
Original line number Diff line number Diff line
@@ -63,6 +63,8 @@ struct pt_regs_offset {
# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
#endif

/* TODO: switching by dwarf address size */
#ifndef __x86_64__
static const struct pt_regs_offset x86_32_regoffset_table[] = {
	REG_OFFSET_NAME_32("%ax",	eax),
	REG_OFFSET_NAME_32("%cx",	ecx),
@@ -75,6 +77,8 @@ static const struct pt_regs_offset x86_32_regoffset_table[] = {
	REG_OFFSET_END,
};

#define regoffset_table x86_32_regoffset_table
#else
static const struct pt_regs_offset x86_64_regoffset_table[] = {
	REG_OFFSET_NAME_64("%ax",	rax),
	REG_OFFSET_NAME_64("%dx",	rdx),
@@ -95,11 +99,7 @@ static const struct pt_regs_offset x86_64_regoffset_table[] = {
	REG_OFFSET_END,
};

/* TODO: switching by dwarf address size */
#ifdef __x86_64__
#define regoffset_table x86_64_regoffset_table
#else
#define regoffset_table x86_32_regoffset_table
#endif

/* Minus 1 for the ending REG_OFFSET_END */
+1 −1
Original line number Diff line number Diff line
@@ -982,7 +982,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
	struct perf_evlist *evlist = kvm->evlist;
	char sbuf[STRERR_BUFSIZE];

	perf_evlist__config(evlist, &kvm->opts);
	perf_evlist__config(evlist, &kvm->opts, NULL);

	/*
	 * Note: exclude_{guest,host} do not apply here.
+9 −1
Original line number Diff line number Diff line
@@ -284,7 +284,7 @@ static int record__open(struct record *rec)
	struct record_opts *opts = &rec->opts;
	int rc = 0;

	perf_evlist__config(evlist, opts);
	perf_evlist__config(evlist, opts, &callchain_param);

	evlist__for_each(evlist, pos) {
try_again:
@@ -1276,6 +1276,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
	if (err)
		return err;

	err = bpf__setup_stdout(rec->evlist);
	if (err) {
		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
		pr_err("ERROR: Setup BPF stdout failed: %s\n",
			 errbuf);
		return err;
	}

	err = -ENOMEM;

	symbol__init(NULL);
Loading