Commit 9243ae5b authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-20160415' of...

Merge tag 'perf-core-for-mingo-20160415' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements from Arnaldo Carvalho de Melo:

User visible changes:

- Wire the callchain unwinding "max-stack" now to 'perf script --max-stack',
  allowing to limit the depth of callchains, possibly reducing processing
  time (Arnaldo Carvalho de Melo)

- Ditto for 'perf trace --max-stack' (Arnaldo Carvalho de Melo)

- Introduce a --min-stack filter for 'perf trace', to show syscalls that
  had a userspace callchain leading to it at least min-stack deep (Arnaldo Carvalho de Melo)

- Make 'perf trace' work with multiple threads and the --duration filter,
  i.e. do not print the start of an interrupted syscall followed by ...
  to print interrupts from other threads, as we need to wait the sys_exit
  syscall tracepoint to calculate the duration, duh. (Arnaldo Carvalho de Melo)

  System wide --duration now works as expected:

   [root@jouet ~]# trace --duration 100
     152.393 (145.147 ms): Timer/24358 futex(uaddr: 0x7f5ed98e56cc, op: WAIT_BITSET|PRIV|CLKRT, val: 7055125, utime: 0x7f5ecdbfec30, val3: 4294967295) = -1 ETIMEDOUT Connection timed out
     152.438 (145.040 ms): firefox/24321 poll(ufds: 0x7f5ec388b460, nfds: 6, timeout_msecs: 4294967295) = 1
     358.580 (158.279 ms): Xorg/2025 select(n: 512, inp: 0x83a8e0, tvp: 0x7ffdcbb63610) = 0 Timeout
     358.687 (148.285 ms): gnome-terminal/2711 poll(ufds: 0x55b7e6811ad0, nfds: 15, timeout_msecs: 249) = 1
     370.150 (169.569 ms): gnome-shell/2287 poll(ufds: 0x55e623d65490, nfds: 86, timeout_msecs: 4294967295) = 1

- Now 'perf trace's --max-stack and --min-stack will automatically set
  "--call-graph dwarf", if --call-graph is not present on the command line:

   [root@jouet ~]# perf trace -e nanosleep --max-stack 3 usleep 1
     0.299 ( 0.057 ms): usleep/29658 nanosleep(rqtp: 0x7fff80f3b230) = 0
                                       __nanosleep+0x10 (/usr/lib64/libc-2.22.so)
                                       usleep+0x34 (/usr/lib64/libc-2.22.so)
                                       main+0x1eb (/usr/bin/usleep)
   [root@jouet ~]#

- Bump 'perf trace --mmap-pages' for root when using callchains and not
  specifying --mmap-pages explicitely (Arnaldo Carvalho de Melo)

Build fixes:

- The python binding object had missing symbols, to some refactoring
  to fix that (Arnaldo Carvalho de Melo)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 0b22cd27 f3e459d1
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -259,6 +259,16 @@ include::itrace.txt[]
--full-source-path::
	Show the full path for source files for srcline output.

--max-stack::
        Set the stack depth limit when parsing the callchain, anything
        beyond the specified depth will be ignored. This is a trade-off
        between information loss and faster processing especially for
        workloads that can have a very long callchain stack.
        Note that when using the --itrace option the synthesized callchain size
        will override this value if the synthesized callchain size is bigger.

        Default: 127

--ns::
	Use 9 decimal places when displaying time (i.e. show the nanoseconds)

+23 −0
Original line number Diff line number Diff line
@@ -123,12 +123,35 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
        man pages for details. The ones that are most useful in 'perf trace'
        are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.

        Using this will, for the root user, bump the value of --mmap-pages to 4
        times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
        sysctl. This is done only if the user doesn't specify a --mmap-pages value.

--kernel-syscall-graph::
	 Show the kernel callchains on the syscall exit path.

--event::
	Trace other events, see 'perf list' for a complete list.

--max-stack::
        Set the stack depth limit when parsing the callchain, anything
        beyond the specified depth will be ignored. Note that at this point
        this is just about the presentation part, i.e. the kernel is still
        not limiting, the overhead of callchains needs to be set via the
        knobs in --call-graph dwarf.

        Implies '--call-graph dwarf' when --call-graph not present on the
        command line, on systems where DWARF unwinding was built in.

        Default: 127

--min-stack::
        Set the stack depth limit when parsing the callchain, anything
        below the specified depth will be ignored. Disabled by default.

        Implies '--call-graph dwarf' when --call-graph not present on the
        command line, on systems where DWARF unwinding was built in.

--proc-map-timeout::
	When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
	because the file may be huge. A time out is needed in such cases.
+1 −1
Original line number Diff line number Diff line
@@ -375,7 +375,7 @@ static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
	}

	al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
	sample__resolve_callchain(sample, NULL, evsel, &al, 16);
	sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);

	callchain_cursor_commit(&callchain_cursor);
	while (true) {
+20 −15
Original line number Diff line number Diff line
@@ -930,45 +930,50 @@ out_delete_session:
	return status;
}

static void callchain_debug(void)
static void callchain_debug(struct callchain_param *callchain)
{
	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };

	pr_debug("callchain: type %s\n", str[callchain_param.record_mode]);
	pr_debug("callchain: type %s\n", str[callchain->record_mode]);

	if (callchain_param.record_mode == CALLCHAIN_DWARF)
	if (callchain->record_mode == CALLCHAIN_DWARF)
		pr_debug("callchain: stack dump size %d\n",
			 callchain_param.dump_size);
			 callchain->dump_size);
}

int record_parse_callchain_opt(const struct option *opt,
			       const char *arg,
			       int unset)
int record_opts__parse_callchain(struct record_opts *record,
				 struct callchain_param *callchain,
				 const char *arg, bool unset)
{
	int ret;
	struct record_opts *record = (struct record_opts *)opt->value;

	record->callgraph_set = true;
	callchain_param.enabled = !unset;
	callchain->enabled = !unset;

	/* --no-call-graph */
	if (unset) {
		callchain_param.record_mode = CALLCHAIN_NONE;
		callchain->record_mode = CALLCHAIN_NONE;
		pr_debug("callchain: disabled\n");
		return 0;
	}

	ret = parse_callchain_record_opt(arg, &callchain_param);
	ret = parse_callchain_record_opt(arg, callchain);
	if (!ret) {
		/* Enable data address sampling for DWARF unwind. */
		if (callchain_param.record_mode == CALLCHAIN_DWARF)
		if (callchain->record_mode == CALLCHAIN_DWARF)
			record->sample_address = true;
		callchain_debug();
		callchain_debug(callchain);
	}

	return ret;
}

int record_parse_callchain_opt(const struct option *opt,
			       const char *arg,
			       int unset)
{
	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
}

int record_callchain_opt(const struct option *opt,
			 const char *arg __maybe_unused,
			 int unset __maybe_unused)
@@ -981,7 +986,7 @@ int record_callchain_opt(const struct option *opt,
	if (callchain_param.record_mode == CALLCHAIN_NONE)
		callchain_param.record_mode = CALLCHAIN_FP;

	callchain_debug();
	callchain_debug(&callchain_param);
	return 0;
}

+25 −14
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
#include "util/thread_map.h"
#include "util/stat.h"
#include <linux/bitmap.h>
#include <linux/stringify.h>
#include "asm/bug.h"
#include "util/mem-events.h"

@@ -569,18 +570,23 @@ static void print_sample_bts(struct perf_sample *sample,
	/* print branch_from information */
	if (PRINT_FIELD(IP)) {
		unsigned int print_opts = output[attr->type].print_ip_opts;
		struct callchain_cursor *cursor = NULL, cursor_callchain;

		if (symbol_conf.use_callchain && sample->callchain) {
			printf("\n");
		} else {
			printf(" ");
		if (symbol_conf.use_callchain && sample->callchain &&
		    thread__resolve_callchain(al->thread, &cursor_callchain, evsel,
					      sample, NULL, NULL, scripting_max_stack) == 0)
			cursor = &cursor_callchain;

		if (cursor == NULL) {
			putchar(' ');
			if (print_opts & EVSEL__PRINT_SRCLINE) {
				print_srcline_last = true;
				print_opts &= ~EVSEL__PRINT_SRCLINE;
			}
		}
		perf_evsel__fprintf_sym(evsel, sample, al, 0, print_opts,
					scripting_max_stack, stdout);
		} else
			putchar('\n');

		sample__fprintf_sym(sample, al, 0, print_opts, cursor, stdout);
	}

	/* print branch_to information */
@@ -783,14 +789,15 @@ static void process_event(struct perf_script *script,
		printf("%16" PRIu64, sample->weight);

	if (PRINT_FIELD(IP)) {
		if (!symbol_conf.use_callchain)
			printf(" ");
		else
			printf("\n");
		struct callchain_cursor *cursor = NULL, cursor_callchain;

		if (symbol_conf.use_callchain &&
		    thread__resolve_callchain(al->thread, &cursor_callchain, evsel,
					      sample, NULL, NULL, scripting_max_stack) == 0)
			cursor = &cursor_callchain;

		perf_evsel__fprintf_sym(evsel, sample, al, 0,
					output[attr->type].print_ip_opts,
					scripting_max_stack, stdout);
		putchar(cursor ? '\n' : ' ');
		sample__fprintf_sym(sample, al, 0, output[attr->type].print_ip_opts, cursor, stdout);
	}

	if (PRINT_FIELD(IREGS))
@@ -2021,6 +2028,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
		   "only consider symbols in these pids"),
	OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
		   "only consider symbols in these tids"),
	OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
		     "Set the maximum stack depth when parsing the callchain, "
		     "anything beyond the specified depth will be ignored. "
		     "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
	OPT_BOOLEAN('I', "show-info", &show_full_info,
		    "display extended information from perf.data file"),
	OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
Loading