Commit c5ab6ad7 authored by Ingo Molnar's avatar Ingo Molnar
Browse files

Merge tag 'perf-core-for-mingo-20160413' of...

Merge tag 'perf-core-for-mingo-20160413' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

 into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

- Print callchains asked for events requested via 'perf trace --event' too:
  (Arnaldo Carvalho de Melo)

  # trace -e nanosleep --call dwarf --event sched:sched_switch/call-graph=fp/ usleep 1
   0.346 (0.005 ms): usleep/24428 nanosleep(rqtp: 0x7fffa15a0540) ...
   0.346 (        ): sched:sched_switch:usleep:24428 [120] S ==> swapper/3:0 [120])
                                    __schedule+0xfe200402 ([kernel.kallsyms])
                                    schedule+0xfe200035 ([kernel.kallsyms])
                                    do_nanosleep+0xfe20006f ([kernel.kallsyms])
                                    hrtimer_nanosleep+0xfe2000dc ([kernel.kallsyms])
                                    sys_nanosleep+0xfe20007a ([kernel.kallsyms])
                                    do_syscall_64+0xfe200062 ([kernel.kallsyms])
                                    return_from_SYSCALL_64+0xfe200000 ([kernel.kallsyms])
                                    __nanosleep+0xffff005b8d602010 (/usr/lib64/libc-2.22.so)
   0.400 (0.059 ms): usleep/24428  ... [continued]: nanosleep()) = 0
                                    __nanosleep+0x10 (/usr/lib64/libc-2.22.so)
                                    usleep+0x34 (/usr/lib64/libc-2.22.so)
                                    main+0x1eb (/usr/bin/usleep)
                                    __libc_start_main+0xf0 (/usr/lib64/libc-2.22.so)
                                    _start+0x29 (/usr/bin/usleep)

- Allow requesting that some CPUs or PIDs be highlighted in 'perf sched map' (Jiri Olsa)

- Compact 'perf sched map' to show just CPUs with activity, improving the output
  in high core count systems (Jiri Olsa)

- Fix segfault with 'perf trace --no-syscalls -e syscall-names' by bailing out
  such request, doesn't make sense to ask for no syscalls and then specify which
  ones should be printed (Arnaldo Carvalho de Melo)

Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 31d50c55 59247e33
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -50,6 +50,22 @@ OPTIONS
--dump-raw-trace=::
        Display verbose dump of the sched data.

OPTIONS for 'perf sched map'
----------------------------

--compact::
	Show only CPUs with activity. Helps visualizing on high core
	count systems.

--cpus::
	Show just entries with activities for the given CPUs.

--color-cpus::
	Highlight the given cpus.

--color-pids::
	Highlight the given pids.

SEE ALSO
--------
linkperf:perf-record[1]
+186 −12
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@
#include "util/session.h"
#include "util/tool.h"
#include "util/cloexec.h"
#include "util/thread_map.h"
#include "util/color.h"

#include <subcmd/parse-options.h>
#include "util/trace-event.h"
@@ -122,6 +124,21 @@ struct trace_sched_handler {
				  struct machine *machine);
};

#define COLOR_PIDS PERF_COLOR_BLUE
#define COLOR_CPUS PERF_COLOR_BG_RED

struct perf_sched_map {
	DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
	int			*comp_cpus;
	bool			 comp;
	struct thread_map	*color_pids;
	const char		*color_pids_str;
	struct cpu_map		*color_cpus;
	const char		*color_cpus_str;
	struct cpu_map		*cpus;
	const char		*cpus_str;
};

struct perf_sched {
	struct perf_tool tool;
	const char	 *sort_order;
@@ -173,6 +190,7 @@ struct perf_sched {
	struct list_head sort_list, cmp_pid;
	bool force;
	bool skip_merge;
	struct perf_sched_map map;
};

static u64 get_nsecs(void)
@@ -1339,6 +1357,38 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
	return 0;
}

union map_priv {
	void	*ptr;
	bool	 color;
};

static bool thread__has_color(struct thread *thread)
{
	union map_priv priv = {
		.ptr = thread__priv(thread),
	};

	return priv.color;
}

static struct thread*
map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
{
	struct thread *thread = machine__findnew_thread(machine, pid, tid);
	union map_priv priv = {
		.color = false,
	};

	if (!sched->map.color_pids || !thread || thread__priv(thread))
		return thread;

	if (thread_map__has(sched->map.color_pids, tid))
		priv.color = true;

	thread__set_priv(thread, priv.ptr);
	return thread;
}

static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
			    struct perf_sample *sample, struct machine *machine)
{
@@ -1347,13 +1397,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
	int new_shortname;
	u64 timestamp0, timestamp = sample->time;
	s64 delta;
	int cpu, this_cpu = sample->cpu;
	int i, this_cpu = sample->cpu;
	int cpus_nr;
	bool new_cpu = false;
	const char *color = PERF_COLOR_NORMAL;

	BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);

	if (this_cpu > sched->max_cpu)
		sched->max_cpu = this_cpu;

	if (sched->map.comp) {
		cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
		if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
			sched->map.comp_cpus[cpus_nr++] = this_cpu;
			new_cpu = true;
		}
	} else
		cpus_nr = sched->max_cpu;

	timestamp0 = sched->cpu_last_switched[this_cpu];
	sched->cpu_last_switched[this_cpu] = timestamp;
	if (timestamp0)
@@ -1366,7 +1428,7 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
		return -1;
	}

	sched_in = machine__findnew_thread(machine, -1, next_pid);
	sched_in = map__findnew_thread(sched, machine, -1, next_pid);
	if (sched_in == NULL)
		return -1;

@@ -1400,26 +1462,52 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
		new_shortname = 1;
	}

	for (cpu = 0; cpu <= sched->max_cpu; cpu++) {
	for (i = 0; i < cpus_nr; i++) {
		int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
		struct thread *curr_thread = sched->curr_thread[cpu];
		const char *pid_color = color;
		const char *cpu_color = color;

		if (curr_thread && thread__has_color(curr_thread))
			pid_color = COLOR_PIDS;

		if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
			continue;

		if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
			cpu_color = COLOR_CPUS;

		if (cpu != this_cpu)
			printf(" ");
			color_fprintf(stdout, cpu_color, " ");
		else
			printf("*");
			color_fprintf(stdout, cpu_color, "*");

		if (sched->curr_thread[cpu])
			printf("%2s ", sched->curr_thread[cpu]->shortname);
			color_fprintf(stdout, pid_color, "%2s ", sched->curr_thread[cpu]->shortname);
		else
			printf("   ");
			color_fprintf(stdout, color, "   ");
	}

	printf("  %12.6f secs ", (double)timestamp/1e9);
	if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
		goto out;

	color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp/1e9);
	if (new_shortname) {
		printf("%s => %s:%d\n",
		const char *pid_color = color;

		if (thread__has_color(sched_in))
			pid_color = COLOR_PIDS;

		color_fprintf(stdout, pid_color, "%s => %s:%d",
		       sched_in->shortname, thread__comm_str(sched_in), sched_in->tid);
	} else {
		printf("\n");
	}

	if (sched->map.comp && new_cpu)
		color_fprintf(stdout, color, " (CPU %d)", this_cpu);

out:
	color_fprintf(stdout, color, "\n");

	thread__put(sched_in);

	return 0;
@@ -1675,10 +1763,76 @@ static int perf_sched__lat(struct perf_sched *sched)
	return 0;
}

static int perf_sched__map(struct perf_sched *sched)
static int setup_map_cpus(struct perf_sched *sched)
{
	struct cpu_map *map;

	sched->max_cpu  = sysconf(_SC_NPROCESSORS_CONF);

	if (sched->map.comp) {
		sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
		if (!sched->map.comp_cpus)
			return -1;
	}

	if (!sched->map.cpus_str)
		return 0;

	map = cpu_map__new(sched->map.cpus_str);
	if (!map) {
		pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
		return -1;
	}

	sched->map.cpus = map;
	return 0;
}

static int setup_color_pids(struct perf_sched *sched)
{
	struct thread_map *map;

	if (!sched->map.color_pids_str)
		return 0;

	map = thread_map__new_by_tid_str(sched->map.color_pids_str);
	if (!map) {
		pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
		return -1;
	}

	sched->map.color_pids = map;
	return 0;
}

static int setup_color_cpus(struct perf_sched *sched)
{
	struct cpu_map *map;

	if (!sched->map.color_cpus_str)
		return 0;

	map = cpu_map__new(sched->map.color_cpus_str);
	if (!map) {
		pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
		return -1;
	}

	sched->map.color_cpus = map;
	return 0;
}

static int perf_sched__map(struct perf_sched *sched)
{
	if (setup_map_cpus(sched))
		return -1;

	if (setup_color_pids(sched))
		return -1;

	if (setup_color_cpus(sched))
		return -1;

	setup_pager();
	if (perf_sched__read_events(sched))
		return -1;
@@ -1831,6 +1985,17 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
		    "dump raw trace in ASCII"),
	OPT_END()
	};
	const struct option map_options[] = {
	OPT_BOOLEAN(0, "compact", &sched.map.comp,
		    "map output in compact mode"),
	OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
		   "highlight given pids in map"),
	OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
                    "highlight given CPUs in map"),
	OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
                    "display given CPUs in map"),
	OPT_END()
	};
	const char * const latency_usage[] = {
		"perf sched latency [<options>]",
		NULL
@@ -1839,6 +2004,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
		"perf sched replay [<options>]",
		NULL
	};
	const char * const map_usage[] = {
		"perf sched map [<options>]",
		NULL
	};
	const char *const sched_subcommands[] = { "record", "latency", "map",
						  "replay", "script", NULL };
	const char *sched_usage[] = {
@@ -1887,6 +2056,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
		setup_sorting(&sched, latency_options, latency_usage);
		return perf_sched__lat(&sched);
	} else if (!strcmp(argv[0], "map")) {
		if (argc) {
			argc = parse_options(argc, argv, map_options, map_usage, 0);
			if (argc)
				usage_with_options(map_usage, map_options);
		}
		sched.tp_handler = &map_ops;
		setup_sorting(&sched, latency_options, latency_usage);
		return perf_sched__map(&sched);
+7 −7
Original line number Diff line number Diff line
@@ -317,19 +317,19 @@ static void set_print_ip_opts(struct perf_event_attr *attr)

	output[type].print_ip_opts = 0;
	if (PRINT_FIELD(IP))
		output[type].print_ip_opts |= PRINT_IP_OPT_IP;
		output[type].print_ip_opts |= EVSEL__PRINT_IP;

	if (PRINT_FIELD(SYM))
		output[type].print_ip_opts |= PRINT_IP_OPT_SYM;
		output[type].print_ip_opts |= EVSEL__PRINT_SYM;

	if (PRINT_FIELD(DSO))
		output[type].print_ip_opts |= PRINT_IP_OPT_DSO;
		output[type].print_ip_opts |= EVSEL__PRINT_DSO;

	if (PRINT_FIELD(SYMOFFSET))
		output[type].print_ip_opts |= PRINT_IP_OPT_SYMOFFSET;
		output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;

	if (PRINT_FIELD(SRCLINE))
		output[type].print_ip_opts |= PRINT_IP_OPT_SRCLINE;
		output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
}

/*
@@ -574,9 +574,9 @@ static void print_sample_bts(struct perf_sample *sample,
			printf("\n");
		} else {
			printf(" ");
			if (print_opts & PRINT_IP_OPT_SRCLINE) {
			if (print_opts & EVSEL__PRINT_SRCLINE) {
				print_srcline_last = true;
				print_opts &= ~PRINT_IP_OPT_SRCLINE;
				print_opts &= ~EVSEL__PRINT_SRCLINE;
			}
		}
		perf_evsel__fprintf_sym(evsel, sample, al, 0, print_opts,
+33 −15
Original line number Diff line number Diff line
@@ -2114,6 +2114,28 @@ out_put:
	return err;
}

static int trace__fprintf_callchain(struct trace *trace, struct perf_evsel *evsel,
				    struct perf_sample *sample)
{
	struct addr_location al;
	/* TODO: user-configurable print_opts */
	const unsigned int print_opts = EVSEL__PRINT_SYM |
				        EVSEL__PRINT_DSO |
				        EVSEL__PRINT_UNKNOWN_AS_ADDR;

	if (sample->callchain == NULL)
		return 0;

	if (machine__resolve(trace->host, &al, sample) < 0) {
		pr_err("Problem processing %s callchain, skipping...\n",
			perf_evsel__name(evsel));
		return 0;
	}

	return perf_evsel__fprintf_callchain(evsel, sample, &al, 38, print_opts,
					     scripting_max_stack, trace->output);
}

static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
			   union perf_event *event __maybe_unused,
			   struct perf_sample *sample)
@@ -2193,21 +2215,7 @@ signed_print:

	fputc('\n', trace->output);

	if (sample->callchain) {
		struct addr_location al;
		/* TODO: user-configurable print_opts */
		const unsigned int print_opts = PRINT_IP_OPT_SYM |
					        PRINT_IP_OPT_DSO |
					        PRINT_IP_OPT_UNKNOWN_AS_ADDR;

		if (machine__resolve(trace->host, &al, sample) < 0) {
			pr_err("problem processing %d event, skipping it.\n",
			       event->header.type);
			goto out_put;
		}
		perf_evsel__fprintf_callchain(evsel, sample, &al, 38, print_opts,
					      scripting_max_stack, trace->output);
	}
	trace__fprintf_callchain(trace, evsel, sample);
out:
	ttrace->entry_pending = false;
	err = 0;
@@ -2355,6 +2363,9 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
	}

	fprintf(trace->output, ")\n");

	trace__fprintf_callchain(trace, evsel, sample);

	return 0;
}

@@ -3333,6 +3344,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
		goto out;
	}

	err = -1;

	if (trace.trace_pgfaults) {
		trace.opts.sample_address = true;
		trace.opts.sample_time = true;
@@ -3357,6 +3370,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
		return -1;
	}

	if (!trace.trace_syscalls && ev_qualifier_str) {
		pr_err("The -e option can't be used with --no-syscalls.\n");
		goto out;
	}

	if (output_name != NULL) {
		err = trace__open_output(&trace, output_name);
		if (err < 0) {
+12 −0
Original line number Diff line number Diff line
@@ -587,3 +587,15 @@ int cpu__setup_cpunode_map(void)
	closedir(dir1);
	return 0;
}

bool cpu_map__has(struct cpu_map *cpus, int cpu)
{
	int i;

	for (i = 0; i < cpus->nr; ++i) {
		if (cpus->map[i] == cpu)
			return true;
	}

	return false;
}
Loading