Commit d7e7a451 authored by Stephane Eranian's avatar Stephane Eranian Committed by Arnaldo Carvalho de Melo
Browse files

perf stat: Add per processor socket count aggregation



This patch adds per-processor socket count aggregation for system-wide
mode measurements. This is a useful mode to detect imbalance between
sockets.

To enable this mode, use --aggr-socket in addition
to -a. (system-wide).

The output includes the socket number and the number of online
processors on that socket. This is useful to gauge the amount of
aggregation.

 # ./perf stat -I 1000 -a --aggr-socket -e cycles sleep 2
 #           time socket cpus             counts events
      1.000097680 S0        4          5,788,785 cycles
      2.000379943 S0        4         27,361,546 cycles
      2.001167808 S0        4            818,275 cycles

Signed-off-by: default avatarStephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1360161962-9675-3-git-send-email-eranian@google.com


[ committer note: Added missing man page entry based on above comments ]
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 5ac59a8a
Loading
Loading
Loading
Loading
+8 −1
Original line number Diff line number Diff line
@@ -116,9 +116,16 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m

-I msecs::
--interval-print msecs::
	print count deltas every N milliseconds (minimum: 100ms)
	Print count deltas every N milliseconds (minimum: 100ms)
	example: perf stat -I 1000 -e cycles -a sleep 5

--aggr-socket::
Aggregate counts per processor socket for system-wide mode measurements.  This
is a useful mode to detect imbalance between sockets.  To enable this mode,
use --aggr-socket in addition to -a. (system-wide).  The output includes the
socket number and the number of online processors on that socket. This is
useful to gauge the amount of aggregation.

EXAMPLES
--------

+115 −11
Original line number Diff line number Diff line
@@ -68,6 +68,7 @@
static void print_stat(int argc, const char **argv);
static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
static void print_counter(struct perf_evsel *counter, char *prefix);
static void print_aggr_socket(char *prefix);

static struct perf_evlist	*evsel_list;

@@ -79,6 +80,7 @@ static int run_count = 1;
static bool			no_inherit			= false;
static bool			scale				=  true;
static bool			no_aggr				= false;
static bool			aggr_socket			= false;
static pid_t			child_pid			= -1;
static bool			null_run			=  false;
static int			detailed_run			=  0;
@@ -93,6 +95,7 @@ static const char *post_cmd = NULL;
static bool			sync_run			= false;
static unsigned int		interval			= 0;
static struct timespec		ref_time;
static struct cpu_map		*sock_map;

static volatile int done = 0;

@@ -312,7 +315,9 @@ static void print_interval(void)
	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);

	if (num_print_interval == 0 && !csv_output) {
		if (no_aggr)
		if (aggr_socket)
			fprintf(output, "#           time socket cpus             counts events\n");
		else if (no_aggr)
			fprintf(output, "#           time CPU                 counts events\n");
		else
			fprintf(output, "#           time             counts events\n");
@@ -321,7 +326,9 @@ static void print_interval(void)
	if (++num_print_interval == 25)
		num_print_interval = 0;

	if (no_aggr) {
	if (aggr_socket)
		print_aggr_socket(prefix);
	else if (no_aggr) {
		list_for_each_entry(counter, &evsel_list->entries, node)
			print_counter(counter, prefix);
	} else {
@@ -349,6 +356,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
		ts.tv_nsec = 0;
	}

	if (aggr_socket
	    && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
		perror("cannot build socket map");
		return -1;
	}

	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
		perror("failed to create pipes");
		return -1;
@@ -529,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg)
	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
}

static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
	double msecs = avg / 1e6;
	char cpustr[16] = { '\0', };
	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";

	if (no_aggr)
	if (aggr_socket)
		sprintf(cpustr, "S%*d%s%*d%s",
			csv_output ? 0 : -5,
			cpu,
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
	else if (no_aggr)
		sprintf(cpustr, "CPU%*d%s",
			csv_output ? 0 : -4,
			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -734,7 +755,7 @@ static void print_ll_cache_misses(int cpu,
	fprintf(output, " of all LL-cache hits   ");
}

static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
{
	double total, ratio = 0.0;
	char cpustr[16] = { '\0', };
@@ -747,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
	else
		fmt = "%s%18.0f%s%-25s";

	if (no_aggr)
	if (aggr_socket)
		sprintf(cpustr, "S%*d%s%*d%s",
			csv_output ? 0 : -5,
			cpu,
			csv_sep,
			csv_output ? 0 : 4,
			nr,
			csv_sep);
	else if (no_aggr)
		sprintf(cpustr, "CPU%*d%s",
			csv_output ? 0 : -4,
			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -853,6 +882,70 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
	}
}

static void print_aggr_socket(char *prefix)
{
	struct perf_evsel *counter;
	u64 ena, run, val;
	int cpu, s, s2, sock, nr;

	if (!sock_map)
		return;

	for (s = 0; s < sock_map->nr; s++) {
		sock = cpu_map__socket(sock_map, s);
		list_for_each_entry(counter, &evsel_list->entries, node) {
			val = ena = run = 0;
			nr = 0;
			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
				s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
				if (s2 != sock)
					continue;
				val += counter->counts->cpu[cpu].val;
				ena += counter->counts->cpu[cpu].ena;
				run += counter->counts->cpu[cpu].run;
				nr++;
			}
			if (prefix)
				fprintf(output, "%s", prefix);

			if (run == 0 || ena == 0) {
				fprintf(output, "S%*d%s%*d%s%*s%s%*s",
					csv_output ? 0 : -5,
					s,
					csv_sep,
					csv_output ? 0 : 4,
					nr,
					csv_sep,
					csv_output ? 0 : 18,
					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
					csv_sep,
					csv_output ? 0 : -24,
					perf_evsel__name(counter));
				if (counter->cgrp)
					fprintf(output, "%s%s",
						csv_sep, counter->cgrp->name);

				fputc('\n', output);
				continue;
			}

			if (nsec_counter(counter))
				nsec_printout(sock, nr, counter, val);
			else
				abs_printout(sock, nr, counter, val);

			if (!csv_output) {
				print_noise(counter, 1.0);

				if (run != ena)
					fprintf(output, "  (%.2f%%)",
						100.0 * run / ena);
			}
			fputc('\n', output);
		}
	}
}

/*
 * Print out the results of a single counter:
 * aggregated counts in system-wide mode
@@ -882,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
	}

	if (nsec_counter(counter))
		nsec_printout(-1, counter, avg);
		nsec_printout(-1, 0, counter, avg);
	else
		abs_printout(-1, counter, avg);
		abs_printout(-1, 0, counter, avg);

	print_noise(counter, avg);

@@ -940,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
		}

		if (nsec_counter(counter))
			nsec_printout(cpu, counter, val);
			nsec_printout(cpu, 0, counter, val);
		else
			abs_printout(cpu, counter, val);
			abs_printout(cpu, 0, counter, val);

		if (!csv_output) {
			print_noise(counter, 1.0);
@@ -980,7 +1073,9 @@ static void print_stat(int argc, const char **argv)
		fprintf(output, ":\n\n");
	}

	if (no_aggr) {
	if (aggr_socket)
		print_aggr_socket(NULL);
	else if (no_aggr) {
		list_for_each_entry(counter, &evsel_list->entries, node)
			print_counter(counter, NULL);
	} else {
@@ -1228,6 +1323,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
			"command to run after to the measured command"),
	OPT_UINTEGER('I', "interval-print", &interval,
		    "print counts at regular interval in ms (>= 100)"),
	OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
	OPT_END()
	};
	const char * const stat_usage[] = {
@@ -1314,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
		usage_with_options(stat_usage, options);
	}

	if (aggr_socket) {
		if (!perf_target__has_cpu(&target)) {
			fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
			usage_with_options(stat_usage, options);
		}
		no_aggr = true;
	}

	if (add_default_attributes())
		goto out;