Commit 4fc4d8df authored by Jin Yao's avatar Jin Yao Committed by Arnaldo Carvalho de Melo
Browse files

perf stat: Support 'percore' event qualifier



With this patch, we can use the 'percore' event qualifier in perf-stat.

  root@skl:/tmp# perf stat -e cpu/event=0,umask=0x3,percore=1/,cpu/event=0,umask=0x3/ -a -A -I1000
    1.000773050 S0-C0   98,352,832 cpu/event=0,umask=0x3,percore=1/  (50.01%)
    1.000773050 S0-C1  103,763,057 cpu/event=0,umask=0x3,percore=1/  (50.02%)
    1.000773050 S0-C2  196,776,995 cpu/event=0,umask=0x3,percore=1/  (50.02%)
    1.000773050 S0-C3  176,493,779 cpu/event=0,umask=0x3,percore=1/  (50.02%)
    1.000773050 CPU0    47,699,641 cpu/event=0,umask=0x3/            (50.02%)
    1.000773050 CPU1    49,052,451 cpu/event=0,umask=0x3/            (49.98%)
    1.000773050 CPU2   102,771,422 cpu/event=0,umask=0x3/            (49.98%)
    1.000773050 CPU3   100,784,662 cpu/event=0,umask=0x3/            (49.98%)
    1.000773050 CPU4    43,171,342 cpu/event=0,umask=0x3/            (49.98%)
    1.000773050 CPU5    54,152,158 cpu/event=0,umask=0x3/            (49.98%)
    1.000773050 CPU6    93,618,410 cpu/event=0,umask=0x3/            (49.98%)
    1.000773050 CPU7    74,477,589 cpu/event=0,umask=0x3/            (49.99%)

In this example, we count the event 'ref-cycles' per-core and per-CPU in
one perf stat command-line. From the output, we can see:

  S0-C0 = CPU0 + CPU4
  S0-C1 = CPU1 + CPU5
  S0-C2 = CPU2 + CPU6
  S0-C3 = CPU3 + CPU7

So the result is expected (tiny difference is ignored).

Note that, the 'percore' event qualifier needs to use with option '-A'.

Signed-off-by: default avatarJin Yao <yao.jin@linux.intel.com>
Tested-by: default avatarRavi Bangoria <ravi.bangoria@linux.ibm.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1555077590-27664-4-git-send-email-yao.jin@linux.intel.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 40480a81
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -43,6 +43,10 @@ report::
	  param1 and param2 are defined as formats for the PMU in
	  /sys/bus/event_source/devices/<pmu>/format/*

	  'percore' is a event qualifier that sums up the event counts for both
	  hardware threads in a core. For example:
	  perf stat -A -a -e cpu/event,percore=1/,otherevent ...

	- a symbolically formed event like 'pmu/config=M,config1=N,config2=K/'
	  where M, N, K are numbers (in decimal, hex, octal format).
	  Acceptable values for each of 'config', 'config1' and 'config2'
+21 −0
Original line number Diff line number Diff line
@@ -847,6 +847,18 @@ static int perf_stat__get_core_cached(struct perf_stat_config *config,
	return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
}

static bool term_percore_set(void)
{
	struct perf_evsel *counter;

	evlist__for_each_entry(evsel_list, counter) {
		if (counter->percore)
			return true;
	}

	return false;
}

static int perf_stat_init_aggr_mode(void)
{
	int nr;
@@ -867,6 +879,15 @@ static int perf_stat_init_aggr_mode(void)
		stat_config.aggr_get_id = perf_stat__get_core_cached;
		break;
	case AGGR_NONE:
		if (term_percore_set()) {
			if (cpu_map__build_core_map(evsel_list->cpus,
						    &stat_config.aggr_map)) {
				perror("cannot build core map");
				return -1;
			}
			stat_config.aggr_get_id = perf_stat__get_core_cached;
		}
		break;
	case AGGR_GLOBAL:
	case AGGR_THREAD:
	case AGGR_UNSET:
+39 −4
Original line number Diff line number Diff line
@@ -88,9 +88,17 @@ static void aggr_printout(struct perf_stat_config *config,
			config->csv_sep);
			break;
	case AGGR_NONE:
		if (evsel->percore) {
			fprintf(config->output, "S%d-C%*d%s",
				cpu_map__id_to_socket(id),
				config->csv_output ? 0 : -5,
				cpu_map__id_to_cpu(id), config->csv_sep);
		} else {
			fprintf(config->output, "CPU%*d%s ",
			config->csv_output ? 0 : -4,
			perf_evsel__cpus(evsel)->map[id], config->csv_sep);
				config->csv_output ? 0 : -5,
				perf_evsel__cpus(evsel)->map[id],
				config->csv_sep);
		}
		break;
	case AGGR_THREAD:
		fprintf(config->output, "%*s-%*d%s",
@@ -1103,6 +1111,30 @@ static void print_footer(struct perf_stat_config *config)
			"the same PMU. Try reorganizing the group.\n");
}

static void print_percore(struct perf_stat_config *config,
			  struct perf_evsel *counter, char *prefix)
{
	bool metric_only = config->metric_only;
	FILE *output = config->output;
	int s;
	bool first = true;

	if (!(config->aggr_map || config->aggr_get_id))
		return;

	for (s = 0; s < config->aggr_map->nr; s++) {
		if (prefix && metric_only)
			fprintf(output, "%s", prefix);

		print_counter_aggrdata(config, counter, s,
				       prefix, metric_only,
				       &first);
	}

	if (metric_only)
		fputc('\n', output);
}

void
perf_evlist__print_counters(struct perf_evlist *evlist,
			    struct perf_stat_config *config,
@@ -1153,6 +1185,9 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
			print_no_aggr_metric(config, evlist, prefix);
		else {
			evlist__for_each_entry(evlist, counter) {
				if (counter->percore)
					print_percore(config, counter, prefix);
				else
					print_counter(config, counter, prefix);
			}
		}
+5 −3
Original line number Diff line number Diff line
@@ -277,9 +277,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
		if (!evsel->snapshot)
			perf_evsel__compute_deltas(evsel, cpu, thread, count);
		perf_counts_values__scale(count, config->scale, NULL);
		if (config->aggr_mode == AGGR_NONE)
			perf_stat__update_shadow_stats(evsel, count->val, cpu,
						       &rt_stat);
		if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
			perf_stat__update_shadow_stats(evsel, count->val,
						       cpu, &rt_stat);
		}

		if (config->aggr_mode == AGGR_THREAD) {
			if (config->stats)
				perf_stat__update_shadow_stats(evsel,