Commit 4b49ab70 authored by Andi Kleen's avatar Andi Kleen Committed by Arnaldo Carvalho de Melo
Browse files

perf stat: Use affinity for reading



Restructure event reading to use affinity to minimize the number of IPIs
needed.

Before on a large test case with 94 CPUs:

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
    3.16    0.106079           4     22082           read

After:

    3.43    0.081295           3     22082           read

Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.org


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 4804e011
Loading
Loading
Loading
Loading
+56 −41
Original line number Diff line number Diff line
@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
 * Read out the results of a single counter:
 * do not aggregate counts across CPUs in system-wide mode
 */
static int read_counter(struct evsel *counter, struct timespec *rs)
static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
{
	int nthreads = perf_thread_map__nr(evsel_list->core.threads);
	int ncpus, cpu, thread;

	if (target__has_cpu(&target) && !target__has_per_thread(&target))
		ncpus = perf_evsel__nr_cpus(counter);
	else
		ncpus = 1;
	int thread;

	if (!counter->supported)
		return -ENOENT;
@@ -283,14 +278,13 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
		nthreads = 1;

	for (thread = 0; thread < nthreads; thread++) {
		for (cpu = 0; cpu < ncpus; cpu++) {
		struct perf_counts_values *count;

		count = perf_counts(counter->counts, cpu, thread);

		/*
		 * The leader's group read loads data into its group members
			 * (via perf_evsel__read_counter) and sets threir count->loaded.
		 * (via perf_evsel__read_counter()) and sets their count->loaded.
		 */
		if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
		    read_single_counter(counter, cpu, thread, rs)) {
@@ -317,7 +311,6 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
					count->val, count->ena, count->run);
		}
	}
	}

	return 0;
}
@@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs)
{
	struct evsel *counter;
	int ret;
	struct affinity affinity;
	int i, ncpus, cpu;

	if (affinity__setup(&affinity) < 0)
		return;

	ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
	if (!target__has_cpu(&target) || target__has_per_thread(&target))
		ncpus = 1;
	evlist__for_each_cpu(evsel_list, i, cpu) {
		if (i >= ncpus)
			break;
		affinity__set(&affinity, cpu);

		evlist__for_each_entry(evsel_list, counter) {
		ret = read_counter(counter, rs);
		if (ret)
			pr_debug("failed to read counter %s\n", counter->name);
			if (evsel__cpu_iter_skip(counter, cpu))
				continue;
			if (!counter->err) {
				counter->err = read_counter_cpu(counter, rs,
								counter->cpu_iter - 1);
			}
		}
	}
	affinity__cleanup(&affinity);

		if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
	evlist__for_each_entry(evsel_list, counter) {
		if (counter->err)
			pr_debug("failed to read counter %s\n", counter->name);
		if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
			pr_warning("failed to process counter %s\n", counter->name);
		counter->err = 0;
	}
}

+1 −0
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@ struct evsel {
	struct list_head	config_terms;
	struct bpf_object	*bpf_obj;
	int			bpf_fd;
	int			err;
	bool			auto_merge_stats;
	bool			merged_stat;
	const char *		metric_expr;