Commit 4804e011 authored by Andi Kleen's avatar Andi Kleen Committed by Arnaldo Carvalho de Melo
Browse files

perf stat: Use affinity for opening events



Restructure the event opening in perf stat to cycle through the events
by CPU after setting affinity to that CPU.

This eliminates IPI overhead in the perf API.

We have to loop through the CPU in the outter builtin-stat code instead
of leaving that to low level functions.

It has to change the weak group fallback strategy slightly.  Since we
cannot easily undo the opens for other CPUs move the weak group retry to
a separate loop.

Before with a large test case with 94 CPUs:

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
   42.75    4.050910          67     60046       110 perf_event_open

After:

   26.86    0.944396          16     58069       110 perf_event_open

(the number changes slightly because the weak group retries
work differently and the test case relies on weak groups)

Committer notes:

Added one of the hunks in a patch provided by Andi after I noticed that
the "event times" 'perf test' entry was segfaulting.

Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Acked-by: default avatarJiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-10-andi@firstfloor.org
Link: http://lore.kernel.org/lkml/20191127232657.GL84886@tassilo.jf.intel.com

 # Fix
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent e0e6a6ca
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -832,7 +832,7 @@ try_again:
			if ((errno == EINVAL || errno == EBADF) &&
			    pos->leader != pos &&
			    pos->weak_group) {
			        pos = perf_evlist__reset_weak_group(evlist, pos);
			        pos = perf_evlist__reset_weak_group(evlist, pos, true);
				goto try_again;
			}
			rc = -errno;
+102 −19
Original line number Diff line number Diff line
@@ -65,6 +65,7 @@
#include "util/target.h"
#include "util/time-utils.h"
#include "util/top.h"
#include "util/affinity.h"
#include "asm/bug.h"

#include <linux/time64.h>
@@ -440,6 +441,11 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
			ui__warning("%s event is not supported by the kernel.\n",
				    perf_evsel__name(counter));
		counter->supported = false;
		/*
		 * errored is a sticky flag that means one of the counter's
		 * cpu event had a problem and needs to be reexamined.
		 */
		counter->errored = true;

		if ((counter->leader != counter) ||
		    !(counter->leader->core.nr_members > 1))
@@ -484,6 +490,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
	int status = 0;
	const bool forks = (argc > 0);
	bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
	struct affinity affinity;
	int i, cpu;
	bool second_pass = false;

	if (interval) {
		ts.tv_sec  = interval / USEC_PER_MSEC;
@@ -508,16 +517,35 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
	if (group)
		perf_evlist__set_leader(evsel_list);

	if (affinity__setup(&affinity) < 0)
		return -1;

	evlist__for_each_cpu (evsel_list, i, cpu) {
		affinity__set(&affinity, cpu);

		evlist__for_each_entry(evsel_list, counter) {
			if (evsel__cpu_iter_skip(counter, cpu))
				continue;
			if (counter->reset_group || counter->errored)
				continue;
try_again:
		if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
			if (create_perf_stat_counter(counter, &stat_config, &target,
						     counter->cpu_iter - 1) < 0) {

			/* Weak group failed. Reset the group. */
				/*
				 * Weak group failed. We cannot just undo this here
				 * because earlier CPUs might be in group mode, and the kernel
				 * doesn't support mixing group and non group reads. Defer
				 * it to later.
				 * Don't close here because we're in the wrong affinity.
				 */
				if ((errno == EINVAL || errno == EBADF) &&
				    counter->leader != counter &&
				    counter->weak_group) {
				counter = perf_evlist__reset_weak_group(evsel_list, counter);
				goto try_again;
					perf_evlist__reset_weak_group(evsel_list, counter, false);
					assert(counter->reset_group);
					second_pass = true;
					continue;
				}

				switch (stat_handle_error(counter)) {
@@ -530,8 +558,63 @@ try_again:
				default:
					break;
				}

			}
			counter->supported = true;
		}
	}

	if (second_pass) {
		/*
		 * Now redo all the weak group after closing them,
		 * and also close errored counters.
		 */

		evlist__for_each_cpu(evsel_list, i, cpu) {
			affinity__set(&affinity, cpu);
			/* First close errored or weak retry */
			evlist__for_each_entry(evsel_list, counter) {
				if (!counter->reset_group && !counter->errored)
					continue;
				if (evsel__cpu_iter_skip_no_inc(counter, cpu))
					continue;
				perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
			}
			/* Now reopen weak */
			evlist__for_each_entry(evsel_list, counter) {
				if (!counter->reset_group && !counter->errored)
					continue;
				if (evsel__cpu_iter_skip(counter, cpu))
					continue;
				if (!counter->reset_group)
					continue;
try_again_reset:
				pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
				if (create_perf_stat_counter(counter, &stat_config, &target,
							     counter->cpu_iter - 1) < 0) {

					switch (stat_handle_error(counter)) {
					case COUNTER_FATAL:
						return -1;
					case COUNTER_RETRY:
						goto try_again_reset;
					case COUNTER_SKIP:
						continue;
					default:
						break;
					}
				}
				counter->supported = true;
			}
		}
	}
	affinity__cleanup(&affinity);

	evlist__for_each_entry(evsel_list, counter) {
		if (!counter->supported) {
			perf_evsel__free_fd(&counter->core);
			continue;
		}

		l = strlen(counter->unit);
		if (l > stat_config.unit_width)
+2 −2
Original line number Diff line number Diff line
@@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist)

	evsel->core.attr.disabled = 1;

	err = perf_evsel__open_per_cpu(evsel, cpus);
	err = perf_evsel__open_per_cpu(evsel, cpus, -1);
	if (err) {
		if (err == -EACCES)
			return TEST_SKIP;
@@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist)
		return -1;
	}

	err = perf_evsel__open_per_cpu(evsel, cpus);
	err = perf_evsel__open_per_cpu(evsel, cpus, -1);
	if (err == -EACCES)
		return TEST_SKIP;

+8 −2
Original line number Diff line number Diff line
@@ -1636,7 +1636,8 @@ void perf_evlist__force_leader(struct evlist *evlist)
}

struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
						 struct evsel *evsel)
						 struct evsel *evsel,
						bool close)
{
	struct evsel *c2, *leader;
	bool is_open = true;
@@ -1653,10 +1654,15 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
		if (c2 == evsel)
			is_open = false;
		if (c2->leader == leader) {
			if (is_open)
			if (is_open && close)
				perf_evsel__close(&c2->core);
			c2->leader = c2;
			c2->core.nr_members = 0;
			/*
			 * Set this for all former members of the group
			 * to indicate they get reopened.
			 */
			c2->reset_group = true;
		}
	}
	return leader;
+2 −1
Original line number Diff line number Diff line
@@ -356,5 +356,6 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist);
void perf_evlist__force_leader(struct evlist *evlist);

struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
						 struct evsel *evsel);
						 struct evsel *evsel,
						bool close);
#endif /* __PERF_EVLIST_H */
Loading