Commit 42bbabed authored by Kan Liang's avatar Kan Liang Committed by Arnaldo Carvalho de Melo
Browse files

perf tools: Add hw_idx in struct branch_stack



The low level index of raw branch records for the most recent branch can
be recorded in a sample with PERF_SAMPLE_BRANCH_HW_INDEX
branch_sample_type. Extend struct branch_stack to support it.

However, if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied, only nr and
entries[] will be output by kernel. The pointer of entries[] could be
wrong, since the output format is different with new struct
branch_stack.  Add a variable no_hw_idx in struct perf_sample to
indicate whether the hw_idx is output.  Add get_branch_entry() to return
corresponding pointer of entries[0].

To make dummy branch sample consistent as new branch sample, add hw_idx
in struct dummy_branch_stack for cs-etm and intel-pt.

Apply the new struct branch_stack for synthetic events as well.

Extend test case sample-parsing to support new struct branch_stack.

Committer notes:

Renamed get_branch_entries() to perf_sample__branch_entries() to have
proper namespacing and pave the way for this to be moved to libperf,
eventually.

Add 'static' to that inline as it is in a header.

Add 'hw_idx' to 'struct dummy_branch_stack' in cs-etm.c to fix the build
on arm64.

Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Pavel Gerasimov <pavel.gerasimov@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vitaly Slobodskoy <vitaly.slobodskoy@intel.com>
Link: http://lore.kernel.org/lkml/20200228163011.19358-2-kan.liang@linux.intel.com


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 6339998d
Loading
Loading
Loading
Loading
+37 −33
Original line number Diff line number Diff line
@@ -735,6 +735,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
					struct perf_event_attr *attr, FILE *fp)
{
	struct branch_stack *br = sample->branch_stack;
	struct branch_entry *entries = perf_sample__branch_entries(sample);
	struct addr_location alf, alt;
	u64 i, from, to;
	int printed = 0;
@@ -743,8 +744,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
		return 0;

	for (i = 0; i < br->nr; i++) {
		from = br->entries[i].from;
		to   = br->entries[i].to;
		from = entries[i].from;
		to   = entries[i].to;

		if (PRINT_FIELD(DSO)) {
			memset(&alf, 0, sizeof(alf));
@@ -768,10 +769,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
		}

		printed += fprintf(fp, "/%c/%c/%c/%d ",
			mispred_str( br->entries + i),
			br->entries[i].flags.in_tx? 'X' : '-',
			br->entries[i].flags.abort? 'A' : '-',
			br->entries[i].flags.cycles);
			mispred_str(entries + i),
			entries[i].flags.in_tx ? 'X' : '-',
			entries[i].flags.abort ? 'A' : '-',
			entries[i].flags.cycles);
	}

	return printed;
@@ -782,6 +783,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
					   struct perf_event_attr *attr, FILE *fp)
{
	struct branch_stack *br = sample->branch_stack;
	struct branch_entry *entries = perf_sample__branch_entries(sample);
	struct addr_location alf, alt;
	u64 i, from, to;
	int printed = 0;
@@ -793,8 +795,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,

		memset(&alf, 0, sizeof(alf));
		memset(&alt, 0, sizeof(alt));
		from = br->entries[i].from;
		to   = br->entries[i].to;
		from = entries[i].from;
		to   = entries[i].to;

		thread__find_symbol_fb(thread, sample->cpumode, from, &alf);
		thread__find_symbol_fb(thread, sample->cpumode, to, &alt);
@@ -813,10 +815,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
			printed += fprintf(fp, ")");
		}
		printed += fprintf(fp, "/%c/%c/%c/%d ",
			mispred_str( br->entries + i),
			br->entries[i].flags.in_tx? 'X' : '-',
			br->entries[i].flags.abort? 'A' : '-',
			br->entries[i].flags.cycles);
			mispred_str(entries + i),
			entries[i].flags.in_tx ? 'X' : '-',
			entries[i].flags.abort ? 'A' : '-',
			entries[i].flags.cycles);
	}

	return printed;
@@ -827,6 +829,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
					   struct perf_event_attr *attr, FILE *fp)
{
	struct branch_stack *br = sample->branch_stack;
	struct branch_entry *entries = perf_sample__branch_entries(sample);
	struct addr_location alf, alt;
	u64 i, from, to;
	int printed = 0;
@@ -838,8 +841,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,

		memset(&alf, 0, sizeof(alf));
		memset(&alt, 0, sizeof(alt));
		from = br->entries[i].from;
		to   = br->entries[i].to;
		from = entries[i].from;
		to   = entries[i].to;

		if (thread__find_map_fb(thread, sample->cpumode, from, &alf) &&
		    !alf.map->dso->adjust_symbols)
@@ -862,10 +865,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
			printed += fprintf(fp, ")");
		}
		printed += fprintf(fp, "/%c/%c/%c/%d ",
			mispred_str(br->entries + i),
			br->entries[i].flags.in_tx ? 'X' : '-',
			br->entries[i].flags.abort ? 'A' : '-',
			br->entries[i].flags.cycles);
			mispred_str(entries + i),
			entries[i].flags.in_tx ? 'X' : '-',
			entries[i].flags.abort ? 'A' : '-',
			entries[i].flags.cycles);
	}

	return printed;
@@ -1053,6 +1056,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
					    struct machine *machine, FILE *fp)
{
	struct branch_stack *br = sample->branch_stack;
	struct branch_entry *entries = perf_sample__branch_entries(sample);
	u64 start, end;
	int i, insn, len, nr, ilen, printed = 0;
	struct perf_insn x;
@@ -1073,31 +1077,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
	printed += fprintf(fp, "%c", '\n');

	/* Handle first from jump, of which we don't know the entry. */
	len = grab_bb(buffer, br->entries[nr-1].from,
			br->entries[nr-1].from,
	len = grab_bb(buffer, entries[nr-1].from,
			entries[nr-1].from,
			machine, thread, &x.is64bit, &x.cpumode, false);
	if (len > 0) {
		printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
		printed += ip__fprintf_sym(entries[nr - 1].from, thread,
					   x.cpumode, x.cpu, &lastsym, attr, fp);
		printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
		printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
					    &x, buffer, len, 0, fp, &total_cycles);
		if (PRINT_FIELD(SRCCODE))
			printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from);
			printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
	}

	/* Print all blocks */
	for (i = nr - 2; i >= 0; i--) {
		if (br->entries[i].from || br->entries[i].to)
		if (entries[i].from || entries[i].to)
			pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
				 br->entries[i].from,
				 br->entries[i].to);
		start = br->entries[i + 1].to;
		end   = br->entries[i].from;
				 entries[i].from,
				 entries[i].to);
		start = entries[i + 1].to;
		end   = entries[i].from;

		len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
		/* Patch up missing kernel transfers due to ring filters */
		if (len == -ENXIO && i > 0) {
			end = br->entries[--i].from;
			end = entries[--i].from;
			pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
			len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
		}
@@ -1110,7 +1114,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,

			printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
			if (ip == end) {
				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
				printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
							    &total_cycles);
				if (PRINT_FIELD(SRCCODE))
					printed += print_srccode(thread, x.cpumode, ip);
@@ -1134,9 +1138,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
	 * Hit the branch? In this case we are already done, and the target
	 * has not been executed yet.
	 */
	if (br->entries[0].from == sample->ip)
	if (entries[0].from == sample->ip)
		goto out;
	if (br->entries[0].flags.abort)
	if (entries[0].flags.abort)
		goto out;

	/*
@@ -1147,7 +1151,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
	 * between final branch and sample. When this happens just
	 * continue walking after the last TO until we hit a branch.
	 */
	start = br->entries[0].to;
	start = entries[0].to;
	end = sample->ip;
	if (end < start) {
		/* Missing jump. Scan 128 bytes for the next branch */
+6 −1
Original line number Diff line number Diff line
@@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1,

	if (type & PERF_SAMPLE_BRANCH_STACK) {
		COMP(branch_stack->nr);
		COMP(branch_stack->hw_idx);
		for (i = 0; i < s1->branch_stack->nr; i++)
			MCOMP(branch_stack->entries[i]);
	}
@@ -186,7 +187,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
		u64 data[64];
	} branch_stack = {
		/* 1 branch_entry */
		.data = {1, 211, 212, 213},
		.data = {1, -1ULL, 211, 212, 213},
	};
	u64 regs[64];
	const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
@@ -208,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
		.transaction	= 112,
		.raw_data	= (void *)raw_data,
		.callchain	= &callchain.callchain,
		.no_hw_idx      = false,
		.branch_stack	= &branch_stack.branch_stack,
		.user_regs	= {
			.abi	= PERF_SAMPLE_REGS_ABI_64,
@@ -244,6 +246,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
	if (sample_type & PERF_SAMPLE_REGS_INTR)
		evsel.core.attr.sample_regs_intr = sample_regs;

	if (sample_type & PERF_SAMPLE_BRANCH_STACK)
		evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;

	for (i = 0; i < sizeof(regs); i++)
		*(i + (u8 *)regs) = i & 0xfe;

+22 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/types.h>
#include "event.h"

struct branch_flags {
	u64 mispred:1;
@@ -39,9 +40,30 @@ struct branch_entry {

struct branch_stack {
	u64			nr;
	u64			hw_idx;
	struct branch_entry	entries[0];
};

/*
 * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
 * Otherwise, the output format of a sample with branch stack is
 * struct branch_stack {
 *	u64			nr;
 *	struct branch_entry	entries[0];
 * }
 * Check whether the hw_idx is available,
 * and return the corresponding pointer of entries[0].
 */
static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
{
	u64 *entry = (u64 *)sample->branch_stack;

	entry++;
	if (sample->no_hw_idx)
		return (struct branch_entry *)entry;
	return (struct branch_entry *)(++entry);
}

struct branch_type_stat {
	bool	branch_to;
	u64	counts[PERF_BR_MAX];
+2 −0
Original line number Diff line number Diff line
@@ -1172,6 +1172,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
	union perf_event *event = tidq->event_buf;
	struct dummy_branch_stack {
		u64			nr;
		u64			hw_idx;
		struct branch_entry	entries;
	} dummy_bs;
	u64 ip;
@@ -1202,6 +1203,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
	if (etm->synth_opts.last_branch) {
		dummy_bs = (struct dummy_branch_stack){
			.nr = 1,
			.hw_idx = -1ULL,
			.entries = {
				.from = sample.ip,
				.to = sample.addr,
+1 −0
Original line number Diff line number Diff line
@@ -139,6 +139,7 @@ struct perf_sample {
	u16 insn_len;
	u8  cpumode;
	u16 misc;
	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
	char insn[MAX_INSN];
	void *raw_data;
	struct ip_callchain *callchain;
Loading