Commit 7f4eb0a6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "On the kernel side the main changes in this cycle were:

   - Add Intel Kaby Lake CPU support (Srinivas Pandruvada)

   - AMD uncore driver updates for fam17 (Janakarajan Natarajan)

   - Intel/PT updates and core events optimizations and cleanups
     (Alexander Shishkin)

   - cgroups events fixes (David Carrillo-Cisneros)

   - kprobes improvements (Masami Hiramatsu)

   - ... plus misc fixes and updates.

  On the tooling side the main changes were:

   - Support clang build in tools/{perf,lib/{bpf,traceevent,api}} with
     CC=clang, to, for instance, take advantage of better warnings
     (Arnaldo Carvalho de Melo):

   - Introduce the 'delta-abs' 'perf diff' compute method, that orders
     the histogram entries by the absolute value of the percentage delta
     for a function in two perf.data files, i.e. the functions that
     changed the most (increase or decrease in samples) comes first
     (Namhyung Kim)

   - Add support for parsing Intel uncore vendor event files and add
     uncore vendor events for the Intel server processors (Haswell,
     Broadwell, IvyBridge), Xeon Phi (Knights Landing) and Broadwell DE
     (Andi Kleen)

   - Introduce 'perf ftrace' a perf front end to the kernel's ftrace
     function and function_graph tracer, defaulting to the
     "function_graph" tracer, more work will be done in reviving this
     effort, forward porting it from its initial patch submission
     (Namhyung Kim)

   - Add 'e' and 'c' hotkeys to expand/collapse call chains for a single
     hist entry in the 'perf report' and 'perf top' TUI (Jiri Olsa)

   - Account thread wait time (off CPU time) separately: sleep, iowait
     and preempt, based on the prev_state of the last event, show the
     breakdown when using "perf sched timehist --state" (Namhyumg Kim)

   - Add more triggers to switch the output file (perf.data.TIMESTAMP).

     Now, in addition to switching to a different output file when
     receiving a SIGUSR2, one can also specify file size and time based
     triggers:

           perf record -a --switch-output=signal

     is equivalent to what we had before:

           perf record -a --switch-output

     While we can also ask for the file to be "sliced" by size, taking
     into account that that will happen only when we get woken up by the
     kernel, i.e. one has to take into account the --mmap-pages (the
     size of the perf mmap ring buffer):

           perf record -a --switch-output=2G

     will break the perf.data output into multiple files limited to 2GB
     of samples, right when generating the output.

     For time based samples, alert() will be used, so to have 1 minute
     limited perf.data output files:

          perf record -a --switch-output=1m

     (Jiri Olsa)

   - Improve 'perf trace' (Arnaldo Carvalho de Melo)

   - 'perf kallsyms' toy tool to look for extended symbol information on
     the running kernel and demonstrate the machine/thread/symbol APIs
     for use in other tools, such as 'perf probe' (Arnaldo Carvalho de
     Melo)

   - ... plus tons of other changes, see the shortlog and Git log for
     details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (131 commits)
  perf tools: Add missing parse_events_error() prototype
  perf pmu: Fix check for unset alias->unit array
  perf tools: Be consistent on the type of map->symbols[] interator
  perf intel pt decoder: clang has no -Wno-override-init
  perf evsel: Do not put a variable sized type not at the end of a struct
  perf probe: Avoid accessing uninitialized 'map' variable
  perf tools: Do not put a variable sized type not at the end of a struct
  perf record: Do not put a variable sized type not at the end of a struct
  perf tests: Synthesize struct instead of using field after variable sized type
  perf bench numa: Make sure dprintf() is not defined
  Revert "perf bench futex: Sanitize numeric parameters"
  tools lib subcmd: Make it an error to pass a signed value to OPTION_UINTEGER
  tools: Set the maximum optimization level according to the compiler being used
  tools: Suppress request for warning options not existent in clang
  samples/bpf: Reset global variables
  samples/bpf: Ignore already processed ELF sections
  samples/bpf: Add missing header
  perf symbols: dso->name is an array, no need to check it against NULL
  perf tests record: No need to test an array against NULL
  perf symbols: No need to check if sym->name is NULL
  ...
parents 32e2d7c8 0c8967c9
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -87,10 +87,12 @@ endif
ifneq ($(filter 4.%,$(MAKE_VERSION)),)	# make-4
ifneq ($(filter %s ,$(firstword x$(MAKEFLAGS))),)
  quiet=silent_
  tools_silent=s
endif
else					# make-3.8x
ifneq ($(filter s% -s%,$(MAKEFLAGS)),)
  quiet=silent_
  tools_silent=-s
endif
endif

@@ -1607,11 +1609,11 @@ image_name:
# Clear a bunch of variables before executing the submake
tools/: FORCE
	$(Q)mkdir -p $(objtree)/tools
	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(shell cd $(objtree) && /bin/pwd) subdir=tools -C $(src)/tools/
	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(shell cd $(objtree) && /bin/pwd) subdir=tools -C $(src)/tools/

tools/%: FORCE
	$(Q)mkdir -p $(objtree)/tools
	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(shell cd $(objtree) && /bin/pwd) subdir=tools -C $(src)/tools/ $*
	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(shell cd $(objtree) && /bin/pwd) subdir=tools -C $(src)/tools/ $*

# Single targets
# ---------------------------------------------------------------------------
+1 −1
Original line number Diff line number Diff line
@@ -96,7 +96,7 @@ config ARM64
	select HAVE_RCU_TABLE_FREE
	select HAVE_SYSCALL_TRACEPOINTS
	select HAVE_KPROBES
	select HAVE_KRETPROBES if HAVE_KPROBES
	select HAVE_KRETPROBES
	select IOMMU_DMA if IOMMU_SUPPORT
	select IRQ_DOMAIN
	select IRQ_FORCED_THREADING
+3 −10
Original line number Diff line number Diff line
obj-y					+= core.o

obj-$(CONFIG_CPU_SUP_AMD)               += amd/core.o amd/uncore.o
obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= amd/power.o
obj-$(CONFIG_X86_LOCAL_APIC)            += amd/ibs.o msr.o
ifdef CONFIG_AMD_IOMMU
obj-$(CONFIG_CPU_SUP_AMD)               += amd/iommu.o
endif

obj-$(CONFIG_CPU_SUP_INTEL)		+= msr.o
obj-y					+= amd/
obj-$(CONFIG_X86_LOCAL_APIC)            += msr.o
obj-$(CONFIG_CPU_SUP_INTEL)		+= intel/
+7 −0
Original line number Diff line number Diff line
obj-$(CONFIG_CPU_SUP_AMD)		+= core.o uncore.o
obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= power.o
obj-$(CONFIG_X86_LOCAL_APIC)		+= ibs.o
ifdef CONFIG_AMD_IOMMU
obj-$(CONFIG_CPU_SUP_AMD)		+= iommu.o
endif
+132 −72
Original line number Diff line number Diff line
@@ -22,13 +22,17 @@

#define NUM_COUNTERS_NB		4
#define NUM_COUNTERS_L2		4
#define MAX_COUNTERS		NUM_COUNTERS_NB
#define NUM_COUNTERS_L3		6
#define MAX_COUNTERS		6

#define RDPMC_BASE_NB		6
#define RDPMC_BASE_L2		10
#define RDPMC_BASE_LLC		10

#define COUNTER_SHIFT		16

static int num_counters_llc;
static int num_counters_nb;

static HLIST_HEAD(uncore_unused_list);

struct amd_uncore {
@@ -45,30 +49,30 @@ struct amd_uncore {
};

static struct amd_uncore * __percpu *amd_uncore_nb;
static struct amd_uncore * __percpu *amd_uncore_l2;
static struct amd_uncore * __percpu *amd_uncore_llc;

static struct pmu amd_nb_pmu;
static struct pmu amd_l2_pmu;
static struct pmu amd_llc_pmu;

static cpumask_t amd_nb_active_mask;
static cpumask_t amd_l2_active_mask;
static cpumask_t amd_llc_active_mask;

static bool is_nb_event(struct perf_event *event)
{
	return event->pmu->type == amd_nb_pmu.type;
}

static bool is_l2_event(struct perf_event *event)
static bool is_llc_event(struct perf_event *event)
{
	return event->pmu->type == amd_l2_pmu.type;
	return event->pmu->type == amd_llc_pmu.type;
}

static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
{
	if (is_nb_event(event) && amd_uncore_nb)
		return *per_cpu_ptr(amd_uncore_nb, event->cpu);
	else if (is_l2_event(event) && amd_uncore_l2)
		return *per_cpu_ptr(amd_uncore_l2, event->cpu);
	else if (is_llc_event(event) && amd_uncore_llc)
		return *per_cpu_ptr(amd_uncore_llc, event->cpu);

	return NULL;
}
@@ -183,16 +187,16 @@ static int amd_uncore_event_init(struct perf_event *event)
		return -ENOENT;

	/*
	 * NB and L2 counters (MSRs) are shared across all cores that share the
	 * same NB / L2 cache. Interrupts can be directed to a single target
	 * core, however, event counts generated by processes running on other
	 * cores cannot be masked out. So we do not support sampling and
	 * per-thread events.
	 * NB and Last level cache counters (MSRs) are shared across all cores
	 * that share the same NB / Last level cache. Interrupts can be directed
	 * to a single target core, however, event counts generated by processes
	 * running on other cores cannot be masked out. So we do not support
	 * sampling and per-thread events.
	 */
	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
		return -EINVAL;

	/* NB and L2 counters do not have usr/os/guest/host bits */
	/* NB and Last level cache counters do not have usr/os/guest/host bits */
	if (event->attr.exclude_user || event->attr.exclude_kernel ||
	    event->attr.exclude_host || event->attr.exclude_guest)
		return -EINVAL;
@@ -226,8 +230,8 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,

	if (pmu->type == amd_nb_pmu.type)
		active_mask = &amd_nb_active_mask;
	else if (pmu->type == amd_l2_pmu.type)
		active_mask = &amd_l2_active_mask;
	else if (pmu->type == amd_llc_pmu.type)
		active_mask = &amd_llc_active_mask;
	else
		return 0;

@@ -244,30 +248,47 @@ static struct attribute_group amd_uncore_attr_group = {
	.attrs = amd_uncore_attrs,
};

PMU_FORMAT_ATTR(event, "config:0-7,32-35");
PMU_FORMAT_ATTR(umask, "config:8-15");

static struct attribute *amd_uncore_format_attr[] = {
	&format_attr_event.attr,
	&format_attr_umask.attr,
	NULL,
};

static struct attribute_group amd_uncore_format_group = {
	.name = "format",
	.attrs = amd_uncore_format_attr,
/*
 * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
 * on family
 */
#define AMD_FORMAT_ATTR(_dev, _name, _format)				     \
static ssize_t								     \
_dev##_show##_name(struct device *dev,					     \
		struct device_attribute *attr,				     \
		char *page)						     \
{									     \
	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			     \
	return sprintf(page, _format "\n");				     \
}									     \
static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);

/* Used for each uncore counter type */
#define AMD_ATTRIBUTE(_name)						     \
static struct attribute *amd_uncore_format_attr_##_name[] = {		     \
	&format_attr_event_##_name.attr,				     \
	&format_attr_umask.attr,					     \
	NULL,								     \
};									     \
static struct attribute_group amd_uncore_format_group_##_name = {	     \
	.name = "format",						     \
	.attrs = amd_uncore_format_attr_##_name,			     \
};									     \
static const struct attribute_group *amd_uncore_attr_groups_##_name[] = {    \
	&amd_uncore_attr_group,						     \
	&amd_uncore_format_group_##_name,				     \
	NULL,								     \
};

static const struct attribute_group *amd_uncore_attr_groups[] = {
	&amd_uncore_attr_group,
	&amd_uncore_format_group,
	NULL,
};
AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
AMD_FORMAT_ATTR(umask, , "config:8-15");
AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
AMD_FORMAT_ATTR(event, _l3, "config:0-7");
AMD_ATTRIBUTE(df);
AMD_ATTRIBUTE(l3);

static struct pmu amd_nb_pmu = {
	.task_ctx_nr	= perf_invalid_context,
	.attr_groups	= amd_uncore_attr_groups,
	.name		= "amd_nb",
	.event_init	= amd_uncore_event_init,
	.add		= amd_uncore_add,
	.del		= amd_uncore_del,
@@ -276,10 +297,8 @@ static struct pmu amd_nb_pmu = {
	.read		= amd_uncore_read,
};

static struct pmu amd_l2_pmu = {
static struct pmu amd_llc_pmu = {
	.task_ctx_nr	= perf_invalid_context,
	.attr_groups	= amd_uncore_attr_groups,
	.name		= "amd_l2",
	.event_init	= amd_uncore_event_init,
	.add		= amd_uncore_add,
	.del		= amd_uncore_del,
@@ -296,14 +315,14 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)

static int amd_uncore_cpu_up_prepare(unsigned int cpu)
{
	struct amd_uncore *uncore_nb = NULL, *uncore_l2;
	struct amd_uncore *uncore_nb = NULL, *uncore_llc;

	if (amd_uncore_nb) {
		uncore_nb = amd_uncore_alloc(cpu);
		if (!uncore_nb)
			goto fail;
		uncore_nb->cpu = cpu;
		uncore_nb->num_counters = NUM_COUNTERS_NB;
		uncore_nb->num_counters = num_counters_nb;
		uncore_nb->rdpmc_base = RDPMC_BASE_NB;
		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
		uncore_nb->active_mask = &amd_nb_active_mask;
@@ -312,18 +331,18 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu)
		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
	}

	if (amd_uncore_l2) {
		uncore_l2 = amd_uncore_alloc(cpu);
		if (!uncore_l2)
	if (amd_uncore_llc) {
		uncore_llc = amd_uncore_alloc(cpu);
		if (!uncore_llc)
			goto fail;
		uncore_l2->cpu = cpu;
		uncore_l2->num_counters = NUM_COUNTERS_L2;
		uncore_l2->rdpmc_base = RDPMC_BASE_L2;
		uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
		uncore_l2->active_mask = &amd_l2_active_mask;
		uncore_l2->pmu = &amd_l2_pmu;
		uncore_l2->id = -1;
		*per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
		uncore_llc->cpu = cpu;
		uncore_llc->num_counters = num_counters_llc;
		uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
		uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
		uncore_llc->active_mask = &amd_llc_active_mask;
		uncore_llc->pmu = &amd_llc_pmu;
		uncore_llc->id = -1;
		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
	}

	return 0;
@@ -376,17 +395,17 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
	}

	if (amd_uncore_l2) {
	if (amd_uncore_llc) {
		unsigned int apicid = cpu_data(cpu).apicid;
		unsigned int nshared;

		uncore = *per_cpu_ptr(amd_uncore_l2, cpu);
		uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
		cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx);
		nshared = ((eax >> 14) & 0xfff) + 1;
		uncore->id = apicid - (apicid % nshared);

		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2);
		*per_cpu_ptr(amd_uncore_l2, cpu) = uncore;
		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
	}

	return 0;
@@ -419,8 +438,8 @@ static int amd_uncore_cpu_online(unsigned int cpu)
	if (amd_uncore_nb)
		uncore_online(cpu, amd_uncore_nb);

	if (amd_uncore_l2)
		uncore_online(cpu, amd_uncore_l2);
	if (amd_uncore_llc)
		uncore_online(cpu, amd_uncore_llc);

	return 0;
}
@@ -456,8 +475,8 @@ static int amd_uncore_cpu_down_prepare(unsigned int cpu)
	if (amd_uncore_nb)
		uncore_down_prepare(cpu, amd_uncore_nb);

	if (amd_uncore_l2)
		uncore_down_prepare(cpu, amd_uncore_l2);
	if (amd_uncore_llc)
		uncore_down_prepare(cpu, amd_uncore_llc);

	return 0;
}
@@ -479,8 +498,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
	if (amd_uncore_nb)
		uncore_dead(cpu, amd_uncore_nb);

	if (amd_uncore_l2)
		uncore_dead(cpu, amd_uncore_l2);
	if (amd_uncore_llc)
		uncore_dead(cpu, amd_uncore_llc);

	return 0;
}
@@ -492,6 +511,47 @@ static int __init amd_uncore_init(void)
	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
		goto fail_nodev;

	switch(boot_cpu_data.x86) {
		case 23:
			/* Family 17h: */
			num_counters_nb = NUM_COUNTERS_NB;
			num_counters_llc = NUM_COUNTERS_L3;
			/*
			 * For Family17h, the NorthBridge counters are
			 * re-purposed as Data Fabric counters. Also, support is
			 * added for L3 counters. The pmus are exported based on
			 * family as either L2 or L3 and NB or DF.
			 */
			amd_nb_pmu.name = "amd_df";
			amd_llc_pmu.name = "amd_l3";
			format_attr_event_df.show = &event_show_df;
			format_attr_event_l3.show = &event_show_l3;
			break;
		case 22:
			/* Family 16h - may change: */
			num_counters_nb = NUM_COUNTERS_NB;
			num_counters_llc = NUM_COUNTERS_L2;
			amd_nb_pmu.name = "amd_nb";
			amd_llc_pmu.name = "amd_l2";
			format_attr_event_df = format_attr_event;
			format_attr_event_l3 = format_attr_event;
			break;
		default:
			/*
			 * All prior families have the same number of
			 * NorthBridge and Last Level Cache counters
			 */
			num_counters_nb = NUM_COUNTERS_NB;
			num_counters_llc = NUM_COUNTERS_L2;
			amd_nb_pmu.name = "amd_nb";
			amd_llc_pmu.name = "amd_l2";
			format_attr_event_df = format_attr_event;
			format_attr_event_l3 = format_attr_event;
			break;
	}
	amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
	amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;

	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
		goto fail_nodev;

@@ -510,16 +570,16 @@ static int __init amd_uncore_init(void)
	}

	if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) {
		amd_uncore_l2 = alloc_percpu(struct amd_uncore *);
		if (!amd_uncore_l2) {
		amd_uncore_llc = alloc_percpu(struct amd_uncore *);
		if (!amd_uncore_llc) {
			ret = -ENOMEM;
			goto fail_l2;
			goto fail_llc;
		}
		ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1);
		ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
		if (ret)
			goto fail_l2;
			goto fail_llc;

		pr_info("perf: AMD L2I counters detected\n");
		pr_info("perf: AMD LLC counters detected\n");
		ret = 0;
	}

@@ -529,7 +589,7 @@ static int __init amd_uncore_init(void)
	if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
			      "perf/x86/amd/uncore:prepare",
			      amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
		goto fail_l2;
		goto fail_llc;

	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
			      "perf/x86/amd/uncore:starting",
@@ -546,11 +606,11 @@ fail_start:
	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
fail_prep:
	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
fail_l2:
fail_llc:
	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
		perf_pmu_unregister(&amd_nb_pmu);
	if (amd_uncore_l2)
		free_percpu(amd_uncore_l2);
	if (amd_uncore_llc)
		free_percpu(amd_uncore_llc);
fail_nb:
	if (amd_uncore_nb)
		free_percpu(amd_uncore_nb);
Loading