Commit 772c1d06 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "Kernel side changes:

   - Improved kbprobes robustness

   - Intel PEBS support for PT hardware tracing

   - Other Intel PT improvements: high order pages memory footprint
     reduction and various related cleanups

   - Misc cleanups

  The perf tooling side has been very busy in this cycle, with over 300
  commits. This is an incomplete high-level summary of the many
  improvements done by over 30 developers:

   - Lots of updates to the following tools:

      'perf c2c'
      'perf config'
      'perf record'
      'perf report'
      'perf script'
      'perf test'
      'perf top'
      'perf trace'

   - Updates to libperf and libtraceevent, and a consolidation of the
     proliferation of x86 instruction decoder libraries.

   - Vendor event updates for Intel and PowerPC CPUs,

   - Updates to hardware tracing tooling for ARM and Intel CPUs,

   - ... and lots of other changes and cleanups - see the shortlog and
     Git log for details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (322 commits)
  kprobes: Prohibit probing on BUG() and WARN() address
  perf/x86: Make more stuff static
  x86, perf: Fix the dependency of the x86 insn decoder selftest
  objtool: Ignore intentional differences for the x86 insn decoder
  objtool: Update sync-check.sh from perf's check-headers.sh
  perf build: Ignore intentional differences for the x86 insn decoder
  perf intel-pt: Use shared x86 insn decoder
  perf intel-pt: Remove inat.c from build dependency list
  perf: Update .gitignore file
  objtool: Move x86 insn decoder to a common location
  perf metricgroup: Support multiple events for metricgroup
  perf metricgroup: Scale the metric result
  perf pmu: Change convert_scale from static to global
  perf symbols: Move mem_info and branch_info out of symbol.h
  perf auxtrace: Uninline functions that touch perf_session
  perf tools: Remove needless evlist.h include directives
  perf tools: Remove needless evlist.h include directives
  perf tools: Remove needless thread_map.h include directives
  perf tools: Remove needless thread.h include directives
  perf tools: Remove needless map.h include directives
  ...
parents c7eba51c e336b402
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -171,7 +171,7 @@ config HAVE_MMIOTRACE_SUPPORT

config X86_DECODER_SELFTEST
	bool "x86 instruction decoder selftest"
	depends on DEBUG_KERNEL && KPROBES
	depends on DEBUG_KERNEL && INSTRUCTION_DECODER
	depends on !COMPILE_TEST
	---help---
	 Perform x86 instruction decoder selftests at build time.
+34 −0
Original line number Diff line number Diff line
@@ -1005,6 +1005,27 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,

	/* current number of events already accepted */
	n = cpuc->n_events;
	if (!cpuc->n_events)
		cpuc->pebs_output = 0;

	if (!cpuc->is_fake && leader->attr.precise_ip) {
		/*
		 * For PEBS->PT, if !aux_event, the group leader (PT) went
		 * away, the group was broken down and this singleton event
		 * can't schedule any more.
		 */
		if (is_pebs_pt(leader) && !leader->aux_event)
			return -EINVAL;

		/*
		 * pebs_output: 0: no PEBS so far, 1: PT, 2: DS
		 */
		if (cpuc->pebs_output &&
		    cpuc->pebs_output != is_pebs_pt(leader) + 1)
			return -EINVAL;

		cpuc->pebs_output = is_pebs_pt(leader) + 1;
	}

	if (is_x86_event(leader)) {
		if (n >= max_count)
@@ -2241,6 +2262,17 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
	return 0;
}

static int x86_pmu_aux_output_match(struct perf_event *event)
{
	if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT))
		return 0;

	if (x86_pmu.aux_output_match)
		return x86_pmu.aux_output_match(event);

	return 0;
}

static struct pmu pmu = {
	.pmu_enable		= x86_pmu_enable,
	.pmu_disable		= x86_pmu_disable,
@@ -2266,6 +2298,8 @@ static struct pmu pmu = {
	.sched_task		= x86_pmu_sched_task,
	.task_ctx_size          = sizeof(struct x86_perf_task_context),
	.check_period		= x86_pmu_check_period,

	.aux_output_match	= x86_pmu_aux_output_match,
};

void arch_perf_update_userpage(struct perf_event *event,
+18 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/intel-family.h>
#include <asm/intel_pt.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>

@@ -3298,6 +3299,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
		}
	}

	if (event->attr.aux_output) {
		if (!event->attr.precise_ip)
			return -EINVAL;

		event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
	}

	if (event->attr.type != PERF_TYPE_RAW)
		return 0;

@@ -3816,6 +3824,14 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
	return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}

static int intel_pmu_aux_output_match(struct perf_event *event)
{
	if (!x86_pmu.intel_cap.pebs_output_pt_available)
		return 0;

	return is_intel_pt_event(event);
}

PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");

PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -3940,6 +3956,8 @@ static __initconst const struct x86_pmu intel_pmu = {
	.sched_task		= intel_pmu_sched_task,

	.check_period		= intel_pmu_check_period,

	.aux_output_match	= intel_pmu_aux_output_match,
};

static __init void intel_clovertown_quirk(void)
+2 −2
Original line number Diff line number Diff line
@@ -446,7 +446,7 @@ static int cstate_cpu_init(unsigned int cpu)
	return 0;
}

const struct attribute_group *core_attr_update[] = {
static const struct attribute_group *core_attr_update[] = {
	&group_cstate_core_c1,
	&group_cstate_core_c3,
	&group_cstate_core_c6,
@@ -454,7 +454,7 @@ const struct attribute_group *core_attr_update[] = {
	NULL,
};

const struct attribute_group *pkg_attr_update[] = {
static const struct attribute_group *pkg_attr_update[] = {
	&group_cstate_pkg_c2,
	&group_cstate_pkg_c3,
	&group_cstate_pkg_c6,
+50 −1
Original line number Diff line number Diff line
@@ -902,6 +902,9 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 */
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
{
	if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
		return false;

	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}

@@ -919,6 +922,9 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
	u64 threshold;
	int reserved;

	if (cpuc->n_pebs_via_pt)
		return;

	if (x86_pmu.flags & PMU_FL_PEBS_ALL)
		reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
	else
@@ -1059,10 +1065,40 @@ void intel_pmu_pebs_add(struct perf_event *event)
	cpuc->n_pebs++;
	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
		cpuc->n_large_pebs++;
	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
		cpuc->n_pebs_via_pt++;

	pebs_update_state(needed_cb, cpuc, event, true);
}

static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

	if (!is_pebs_pt(event))
		return;

	if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
		cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
}

static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;
	struct debug_store *ds = cpuc->ds;

	if (!is_pebs_pt(event))
		return;

	if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
		cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;

	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;

	wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
}

void intel_pmu_pebs_enable(struct perf_event *event)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1100,6 +1136,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
	} else {
		ds->pebs_event_reset[hwc->idx] = 0;
	}

	intel_pmu_pebs_via_pt_enable(event);
}

void intel_pmu_pebs_del(struct perf_event *event)
@@ -1111,6 +1149,8 @@ void intel_pmu_pebs_del(struct perf_event *event)
	cpuc->n_pebs--;
	if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
		cpuc->n_large_pebs--;
	if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
		cpuc->n_pebs_via_pt--;

	pebs_update_state(needed_cb, cpuc, event, false);
}
@@ -1120,7 +1160,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	struct hw_perf_event *hwc = &event->hw;

	if (cpuc->n_pebs == cpuc->n_large_pebs)
	if (cpuc->n_pebs == cpuc->n_large_pebs &&
	    cpuc->n_pebs != cpuc->n_pebs_via_pt)
		intel_pmu_drain_pebs_buffer();

	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -1131,6 +1172,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
		cpuc->pebs_enabled &= ~(1ULL << 63);

	intel_pmu_pebs_via_pt_disable(event);

	if (cpuc->enabled)
		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);

@@ -2031,6 +2074,12 @@ void __init intel_ds_init(void)
					  PERF_SAMPLE_REGS_INTR);
			}
			pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);

			if (x86_pmu.intel_cap.pebs_output_pt_available) {
				pr_cont("PEBS-via-PT, ");
				x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
			}

			break;

		default:
Loading