Commit b34133fe authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf event updates from Ingo Molnar:
 "HW support updates:

   - Add uncore support for Intel Comet Lake

   - Add RAPL support for Hygon Fam18h

   - Add Intel "IIO stack to PMON mapping" support on Skylake-SP CPUs,
     which enumerates per device performance counters via sysfs and
     enables the perf stat --iiostat functionality

   - Add support for Intel "Architectural LBRs", which generalized the
     model specific LBR hardware tracing feature into a
     model-independent, architected performance monitoring feature.

     Usage is mostly seamless to tooling, as the pre-existing LBR
     features are kept, but there's a couple of advantages under the
     hood, such as faster context-switching, faster LBR reads, cleaner
     exposure of LBR features to guest kernels, etc.

     ( Since architectural LBRs are supported via XSAVE, there's related
       changes to the x86 FPU code as well. )

  ftrace/perf updates:

   - Add support to add a text poke event to record changes to kernel
     text (i.e. self-modifying code) in order to support tracers like
     Intel PT decoding through jump labels, kprobes and ftrace
     trampolines.

  Misc cleanups, smaller fixes..."

* tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits)
  perf/x86/rapl: Add Hygon Fam18h RAPL support
  kprobes: Remove unnecessary module_mutex locking from kprobe_optimizer()
  x86/perf: Fix a typo
  perf: <linux/perf_event.h>: drop a duplicated word
  perf/x86/intel/lbr: Support XSAVES for arch LBR read
  perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch
  x86/fpu/xstate: Add helpers for LBR dynamic supervisor feature
  x86/fpu/xstate: Support dynamic supervisor feature for LBR
  x86/fpu: Use proper mask to replace full instruction mask
  perf/x86: Remove task_ctx_size
  perf/x86/intel/lbr: Create kmem_cache for the LBR context data
  perf/core: Use kmem_cache to allocate the PMU specific data
  perf/core: Factor out functions to allocate/free the task_ctx_data
  perf/x86/intel/lbr: Support Architectural LBR
  perf/x86/intel/lbr: Factor out intel_pmu_store_lbr
  perf/x86/intel/lbr: Factor out rdlbr_all() and wrlbr_all()
  perf/x86/intel/lbr: Mark the {rd,wr}lbr_{to,from} wrappers __always_inline
  perf/x86/intel/lbr: Unify the stored format of LBR information
  perf/x86/intel/lbr: Support LBR_CTL
  perf/x86: Expose CPUID enumeration bits for arch LBR
  ...
parents 9dee8689 d903b6d0
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
What:           /sys/devices/uncore_iio_x/dieX
Date:           February 2020
Contact:        Roman Sudarikov <roman.sudarikov@linux.intel.com>
Description:
                Each IIO stack (PCIe root port) has its own IIO PMON block, so
                each dieX file (where X is die number) holds "Segment:Root Bus"
                for PCIe root port, which can be monitored by that IIO PMON
                block.
                For example, on 4-die Xeon platform with up to 6 IIO stacks per
                die and, therefore, 6 IIO PMON blocks per die, the mapping of
                IIO PMON block 0 exposes as the following:

                $ ls /sys/devices/uncore_iio_0/die*
                -r--r--r-- /sys/devices/uncore_iio_0/die0
                -r--r--r-- /sys/devices/uncore_iio_0/die1
                -r--r--r-- /sys/devices/uncore_iio_0/die2
                -r--r--r-- /sys/devices/uncore_iio_0/die3

                $ tail /sys/devices/uncore_iio_0/die*
                ==> /sys/devices/uncore_iio_0/die0 <==
                0000:00
                ==> /sys/devices/uncore_iio_0/die1 <==
                0000:40
                ==> /sys/devices/uncore_iio_0/die2 <==
                0000:80
                ==> /sys/devices/uncore_iio_0/die3 <==
                0000:c0

                Which means:
                IIO PMU 0 on die 0 belongs to PCI RP on bus 0x00, domain 0x0000
                IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
                IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
                IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
+18 −10
Original line number Diff line number Diff line
@@ -71,10 +71,9 @@ u64 x86_perf_event_update(struct perf_event *event)
	struct hw_perf_event *hwc = &event->hw;
	int shift = 64 - x86_pmu.cntval_bits;
	u64 prev_raw_count, new_raw_count;
	int idx = hwc->idx;
	u64 delta;

	if (idx == INTEL_PMC_IDX_FIXED_BTS)
	if (unlikely(!hwc->event_base))
		return 0;

	/*
@@ -359,6 +358,7 @@ void x86_release_hardware(void)
	if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
		release_pmc_hardware();
		release_ds_buffers();
		release_lbr_buffers();
		mutex_unlock(&pmc_reserve_mutex);
	}
}
@@ -1097,22 +1097,31 @@ static inline void x86_assign_hw_event(struct perf_event *event,
				struct cpu_hw_events *cpuc, int i)
{
	struct hw_perf_event *hwc = &event->hw;
	int idx;

	hwc->idx = cpuc->assign[i];
	idx = hwc->idx = cpuc->assign[i];
	hwc->last_cpu = smp_processor_id();
	hwc->last_tag = ++cpuc->tags[i];

	if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
	switch (hwc->idx) {
	case INTEL_PMC_IDX_FIXED_BTS:
	case INTEL_PMC_IDX_FIXED_VLBR:
		hwc->config_base = 0;
		hwc->event_base	= 0;
	} else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
		break;

	case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
		hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
	} else {
		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
				(idx - INTEL_PMC_IDX_FIXED);
		hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
		break;

	default:
		hwc->config_base = x86_pmu_config_addr(hwc->idx);
		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
		hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
		break;
	}
}

@@ -1233,7 +1242,7 @@ int x86_perf_event_set_period(struct perf_event *event)
	s64 period = hwc->sample_period;
	int ret = 0, idx = hwc->idx;

	if (idx == INTEL_PMC_IDX_FIXED_BTS)
	if (unlikely(!hwc->event_base))
		return 0;

	/*
@@ -2363,7 +2372,6 @@ static struct pmu pmu = {

	.event_idx		= x86_pmu_event_idx,
	.sched_task		= x86_pmu_sched_task,
	.task_ctx_size          = sizeof(struct x86_perf_task_context),
	.swap_task_ctx		= x86_pmu_swap_task_ctx,
	.check_period		= x86_pmu_check_period,

+86 −41
Original line number Diff line number Diff line
@@ -2136,8 +2136,35 @@ static inline void intel_pmu_ack_status(u64 ack)
	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
static inline bool event_is_checkpointed(struct perf_event *event)
{
	return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
}

static inline void intel_set_masks(struct perf_event *event, int idx)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

	if (event->attr.exclude_host)
		__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
	if (event->attr.exclude_guest)
		__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
	if (event_is_checkpointed(event))
		__set_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
}

static inline void intel_clear_masks(struct perf_event *event, int idx)
{
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

	__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
	__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
	__clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
}

static void intel_pmu_disable_fixed(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;
	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
	u64 ctrl_val, mask;

@@ -2148,30 +2175,22 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
	wrmsrl(hwc->config_base, ctrl_val);
}

static inline bool event_is_checkpointed(struct perf_event *event)
{
	return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
}

static void intel_pmu_disable_event(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
	int idx = hwc->idx;

	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
	if (idx < INTEL_PMC_IDX_FIXED) {
		intel_clear_masks(event, idx);
		x86_pmu_disable_event(event);
	} else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
		intel_clear_masks(event, idx);
		intel_pmu_disable_fixed(event);
	} else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
		intel_pmu_disable_bts();
		intel_pmu_drain_bts_buffer();
		return;
	}

	cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
	cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
	cpuc->intel_cp_status &= ~(1ull << hwc->idx);

	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
		intel_pmu_disable_fixed(hwc);
	else
		x86_pmu_disable_event(event);
	} else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
		intel_clear_masks(event, idx);

	/*
	 * Needs to be called after x86_pmu_disable_event,
@@ -2238,33 +2257,23 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
static void intel_pmu_enable_event(struct perf_event *event)
{
	struct hw_perf_event *hwc = &event->hw;
	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
		if (!__this_cpu_read(cpu_hw_events.enabled))
			return;

		intel_pmu_enable_bts(hwc->config);
		return;
	}

	if (event->attr.exclude_host)
		cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
	if (event->attr.exclude_guest)
		cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);

	if (unlikely(event_is_checkpointed(event)))
		cpuc->intel_cp_status |= (1ull << hwc->idx);
	int idx = hwc->idx;

	if (unlikely(event->attr.precise_ip))
		intel_pmu_pebs_enable(event);

	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
	if (idx < INTEL_PMC_IDX_FIXED) {
		intel_set_masks(event, idx);
		__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
	} else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
		intel_set_masks(event, idx);
		intel_pmu_enable_fixed(event);
	} else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
		if (!__this_cpu_read(cpu_hw_events.enabled))
			return;
	}

	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
		intel_pmu_enable_bts(hwc->config);
	} else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
		intel_set_masks(event, idx);
}

static void intel_pmu_add_event(struct perf_event *event)
@@ -2614,6 +2623,20 @@ intel_bts_constraints(struct perf_event *event)
	return NULL;
}

/*
 * Note: matches a fake event, like Fixed2.
 */
static struct event_constraint *
intel_vlbr_constraints(struct perf_event *event)
{
	struct event_constraint *c = &vlbr_constraint;

	if (unlikely(constraint_match(c, event->hw.config)))
		return c;

	return NULL;
}

static int intel_alt_er(int idx, u64 config)
{
	int alt_idx = idx;
@@ -2804,6 +2827,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
{
	struct event_constraint *c;

	c = intel_vlbr_constraints(event);
	if (c)
		return c;

	c = intel_bts_constraints(event);
	if (c)
		return c;
@@ -3951,6 +3978,11 @@ static __initconst const struct x86_pmu core_pmu = {
	.cpu_dead		= intel_pmu_cpu_dead,

	.check_period		= intel_pmu_check_period,

	.lbr_reset		= intel_pmu_lbr_reset_64,
	.lbr_read		= intel_pmu_lbr_read_64,
	.lbr_save		= intel_pmu_lbr_save,
	.lbr_restore		= intel_pmu_lbr_restore,
};

static __initconst const struct x86_pmu intel_pmu = {
@@ -3996,6 +4028,11 @@ static __initconst const struct x86_pmu intel_pmu = {
	.check_period		= intel_pmu_check_period,

	.aux_output_match	= intel_pmu_aux_output_match,

	.lbr_reset		= intel_pmu_lbr_reset_64,
	.lbr_read		= intel_pmu_lbr_read_64,
	.lbr_save		= intel_pmu_lbr_save,
	.lbr_restore		= intel_pmu_lbr_restore,
};

static __init void intel_clovertown_quirk(void)
@@ -4622,6 +4659,14 @@ __init int intel_pmu_init(void)
		x86_pmu.intel_cap.capabilities = capabilities;
	}

	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) {
		x86_pmu.lbr_reset = intel_pmu_lbr_reset_32;
		x86_pmu.lbr_read = intel_pmu_lbr_read_32;
	}

	if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
		intel_pmu_arch_lbr_init();

	intel_ds_init();

	x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
+3 −3
Original line number Diff line number Diff line
@@ -954,7 +954,7 @@ static void adaptive_pebs_record_size_update(void)
	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
		sz += sizeof(struct pebs_xmm);
	if (pebs_data_cfg & PEBS_DATACFG_LBRS)
		sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
		sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);

	cpuc->pebs_record_size = sz;
}
@@ -1595,10 +1595,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
	}

	if (format_size & PEBS_DATACFG_LBRS) {
		struct pebs_lbr *lbr = next_record;
		struct lbr_entry *lbr = next_record;
		int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
					& 0xff) + 1;
		next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
		next_record = next_record + num_lbr * sizeof(struct lbr_entry);

		if (has_branch_stack(event)) {
			intel_pmu_store_pebs_lbrs(lbr);
+618 −115

File changed.

Preview size limit exceeded, changes collapsed.

Loading