Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (90489a72) · Commits · 戴 / test

arch/x86/events/core.c

+71 −24

Original line number	Diff line number	Diff line
		@@ -560,6 +560,21 @@ int x86_pmu_hw_config(struct perf_event *event)
		return -EINVAL;
		}

		/* sample_regs_user never support XMM registers */
		if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
		return -EINVAL;
		/*
		* Besides the general purpose registers, XMM registers may
		* be collected in PEBS on some platforms, e.g. Icelake
		*/
		if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
		if (x86_pmu.pebs_no_xmm_regs)
		return -EINVAL;

		if (!event->attr.precise_ip)
		return -EINVAL;
		}

		return x86_setup_perfctr(event);
		}

		@@ -661,6 +676,10 @@ static inline int is_x86_event(struct perf_event *event)
		return event->pmu == &pmu;
		}

		struct pmu *x86_get_pmu(void)
		{
		return &pmu;
		}
		/*
		* Event scheduler state:
		*
		@@ -849,18 +868,43 @@ int x86_schedule_events(struct cpu_hw_events cpuc, int n, int assign)
		struct event_constraint *c;
		unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
		struct perf_event *e;
		int i, wmin, wmax, unsched = 0;
		int n0, i, wmin, wmax, unsched = 0;
		struct hw_perf_event *hwc;

		bitmap_zero(used_mask, X86_PMC_IDX_MAX);

		/*
		* Compute the number of events already present; see x86_pmu_add(),
		* validate_group() and x86_pmu_commit_txn(). For the former two
		* cpuc->n_events hasn't been updated yet, while for the latter
		* cpuc->n_txn contains the number of events added in the current
		* transaction.
		*/
		n0 = cpuc->n_events;
		if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
		n0 -= cpuc->n_txn;

		if (x86_pmu.start_scheduling)
		x86_pmu.start_scheduling(cpuc);

		for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
		cpuc->event_constraint[i] = NULL;
		c = cpuc->event_constraint[i];

		/*
		* Previously scheduled events should have a cached constraint,
		* while new events should not have one.
		*/
		WARN_ON_ONCE((c && i >= n0) \|\| (!c && i < n0));

		/*
		* Request constraints for new events; or for those events that
		* have a dynamic constraint -- for those the constraint can
		* change due to external factors (sibling state, allow_tfa).
		*/
		if (!c \|\| (c->flags & PERF_X86_EVENT_DYNAMIC)) {
		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
		cpuc->event_constraint[i] = c;
		}

		wmin = min(wmin, c->weight);
		wmax = max(wmax, c->weight);
		@@ -925,25 +969,20 @@ int x86_schedule_events(struct cpu_hw_events cpuc, int n, int assign)
		if (!unsched && assign) {
		for (i = 0; i < n; i++) {
		e = cpuc->event_list[i];
		e->hw.flags \|= PERF_X86_EVENT_COMMITTED;
		if (x86_pmu.commit_scheduling)
		x86_pmu.commit_scheduling(cpuc, i, assign[i]);
		}
		} else {
		for (i = 0; i < n; i++) {
		for (i = n0; i < n; i++) {
		e = cpuc->event_list[i];
		/*
		* do not put_constraint() on comitted events,
		* because they are good to go
		*/
		if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
		continue;

		/*
		* release events that failed scheduling
		*/
		if (x86_pmu.put_event_constraints)
		x86_pmu.put_event_constraints(cpuc, e);

		cpuc->event_constraint[i] = NULL;
		}
		}

		@@ -1372,11 +1411,6 @@ static void x86_pmu_del(struct perf_event *event, int flags)
		struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
		int i;

		/*
		* event is descheduled
		*/
		event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;

		/*
		* If we're called during a txn, we only need to undo x86_pmu.add.
		* The events never got scheduled and ->cancel_txn will truncate
		@@ -1413,6 +1447,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
		cpuc->event_list[i-1] = cpuc->event_list[i];
		cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
		}
		cpuc->event_constraint[i-1] = NULL;
		--cpuc->n_events;

		perf_event_update_userpage(event);
		@@ -2024,7 +2059,7 @@ static int validate_event(struct perf_event *event)
		if (IS_ERR(fake_cpuc))
		return PTR_ERR(fake_cpuc);

		c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
		c = x86_pmu.get_event_constraints(fake_cpuc, 0, event);

		if (!c \|\| !c->weight)
		ret = -EINVAL;
		@@ -2072,8 +2107,7 @@ static int validate_group(struct perf_event *event)
		if (n < 0)
		goto out;

		fake_cpuc->n_events = n;

		fake_cpuc->n_events = 0;
		ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);

		out:
		@@ -2348,6 +2382,15 @@ void arch_perf_update_userpage(struct perf_event *event,
		cyc2ns_read_end();
		}

		/*
		* Determine whether the regs were taken from an irq/exception handler rather
		* than from perf_arch_fetch_caller_regs().
		*/
		static bool perf_hw_regs(struct pt_regs *regs)
		{
		return regs->flags & X86_EFLAGS_FIXED;
		}

		void
		perf_callchain_kernel(struct perf_callchain_entry_ctx entry, struct pt_regs regs)
		{
		@@ -2359,11 +2402,15 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx entry, struct pt_regs re
		return;
		}

		if (perf_hw_regs(regs)) {
		if (perf_callchain_store(entry, regs->ip))
		return;
		unwind_start(&state, current, regs, NULL);
		} else {
		unwind_start(&state, current, NULL, (void *)regs->sp);
		}

		for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
		unwind_next_frame(&state)) {
		for (; !unwind_done(&state); unwind_next_frame(&state)) {
		addr = unwind_get_return_address(&state);
		if (!addr \|\| perf_callchain_store(entry, addr))
		return;

arch/x86/events/intel/core.c

+277 −19

Original line number	Diff line number	Diff line
		@@ -239,6 +239,35 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
		EVENT_EXTRA_END
		};

		static struct event_constraint intel_icl_event_constraints[] = {
		FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
		INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */
		FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
		FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
		FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
		INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
		INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
		INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
		INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
		INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
		INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
		INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
		INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */
		INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
		INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
		INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
		INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
		EVENT_CONSTRAINT_END
		};

		static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
		INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
		INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
		INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
		INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
		EVENT_EXTRA_END
		};

		EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
		EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
		EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
		@@ -1827,6 +1856,45 @@ static __initconst const u64 glp_hw_cache_extra_regs
		},
		};

		#define TNT_LOCAL_DRAM BIT_ULL(26)
		#define TNT_DEMAND_READ GLM_DEMAND_DATA_RD
		#define TNT_DEMAND_WRITE GLM_DEMAND_RFO
		#define TNT_LLC_ACCESS GLM_ANY_RESPONSE
		#define TNT_SNP_ANY (SNB_SNP_NOT_NEEDED\|SNB_SNP_MISS\| \
		SNB_NO_FWD\|SNB_SNP_FWD\|SNB_HITM)
		#define TNT_LLC_MISS (TNT_SNP_ANY\|SNB_NON_DRAM\|TNT_LOCAL_DRAM)

		static __initconst const u64 tnt_hw_cache_extra_regs
		[PERF_COUNT_HW_CACHE_MAX]
		[PERF_COUNT_HW_CACHE_OP_MAX]
		[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
		[C(LL)] = {
		[C(OP_READ)] = {
		[C(RESULT_ACCESS)] = TNT_DEMAND_READ\|
		TNT_LLC_ACCESS,
		[C(RESULT_MISS)] = TNT_DEMAND_READ\|
		TNT_LLC_MISS,
		},
		[C(OP_WRITE)] = {
		[C(RESULT_ACCESS)] = TNT_DEMAND_WRITE\|
		TNT_LLC_ACCESS,
		[C(RESULT_MISS)] = TNT_DEMAND_WRITE\|
		TNT_LLC_MISS,
		},
		[C(OP_PREFETCH)] = {
		[C(RESULT_ACCESS)] = 0x0,
		[C(RESULT_MISS)] = 0x0,
		},
		},
		};

		static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
		/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
		INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0),
		INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1),
		EVENT_EXTRA_END
		};

		#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
		#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
		#define KNL_MCDRAM_LOCAL BIT_ULL(21)
		@@ -2015,7 +2083,7 @@ static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int
		/*
		* We're going to use PMC3, make sure TFA is set before we touch it.
		*/
		if (cntr == 3 && !cpuc->is_fake)
		if (cntr == 3)
		intel_set_tfa(cpuc, true);
		}

		@@ -2149,6 +2217,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
		bits <<= (idx * 4);
		mask = 0xfULL << (idx * 4);

		if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
		bits \|= ICL_FIXED_0_ADAPTIVE << (idx * 4);
		mask \|= ICL_FIXED_0_ADAPTIVE << (idx * 4);
		}

		rdmsrl(hwc->config_base, ctrl_val);
		ctrl_val &= ~mask;
		ctrl_val \|= bits;
		@@ -2692,7 +2765,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,

		if (x86_pmu.event_constraints) {
		for_each_event_constraint(c, x86_pmu.event_constraints) {
		if ((event->hw.config & c->cmask) == c->code) {
		if (constraint_match(c, event->hw.config)) {
		event->hw.flags \|= c->flags;
		return c;
		}
		@@ -2842,7 +2915,7 @@ intel_get_excl_constraints(struct cpu_hw_events cpuc, struct perf_event event,
		struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
		struct intel_excl_states *xlo;
		int tid = cpuc->excl_thread_id;
		int is_excl, i;
		int is_excl, i, w;

		/*
		* validating a group does not require
		@@ -2898,36 +2971,40 @@ intel_get_excl_constraints(struct cpu_hw_events cpuc, struct perf_event event,
		* SHARED : sibling counter measuring non-exclusive event
		* UNUSED : sibling counter unused
		*/
		w = c->weight;
		for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
		/*
		* exclusive event in sibling counter
		* our corresponding counter cannot be used
		* regardless of our event
		*/
		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
		__clear_bit(i, c->idxmsk);
		w--;
		continue;
		}
		/*
		* if measuring an exclusive event, sibling
		* measuring non-exclusive, then counter cannot
		* be used
		*/
		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
		__clear_bit(i, c->idxmsk);
		w--;
		continue;
		}
		}

		/*
		* recompute actual bit weight for scheduling algorithm
		*/
		c->weight = hweight64(c->idxmsk64);

		/*
		* if we return an empty mask, then switch
		* back to static empty constraint to avoid
		* the cost of freeing later on
		*/
		if (c->weight == 0)
		if (!w)
		c = &emptyconstraint;

		c->weight = w;

		return c;
		}

		@@ -2935,10 +3012,8 @@ static struct event_constraint *
		intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		struct perf_event *event)
		{
		struct event_constraint *c1 = NULL;
		struct event_constraint *c2;
		struct event_constraint c1, c2;

		if (idx >= 0) /* fake does < 0 */
		c1 = cpuc->event_constraint[idx];

		/*
		@@ -2947,7 +3022,8 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		* - dynamic constraint: handled by intel_get_excl_constraints()
		*/
		c2 = __intel_get_event_constraints(cpuc, idx, event);
		if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
		if (c1) {
		WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
		bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
		c1->weight = c2->weight;
		c2 = c1;
		@@ -3370,6 +3446,12 @@ static struct event_constraint counter0_constraint =
		static struct event_constraint counter2_constraint =
		EVENT_CONSTRAINT(0, 0x4, 0);

		static struct event_constraint fixed0_constraint =
		FIXED_EVENT_CONSTRAINT(0x00c0, 0);

		static struct event_constraint fixed0_counter0_constraint =
		INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);

		static struct event_constraint *
		hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		struct perf_event *event)
		@@ -3388,6 +3470,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		return c;
		}

		static struct event_constraint *
		icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		struct perf_event *event)
		{
		/*
		* Fixed counter 0 has less skid.
		* Force instruction:ppp in Fixed counter 0
		*/
		if ((event->attr.precise_ip == 3) &&
		constraint_match(&fixed0_constraint, event->hw.config))
		return &fixed0_constraint;

		return hsw_get_event_constraints(cpuc, idx, event);
		}

		static struct event_constraint *
		glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		struct perf_event *event)
		@@ -3403,6 +3500,29 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		return c;
		}

		static struct event_constraint *
		tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		struct perf_event *event)
		{
		struct event_constraint *c;

		/*
		* :ppp means to do reduced skid PEBS,
		* which is available on PMC0 and fixed counter 0.
		*/
		if (event->attr.precise_ip == 3) {
		/* Force instruction:ppp on PMC0 and Fixed counter 0 */
		if (constraint_match(&fixed0_constraint, event->hw.config))
		return &fixed0_counter0_constraint;

		return &counter0_constraint;
		}

		c = intel_get_event_constraints(cpuc, idx, event);

		return c;
		}

		static bool allow_tsx_force_abort = true;

		static struct event_constraint *
		@@ -3414,7 +3534,7 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
		/*
		* Without TFA we must not use PMC3.
		*/
		if (!allow_tsx_force_abort && test_bit(3, c->idxmsk) && idx >= 0) {
		if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
		c = dyn_constraint(cpuc, c, idx);
		c->idxmsk64 &= ~(1ULL << 3);
		c->weight--;
		@@ -3511,6 +3631,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)

		int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
		{
		cpuc->pebs_record_size = x86_pmu.pebs_record_size;

		if (x86_pmu.extra_regs \|\| x86_pmu.lbr_sel_map) {
		cpuc->shared_regs = allocate_shared_regs(cpu);
		if (!cpuc->shared_regs)
		@@ -4118,6 +4240,42 @@ static struct attribute *hsw_tsx_events_attrs[] = {
		NULL
		};

		EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80");
		EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
		EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80");
		EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");

		static struct attribute *icl_events_attrs[] = {
		EVENT_PTR(mem_ld_hsw),
		EVENT_PTR(mem_st_hsw),
		NULL,
		};

		static struct attribute *icl_tsx_events_attrs[] = {
		EVENT_PTR(tx_start),
		EVENT_PTR(tx_abort),
		EVENT_PTR(tx_commit),
		EVENT_PTR(tx_capacity_read),
		EVENT_PTR(tx_capacity_write),
		EVENT_PTR(tx_conflict),
		EVENT_PTR(el_start),
		EVENT_PTR(el_abort),
		EVENT_PTR(el_commit),
		EVENT_PTR(el_capacity_read),
		EVENT_PTR(el_capacity_write),
		EVENT_PTR(el_conflict),
		EVENT_PTR(cycles_t),
		EVENT_PTR(cycles_ct),
		NULL,
		};

		static __init struct attribute **get_icl_events_attrs(void)
		{
		return boot_cpu_has(X86_FEATURE_RTM) ?
		merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
		icl_events_attrs;
		}

		static ssize_t freeze_on_smi_show(struct device *cdev,
		struct device_attribute *attr,
		char *buf)
		@@ -4157,6 +4315,50 @@ done:
		return count;
		}

		static void update_tfa_sched(void *ignored)
		{
		struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

		/*
		* check if PMC3 is used
		* and if so force schedule out for all event types all contexts
		*/
		if (test_bit(3, cpuc->active_mask))
		perf_pmu_resched(x86_get_pmu());
		}

		static ssize_t show_sysctl_tfa(struct device *cdev,
		struct device_attribute *attr,
		char *buf)
		{
		return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
		}

		static ssize_t set_sysctl_tfa(struct device *cdev,
		struct device_attribute *attr,
		const char *buf, size_t count)
		{
		bool val;
		ssize_t ret;

		ret = kstrtobool(buf, &val);
		if (ret)
		return ret;

		/* no change */
		if (val == allow_tsx_force_abort)
		return count;

		allow_tsx_force_abort = val;

		get_online_cpus();
		on_each_cpu(update_tfa_sched, NULL, 1);
		put_online_cpus();

		return count;
		}


		static DEVICE_ATTR_RW(freeze_on_smi);

		static ssize_t branches_show(struct device *cdev,
		@@ -4189,7 +4391,9 @@ static struct attribute *intel_pmu_caps_attrs[] = {
		NULL
		};

		static DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
		static DEVICE_ATTR(allow_tsx_force_abort, 0644,
		show_sysctl_tfa,
		set_sysctl_tfa);

		static struct attribute *intel_pmu_attrs[] = {
		&dev_attr_freeze_on_smi.attr,
		@@ -4450,6 +4654,32 @@ __init int intel_pmu_init(void)
		name = "goldmont_plus";
		break;

		case INTEL_FAM6_ATOM_TREMONT_X:
		x86_pmu.late_ack = true;
		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
		sizeof(hw_cache_event_ids));
		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
		sizeof(hw_cache_extra_regs));
		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;

		intel_pmu_lbr_init_skl();

		x86_pmu.event_constraints = intel_slm_event_constraints;
		x86_pmu.extra_regs = intel_tnt_extra_regs;
		/*
		* It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
		* for precise cycles.
		*/
		x86_pmu.pebs_aliases = NULL;
		x86_pmu.pebs_prec_dist = true;
		x86_pmu.lbr_pt_coexist = true;
		x86_pmu.flags \|= PMU_FL_HAS_RSP_1;
		x86_pmu.get_event_constraints = tnt_get_event_constraints;
		extra_attr = slm_format_attr;
		pr_cont("Tremont events, ");
		name = "Tremont";
		break;

		case INTEL_FAM6_WESTMERE:
		case INTEL_FAM6_WESTMERE_EP:
		case INTEL_FAM6_WESTMERE_EX:
		@@ -4698,13 +4928,41 @@ __init int intel_pmu_init(void)
		x86_pmu.get_event_constraints = tfa_get_event_constraints;
		x86_pmu.enable_all = intel_tfa_pmu_enable_all;
		x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
		intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
		intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
		}

		pr_cont("Skylake events, ");
		name = "skylake";
		break;

		case INTEL_FAM6_ICELAKE_MOBILE:
		x86_pmu.late_ack = true;
		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
		memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
		intel_pmu_lbr_init_skl();

		x86_pmu.event_constraints = intel_icl_event_constraints;
		x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
		x86_pmu.extra_regs = intel_icl_extra_regs;
		x86_pmu.pebs_aliases = NULL;
		x86_pmu.pebs_prec_dist = true;
		x86_pmu.flags \|= PMU_FL_HAS_RSP_1;
		x86_pmu.flags \|= PMU_FL_NO_HT_SHARING;

		x86_pmu.hw_config = hsw_hw_config;
		x86_pmu.get_event_constraints = icl_get_event_constraints;
		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
		hsw_format_attr : nhm_format_attr;
		extra_attr = merge_attr(extra_attr, skl_format_attr);
		x86_pmu.cpu_events = get_icl_events_attrs();
		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
		x86_pmu.lbr_pt_coexist = true;
		intel_pmu_pebs_data_source_skl(false);
		pr_cont("Icelake events, ");
		name = "icelake";
		break;

		default:
		switch (x86_pmu.version) {
		case 1:

arch/x86/events/intel/cstate.c

+2 −0

Original line number	Diff line number	Diff line
		@@ -578,6 +578,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
		X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),

		X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),

		X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
		{ },
		};
		MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);

arch/x86/events/intel/ds.c

+435 −70

File changed.

Preview size limit exceeded, changes collapsed.

arch/x86/events/intel/lbr.c

+34 −1

Original line number	Diff line number	Diff line
		@@ -488,6 +488,8 @@ void intel_pmu_lbr_add(struct perf_event *event)
		* be 'new'. Conversely, a new event can get installed through the
		* context switch path for the first time.
		*/
		if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
		cpuc->lbr_pebs_users++;
		perf_sched_cb_inc(event->ctx->pmu);
		if (!cpuc->lbr_users++ && !event->total_time_running)
		intel_pmu_lbr_reset();
		@@ -507,8 +509,11 @@ void intel_pmu_lbr_del(struct perf_event *event)
		task_ctx->lbr_callstack_users--;
		}

		if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
		cpuc->lbr_pebs_users--;
		cpuc->lbr_users--;
		WARN_ON_ONCE(cpuc->lbr_users < 0);
		WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
		perf_sched_cb_dec(event->ctx->pmu);
		}

		@@ -658,7 +663,13 @@ void intel_pmu_lbr_read(void)
		{
		struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

		if (!cpuc->lbr_users)
		/*
		* Don't read when all LBRs users are using adaptive PEBS.
		*
		* This could be smarter and actually check the event,
		* but this simple approach seems to work for now.
		*/
		if (!cpuc->lbr_users \|\| cpuc->lbr_users == cpuc->lbr_pebs_users)
		return;

		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
		@@ -1080,6 +1091,28 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
		}
		}

		void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
		{
		struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
		int i;

		cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
		for (i = 0; i < x86_pmu.lbr_nr; i++) {
		u64 info = lbr->lbr[i].info;
		struct perf_branch_entry *e = &cpuc->lbr_entries[i];

		e->from = lbr->lbr[i].from;
		e->to = lbr->lbr[i].to;
		e->mispred = !!(info & LBR_INFO_MISPRED);
		e->predicted = !(info & LBR_INFO_MISPRED);
		e->in_tx = !!(info & LBR_INFO_IN_TX);
		e->abort = !!(info & LBR_INFO_ABORT);
		e->cycles = info & LBR_INFO_CYCLES;
		e->reserved = 0;
		}
		intel_pmu_lbr_filter(cpuc);
		}

		/*
		* Map interface branch filters onto LBR filters
		*/

Admin message