Commit 6650cdd9 authored by Peter Zijlstra (Intel)'s avatar Peter Zijlstra (Intel) Committed by Borislav Petkov
Browse files

x86/split_lock: Enable split lock detection by kernel



A split-lock occurs when an atomic instruction operates on data that spans
two cache lines. In order to maintain atomicity the core takes a global bus
lock.

This is typically >1000 cycles slower than an atomic operation within a
cache line. It also disrupts performance on other cores (which must wait
for the bus lock to be released before their memory operations can
complete). For real-time systems this may mean missing deadlines. For other
systems it may just be very annoying.

Some CPUs have the capability to raise an #AC trap when a split lock is
attempted.

Provide a command line option to give the user choices on how to handle
this:

split_lock_detect=
	off	- not enabled (no traps for split locks)
	warn	- warn once when an application does a
		  split lock, but allow it to continue
		  running.
	fatal	- Send SIGBUS to applications that cause split lock

On systems that support split lock detection the default is "warn". Note
that if the kernel hits a split lock in any mode other than "off" it will
OOPs.

One implementation wrinkle is that the MSR to control the split lock
detection is per-core, not per thread. This might result in some short
lived races on HT systems in "warn" mode if Linux tries to enable on one
thread while disabling on the other. Race analysis by Sean Christopherson:

  - Toggling of split-lock is only done in "warn" mode.  Worst case
    scenario of a race is that a misbehaving task will generate multiple
    #AC exceptions on the same instruction.  And this race will only occur
    if both siblings are running tasks that generate split-lock #ACs, e.g.
    a race where sibling threads are writing different values will only
    occur if CPUx is disabling split-lock after an #AC and CPUy is
    re-enabling split-lock after *its* previous task generated an #AC.
  - Transitioning between off/warn/fatal modes at runtime isn't supported
    and disabling is tracked per task, so hardware will always reach a steady
    state that matches the configured mode.  I.e. split-lock is guaranteed to
    be enabled in hardware once all _TIF_SLD threads have been scheduled out.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Co-developed-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Signed-off-by: default avatarFenghua Yu <fenghua.yu@intel.com>
Co-developed-by: default avatarTony Luck <tony.luck@intel.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20200126200535.GB30377@agluck-desk2.amr.corp.intel.com
parent 11a48a5a
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
@@ -4655,6 +4655,28 @@
	spia_pedr=
	spia_peddr=

	split_lock_detect=
			[X86] Enable split lock detection

			When enabled (and if hardware support is present), atomic
			instructions that access data across cache line
			boundaries will result in an alignment check exception.

			off	- not enabled

			warn	- the kernel will emit rate limited warnings
				  about applications triggering the #AC
				  exception. This mode is the default on CPUs
				  that supports split lock detection.

			fatal	- the kernel will send SIGBUS to applications
				  that trigger the #AC exception.

			If an #AC exception is hit in the kernel or in
			firmware (i.e. not while executing in user mode)
			the kernel will oops in either "warn" or "fatal"
			mode.

	srcutree.counter_wrap_check [KNL]
			Specifies how frequently to check for
			grace-period sequence counter wrap for the
+12 −0
Original line number Diff line number Diff line
@@ -40,4 +40,16 @@ int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
#ifdef CONFIG_CPU_SUP_INTEL
extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
extern void switch_to_sld(unsigned long tifn);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
#else
static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
static inline void switch_to_sld(unsigned long tifn) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
{
	return false;
}
#endif
#endif /* _ASM_X86_CPU_H */
+2 −0
Original line number Diff line number Diff line
@@ -285,6 +285,7 @@
#define X86_FEATURE_CQM_MBM_LOCAL	(11*32+ 3) /* LLC Local MBM monitoring */
#define X86_FEATURE_FENCE_SWAPGS_USER	(11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL	(11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT	(11*32+ 6) /* #AC for split lock */

/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -367,6 +368,7 @@
#define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D		(18*32+28) /* Flush L1D cache */
#define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
#define X86_FEATURE_CORE_CAPABILITIES	(18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
#define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */

/*
+9 −0
Original line number Diff line number Diff line
@@ -41,6 +41,10 @@

/* Intel MSRs. Some also available on other CPUs */

#define MSR_TEST_CTRL				0x00000033
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT	29
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT		BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)

#define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS			BIT(0)	   /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP_SHIFT		1	   /* Single Thread Indirect Branch Predictor (STIBP) bit */
@@ -70,6 +74,11 @@
 */
#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK	(~0x03U)

/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
#define MSR_IA32_CORE_CAPS			  0x000000cf
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT  5
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT	  BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)

#define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
#define NHM_C3_AUTO_DEMOTE		(1UL << 25)
#define NHM_C1_AUTO_DEMOTE		(1UL << 26)
+3 −1
Original line number Diff line number Diff line
@@ -92,6 +92,7 @@ struct thread_info {
#define TIF_NOCPUID		15	/* CPUID is not accessible in userland */
#define TIF_NOTSC		16	/* TSC is not accessible in userland */
#define TIF_IA32		17	/* IA32 compatibility process */
#define TIF_SLD			18	/* Restore split lock detection on context switch */
#define TIF_NOHZ		19	/* in adaptive nohz mode */
#define TIF_MEMDIE		20	/* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
@@ -122,6 +123,7 @@ struct thread_info {
#define _TIF_NOCPUID		(1 << TIF_NOCPUID)
#define _TIF_NOTSC		(1 << TIF_NOTSC)
#define _TIF_IA32		(1 << TIF_IA32)
#define _TIF_SLD		(1 << TIF_SLD)
#define _TIF_NOHZ		(1 << TIF_NOHZ)
#define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
@@ -145,7 +147,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE					\
	(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP |		\
	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)

/*
 * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
Loading