Commit 0b38b5e1 authored by Sven Schnelle's avatar Sven Schnelle Committed by Vasily Gorbik
Browse files

s390: prevent leaking kernel address in BEAR



When userspace executes a syscall or gets interrupted,
BEAR contains a kernel address when returning to userspace.
This make it pretty easy to figure out where the kernel is
mapped even with KASLR enabled. To fix this, add lpswe to
lowcore and always execute it there, so userspace sees only
the lowcore address of lpswe. For this we have to extend
both critical_cleanup and the SWITCH_ASYNC macro to also check
for lpswe addresses in lowcore.

Fixes: b2d24b97 ("s390/kernel: add support for kernel address space layout randomization (KASLR)")
Cc: <stable@vger.kernel.org> # v5.2+
Reviewed-by: default avatarGerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: default avatarSven Schnelle <svens@linux.ibm.com>
Signed-off-by: default avatarVasily Gorbik <gor@linux.ibm.com>
parent d2abfbe4
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -141,7 +141,9 @@ struct lowcore {

	/* br %r1 trampoline */
	__u16	br_r1_trampoline;		/* 0x0400 */
	__u8	pad_0x0402[0x0e00-0x0402];	/* 0x0402 */
	__u32	return_lpswe;			/* 0x0402 */
	__u32	return_mcck_lpswe;		/* 0x0406 */
	__u8	pad_0x040a[0x0e00-0x040a];	/* 0x040a */

	/*
	 * 0xe00 contains the address of the IPL Parameter Information
+1 −0
Original line number Diff line number Diff line
@@ -162,6 +162,7 @@ typedef struct thread_struct thread_struct;
#define INIT_THREAD {							\
	.ksp = sizeof(init_stack) + (unsigned long) &init_stack,	\
	.fpu.regs = (void *) init_task.thread.fpu.fprs,			\
	.last_break = 1,						\
}

/*
+7 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@

#include <linux/bits.h>
#include <uapi/asm/setup.h>
#include <linux/build_bug.h>

#define EP_OFFSET		0x10008
#define EP_STRING		"S390EP"
@@ -162,6 +163,12 @@ static inline unsigned long kaslr_offset(void)
	return __kaslr_offset;
}

static inline u32 gen_lpswe(unsigned long addr)
{
	BUILD_BUG_ON(addr > 0xfff);
	return 0xb2b20000 | addr;
}

#else /* __ASSEMBLY__ */

#define IPL_DEVICE	(IPL_DEVICE_OFFSET)
+2 −0
Original line number Diff line number Diff line
@@ -124,6 +124,8 @@ int main(void)
	OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
	OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
	OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr);
	OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe);
	OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe);
	OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
	OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
	OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
+39 −26
Original line number Diff line number Diff line
@@ -115,26 +115,29 @@ _LPP_OFFSET = __LC_LPP

	.macro	SWITCH_ASYNC savearea,timer
	tmhh	%r8,0x0001		# interrupting from user ?
	jnz	1f
	jnz	2f
	lgr	%r14,%r9
	cghi	%r14,__LC_RETURN_LPSWE
	je	0f
	slg	%r14,BASED(.Lcritical_start)
	clg	%r14,BASED(.Lcritical_length)
	jhe	0f
	jhe	1f
0:
	lghi	%r11,\savearea		# inside critical section, do cleanup
	brasl	%r14,cleanup_critical
	tmhh	%r8,0x0001		# retest problem state after cleanup
	jnz	1f
0:	lg	%r14,__LC_ASYNC_STACK	# are we already on the target stack?
	jnz	2f
1:	lg	%r14,__LC_ASYNC_STACK	# are we already on the target stack?
	slgr	%r14,%r15
	srag	%r14,%r14,STACK_SHIFT
	jnz	2f
	jnz	3f
	CHECK_STACK \savearea
	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
	j	3f
1:	UPDATE_VTIME %r14,%r15,\timer
	j	4f
2:	UPDATE_VTIME %r14,%r15,\timer
	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
2:	lg	%r15,__LC_ASYNC_STACK	# load async stack
3:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
3:	lg	%r15,__LC_ASYNC_STACK	# load async stack
4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
	.endm

	.macro UPDATE_VTIME w1,w2,enter_timer
@@ -401,7 +404,7 @@ ENTRY(system_call)
	stpt	__LC_EXIT_TIMER
	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
	lmg	%r11,%r15,__PT_R11(%r11)
	lpswe	__LC_RETURN_PSW
	b	__LC_RETURN_LPSWE(%r0)
.Lsysc_done:

#
@@ -608,43 +611,50 @@ ENTRY(pgm_check_handler)
	BPOFF
	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
	lg	%r10,__LC_LAST_BREAK
	lg	%r12,__LC_CURRENT
	srag	%r11,%r10,12
	jnz	0f
	/* if __LC_LAST_BREAK is < 4096, it contains one of
	 * the lpswe addresses in lowcore. Set it to 1 (initial state)
	 * to prevent leaking that address to userspace.
	 */
	lghi	%r10,1
0:	lg	%r12,__LC_CURRENT
	lghi	%r11,0
	larl	%r13,cleanup_critical
	lmg	%r8,%r9,__LC_PGM_OLD_PSW
	tmhh	%r8,0x0001		# test problem state bit
	jnz	2f			# -> fault in user space
	jnz	3f			# -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
	# cleanup critical section for program checks in sie64a
	lgr	%r14,%r9
	slg	%r14,BASED(.Lsie_critical_start)
	clg	%r14,BASED(.Lsie_critical_length)
	jhe	0f
	jhe	1f
	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
	larl	%r9,sie_exit			# skip forward to sie_exit
	lghi	%r11,_PIF_GUEST_FAULT
#endif
0:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
	jnz	1f			# -> enabled, can't be a double fault
1:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
	jnz	2f			# -> enabled, can't be a double fault
	tm	__LC_PGM_ILC+3,0x80	# check for per exception
	jnz	.Lpgm_svcper		# -> single stepped svc
1:	CHECK_STACK __LC_SAVE_AREA_SYNC
2:	CHECK_STACK __LC_SAVE_AREA_SYNC
	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
	# CHECK_VMAP_STACK branches to stack_overflow or 4f
	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
2:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
	# CHECK_VMAP_STACK branches to stack_overflow or 5f
	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f
3:	UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
	lg	%r15,__LC_KERNEL_STACK
	lgr	%r14,%r12
	aghi	%r14,__TASK_thread	# pointer to thread_struct
	lghi	%r13,__LC_PGM_TDB
	tm	__LC_PGM_ILC+2,0x02	# check for transaction abort
	jz	3f
	jz	4f
	mvc	__THREAD_trap_tdb(256,%r14),0(%r13)
3:	stg	%r10,__THREAD_last_break(%r14)
4:	lgr	%r13,%r11
4:	stg	%r10,__THREAD_last_break(%r14)
5:	lgr	%r13,%r11
	la	%r11,STACK_FRAME_OVERHEAD(%r15)
	stmg	%r0,%r7,__PT_R0(%r11)
	# clear user controlled registers to prevent speculative use
@@ -663,14 +673,14 @@ ENTRY(pgm_check_handler)
	stg	%r13,__PT_FLAGS(%r11)
	stg	%r10,__PT_ARGS(%r11)
	tm	__LC_PGM_ILC+3,0x80	# check for per exception
	jz	5f
	jz	6f
	tmhh	%r8,0x0001		# kernel per event ?
	jz	.Lpgm_kprobe
	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
5:	REENABLE_IRQS
6:	REENABLE_IRQS
	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
	larl	%r1,pgm_check_table
	llgh	%r10,__PT_INT_CODE+2(%r11)
@@ -775,7 +785,7 @@ ENTRY(io_int_handler)
	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
.Lio_exit_kernel:
	lmg	%r11,%r15,__PT_R11(%r11)
	lpswe	__LC_RETURN_PSW
	b	__LC_RETURN_LPSWE(%r0)
.Lio_done:

#
@@ -1214,7 +1224,7 @@ ENTRY(mcck_int_handler)
	stpt	__LC_EXIT_TIMER
	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
0:	lmg	%r11,%r15,__PT_R11(%r11)
	lpswe	__LC_RETURN_MCCK_PSW
	b	__LC_RETURN_MCCK_LPSWE

.Lmcck_panic:
	lg	%r15,__LC_NODAT_STACK
@@ -1271,6 +1281,8 @@ ENDPROC(stack_overflow)
#endif

ENTRY(cleanup_critical)
	cghi	%r9,__LC_RETURN_LPSWE
	je	.Lcleanup_lpswe
#if IS_ENABLED(CONFIG_KVM)
	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
	jl	0f
@@ -1424,6 +1436,7 @@ ENDPROC(cleanup_critical)
	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
	mvc	0(64,%r11),__PT_R8(%r9)
	lmg	%r0,%r7,__PT_R0(%r9)
.Lcleanup_lpswe:
1:	lmg	%r8,%r9,__LC_RETURN_PSW
	BR_EX	%r14,%r11
.Lcleanup_sysc_restore_insn:
Loading