Commit 68b34588 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman
Browse files

powerpc/64/sycall: Implement syscall entry/exit logic in C



System call entry and particularly exit code is beyond the limit of
what is reasonable to implement in asm.

This conversion moves all conditional branches out of the asm code,
except for the case that all GPRs should be restored at exit.

Null syscall test is about 5% faster after this patch, because the
exit work is handled under local_irq_disable, and the hard mask and
pending interrupt replay is handled after that, which avoids games
with MSR.

mpe: Includes subsequent fixes from Nick:

This fixes 4 issues caught by TM selftests. First was a tm-syscall bug
that hit due to tabort_syscall being called after interrupts were
reconciled (in a subsequent patch), which led to interrupts being
enabled before tabort_syscall was called. Rather than going through an
un-reconciling interrupts for the return, I just go back to putting
the test early in asm, the C-ification of that wasn't a big win
anyway.

Second is the syscall return _TIF_USER_WORK_MASK check would go into
an infinite loop if _TIF_RESTORE_TM became set. The asm code uses
_TIF_USER_WORK_MASK to brach to slowpath which includes
restore_tm_state.

Third is system call return was not calling restore_tm_state, I missed
this completely (alhtough it's in the return from interrupt C
conversion because when the asm syscall code encountered problems it
would branch to the interrupt return code.

Fourth is MSR_VEC missing from restore_math, which was caught by
tm-unavailable selftest taking an unexpected facility unavailable
interrupt when testing VSX unavailble exception with MSR.FP=1
MSR.VEC=1. Fourth case also has a fixup in a subsequent patch.

Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Signed-off-by: default avatarMichal Suchanek <msuchanek@suse.de>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200225173541.1549955-26-npiggin@gmail.com
parent f14f8a20
Loading
Loading
Loading
Loading
+2 −11
Original line number Diff line number Diff line
@@ -97,6 +97,8 @@ ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp,
unsigned long __init early_init(unsigned long dt_ptr);
void __init machine_init(u64 dt_ptr);
#endif
long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs);
notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs);

long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
		      u32 len_high, u32 len_low);
@@ -104,14 +106,6 @@ long sys_switch_endian(void);
notrace unsigned int __check_irq_replay(void);
void notrace restore_interrupts(void);

/* ptrace */
long do_syscall_trace_enter(struct pt_regs *regs);
void do_syscall_trace_leave(struct pt_regs *regs);

/* process */
void restore_math(struct pt_regs *regs);
void restore_tm_state(struct pt_regs *regs);

/* prom_init (OpenFirmware) */
unsigned long __init prom_init(unsigned long r3, unsigned long r4,
			       unsigned long pp,
@@ -122,9 +116,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
void __init early_setup(unsigned long dt_ptr);
void early_setup_secondary(void);

/* time */
void accumulate_stolen_time(void);

/* misc runtime */
extern u64 __bswapdi2(u64);
extern s64 __lshrdi3(s64, int);
+13 −1
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H

#include <linux/const.h>
#include <asm/reg.h>

#define AMR_KUAP_BLOCK_READ	UL(0x4000000000000000)
#define AMR_KUAP_BLOCK_WRITE	UL(0x8000000000000000)
@@ -56,7 +57,14 @@

#ifdef CONFIG_PPC_KUAP

#include <asm/reg.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>

static inline void kuap_check_amr(void)
{
	if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && mmu_has_feature(MMU_FTR_RADIX_KUAP))
		WARN_ON_ONCE(mfspr(SPRN_AMR) != AMR_KUAP_BLOCKED);
}

/*
 * We support individually allowing read or write, but we don't support nesting
@@ -127,6 +135,10 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
		    (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
		    "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
}
#else /* CONFIG_PPC_KUAP */
static inline void kuap_check_amr(void)
{
}
#endif /* CONFIG_PPC_KUAP */

#endif /* __ASSEMBLY__ */
+33 −0
Original line number Diff line number Diff line
@@ -43,9 +43,12 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct)
 */
#ifdef CONFIG_PPC64
#define get_accounting(tsk)	(&get_paca()->accounting)
#define raw_get_accounting(tsk)	(&local_paca->accounting)
static inline void arch_vtime_task_switch(struct task_struct *tsk) { }

#else
#define get_accounting(tsk)	(&task_thread_info(tsk)->accounting)
#define raw_get_accounting(tsk)	get_accounting(tsk)
/*
 * Called from the context switch with interrupts disabled, to charge all
 * accumulated times to the current process, and to prepare accounting on
@@ -60,6 +63,36 @@ static inline void arch_vtime_task_switch(struct task_struct *prev)
}
#endif

/*
 * account_cpu_user_entry/exit runs "unreconciled", so can't trace,
 * can't use use get_paca()
 */
static notrace inline void account_cpu_user_entry(void)
{
	unsigned long tb = mftb();
	struct cpu_accounting_data *acct = raw_get_accounting(current);

	acct->utime += (tb - acct->starttime_user);
	acct->starttime = tb;
}

static notrace inline void account_cpu_user_exit(void)
{
	unsigned long tb = mftb();
	struct cpu_accounting_data *acct = raw_get_accounting(current);

	acct->stime += (tb - acct->starttime);
	acct->starttime_user = tb;
}


#endif /* __KERNEL__ */
#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
static inline void account_cpu_user_entry(void)
{
}
static inline void account_cpu_user_exit(void)
{
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __POWERPC_CPUTIME_H */
+4 −0
Original line number Diff line number Diff line
@@ -228,9 +228,13 @@ static inline bool arch_irqs_disabled(void)
#ifdef CONFIG_PPC_BOOK3E
#define __hard_irq_enable()	wrtee(MSR_EE)
#define __hard_irq_disable()	wrtee(0)
#define __hard_EE_RI_disable()	wrtee(0)
#define __hard_RI_enable()	do { } while (0)
#else
#define __hard_irq_enable()	__mtmsrd(MSR_EE|MSR_RI, 1)
#define __hard_irq_disable()	__mtmsrd(MSR_RI, 1)
#define __hard_EE_RI_disable()	__mtmsrd(0, 1)
#define __hard_RI_enable()	__mtmsrd(MSR_RI, 1)
#endif

#define hard_irq_disable()	do {					\
+3 −0
Original line number Diff line number Diff line
@@ -138,6 +138,9 @@ extern unsigned long profile_pc(struct pt_regs *regs);
#define profile_pc(regs) instruction_pointer(regs)
#endif

long do_syscall_trace_enter(struct pt_regs *regs);
void do_syscall_trace_leave(struct pt_regs *regs);

#define kernel_stack_pointer(regs) ((regs)->gpr[1])
static inline int is_syscall_success(struct pt_regs *regs)
{
Loading