Commit ab037dd8 authored by Christophe Leroy's avatar Christophe Leroy Committed by Michael Ellerman
Browse files

powerpc/vdso: Switch VDSO to generic C implementation.



With the C VDSO, the performance is slightly lower, but it is worth
it as it will ease maintenance and evolution, and also brings clocks
that are not supported with the ASM VDSO.

On an 8xx at 132 MHz, vdsotest with the ASM VDSO:
  gettimeofday:    		  vdso:  828 nsec/call
  clock-getres-realtime-coarse:   vdso:  391 nsec/call
  clock-gettime-realtime-coarse:  vdso:  614 nsec/call
  clock-getres-realtime:    	  vdso:  460 nsec/call
  clock-gettime-realtime:    	  vdso:  876 nsec/call
  clock-getres-monotonic-coarse:  vdso:  399 nsec/call
  clock-gettime-monotonic-coarse: vdso:  691 nsec/call
  clock-getres-monotonic:    	  vdso:  460 nsec/call
  clock-gettime-monotonic:    	  vdso: 1026 nsec/call

On an 8xx at 132 MHz, vdsotest with the C VDSO:
  gettimeofday:    		  vdso:  955 nsec/call
  clock-getres-realtime-coarse:   vdso:  545 nsec/call
  clock-gettime-realtime-coarse:  vdso:  592 nsec/call
  clock-getres-realtime:          vdso:  545 nsec/call
  clock-gettime-realtime:    	  vdso:  941 nsec/call
  clock-getres-monotonic-coarse:  vdso:  545 nsec/call
  clock-gettime-monotonic-coarse: vdso:  591 nsec/call
  clock-getres-monotonic:         vdso:  545 nsec/call
  clock-gettime-monotonic:        vdso:  940 nsec/call

It is even better for gettime with monotonic clocks.

Unsupported clocks with ASM VDSO:
  clock-gettime-boottime:         vdso: 3851 nsec/call
  clock-gettime-tai:      	  vdso: 3852 nsec/call
  clock-gettime-monotonic-raw:    vdso: 3396 nsec/call

Same clocks with C VDSO:
  clock-gettime-tai:              vdso:  941 nsec/call
  clock-gettime-monotonic-raw:    vdso: 1001 nsec/call
  clock-gettime-monotonic-coarse: vdso:  591 nsec/call

On an 8321E at 333 MHz, vdsotest with the ASM VDSO:
  gettimeofday:     		  vdso: 220 nsec/call
  clock-getres-realtime-coarse:   vdso: 102 nsec/call
  clock-gettime-realtime-coarse:  vdso: 178 nsec/call
  clock-getres-realtime:          vdso: 129 nsec/call
  clock-gettime-realtime:    	  vdso: 235 nsec/call
  clock-getres-monotonic-coarse:  vdso: 105 nsec/call
  clock-gettime-monotonic-coarse: vdso: 208 nsec/call
  clock-getres-monotonic:         vdso: 129 nsec/call
  clock-gettime-monotonic:        vdso: 274 nsec/call

On an 8321E at 333 MHz, vdsotest with the C VDSO:
  gettimeofday:    		  vdso: 272 nsec/call
  clock-getres-realtime-coarse:   vdso: 160 nsec/call
  clock-gettime-realtime-coarse:  vdso: 184 nsec/call
  clock-getres-realtime:          vdso: 166 nsec/call
  clock-gettime-realtime:         vdso: 281 nsec/call
  clock-getres-monotonic-coarse:  vdso: 160 nsec/call
  clock-gettime-monotonic-coarse: vdso: 184 nsec/call
  clock-getres-monotonic:         vdso: 169 nsec/call
  clock-gettime-monotonic:        vdso: 275 nsec/call

On a Power9 Nimbus DD2.2 at 3.8GHz, with the ASM VDSO:
  clock-gettime-monotonic:    	  vdso:  35 nsec/call
  clock-getres-monotonic:    	  vdso:  16 nsec/call
  clock-gettime-monotonic-coarse: vdso:  18 nsec/call
  clock-getres-monotonic-coarse:  vdso: 522 nsec/call
  clock-gettime-monotonic-raw:    vdso: 598 nsec/call
  clock-getres-monotonic-raw:     vdso: 520 nsec/call
  clock-gettime-realtime:    	  vdso:  34 nsec/call
  clock-getres-realtime:    	  vdso:  16 nsec/call
  clock-gettime-realtime-coarse:  vdso:  18 nsec/call
  clock-getres-realtime-coarse:   vdso: 517 nsec/call
  getcpu:    			  vdso:   8 nsec/call
  gettimeofday:    		  vdso:  25 nsec/call

And with the C VDSO:
  clock-gettime-monotonic:    	  vdso:  37 nsec/call
  clock-getres-monotonic:    	  vdso:  20 nsec/call
  clock-gettime-monotonic-coarse: vdso:  21 nsec/call
  clock-getres-monotonic-coarse:  vdso:  19 nsec/call
  clock-gettime-monotonic-raw:    vdso:  38 nsec/call
  clock-getres-monotonic-raw:     vdso:  20 nsec/call
  clock-gettime-realtime:    	  vdso:  37 nsec/call
  clock-getres-realtime:    	  vdso:  20 nsec/call
  clock-gettime-realtime-coarse:  vdso:  20 nsec/call
  clock-getres-realtime-coarse:   vdso:  19 nsec/call
  getcpu:    			  vdso:   8 nsec/call
  gettimeofday:    		  vdso:  28 nsec/call

Signed-off-by: default avatarChristophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126131006.2431205-8-mpe@ellerman.id.au
parent 7fec9f5d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -176,6 +176,7 @@ config PPC
	select GENERIC_STRNCPY_FROM_USER
	select GENERIC_STRNLEN_USER
	select GENERIC_TIME_VSYSCALL
	select GENERIC_GETTIMEOFDAY
	select HAVE_ARCH_AUDITSYSCALL
	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
	select HAVE_ARCH_JUMP_LABEL
@@ -206,6 +207,7 @@ config PPC
	select HAVE_FUNCTION_GRAPH_TRACER
	select HAVE_FUNCTION_TRACER
	select HAVE_GCC_PLUGINS			if GCC_VERSION >= 50200   # plugin support on gcc <= 5.1 is buggy on PPC
	select HAVE_GENERIC_VDSO
	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
	select HAVE_IDE
	select HAVE_IOREMAP_PROT
+25 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_VDSO_VSYSCALL_H
#define _ASM_POWERPC_VDSO_VSYSCALL_H

#ifndef __ASSEMBLY__

#include <linux/timekeeper_internal.h>
#include <asm/vdso_datapage.h>

/*
 * Update the vDSO data page to keep in sync with kernel timekeeping.
 */
static __always_inline
struct vdso_data *__arch_get_k_vdso_data(void)
{
	return vdso_data->data;
}
#define __arch_get_k_vdso_data __arch_get_k_vdso_data

/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>

#endif /* !__ASSEMBLY__ */

#endif /* _ASM_POWERPC_VDSO_VSYSCALL_H */
+13 −27
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@

#include <linux/unistd.h>
#include <linux/time.h>
#include <vdso/datapage.h>

#define SYSCALL_MAP_SIZE      ((NR_syscalls + 31) / 32)

@@ -45,7 +46,7 @@

#ifdef CONFIG_PPC64

struct vdso_data {
struct vdso_arch_data {
	__u8  eye_catcher[16];		/* Eyecatcher: SYSTEMCFG:PPC64	0x00 */
	struct {			/* Systemcfg version numbers	     */
		__u32 major;		/* Major number			0x10 */
@@ -59,13 +60,13 @@ struct vdso_data {
	__u32 processor;		/* Processor type		0x1C */
	__u64 processorCount;		/* # of physical processors	0x20 */
	__u64 physicalMemorySize;	/* Size of real memory(B)	0x28 */
	__u64 tb_orig_stamp;		/* Timebase at boot		0x30 */
	__u64 tb_orig_stamp;		/* (NU) Timebase at boot	0x30 */
	__u64 tb_ticks_per_sec;		/* Timebase tics / sec		0x38 */
	__u64 tb_to_xs;			/* Inverse of TB to 2^20	0x40 */
	__u64 stamp_xsec;		/*				0x48 */
	__u64 tb_update_count;		/* Timebase atomicity ctr	0x50 */
	__u32 tz_minuteswest;		/* Minutes west of Greenwich	0x58 */
	__u32 tz_dsttime;		/* Type of dst correction	0x5C */
	__u64 tb_to_xs;			/* (NU) Inverse of TB to 2^20	0x40 */
	__u64 stamp_xsec;		/* (NU)				0x48 */
	__u64 tb_update_count;		/* (NU) Timebase atomicity ctr	0x50 */
	__u32 tz_minuteswest;		/* (NU) Min. west of Greenwich	0x58 */
	__u32 tz_dsttime;		/* (NU) Type of dst correction	0x5C */
	__u32 dcache_size;		/* L1 d-cache size		0x60 */
	__u32 dcache_line_size;		/* L1 d-cache line size		0x64 */
	__u32 icache_size;		/* L1 i-cache size		0x68 */
@@ -78,14 +79,10 @@ struct vdso_data {
	__u32 icache_block_size;		/* L1 i-cache block size     */
	__u32 dcache_log_block_size;		/* L1 d-cache log block size */
	__u32 icache_log_block_size;		/* L1 i-cache log block size */
	__u32 stamp_sec_fraction;		/* fractional seconds of stamp_xtime */
	__s32 wtom_clock_nsec;			/* Wall to monotonic clock nsec */
	__s64 wtom_clock_sec;			/* Wall to monotonic clock sec */
	__s64 stamp_xtime_sec;			/* xtime secs as at tb_orig_stamp */
	__s64 stamp_xtime_nsec;			/* xtime nsecs as at tb_orig_stamp */
	__u32 hrtimer_res;			/* hrtimer resolution */
   	__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls  */
   	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */

	struct vdso_data data[CS_BASES];
};

#else /* CONFIG_PPC64 */
@@ -93,26 +90,15 @@ struct vdso_data {
/*
 * And here is the simpler 32 bits version
 */
struct vdso_data {
	__u64 tb_orig_stamp;		/* Timebase at boot		0x30 */
struct vdso_arch_data {
	__u64 tb_ticks_per_sec;		/* Timebase tics / sec		0x38 */
	__u64 tb_to_xs;			/* Inverse of TB to 2^20	0x40 */
	__u64 stamp_xsec;		/*				0x48 */
	__u32 tb_update_count;		/* Timebase atomicity ctr	0x50 */
	__u32 tz_minuteswest;		/* Minutes west of Greenwich	0x58 */
	__u32 tz_dsttime;		/* Type of dst correction	0x5C */
	__s32 wtom_clock_sec;			/* Wall to monotonic clock */
	__s32 wtom_clock_nsec;
	__s32 stamp_xtime_sec;		/* xtime seconds as at tb_orig_stamp */
	__s32 stamp_xtime_nsec;		/* xtime nsecs as at tb_orig_stamp */
	__u32 stamp_sec_fraction;	/* fractional seconds of stamp_xtime */
	__u32 hrtimer_res;		/* hrtimer resolution */
   	__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
	struct vdso_data data[CS_BASES];
};

#endif /* CONFIG_PPC64 */

extern struct vdso_data *vdso_data;
extern struct vdso_arch_data *vdso_data;

#else /* __ASSEMBLY__ */

+9 −40
Original line number Diff line number Diff line
@@ -398,47 +398,16 @@ int main(void)
#endif /* ! CONFIG_PPC64 */

	/* datapage offsets for use by vdso */
	OFFSET(CFG_TB_ORIG_STAMP, vdso_data, tb_orig_stamp);
	OFFSET(CFG_TB_TICKS_PER_SEC, vdso_data, tb_ticks_per_sec);
	OFFSET(CFG_TB_TO_XS, vdso_data, tb_to_xs);
	OFFSET(CFG_TB_UPDATE_COUNT, vdso_data, tb_update_count);
	OFFSET(CFG_TZ_MINUTEWEST, vdso_data, tz_minuteswest);
	OFFSET(CFG_TZ_DSTTIME, vdso_data, tz_dsttime);
	OFFSET(CFG_SYSCALL_MAP32, vdso_data, syscall_map_32);
	OFFSET(WTOM_CLOCK_SEC, vdso_data, wtom_clock_sec);
	OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec);
	OFFSET(STAMP_XTIME_SEC, vdso_data, stamp_xtime_sec);
	OFFSET(STAMP_XTIME_NSEC, vdso_data, stamp_xtime_nsec);
	OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction);
	OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res);
	OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data);
	OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec);
	OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map_32);
#ifdef CONFIG_PPC64
	OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size);
	OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size);
	OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size);
	OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_data, dcache_log_block_size);
	OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
	OFFSET(TVAL64_TV_SEC, __kernel_old_timeval, tv_sec);
	OFFSET(TVAL64_TV_USEC, __kernel_old_timeval, tv_usec);
#endif
	OFFSET(TSPC64_TV_SEC, __kernel_timespec, tv_sec);
	OFFSET(TSPC64_TV_NSEC, __kernel_timespec, tv_nsec);
	OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
	OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
	OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
	OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
	/* timeval/timezone offsets for use by vdso */
	OFFSET(TZONE_TZ_MINWEST, timezone, tz_minuteswest);
	OFFSET(TZONE_TZ_DSTTIME, timezone, tz_dsttime);

	/* Other bits used by the vdso */
	DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
	DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
	DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
	DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
	DEFINE(CLOCK_MAX, CLOCK_TAI);
	DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
	DEFINE(EINVAL, EINVAL);
	DEFINE(KTIME_LOW_RES, KTIME_LOW_RES);
	OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size);
	OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size);
	OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size);
	OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size);
	OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map_64);
#endif

#ifdef CONFIG_BUG
	DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
+1 −90
Original line number Diff line number Diff line
@@ -82,6 +82,7 @@ static struct clocksource clocksource_timebase = {
	.flags        = CLOCK_SOURCE_IS_CONTINUOUS,
	.mask         = CLOCKSOURCE_MASK(64),
	.read         = timebase_read,
	.vdso_clock_mode	= VDSO_CLOCKMODE_ARCHTIMER,
};

#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
@@ -831,95 +832,6 @@ static notrace u64 timebase_read(struct clocksource *cs)
	return (u64)get_tb();
}


void update_vsyscall(struct timekeeper *tk)
{
	struct timespec64 xt;
	struct clocksource *clock = tk->tkr_mono.clock;
	u32 mult = tk->tkr_mono.mult;
	u32 shift = tk->tkr_mono.shift;
	u64 cycle_last = tk->tkr_mono.cycle_last;
	u64 new_tb_to_xs, new_stamp_xsec;
	u64 frac_sec;

	if (clock != &clocksource_timebase)
		return;

	xt.tv_sec = tk->xtime_sec;
	xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);

	/* Make userspace gettimeofday spin until we're done. */
	++vdso_data->tb_update_count;
	smp_mb();

	/*
	 * This computes ((2^20 / 1e9) * mult) >> shift as a
	 * 0.64 fixed-point fraction.
	 * The computation in the else clause below won't overflow
	 * (as long as the timebase frequency is >= 1.049 MHz)
	 * but loses precision because we lose the low bits of the constant
	 * in the shift.  Note that 19342813113834067 ~= 2^(20+64) / 1e9.
	 * For a shift of 24 the error is about 0.5e-9, or about 0.5ns
	 * over a second.  (Shift values are usually 22, 23 or 24.)
	 * For high frequency clocks such as the 512MHz timebase clock
	 * on POWER[6789], the mult value is small (e.g. 32768000)
	 * and so we can shift the constant by 16 initially
	 * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the
	 * remaining shifts after the multiplication, which gives a
	 * more accurate result (e.g. with mult = 32768000, shift = 24,
	 * the error is only about 1.2e-12, or 0.7ns over 10 minutes).
	 */
	if (mult <= 62500000 && clock->shift >= 16)
		new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16);
	else
		new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);

	/*
	 * Compute the fractional second in units of 2^-32 seconds.
	 * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift
	 * in nanoseconds, so multiplying that by 2^32 / 1e9 gives
	 * it in units of 2^-32 seconds.
	 * We assume shift <= 32 because clocks_calc_mult_shift()
	 * generates shift values in the range 0 - 32.
	 */
	frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift);
	do_div(frac_sec, NSEC_PER_SEC);

	/*
	 * Work out new stamp_xsec value for any legacy users of systemcfg.
	 * stamp_xsec is in units of 2^-20 seconds.
	 */
	new_stamp_xsec = frac_sec >> 12;
	new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC;

	/*
	 * tb_update_count is used to allow the userspace gettimeofday code
	 * to assure itself that it sees a consistent view of the tb_to_xs and
	 * stamp_xsec variables.  It reads the tb_update_count, then reads
	 * tb_to_xs and stamp_xsec and then reads tb_update_count again.  If
	 * the two values of tb_update_count match and are even then the
	 * tb_to_xs and stamp_xsec values are consistent.  If not, then it
	 * loops back and reads them again until this criteria is met.
	 */
	vdso_data->tb_orig_stamp = cycle_last;
	vdso_data->stamp_xsec = new_stamp_xsec;
	vdso_data->tb_to_xs = new_tb_to_xs;
	vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
	vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
	vdso_data->stamp_xtime_sec = xt.tv_sec;
	vdso_data->stamp_xtime_nsec = xt.tv_nsec;
	vdso_data->stamp_sec_fraction = frac_sec;
	vdso_data->hrtimer_res = hrtimer_resolution;
	smp_wmb();
	++(vdso_data->tb_update_count);
}

void update_vsyscall_tz(void)
{
	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
}

static void __init clocksource_init(void)
{
	struct clocksource *clock = &clocksource_timebase;
@@ -1079,7 +991,6 @@ void __init time_init(void)
		sys_tz.tz_dsttime = 0;
	}

	vdso_data->tb_update_count = 0;
	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;

	/* initialise and enable the large decrementer (if we have one) */
Loading