Commit 97b67ae5 authored by Jan Beulich's avatar Jan Beulich Committed by Thomas Gleixner
Browse files

x86-64: Use RIP-relative addressing for most per-CPU accesses



Observing that per-CPU data (in the SMP case) is reachable by
exploiting 64-bit address wraparound (building on the default kernel
load address being at 16Mb), the one byte shorter RIP-relative
addressing form can be used for most per-CPU accesses. The one
exception are the "stable" reads, where the use of the "P" operand
modifier prevents the compiler from using RIP-relative addressing, but
is unavoidable due to the use of the "p" constraint (side note: with
gcc 4.9.x the intended effect of this isn't being achieved anymore,
see gcc bug 63637).

With the dependency on the minimum kernel load address, arbitrarily
low values for CONFIG_PHYSICAL_START are now no longer possible. A
link time assertion is being added, directing to the need to increase
that value when it triggers.

Signed-off-by: default avatarJan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/5458A1780200007800044A9D@mail.emea.novell.com


Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 6d24c5f7
Loading
Loading
Loading
Loading
+44 −15
Original line number Diff line number Diff line
@@ -64,7 +64,7 @@
#define __percpu_prefix		""
#endif

#define __percpu_arg(x)		__percpu_prefix "%P" #x
#define __percpu_arg(x)		__percpu_prefix "%" #x

/*
 * Initialized pointers to per-cpu variables needed for the boot
@@ -179,29 +179,58 @@ do { \
	}								\
} while (0)

#define percpu_from_op(op, var, constraint)		\
#define percpu_from_op(op, var)				\
({							\
	typeof(var) pfo_ret__;				\
	switch (sizeof(var)) {				\
	case 1:						\
		asm(op "b "__percpu_arg(1)",%0"		\
		    : "=q" (pfo_ret__)			\
		    : constraint);			\
		    : "m" (var));			\
		break;					\
	case 2:						\
		asm(op "w "__percpu_arg(1)",%0"		\
		    : "=r" (pfo_ret__)			\
		    : constraint);			\
		    : "m" (var));			\
		break;					\
	case 4:						\
		asm(op "l "__percpu_arg(1)",%0"		\
		    : "=r" (pfo_ret__)			\
		    : constraint);			\
		    : "m" (var));			\
		break;					\
	case 8:						\
		asm(op "q "__percpu_arg(1)",%0"		\
		    : "=r" (pfo_ret__)			\
		    : constraint);			\
		    : "m" (var));			\
		break;					\
	default: __bad_percpu_size();			\
	}						\
	pfo_ret__;					\
})

#define percpu_stable_op(op, var)			\
({							\
	typeof(var) pfo_ret__;				\
	switch (sizeof(var)) {				\
	case 1:						\
		asm(op "b "__percpu_arg(P1)",%0"	\
		    : "=q" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 2:						\
		asm(op "w "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 4:						\
		asm(op "l "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 8:						\
		asm(op "q "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	default: __bad_percpu_size();			\
	}						\
@@ -359,11 +388,11 @@ do { \
 * per-thread variables implemented as per-cpu variables and thus
 * stable for the duration of the respective task.
 */
#define this_cpu_read_stable(var)	percpu_from_op("mov", var, "p" (&(var)))
#define this_cpu_read_stable(var)	percpu_stable_op("mov", var)

#define raw_cpu_read_1(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
#define raw_cpu_read_2(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
#define raw_cpu_read_4(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
#define raw_cpu_read_1(pcp)		percpu_from_op("mov", pcp)
#define raw_cpu_read_2(pcp)		percpu_from_op("mov", pcp)
#define raw_cpu_read_4(pcp)		percpu_from_op("mov", pcp)

#define raw_cpu_write_1(pcp, val)	percpu_to_op("mov", (pcp), val)
#define raw_cpu_write_2(pcp, val)	percpu_to_op("mov", (pcp), val)
@@ -381,9 +410,9 @@ do { \
#define raw_cpu_xchg_2(pcp, val)	percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_4(pcp, val)	percpu_xchg_op(pcp, val)

#define this_cpu_read_1(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_2(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_4(pcp)		percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_1(pcp)		percpu_from_op("mov", pcp)
#define this_cpu_read_2(pcp)		percpu_from_op("mov", pcp)
#define this_cpu_read_4(pcp)		percpu_from_op("mov", pcp)
#define this_cpu_write_1(pcp, val)	percpu_to_op("mov", (pcp), val)
#define this_cpu_write_2(pcp, val)	percpu_to_op("mov", (pcp), val)
#define this_cpu_write_4(pcp, val)	percpu_to_op("mov", (pcp), val)
@@ -435,7 +464,7 @@ do { \
 * 32 bit must fall back to generic operations.
 */
#ifdef CONFIG_X86_64
#define raw_cpu_read_8(pcp)			percpu_from_op("mov", (pcp), "m"(pcp))
#define raw_cpu_read_8(pcp)			percpu_from_op("mov", pcp)
#define raw_cpu_write_8(pcp, val)		percpu_to_op("mov", (pcp), val)
#define raw_cpu_add_8(pcp, val)			percpu_add_op((pcp), val)
#define raw_cpu_and_8(pcp, val)			percpu_to_op("and", (pcp), val)
@@ -444,7 +473,7 @@ do { \
#define raw_cpu_xchg_8(pcp, nval)		percpu_xchg_op(pcp, nval)
#define raw_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(pcp, oval, nval)

#define this_cpu_read_8(pcp)			percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_read_8(pcp)			percpu_from_op("mov", pcp)
#define this_cpu_write_8(pcp, val)		percpu_to_op("mov", (pcp), val)
#define this_cpu_add_8(pcp, val)		percpu_add_op((pcp), val)
#define this_cpu_and_8(pcp, val)		percpu_to_op("and", (pcp), val)
+2 −0
Original line number Diff line number Diff line
@@ -186,6 +186,8 @@ SECTIONS
	 * start another segment - init.
	 */
	PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
	ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
	       "per-CPU data too large - increase CONFIG_PHYSICAL_START")
#endif

	INIT_TEXT_SECTION(PAGE_SIZE)