Commit ba77c568 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'x86-asm-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "A couple of changes, concentrated into the percpu code, to enable
  Clang support on i386 kernels too"

[ And cleans up the macros to generate percpu ops a lot too  - Linus ]

* tag 'x86-asm-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/uaccess: Make __get_user_size() Clang compliant on 32-bit
  x86/percpu: Remove unused PER_CPU() macro
  x86/percpu: Clean up percpu_stable_op()
  x86/percpu: Clean up percpu_cmpxchg_op()
  x86/percpu: Clean up percpu_xchg_op()
  x86/percpu: Clean up percpu_add_return_op()
  x86/percpu: Remove "e" constraint from XADD
  x86/percpu: Clean up percpu_add_op()
  x86/percpu: Clean up percpu_from_op()
  x86/percpu: Clean up percpu_to_op()
  x86/percpu: Introduce size abstraction macros
parents 97c6f57d 158807de
Loading
Loading
Loading
Loading
+172 −338
Original line number Diff line number Diff line
@@ -4,33 +4,15 @@

#ifdef CONFIG_X86_64
#define __percpu_seg		gs
#define __percpu_mov_op		movq
#else
#define __percpu_seg		fs
#define __percpu_mov_op		movl
#endif

#ifdef __ASSEMBLY__

/*
 * PER_CPU finds an address of a per-cpu variable.
 *
 * Args:
 *    var - variable name
 *    reg - 32bit register
 *
 * The resulting address is stored in the "reg" argument.
 *
 * Example:
 *    PER_CPU(cpu_gdt_descr, %ebx)
 */
#ifdef CONFIG_SMP
#define PER_CPU(var, reg)						\
	__percpu_mov_op %__percpu_seg:this_cpu_off, reg;		\
	lea var(reg), reg
#define PER_CPU_VAR(var)	%__percpu_seg:var
#else /* ! SMP */
#define PER_CPU(var, reg)	__percpu_mov_op $var, reg
#define PER_CPU_VAR(var)	var
#endif	/* SMP */

@@ -85,213 +67,108 @@

/* For arch-specific code, we can use direct single-insn ops (they
 * don't give an lvalue though). */
extern void __bad_percpu_size(void);

#define percpu_to_op(qual, op, var, val)		\
#define __pcpu_type_1 u8
#define __pcpu_type_2 u16
#define __pcpu_type_4 u32
#define __pcpu_type_8 u64

#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
#define __pcpu_cast_8(val) ((u64)(val))

#define __pcpu_op1_1(op, dst) op "b " dst
#define __pcpu_op1_2(op, dst) op "w " dst
#define __pcpu_op1_4(op, dst) op "l " dst
#define __pcpu_op1_8(op, dst) op "q " dst

#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst

#define __pcpu_reg_1(mod, x) mod "q" (x)
#define __pcpu_reg_2(mod, x) mod "r" (x)
#define __pcpu_reg_4(mod, x) mod "r" (x)
#define __pcpu_reg_8(mod, x) mod "r" (x)

#define __pcpu_reg_imm_1(x) "qi" (x)
#define __pcpu_reg_imm_2(x) "ri" (x)
#define __pcpu_reg_imm_4(x) "ri" (x)
#define __pcpu_reg_imm_8(x) "re" (x)

#define percpu_to_op(size, qual, op, _var, _val)			\
do {									\
	typedef typeof(var) pto_T__;			\
	__pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val);	\
	if (0) {		                                        \
		pto_T__ pto_tmp__;			\
		pto_tmp__ = (val);			\
		typeof(_var) pto_tmp__;					\
		pto_tmp__ = (_val);					\
		(void)pto_tmp__;					\
	}								\
	switch (sizeof(var)) {				\
	case 1:						\
		asm qual (op "b %1,"__percpu_arg(0)	\
		    : "+m" (var)			\
		    : "qi" ((pto_T__)(val)));		\
		break;					\
	case 2:						\
		asm qual (op "w %1,"__percpu_arg(0)	\
		    : "+m" (var)			\
		    : "ri" ((pto_T__)(val)));		\
		break;					\
	case 4:						\
		asm qual (op "l %1,"__percpu_arg(0)	\
		    : "+m" (var)			\
		    : "ri" ((pto_T__)(val)));		\
		break;					\
	case 8:						\
		asm qual (op "q %1,"__percpu_arg(0)	\
		    : "+m" (var)			\
		    : "re" ((pto_T__)(val)));		\
		break;					\
	default: __bad_percpu_size();			\
	}						\
	asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var]))	\
	    : [var] "+m" (_var)						\
	    : [val] __pcpu_reg_imm_##size(pto_val__));			\
} while (0)

#define percpu_unary_op(size, qual, op, _var)				\
({									\
	asm qual (__pcpu_op1_##size(op, __percpu_arg([var]))		\
	    : [var] "+m" (_var));					\
})

/*
 * Generate a percpu add to memory instruction and optimize code
 * if one is added or subtracted.
 */
#define percpu_add_op(qual, var, val)					\
#define percpu_add_op(size, qual, var, val)				\
do {									\
	typedef typeof(var) pao_T__;					\
	const int pao_ID__ = (__builtin_constant_p(val) &&		\
			      ((val) == 1 || (val) == -1)) ?		\
				(int)(val) : 0;				\
	if (0) {							\
		pao_T__ pao_tmp__;					\
		typeof(var) pao_tmp__;					\
		pao_tmp__ = (val);					\
		(void)pao_tmp__;					\
	}								\
	switch (sizeof(var)) {						\
	case 1:								\
	if (pao_ID__ == 1)						\
			asm qual ("incb "__percpu_arg(0) : "+m" (var));	\
		percpu_unary_op(size, qual, "inc", var);		\
	else if (pao_ID__ == -1)					\
			asm qual ("decb "__percpu_arg(0) : "+m" (var));	\
		percpu_unary_op(size, qual, "dec", var);		\
	else								\
			asm qual ("addb %1, "__percpu_arg(0)		\
			    : "+m" (var)				\
			    : "qi" ((pao_T__)(val)));			\
		break;							\
	case 2:								\
		if (pao_ID__ == 1)					\
			asm qual ("incw "__percpu_arg(0) : "+m" (var));	\
		else if (pao_ID__ == -1)				\
			asm qual ("decw "__percpu_arg(0) : "+m" (var));	\
		else							\
			asm qual ("addw %1, "__percpu_arg(0)		\
			    : "+m" (var)				\
			    : "ri" ((pao_T__)(val)));			\
		break;							\
	case 4:								\
		if (pao_ID__ == 1)					\
			asm qual ("incl "__percpu_arg(0) : "+m" (var));	\
		else if (pao_ID__ == -1)				\
			asm qual ("decl "__percpu_arg(0) : "+m" (var));	\
		else							\
			asm qual ("addl %1, "__percpu_arg(0)		\
			    : "+m" (var)				\
			    : "ri" ((pao_T__)(val)));			\
		break;							\
	case 8:								\
		if (pao_ID__ == 1)					\
			asm qual ("incq "__percpu_arg(0) : "+m" (var));	\
		else if (pao_ID__ == -1)				\
			asm qual ("decq "__percpu_arg(0) : "+m" (var));	\
		else							\
			asm qual ("addq %1, "__percpu_arg(0)		\
			    : "+m" (var)				\
			    : "re" ((pao_T__)(val)));			\
		break;							\
	default: __bad_percpu_size();					\
	}								\
		percpu_to_op(size, qual, "add", var, val);		\
} while (0)

#define percpu_from_op(qual, op, var)			\
#define percpu_from_op(size, qual, op, _var)				\
({									\
	typeof(var) pfo_ret__;				\
	switch (sizeof(var)) {				\
	case 1:						\
		asm qual (op "b "__percpu_arg(1)",%0"	\
		    : "=q" (pfo_ret__)			\
		    : "m" (var));			\
		break;					\
	case 2:						\
		asm qual (op "w "__percpu_arg(1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "m" (var));			\
		break;					\
	case 4:						\
		asm qual (op "l "__percpu_arg(1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "m" (var));			\
		break;					\
	case 8:						\
		asm qual (op "q "__percpu_arg(1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "m" (var));			\
		break;					\
	default: __bad_percpu_size();			\
	}						\
	pfo_ret__;					\
	__pcpu_type_##size pfo_val__;					\
	asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]")	\
	    : [val] __pcpu_reg_##size("=", pfo_val__)			\
	    : [var] "m" (_var));					\
	(typeof(_var))(unsigned long) pfo_val__;			\
})

#define percpu_stable_op(op, var)			\
#define percpu_stable_op(size, op, _var)				\
({									\
	typeof(var) pfo_ret__;				\
	switch (sizeof(var)) {				\
	case 1:						\
		asm(op "b "__percpu_arg(P1)",%0"	\
		    : "=q" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 2:						\
		asm(op "w "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 4:						\
		asm(op "l "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	case 8:						\
		asm(op "q "__percpu_arg(P1)",%0"	\
		    : "=r" (pfo_ret__)			\
		    : "p" (&(var)));			\
		break;					\
	default: __bad_percpu_size();			\
	}						\
	pfo_ret__;					\
})

#define percpu_unary_op(qual, op, var)			\
({							\
	switch (sizeof(var)) {				\
	case 1:						\
		asm qual (op "b "__percpu_arg(0)	\
		    : "+m" (var));			\
		break;					\
	case 2:						\
		asm qual (op "w "__percpu_arg(0)	\
		    : "+m" (var));			\
		break;					\
	case 4:						\
		asm qual (op "l "__percpu_arg(0)	\
		    : "+m" (var));			\
		break;					\
	case 8:						\
		asm qual (op "q "__percpu_arg(0)	\
		    : "+m" (var));			\
		break;					\
	default: __bad_percpu_size();			\
	}						\
	__pcpu_type_##size pfo_val__;					\
	asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]")	\
	    : [val] __pcpu_reg_##size("=", pfo_val__)			\
	    : [var] "p" (&(_var)));					\
	(typeof(_var))(unsigned long) pfo_val__;			\
})

/*
 * Add return operation
 */
#define percpu_add_return_op(qual, var, val)				\
#define percpu_add_return_op(size, qual, _var, _val)			\
({									\
	typeof(var) paro_ret__ = val;					\
	switch (sizeof(var)) {						\
	case 1:								\
		asm qual ("xaddb %0, "__percpu_arg(1)			\
			    : "+q" (paro_ret__), "+m" (var)		\
			    : : "memory");				\
		break;							\
	case 2:								\
		asm qual ("xaddw %0, "__percpu_arg(1)			\
			    : "+r" (paro_ret__), "+m" (var)		\
	__pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val);	\
	asm qual (__pcpu_op2_##size("xadd", "%[tmp]",			\
				     __percpu_arg([var]))		\
		  : [tmp] __pcpu_reg_##size("+", paro_tmp__),		\
		    [var] "+m" (_var)					\
		  : : "memory");					\
		break;							\
	case 4:								\
		asm qual ("xaddl %0, "__percpu_arg(1)			\
			    : "+r" (paro_ret__), "+m" (var)		\
			    : : "memory");				\
		break;							\
	case 8:								\
		asm qual ("xaddq %0, "__percpu_arg(1)			\
			    : "+re" (paro_ret__), "+m" (var)		\
			    : : "memory");				\
		break;							\
	default: __bad_percpu_size();					\
	}								\
	paro_ret__ += val;						\
	paro_ret__;							\
	(typeof(_var))(unsigned long) (paro_tmp__ + _val);		\
})

/*
@@ -299,85 +176,38 @@ do { \
 * expensive due to the implied lock prefix.  The processor cannot prefetch
 * cachelines if xchg is used.
 */
#define percpu_xchg_op(qual, var, nval)					\
#define percpu_xchg_op(size, qual, _var, _nval)				\
({									\
	typeof(var) pxo_ret__;						\
	typeof(var) pxo_new__ = (nval);					\
	switch (sizeof(var)) {						\
	case 1:								\
		asm qual ("\n\tmov "__percpu_arg(1)",%%al"		\
		    "\n1:\tcmpxchgb %2, "__percpu_arg(1)		\
		    "\n\tjnz 1b"					\
			    : "=&a" (pxo_ret__), "+m" (var)		\
			    : "q" (pxo_new__)				\
			    : "memory");				\
		break;							\
	case 2:								\
		asm qual ("\n\tmov "__percpu_arg(1)",%%ax"		\
		    "\n1:\tcmpxchgw %2, "__percpu_arg(1)		\
	__pcpu_type_##size pxo_old__;					\
	__pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval);	\
	asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]),		\
				    "%[oval]")				\
		  "\n1:\t"						\
		  __pcpu_op2_##size("cmpxchg", "%[nval]",		\
				    __percpu_arg([var]))		\
		  "\n\tjnz 1b"						\
			    : "=&a" (pxo_ret__), "+m" (var)		\
			    : "r" (pxo_new__)				\
		  : [oval] "=&a" (pxo_old__),				\
		    [var] "+m" (_var)					\
		  : [nval] __pcpu_reg_##size(, pxo_new__)		\
		  : "memory");						\
		break;							\
	case 4:								\
		asm qual ("\n\tmov "__percpu_arg(1)",%%eax"		\
		    "\n1:\tcmpxchgl %2, "__percpu_arg(1)		\
		    "\n\tjnz 1b"					\
			    : "=&a" (pxo_ret__), "+m" (var)		\
			    : "r" (pxo_new__)				\
			    : "memory");				\
		break;							\
	case 8:								\
		asm qual ("\n\tmov "__percpu_arg(1)",%%rax"		\
		    "\n1:\tcmpxchgq %2, "__percpu_arg(1)		\
		    "\n\tjnz 1b"					\
			    : "=&a" (pxo_ret__), "+m" (var)		\
			    : "r" (pxo_new__)				\
			    : "memory");				\
		break;							\
	default: __bad_percpu_size();					\
	}								\
	pxo_ret__;							\
	(typeof(_var))(unsigned long) pxo_old__;			\
})

/*
 * cmpxchg has no such implied lock semantics as a result it is much
 * more efficient for cpu local operations.
 */
#define percpu_cmpxchg_op(qual, var, oval, nval)			\
#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval)		\
({									\
	typeof(var) pco_ret__;						\
	typeof(var) pco_old__ = (oval);					\
	typeof(var) pco_new__ = (nval);					\
	switch (sizeof(var)) {						\
	case 1:								\
		asm qual ("cmpxchgb %2, "__percpu_arg(1)		\
			    : "=a" (pco_ret__), "+m" (var)		\
			    : "q" (pco_new__), "0" (pco_old__)		\
			    : "memory");				\
		break;							\
	case 2:								\
		asm qual ("cmpxchgw %2, "__percpu_arg(1)		\
			    : "=a" (pco_ret__), "+m" (var)		\
			    : "r" (pco_new__), "0" (pco_old__)		\
			    : "memory");				\
		break;							\
	case 4:								\
		asm qual ("cmpxchgl %2, "__percpu_arg(1)		\
			    : "=a" (pco_ret__), "+m" (var)		\
			    : "r" (pco_new__), "0" (pco_old__)		\
	__pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval);	\
	__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval);	\
	asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]",		\
				    __percpu_arg([var]))		\
		  : [oval] "+a" (pco_old__),				\
		    [var] "+m" (_var)					\
		  : [nval] __pcpu_reg_##size(, pco_new__)		\
		  : "memory");						\
		break;							\
	case 8:								\
		asm qual ("cmpxchgq %2, "__percpu_arg(1)		\
			    : "=a" (pco_ret__), "+m" (var)		\
			    : "r" (pco_new__), "0" (pco_old__)		\
			    : "memory");				\
		break;							\
	default: __bad_percpu_size();					\
	}								\
	pco_ret__;							\
	(typeof(_var))(unsigned long) pco_old__;			\
})

/*
@@ -389,24 +219,28 @@ do { \
 * per-thread variables implemented as per-cpu variables and thus
 * stable for the duration of the respective task.
 */
#define this_cpu_read_stable(var)	percpu_stable_op("mov", var)

#define raw_cpu_read_1(pcp)		percpu_from_op(, "mov", pcp)
#define raw_cpu_read_2(pcp)		percpu_from_op(, "mov", pcp)
#define raw_cpu_read_4(pcp)		percpu_from_op(, "mov", pcp)

#define raw_cpu_write_1(pcp, val)	percpu_to_op(, "mov", (pcp), val)
#define raw_cpu_write_2(pcp, val)	percpu_to_op(, "mov", (pcp), val)
#define raw_cpu_write_4(pcp, val)	percpu_to_op(, "mov", (pcp), val)
#define raw_cpu_add_1(pcp, val)		percpu_add_op(, (pcp), val)
#define raw_cpu_add_2(pcp, val)		percpu_add_op(, (pcp), val)
#define raw_cpu_add_4(pcp, val)		percpu_add_op(, (pcp), val)
#define raw_cpu_and_1(pcp, val)		percpu_to_op(, "and", (pcp), val)
#define raw_cpu_and_2(pcp, val)		percpu_to_op(, "and", (pcp), val)
#define raw_cpu_and_4(pcp, val)		percpu_to_op(, "and", (pcp), val)
#define raw_cpu_or_1(pcp, val)		percpu_to_op(, "or", (pcp), val)
#define raw_cpu_or_2(pcp, val)		percpu_to_op(, "or", (pcp), val)
#define raw_cpu_or_4(pcp, val)		percpu_to_op(, "or", (pcp), val)
#define this_cpu_read_stable_1(pcp)	percpu_stable_op(1, "mov", pcp)
#define this_cpu_read_stable_2(pcp)	percpu_stable_op(2, "mov", pcp)
#define this_cpu_read_stable_4(pcp)	percpu_stable_op(4, "mov", pcp)
#define this_cpu_read_stable_8(pcp)	percpu_stable_op(8, "mov", pcp)
#define this_cpu_read_stable(pcp)	__pcpu_size_call_return(this_cpu_read_stable_, pcp)

#define raw_cpu_read_1(pcp)		percpu_from_op(1, , "mov", pcp)
#define raw_cpu_read_2(pcp)		percpu_from_op(2, , "mov", pcp)
#define raw_cpu_read_4(pcp)		percpu_from_op(4, , "mov", pcp)

#define raw_cpu_write_1(pcp, val)	percpu_to_op(1, , "mov", (pcp), val)
#define raw_cpu_write_2(pcp, val)	percpu_to_op(2, , "mov", (pcp), val)
#define raw_cpu_write_4(pcp, val)	percpu_to_op(4, , "mov", (pcp), val)
#define raw_cpu_add_1(pcp, val)		percpu_add_op(1, , (pcp), val)
#define raw_cpu_add_2(pcp, val)		percpu_add_op(2, , (pcp), val)
#define raw_cpu_add_4(pcp, val)		percpu_add_op(4, , (pcp), val)
#define raw_cpu_and_1(pcp, val)		percpu_to_op(1, , "and", (pcp), val)
#define raw_cpu_and_2(pcp, val)		percpu_to_op(2, , "and", (pcp), val)
#define raw_cpu_and_4(pcp, val)		percpu_to_op(4, , "and", (pcp), val)
#define raw_cpu_or_1(pcp, val)		percpu_to_op(1, , "or", (pcp), val)
#define raw_cpu_or_2(pcp, val)		percpu_to_op(2, , "or", (pcp), val)
#define raw_cpu_or_4(pcp, val)		percpu_to_op(4, , "or", (pcp), val)

/*
 * raw_cpu_xchg() can use a load-store since it is not required to be
@@ -423,38 +257,38 @@ do { \
#define raw_cpu_xchg_2(pcp, val)	raw_percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_4(pcp, val)	raw_percpu_xchg_op(pcp, val)

#define this_cpu_read_1(pcp)		percpu_from_op(volatile, "mov", pcp)
#define this_cpu_read_2(pcp)		percpu_from_op(volatile, "mov", pcp)
#define this_cpu_read_4(pcp)		percpu_from_op(volatile, "mov", pcp)
#define this_cpu_write_1(pcp, val)	percpu_to_op(volatile, "mov", (pcp), val)
#define this_cpu_write_2(pcp, val)	percpu_to_op(volatile, "mov", (pcp), val)
#define this_cpu_write_4(pcp, val)	percpu_to_op(volatile, "mov", (pcp), val)
#define this_cpu_add_1(pcp, val)	percpu_add_op(volatile, (pcp), val)
#define this_cpu_add_2(pcp, val)	percpu_add_op(volatile, (pcp), val)
#define this_cpu_add_4(pcp, val)	percpu_add_op(volatile, (pcp), val)
#define this_cpu_and_1(pcp, val)	percpu_to_op(volatile, "and", (pcp), val)
#define this_cpu_and_2(pcp, val)	percpu_to_op(volatile, "and", (pcp), val)
#define this_cpu_and_4(pcp, val)	percpu_to_op(volatile, "and", (pcp), val)
#define this_cpu_or_1(pcp, val)		percpu_to_op(volatile, "or", (pcp), val)
#define this_cpu_or_2(pcp, val)		percpu_to_op(volatile, "or", (pcp), val)
#define this_cpu_or_4(pcp, val)		percpu_to_op(volatile, "or", (pcp), val)
#define this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(volatile, pcp, nval)
#define this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(volatile, pcp, nval)
#define this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(volatile, pcp, nval)

#define raw_cpu_add_return_1(pcp, val)		percpu_add_return_op(, pcp, val)
#define raw_cpu_add_return_2(pcp, val)		percpu_add_return_op(, pcp, val)
#define raw_cpu_add_return_4(pcp, val)		percpu_add_return_op(, pcp, val)
#define raw_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(, pcp, oval, nval)
#define raw_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(, pcp, oval, nval)
#define raw_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(, pcp, oval, nval)

#define this_cpu_add_return_1(pcp, val)		percpu_add_return_op(volatile, pcp, val)
#define this_cpu_add_return_2(pcp, val)		percpu_add_return_op(volatile, pcp, val)
#define this_cpu_add_return_4(pcp, val)		percpu_add_return_op(volatile, pcp, val)
#define this_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(volatile, pcp, oval, nval)
#define this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(volatile, pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(volatile, pcp, oval, nval)
#define this_cpu_read_1(pcp)		percpu_from_op(1, volatile, "mov", pcp)
#define this_cpu_read_2(pcp)		percpu_from_op(2, volatile, "mov", pcp)
#define this_cpu_read_4(pcp)		percpu_from_op(4, volatile, "mov", pcp)
#define this_cpu_write_1(pcp, val)	percpu_to_op(1, volatile, "mov", (pcp), val)
#define this_cpu_write_2(pcp, val)	percpu_to_op(2, volatile, "mov", (pcp), val)
#define this_cpu_write_4(pcp, val)	percpu_to_op(4, volatile, "mov", (pcp), val)
#define this_cpu_add_1(pcp, val)	percpu_add_op(1, volatile, (pcp), val)
#define this_cpu_add_2(pcp, val)	percpu_add_op(2, volatile, (pcp), val)
#define this_cpu_add_4(pcp, val)	percpu_add_op(4, volatile, (pcp), val)
#define this_cpu_and_1(pcp, val)	percpu_to_op(1, volatile, "and", (pcp), val)
#define this_cpu_and_2(pcp, val)	percpu_to_op(2, volatile, "and", (pcp), val)
#define this_cpu_and_4(pcp, val)	percpu_to_op(4, volatile, "and", (pcp), val)
#define this_cpu_or_1(pcp, val)		percpu_to_op(1, volatile, "or", (pcp), val)
#define this_cpu_or_2(pcp, val)		percpu_to_op(2, volatile, "or", (pcp), val)
#define this_cpu_or_4(pcp, val)		percpu_to_op(4, volatile, "or", (pcp), val)
#define this_cpu_xchg_1(pcp, nval)	percpu_xchg_op(1, volatile, pcp, nval)
#define this_cpu_xchg_2(pcp, nval)	percpu_xchg_op(2, volatile, pcp, nval)
#define this_cpu_xchg_4(pcp, nval)	percpu_xchg_op(4, volatile, pcp, nval)

#define raw_cpu_add_return_1(pcp, val)		percpu_add_return_op(1, , pcp, val)
#define raw_cpu_add_return_2(pcp, val)		percpu_add_return_op(2, , pcp, val)
#define raw_cpu_add_return_4(pcp, val)		percpu_add_return_op(4, , pcp, val)
#define raw_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(1, , pcp, oval, nval)
#define raw_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(2, , pcp, oval, nval)
#define raw_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(4, , pcp, oval, nval)

#define this_cpu_add_return_1(pcp, val)		percpu_add_return_op(1, volatile, pcp, val)
#define this_cpu_add_return_2(pcp, val)		percpu_add_return_op(2, volatile, pcp, val)
#define this_cpu_add_return_4(pcp, val)		percpu_add_return_op(4, volatile, pcp, val)
#define this_cpu_cmpxchg_1(pcp, oval, nval)	percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
#define this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(4, volatile, pcp, oval, nval)

#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)		\
@@ -478,23 +312,23 @@ do { \
 * 32 bit must fall back to generic operations.
 */
#ifdef CONFIG_X86_64
#define raw_cpu_read_8(pcp)			percpu_from_op(, "mov", pcp)
#define raw_cpu_write_8(pcp, val)		percpu_to_op(, "mov", (pcp), val)
#define raw_cpu_add_8(pcp, val)			percpu_add_op(, (pcp), val)
#define raw_cpu_and_8(pcp, val)			percpu_to_op(, "and", (pcp), val)
#define raw_cpu_or_8(pcp, val)			percpu_to_op(, "or", (pcp), val)
#define raw_cpu_add_return_8(pcp, val)		percpu_add_return_op(, pcp, val)
#define raw_cpu_read_8(pcp)			percpu_from_op(8, , "mov", pcp)
#define raw_cpu_write_8(pcp, val)		percpu_to_op(8, , "mov", (pcp), val)
#define raw_cpu_add_8(pcp, val)			percpu_add_op(8, , (pcp), val)
#define raw_cpu_and_8(pcp, val)			percpu_to_op(8, , "and", (pcp), val)
#define raw_cpu_or_8(pcp, val)			percpu_to_op(8, , "or", (pcp), val)
#define raw_cpu_add_return_8(pcp, val)		percpu_add_return_op(8, , pcp, val)
#define raw_cpu_xchg_8(pcp, nval)		raw_percpu_xchg_op(pcp, nval)
#define raw_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(, pcp, oval, nval)

#define this_cpu_read_8(pcp)			percpu_from_op(volatile, "mov", pcp)
#define this_cpu_write_8(pcp, val)		percpu_to_op(volatile, "mov", (pcp), val)
#define this_cpu_add_8(pcp, val)		percpu_add_op(volatile, (pcp), val)
#define this_cpu_and_8(pcp, val)		percpu_to_op(volatile, "and", (pcp), val)
#define this_cpu_or_8(pcp, val)			percpu_to_op(volatile, "or", (pcp), val)
#define this_cpu_add_return_8(pcp, val)		percpu_add_return_op(volatile, pcp, val)
#define this_cpu_xchg_8(pcp, nval)		percpu_xchg_op(volatile, pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(volatile, pcp, oval, nval)
#define raw_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(8, , pcp, oval, nval)

#define this_cpu_read_8(pcp)			percpu_from_op(8, volatile, "mov", pcp)
#define this_cpu_write_8(pcp, val)		percpu_to_op(8, volatile, "mov", (pcp), val)
#define this_cpu_add_8(pcp, val)		percpu_add_op(8, volatile, (pcp), val)
#define this_cpu_and_8(pcp, val)		percpu_to_op(8, volatile, "and", (pcp), val)
#define this_cpu_or_8(pcp, val)			percpu_to_op(8, volatile, "or", (pcp), val)
#define this_cpu_add_return_8(pcp, val)		percpu_add_return_op(8, volatile, pcp, val)
#define this_cpu_xchg_8(pcp, nval)		percpu_xchg_op(8, volatile, pcp, nval)
#define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(8, volatile, pcp, oval, nval)

/*
 * Pretty complex macro to generate cmpxchg16 instruction.  The instruction
+4 −1
Original line number Diff line number Diff line
@@ -314,11 +314,14 @@ do { \

#define __get_user_size(x, ptr, size, retval)				\
do {									\
	unsigned char x_u8__;						\
									\
	retval = 0;							\
	__chk_user_ptr(ptr);						\
	switch (size) {							\
	case 1:								\
		__get_user_asm(x, ptr, retval, "b", "=q");		\
		__get_user_asm(x_u8__, ptr, retval, "b", "=q");		\
		(x) = x_u8__;						\
		break;							\
	case 2:								\
		__get_user_asm(x, ptr, retval, "w", "=r");		\