Commit 5d2bd700 authored by Suresh Siddha's avatar Suresh Siddha Committed by H. Peter Anvin
Browse files

x86, fpu: decouple non-lazy/eager fpu restore from xsave



Decouple non-lazy/eager fpu restore policy from the existence of the xsave
feature. Introduce a synthetic CPUID flag to represent the eagerfpu
policy. "eagerfpu=on" boot paramter will enable the policy.

Requested-by: default avatarH. Peter Anvin <hpa@zytor.com>
Requested-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/1347300665-6209-2-git-send-email-suresh.b.siddha@intel.com


Signed-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 304bceda
Loading
Loading
Loading
Loading
+4 −0
Original line number Original line Diff line number Diff line
@@ -1833,6 +1833,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
			and restore using xsave. The kernel will fallback to
			and restore using xsave. The kernel will fallback to
			enabling legacy floating-point and sse state.
			enabling legacy floating-point and sse state.


	eagerfpu=	[X86]
			on	enable eager fpu restore
			off	disable eager fpu restore

	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
	nohlt		[BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
			wfi(ARM) instruction doesn't work correctly and not to
			wfi(ARM) instruction doesn't work correctly and not to
			use it. This is also useful when using JTAG debugger.
			use it. This is also useful when using JTAG debugger.
+2 −0
Original line number Original line Diff line number Diff line
@@ -97,6 +97,7 @@
#define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
#define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF	(3*32+28) /* APERFMPERF */
#define X86_FEATURE_APERFMPERF	(3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU	(3*32+29) /* "eagerfpu" Non lazy FPU restore */


/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
#define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
@@ -305,6 +306,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_perfctr_core	boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#define cpu_has_cx8		boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx8		boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16		boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_cx16		boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu	boot_cpu_has(X86_FEATURE_EAGER_FPU)


#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg		1
# define cpu_has_invlpg		1
+39 −15
Original line number Original line Diff line number Diff line
@@ -38,6 +38,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,


extern unsigned int mxcsr_feature_mask;
extern unsigned int mxcsr_feature_mask;
extern void fpu_init(void);
extern void fpu_init(void);
extern void eager_fpu_init(void);


DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);


@@ -84,6 +85,11 @@ static inline int is_x32_frame(void)


#define X87_FSW_ES (1 << 7)	/* Exception Summary */
#define X87_FSW_ES (1 << 7)	/* Exception Summary */


static __always_inline __pure bool use_eager_fpu(void)
{
	return static_cpu_has(X86_FEATURE_EAGER_FPU);
}

static __always_inline __pure bool use_xsaveopt(void)
static __always_inline __pure bool use_xsaveopt(void)
{
{
	return static_cpu_has(X86_FEATURE_XSAVEOPT);
	return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -99,6 +105,14 @@ static __always_inline __pure bool use_fxsr(void)
        return static_cpu_has(X86_FEATURE_FXSR);
        return static_cpu_has(X86_FEATURE_FXSR);
}
}


static inline void fx_finit(struct i387_fxsave_struct *fx)
{
	memset(fx, 0, xstate_size);
	fx->cwd = 0x37f;
	if (cpu_has_xmm)
		fx->mxcsr = MXCSR_DEFAULT;
}

extern void __sanitize_i387_state(struct task_struct *);
extern void __sanitize_i387_state(struct task_struct *);


static inline void sanitize_i387_state(struct task_struct *tsk)
static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -291,13 +305,13 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk)
static inline void __thread_fpu_end(struct task_struct *tsk)
static inline void __thread_fpu_end(struct task_struct *tsk)
{
{
	__thread_clear_has_fpu(tsk);
	__thread_clear_has_fpu(tsk);
	if (!use_xsave())
	if (!use_eager_fpu())
		stts();
		stts();
}
}


static inline void __thread_fpu_begin(struct task_struct *tsk)
static inline void __thread_fpu_begin(struct task_struct *tsk)
{
{
	if (!use_xsave())
	if (!use_eager_fpu())
		clts();
		clts();
	__thread_set_has_fpu(tsk);
	__thread_set_has_fpu(tsk);
}
}
@@ -327,10 +341,14 @@ static inline void drop_fpu(struct task_struct *tsk)


static inline void drop_init_fpu(struct task_struct *tsk)
static inline void drop_init_fpu(struct task_struct *tsk)
{
{
	if (!use_xsave())
	if (!use_eager_fpu())
		drop_fpu(tsk);
		drop_fpu(tsk);
	else
	else {
		if (use_xsave())
			xrstor_state(init_xstate_buf, -1);
			xrstor_state(init_xstate_buf, -1);
		else
			fxrstor_checking(&init_xstate_buf->i387);
	}
}
}


/*
/*
@@ -370,7 +388,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
	 * If the task has used the math, pre-load the FPU on xsave processors
	 * If the task has used the math, pre-load the FPU on xsave processors
	 * or if the past 5 consecutive context-switches used math.
	 * or if the past 5 consecutive context-switches used math.
	 */
	 */
	fpu.preload = tsk_used_math(new) && (use_xsave() ||
	fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
					     new->fpu_counter > 5);
					     new->fpu_counter > 5);
	if (__thread_has_fpu(old)) {
	if (__thread_has_fpu(old)) {
		if (!__save_init_fpu(old))
		if (!__save_init_fpu(old))
@@ -383,14 +401,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
			new->fpu_counter++;
			new->fpu_counter++;
			__thread_set_has_fpu(new);
			__thread_set_has_fpu(new);
			prefetch(new->thread.fpu.state);
			prefetch(new->thread.fpu.state);
		} else if (!use_xsave())
		} else if (!use_eager_fpu())
			stts();
			stts();
	} else {
	} else {
		old->fpu_counter = 0;
		old->fpu_counter = 0;
		old->thread.fpu.last_cpu = ~0;
		old->thread.fpu.last_cpu = ~0;
		if (fpu.preload) {
		if (fpu.preload) {
			new->fpu_counter++;
			new->fpu_counter++;
			if (!use_xsave() && fpu_lazy_restore(new, cpu))
			if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
				fpu.preload = 0;
				fpu.preload = 0;
			else
			else
				prefetch(new->thread.fpu.state);
				prefetch(new->thread.fpu.state);
@@ -452,6 +470,14 @@ static inline void user_fpu_begin(void)
	preempt_enable();
	preempt_enable();
}
}


static inline void __save_fpu(struct task_struct *tsk)
{
	if (use_xsave())
		xsave_state(&tsk->thread.fpu.state->xsave, -1);
	else
		fpu_fxsave(&tsk->thread.fpu);
}

/*
/*
 * These disable preemption on their own and are safe
 * These disable preemption on their own and are safe
 */
 */
@@ -459,8 +485,8 @@ static inline void save_init_fpu(struct task_struct *tsk)
{
{
	WARN_ON_ONCE(!__thread_has_fpu(tsk));
	WARN_ON_ONCE(!__thread_has_fpu(tsk));


	if (use_xsave()) {
	if (use_eager_fpu()) {
		xsave_state(&tsk->thread.fpu.state->xsave, -1);
		__save_fpu(tsk);
		return;
		return;
	}
	}


@@ -526,11 +552,9 @@ static inline void fpu_free(struct fpu *fpu)


static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
{
{
	if (use_xsave()) {
	if (use_eager_fpu()) {
		struct xsave_struct *xsave = &dst->thread.fpu.state->xsave;
		memset(&dst->thread.fpu.state->xsave, 0, xstate_size);

		__save_fpu(dst);
		memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct));
		xsave_state(xsave, -1);
	} else {
	} else {
		struct fpu *dfpu = &dst->thread.fpu;
		struct fpu *dfpu = &dst->thread.fpu;
		struct fpu *sfpu = &src->thread.fpu;
		struct fpu *sfpu = &src->thread.fpu;
+0 −2
Original line number Original line Diff line number Diff line
@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void)
	dbg_restore_debug_regs();
	dbg_restore_debug_regs();


	fpu_init();
	fpu_init();
	xsave_init();


	raw_local_save_flags(kernel_eflags);
	raw_local_save_flags(kernel_eflags);


@@ -1352,6 +1351,5 @@ void __cpuinit cpu_init(void)
	dbg_restore_debug_regs();
	dbg_restore_debug_regs();


	fpu_init();
	fpu_init();
	xsave_init();
}
}
#endif
#endif
+8 −17
Original line number Original line Diff line number Diff line
@@ -22,9 +22,8 @@
/*
/*
 * Were we in an interrupt that interrupted kernel mode?
 * Were we in an interrupt that interrupted kernel mode?
 *
 *
 * For now, on xsave platforms we will return interrupted
 * For now, with eagerfpu we will return interrupted kernel FPU
 * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore
 * state as not-idle. TBD: Ideally we can change the return value
 * for xsave platforms, ideally we can change the return value
 * to something like __thread_has_fpu(current). But we need to
 * to something like __thread_has_fpu(current). But we need to
 * be careful of doing __thread_clear_has_fpu() before saving
 * be careful of doing __thread_clear_has_fpu() before saving
 * the FPU etc for supporting nested uses etc. For now, take
 * the FPU etc for supporting nested uses etc. For now, take
@@ -38,7 +37,7 @@
 */
 */
static inline bool interrupted_kernel_fpu_idle(void)
static inline bool interrupted_kernel_fpu_idle(void)
{
{
	if (use_xsave())
	if (use_eager_fpu())
		return 0;
		return 0;


	return !__thread_has_fpu(current) &&
	return !__thread_has_fpu(current) &&
@@ -84,7 +83,7 @@ void kernel_fpu_begin(void)
		__save_init_fpu(me);
		__save_init_fpu(me);
		__thread_clear_has_fpu(me);
		__thread_clear_has_fpu(me);
		/* We do 'stts()' in kernel_fpu_end() */
		/* We do 'stts()' in kernel_fpu_end() */
	} else if (!use_xsave()) {
	} else if (!use_eager_fpu()) {
		this_cpu_write(fpu_owner_task, NULL);
		this_cpu_write(fpu_owner_task, NULL);
		clts();
		clts();
	}
	}
@@ -93,7 +92,7 @@ EXPORT_SYMBOL(kernel_fpu_begin);


void kernel_fpu_end(void)
void kernel_fpu_end(void)
{
{
	if (use_xsave())
	if (use_eager_fpu())
		math_state_restore();
		math_state_restore();
	else
	else
		stts();
		stts();
@@ -122,7 +121,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
{
{
	unsigned long mask = 0;
	unsigned long mask = 0;


	clts();
	if (cpu_has_fxsr) {
	if (cpu_has_fxsr) {
		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
		asm volatile("fxsave %0" : : "m" (fx_scratch));
		asm volatile("fxsave %0" : : "m" (fx_scratch));
@@ -131,7 +129,6 @@ static void __cpuinit mxcsr_feature_mask_init(void)
			mask = 0x0000ffbf;
			mask = 0x0000ffbf;
	}
	}
	mxcsr_feature_mask &= mask;
	mxcsr_feature_mask &= mask;
	stts();
}
}


static void __cpuinit init_thread_xstate(void)
static void __cpuinit init_thread_xstate(void)
@@ -185,9 +182,8 @@ void __cpuinit fpu_init(void)
		init_thread_xstate();
		init_thread_xstate();


	mxcsr_feature_mask_init();
	mxcsr_feature_mask_init();
	/* clean state in init */
	xsave_init();
	current_thread_info()->status = 0;
	eager_fpu_init();
	clear_used_math();
}
}


void fpu_finit(struct fpu *fpu)
void fpu_finit(struct fpu *fpu)
@@ -198,12 +194,7 @@ void fpu_finit(struct fpu *fpu)
	}
	}


	if (cpu_has_fxsr) {
	if (cpu_has_fxsr) {
		struct i387_fxsave_struct *fx = &fpu->state->fxsave;
		fx_finit(&fpu->state->fxsave);

		memset(fx, 0, xstate_size);
		fx->cwd = 0x37f;
		if (cpu_has_xmm)
			fx->mxcsr = MXCSR_DEFAULT;
	} else {
	} else {
		struct i387_fsave_struct *fp = &fpu->state->fsave;
		struct i387_fsave_struct *fp = &fpu->state->fsave;
		memset(fp, 0, xstate_size);
		memset(fp, 0, xstate_size);
Loading