Commit b1378a56 authored by Chang S. Bae's avatar Chang S. Bae Committed by Ingo Molnar
Browse files

x86/fsgsbase/64: Introduce FS/GS base helper functions



Introduce FS/GS base access functionality via <asm/fsgsbase.h>,
not yet used by anything directly.

Factor out task_seg_base() from x86/ptrace.c and rename it to
x86_fsgsbase_read_task() to make it part of the new helpers.

This will allow us to enhance FSGSBASE support and eventually enable
the FSBASE/GSBASE instructions.

An "inactive" GS base refers to a base saved at kernel entry
and being part of an inactive, non-running/stopped user-task.
(The typical ptrace model.)

Here are the new functions:

  x86_fsbase_read_task()
  x86_gsbase_read_task()
  x86_fsbase_write_task()
  x86_gsbase_write_task()
  x86_fsbase_read_cpu()
  x86_fsbase_write_cpu()
  x86_gsbase_read_cpu_inactive()
  x86_gsbase_write_cpu_inactive()

As an advantage of the unified namespace we can now see all FS/GSBASE
API use in the kernel via the following 'git grep' pattern:

  $ git grep x86_.*sbase

[ mingo: Wrote new changelog. ]

Based-on-code-from: Andy Lutomirski <luto@kernel.org>
Suggested-by: default avatarIngo Molnar <mingo@kernel.org>
Signed-off-by: default avatarChang S. Bae <chang.seok.bae@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Markus T Metzger <markus.t.metzger@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Shankar <ravi.v.shankar@intel.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1537312139-5580-3-git-send-email-chang.seok.bae@intel.com


Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 07e1d88a
Loading
Loading
Loading
Loading
+50 −0
Original line number Original line Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_FSGSBASE_H
#define _ASM_FSGSBASE_H 1

#ifndef __ASSEMBLY__

#ifdef CONFIG_X86_64

#include <asm/msr-index.h>

unsigned long x86_fsgsbase_read_task(struct task_struct *task,
				     unsigned short selector);

/*
 * Read/write a task's fsbase or gsbase. This returns the value that
 * the FS/GS base would have (if the task were to be resumed). These
 * work on current or on a different non-running task.
 */
unsigned long x86_fsbase_read_task(struct task_struct *task);
unsigned long x86_gsbase_read_task(struct task_struct *task);
int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase);
int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase);

/* Helper functions for reading/writing FS/GS base */

static inline unsigned long x86_fsbase_read_cpu(void)
{
	unsigned long fsbase;

	rdmsrl(MSR_FS_BASE, fsbase);
	return fsbase;
}

void x86_fsbase_write_cpu(unsigned long fsbase);

static inline unsigned long x86_gsbase_read_cpu_inactive(void)
{
	unsigned long gsbase;

	rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
	return gsbase;
}

void x86_gsbase_write_cpu_inactive(unsigned long gsbase);

#endif /* CONFIG_X86_64 */

#endif /* __ASSEMBLY__ */

#endif /* _ASM_FSGSBASE_H */
+124 −0
Original line number Original line Diff line number Diff line
@@ -54,6 +54,7 @@
#include <asm/vdso.h>
#include <asm/vdso.h>
#include <asm/intel_rdt_sched.h>
#include <asm/intel_rdt_sched.h>
#include <asm/unistd.h>
#include <asm/unistd.h>
#include <asm/fsgsbase.h>
#ifdef CONFIG_IA32_EMULATION
#ifdef CONFIG_IA32_EMULATION
/* Not included via unistd.h */
/* Not included via unistd.h */
#include <asm/unistd_32_ia32.h>
#include <asm/unistd_32_ia32.h>
@@ -286,6 +287,129 @@ static __always_inline void load_seg_legacy(unsigned short prev_index,
	}
	}
}
}


unsigned long x86_fsgsbase_read_task(struct task_struct *task,
				     unsigned short selector)
{
	unsigned short idx = selector >> 3;
	unsigned long base;

	if (likely((selector & SEGMENT_TI_MASK) == 0)) {
		if (unlikely(idx >= GDT_ENTRIES))
			return 0;

		/*
		 * There are no user segments in the GDT with nonzero bases
		 * other than the TLS segments.
		 */
		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
			return 0;

		idx -= GDT_ENTRY_TLS_MIN;
		base = get_desc_base(&task->thread.tls_array[idx]);
	} else {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
		struct ldt_struct *ldt;

		/*
		 * If performance here mattered, we could protect the LDT
		 * with RCU.  This is a slow path, though, so we can just
		 * take the mutex.
		 */
		mutex_lock(&task->mm->context.lock);
		ldt = task->mm->context.ldt;
		if (unlikely(idx >= ldt->nr_entries))
			base = 0;
		else
			base = get_desc_base(ldt->entries + idx);
		mutex_unlock(&task->mm->context.lock);
#else
		base = 0;
#endif
	}

	return base;
}

void x86_fsbase_write_cpu(unsigned long fsbase)
{
	/*
	 * Set the selector to 0 as a notion, that the segment base is
	 * overwritten, which will be checked for skipping the segment load
	 * during context switch.
	 */
	loadseg(FS, 0);
	wrmsrl(MSR_FS_BASE, fsbase);
}

void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
{
	/* Set the selector to 0 for the same reason as %fs above. */
	loadseg(GS, 0);
	wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
}

unsigned long x86_fsbase_read_task(struct task_struct *task)
{
	unsigned long fsbase;

	if (task == current)
		fsbase = x86_fsbase_read_cpu();
	else if (task->thread.fsindex == 0)
		fsbase = task->thread.fsbase;
	else
		fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);

	return fsbase;
}

unsigned long x86_gsbase_read_task(struct task_struct *task)
{
	unsigned long gsbase;

	if (task == current)
		gsbase = x86_gsbase_read_cpu_inactive();
	else if (task->thread.gsindex == 0)
		gsbase = task->thread.gsbase;
	else
		gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);

	return gsbase;
}

int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
{
	/*
	 * Not strictly needed for %fs, but do it for symmetry
	 * with %gs
	 */
	if (unlikely(fsbase >= TASK_SIZE_MAX))
		return -EPERM;

	preempt_disable();
	task->thread.fsbase = fsbase;
	if (task == current)
		x86_fsbase_write_cpu(fsbase);
	task->thread.fsindex = 0;
	preempt_enable();

	return 0;
}

int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
{
	if (unlikely(gsbase >= TASK_SIZE_MAX))
		return -EPERM;

	preempt_disable();
	task->thread.gsbase = gsbase;
	if (task == current)
		x86_gsbase_write_cpu_inactive(gsbase);
	task->thread.gsindex = 0;
	preempt_enable();

	return 0;
}

int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
		unsigned long arg, struct task_struct *p, unsigned long tls)
		unsigned long arg, struct task_struct *p, unsigned long tls)
{
{
+5 −46
Original line number Original line Diff line number Diff line
@@ -39,7 +39,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
#include <asm/traps.h>
#include <asm/syscall.h>
#include <asm/syscall.h>
#include <asm/mmu_context.h>
#include <asm/fsgsbase.h>


#include "tls.h"
#include "tls.h"


@@ -343,49 +343,6 @@ static int set_segment_reg(struct task_struct *task,
	return 0;
	return 0;
}
}


static unsigned long task_seg_base(struct task_struct *task,
				   unsigned short selector)
{
	unsigned short idx = selector >> 3;
	unsigned long base;

	if (likely((selector & SEGMENT_TI_MASK) == 0)) {
		if (unlikely(idx >= GDT_ENTRIES))
			return 0;

		/*
		 * There are no user segments in the GDT with nonzero bases
		 * other than the TLS segments.
		 */
		if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
			return 0;

		idx -= GDT_ENTRY_TLS_MIN;
		base = get_desc_base(&task->thread.tls_array[idx]);
	} else {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
		struct ldt_struct *ldt;

		/*
		 * If performance here mattered, we could protect the LDT
		 * with RCU.  This is a slow path, though, so we can just
		 * take the mutex.
		 */
		mutex_lock(&task->mm->context.lock);
		ldt = task->mm->context.ldt;
		if (unlikely(idx >= ldt->nr_entries))
			base = 0;
		else
			base = get_desc_base(ldt->entries + idx);
		mutex_unlock(&task->mm->context.lock);
#else
		base = 0;
#endif
	}

	return base;
}

#endif	/* CONFIG_X86_32 */
#endif	/* CONFIG_X86_32 */


static unsigned long get_flags(struct task_struct *task)
static unsigned long get_flags(struct task_struct *task)
@@ -482,13 +439,15 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
		if (task->thread.fsindex == 0)
		if (task->thread.fsindex == 0)
			return task->thread.fsbase;
			return task->thread.fsbase;
		else
		else
			return task_seg_base(task, task->thread.fsindex);
			return x86_fsgsbase_read_task(task,
						      task->thread.fsindex);
	}
	}
	case offsetof(struct user_regs_struct, gs_base): {
	case offsetof(struct user_regs_struct, gs_base): {
		if (task->thread.gsindex == 0)
		if (task->thread.gsindex == 0)
			return task->thread.gsbase;
			return task->thread.gsbase;
		else
		else
			return task_seg_base(task, task->thread.gsindex);
			return x86_fsgsbase_read_task(task,
						      task->thread.gsindex);
	}
	}
#endif
#endif
	}
	}