Commit 78be3706 authored by Rusty Russell's avatar Rusty Russell Committed by Andi Kleen
Browse files

[PATCH] i386: Allow a kernel not to be in ring 0



We allow for the fact that the guest kernel may not run in ring 0.  This
requires some abstraction in a few places when setting %cs or checking
privilege level (user vs kernel).

This is Chris' [RFC PATCH 15/33] move segment checks to subarch, except rather
than using #define USER_MODE_MASK which depends on a config option, we use
Zach's more flexible approach of assuming ring 3 == userspace.  I also used
"get_kernel_rpl()" over "get_kernel_cs()" because I think it reads better in
the code...

1) Remove the hardcoded 3 and introduce #define SEGMENT_RPL_MASK 3 2) Add a
get_kernel_rpl() macro, and don't assume it's zero.

And:

Clean up of patch for letting kernel run other than ring 0:

a. Add some comments about the SEGMENT_IS_*_CODE() macros.
b. Add a USER_RPL macro.  (Code was comparing a value to a mask
   in some places and to the magic number 3 in other places.)
c. Add macros for table indicator field and use them.
d. Change the entry.S tests for LDT stack segment to use the macros

Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarZachary Amsden <zach@vmware.com>
Signed-off-by: default avatarJeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarAndi Kleen <ak@suse.de>
parent 0da5db31
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -240,8 +240,9 @@ ret_from_intr:
check_userspace:
	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
	movb CS(%esp), %al
	testl $(VM_MASK | 3), %eax
	jz resume_kernel
	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
	cmpl $USER_RPL, %eax
	jb resume_kernel		# not returning to v8086 or userspace
ENTRY(resume_userspace)
 	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
					# setting need_resched or sigpending
@@ -377,8 +378,8 @@ restore_all:
	# See comments in process.c:copy_thread() for details.
	movb OLDSS(%esp), %ah
	movb CS(%esp), %al
	andl $(VM_MASK | (4 << 8) | 3), %eax
	cmpl $((4 << 8) | 3), %eax
	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
	CFI_REMEMBER_STATE
	je ldt_ss			# returning to user-space with LDT SS
restore_nocheck:
+1 −1
Original line number Diff line number Diff line
@@ -338,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
	regs.xes = __USER_DS;
	regs.orig_eax = -1;
	regs.eip = (unsigned long) kernel_thread_helper;
	regs.xcs = __KERNEL_CS;
	regs.xcs = __KERNEL_CS | get_kernel_rpl();
	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;

	/* Ok, create the new process.. */
+1 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
	const struct exception_table_entry *fixup;

#ifdef CONFIG_PNPBIOS
	if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3)))
	if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
	{
		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
		extern u32 pnp_bios_is_utter_crap;
+4 −7
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/kdebug.h>
#include <asm/segment.h>

extern void die(const char *,struct pt_regs *,long);

@@ -113,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
	}

	/* The standard kernel/user address space limit. */
	*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
	
	/* By far the most common cases. */
	if (likely(seg == __USER_CS || seg == __KERNEL_CS))
	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
		return eip;

	/* Check the segment exists, is within the current LDT/GDT size,
@@ -430,10 +431,6 @@ good_area:
	write = 0;
	switch (error_code & 3) {
		default:	/* 3: write, present */
#ifdef TEST_VERIFY_AREA
			if (regs->cs == KERNEL_CS)
				printk("WP fault at %08lx\n", regs->eip);
#endif
				/* fall through */
		case 2:		/* write, not present */
			if (!(vma->vm_flags & VM_WRITE))
+3 −2
Original line number Diff line number Diff line
@@ -60,6 +60,7 @@ struct pt_regs {
#ifdef __KERNEL__

#include <asm/vm86.h>
#include <asm/segment.h>

struct task_struct;
extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
@@ -73,11 +74,11 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int erro
 */
static inline int user_mode(struct pt_regs *regs)
{
	return (regs->xcs & 3) != 0;
	return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
}
static inline int user_mode_vm(struct pt_regs *regs)
{
	return ((regs->xcs & 3) | (regs->eflags & VM_MASK)) != 0;
	return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
}
#define instruction_pointer(regs) ((regs)->eip)
extern unsigned long profile_pc(struct pt_regs *regs);
Loading